pixeltable 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (150) hide show
  1. pixeltable/__init__.py +64 -11
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +1 -1
  4. pixeltable/catalog/catalog.py +50 -27
  5. pixeltable/catalog/column.py +27 -11
  6. pixeltable/catalog/dir.py +6 -4
  7. pixeltable/catalog/globals.py +8 -1
  8. pixeltable/catalog/insertable_table.py +22 -12
  9. pixeltable/catalog/named_function.py +10 -6
  10. pixeltable/catalog/path.py +3 -2
  11. pixeltable/catalog/path_dict.py +8 -6
  12. pixeltable/catalog/schema_object.py +2 -1
  13. pixeltable/catalog/table.py +121 -101
  14. pixeltable/catalog/table_version.py +291 -142
  15. pixeltable/catalog/table_version_path.py +8 -5
  16. pixeltable/catalog/view.py +67 -26
  17. pixeltable/dataframe.py +106 -81
  18. pixeltable/env.py +28 -24
  19. pixeltable/exec/__init__.py +2 -2
  20. pixeltable/exec/aggregation_node.py +10 -4
  21. pixeltable/exec/cache_prefetch_node.py +5 -3
  22. pixeltable/exec/component_iteration_node.py +9 -9
  23. pixeltable/exec/data_row_batch.py +21 -10
  24. pixeltable/exec/exec_context.py +10 -3
  25. pixeltable/exec/exec_node.py +23 -12
  26. pixeltable/exec/expr_eval/evaluators.py +13 -7
  27. pixeltable/exec/expr_eval/expr_eval_node.py +24 -15
  28. pixeltable/exec/expr_eval/globals.py +30 -7
  29. pixeltable/exec/expr_eval/row_buffer.py +5 -6
  30. pixeltable/exec/expr_eval/schedulers.py +151 -31
  31. pixeltable/exec/in_memory_data_node.py +8 -7
  32. pixeltable/exec/row_update_node.py +15 -5
  33. pixeltable/exec/sql_node.py +56 -27
  34. pixeltable/exprs/__init__.py +2 -2
  35. pixeltable/exprs/arithmetic_expr.py +57 -26
  36. pixeltable/exprs/array_slice.py +1 -1
  37. pixeltable/exprs/column_property_ref.py +2 -1
  38. pixeltable/exprs/column_ref.py +20 -15
  39. pixeltable/exprs/comparison.py +6 -2
  40. pixeltable/exprs/compound_predicate.py +1 -3
  41. pixeltable/exprs/data_row.py +2 -2
  42. pixeltable/exprs/expr.py +108 -72
  43. pixeltable/exprs/expr_dict.py +2 -1
  44. pixeltable/exprs/expr_set.py +3 -1
  45. pixeltable/exprs/function_call.py +39 -41
  46. pixeltable/exprs/globals.py +1 -0
  47. pixeltable/exprs/in_predicate.py +2 -2
  48. pixeltable/exprs/inline_expr.py +20 -17
  49. pixeltable/exprs/json_mapper.py +4 -2
  50. pixeltable/exprs/json_path.py +12 -18
  51. pixeltable/exprs/literal.py +5 -9
  52. pixeltable/exprs/method_ref.py +1 -0
  53. pixeltable/exprs/object_ref.py +1 -1
  54. pixeltable/exprs/row_builder.py +32 -17
  55. pixeltable/exprs/rowid_ref.py +14 -5
  56. pixeltable/exprs/similarity_expr.py +11 -6
  57. pixeltable/exprs/sql_element_cache.py +1 -1
  58. pixeltable/exprs/type_cast.py +24 -9
  59. pixeltable/ext/__init__.py +1 -0
  60. pixeltable/ext/functions/__init__.py +1 -0
  61. pixeltable/ext/functions/whisperx.py +2 -2
  62. pixeltable/ext/functions/yolox.py +11 -11
  63. pixeltable/func/aggregate_function.py +17 -13
  64. pixeltable/func/callable_function.py +6 -6
  65. pixeltable/func/expr_template_function.py +15 -14
  66. pixeltable/func/function.py +16 -16
  67. pixeltable/func/function_registry.py +11 -8
  68. pixeltable/func/globals.py +4 -2
  69. pixeltable/func/query_template_function.py +12 -13
  70. pixeltable/func/signature.py +18 -9
  71. pixeltable/func/tools.py +10 -17
  72. pixeltable/func/udf.py +106 -11
  73. pixeltable/functions/__init__.py +21 -2
  74. pixeltable/functions/anthropic.py +16 -12
  75. pixeltable/functions/fireworks.py +63 -5
  76. pixeltable/functions/gemini.py +13 -3
  77. pixeltable/functions/globals.py +18 -6
  78. pixeltable/functions/huggingface.py +20 -38
  79. pixeltable/functions/image.py +7 -3
  80. pixeltable/functions/json.py +1 -0
  81. pixeltable/functions/llama_cpp.py +1 -4
  82. pixeltable/functions/mistralai.py +31 -20
  83. pixeltable/functions/ollama.py +4 -18
  84. pixeltable/functions/openai.py +231 -113
  85. pixeltable/functions/replicate.py +11 -10
  86. pixeltable/functions/string.py +70 -7
  87. pixeltable/functions/timestamp.py +21 -8
  88. pixeltable/functions/together.py +66 -52
  89. pixeltable/functions/video.py +1 -0
  90. pixeltable/functions/vision.py +14 -11
  91. pixeltable/functions/whisper.py +2 -1
  92. pixeltable/globals.py +60 -26
  93. pixeltable/index/__init__.py +1 -1
  94. pixeltable/index/btree.py +5 -3
  95. pixeltable/index/embedding_index.py +15 -14
  96. pixeltable/io/__init__.py +1 -1
  97. pixeltable/io/external_store.py +30 -25
  98. pixeltable/io/fiftyone.py +6 -14
  99. pixeltable/io/globals.py +33 -27
  100. pixeltable/io/hf_datasets.py +2 -1
  101. pixeltable/io/label_studio.py +77 -68
  102. pixeltable/io/pandas.py +36 -23
  103. pixeltable/io/parquet.py +9 -12
  104. pixeltable/iterators/__init__.py +1 -0
  105. pixeltable/iterators/audio.py +205 -0
  106. pixeltable/iterators/document.py +19 -8
  107. pixeltable/iterators/image.py +6 -24
  108. pixeltable/iterators/string.py +3 -6
  109. pixeltable/iterators/video.py +1 -7
  110. pixeltable/metadata/__init__.py +7 -1
  111. pixeltable/metadata/converters/convert_10.py +2 -2
  112. pixeltable/metadata/converters/convert_15.py +1 -5
  113. pixeltable/metadata/converters/convert_16.py +2 -4
  114. pixeltable/metadata/converters/convert_17.py +2 -4
  115. pixeltable/metadata/converters/convert_18.py +2 -4
  116. pixeltable/metadata/converters/convert_19.py +2 -5
  117. pixeltable/metadata/converters/convert_20.py +1 -4
  118. pixeltable/metadata/converters/convert_21.py +4 -6
  119. pixeltable/metadata/converters/convert_22.py +1 -0
  120. pixeltable/metadata/converters/convert_23.py +5 -5
  121. pixeltable/metadata/converters/convert_24.py +12 -13
  122. pixeltable/metadata/converters/convert_26.py +23 -0
  123. pixeltable/metadata/converters/util.py +3 -4
  124. pixeltable/metadata/notes.py +1 -0
  125. pixeltable/metadata/schema.py +13 -2
  126. pixeltable/plan.py +173 -98
  127. pixeltable/share/__init__.py +0 -0
  128. pixeltable/share/packager.py +218 -0
  129. pixeltable/store.py +42 -26
  130. pixeltable/type_system.py +102 -75
  131. pixeltable/utils/arrow.py +7 -8
  132. pixeltable/utils/coco.py +16 -17
  133. pixeltable/utils/code.py +1 -1
  134. pixeltable/utils/console_output.py +6 -3
  135. pixeltable/utils/description_helper.py +7 -7
  136. pixeltable/utils/documents.py +3 -1
  137. pixeltable/utils/filecache.py +12 -7
  138. pixeltable/utils/http_server.py +9 -8
  139. pixeltable/utils/iceberg.py +14 -0
  140. pixeltable/utils/media_store.py +3 -2
  141. pixeltable/utils/pytorch.py +11 -14
  142. pixeltable/utils/s3.py +1 -0
  143. pixeltable/utils/sql.py +1 -0
  144. pixeltable/utils/transactional_directory.py +2 -2
  145. {pixeltable-0.3.2.dist-info → pixeltable-0.3.4.dist-info}/METADATA +9 -9
  146. pixeltable-0.3.4.dist-info/RECORD +166 -0
  147. pixeltable-0.3.2.dist-info/RECORD +0 -161
  148. {pixeltable-0.3.2.dist-info → pixeltable-0.3.4.dist-info}/LICENSE +0 -0
  149. {pixeltable-0.3.2.dist-info → pixeltable-0.3.4.dist-info}/WHEEL +0 -0
  150. {pixeltable-0.3.2.dist-info → pixeltable-0.3.4.dist-info}/entry_points.txt +0 -0
@@ -8,11 +8,11 @@ from dataclasses import dataclass
8
8
  from typing import Any, Optional
9
9
  from uuid import UUID
10
10
 
11
- import pixeltable.exceptions as excs
12
- import pixeltable.type_system as ts
13
- from pixeltable import Table, Column
14
11
  import sqlalchemy as sql
15
12
 
13
+ import pixeltable.exceptions as excs
14
+ import pixeltable.type_system as ts
15
+ from pixeltable import Column, Table
16
16
  from pixeltable.catalog import TableVersion
17
17
 
18
18
  _logger = logging.getLogger('pixeltable')
@@ -148,7 +148,9 @@ class Project(ExternalStore, abc.ABC):
148
148
  """
149
149
  from pixeltable import exprs
150
150
 
151
- assert col.col_type.is_media_type() and not (col.is_stored and col.is_computed) and col not in self.stored_proxies
151
+ assert (
152
+ col.col_type.is_media_type() and not (col.is_stored and col.is_computed) and col not in self.stored_proxies
153
+ )
152
154
  proxy_col = Column(
153
155
  name=None,
154
156
  # Force images in the proxy column to be materialized inside the media store, in a normalized format.
@@ -159,7 +161,7 @@ class Project(ExternalStore, abc.ABC):
159
161
  stored=True,
160
162
  col_id=tbl_version.next_col_id,
161
163
  sa_col_type=col.col_type.to_sa_type(),
162
- schema_version_add=tbl_version.schema_version
164
+ schema_version_add=tbl_version.schema_version,
163
165
  )
164
166
  proxy_col.tbl = tbl_version
165
167
  tbl_version.next_col_id += 1
@@ -197,11 +199,11 @@ class Project(ExternalStore, abc.ABC):
197
199
 
198
200
  @classmethod
199
201
  def validate_columns(
200
- cls,
201
- table: Table,
202
- export_cols: dict[str, ts.ColumnType],
203
- import_cols: dict[str, ts.ColumnType],
204
- col_mapping: Optional[dict[str, str]]
202
+ cls,
203
+ table: Table,
204
+ export_cols: dict[str, ts.ColumnType],
205
+ import_cols: dict[str, ts.ColumnType],
206
+ col_mapping: Optional[dict[str, str]],
205
207
  ) -> dict[Column, str]:
206
208
  """
207
209
  Verifies that the specified `col_mapping` is valid. In particular, checks that:
@@ -294,7 +296,7 @@ class SyncStatus:
294
296
  external_rows_deleted=self.external_rows_deleted + other.external_rows_deleted,
295
297
  external_rows_updated=self.external_rows_updated + other.external_rows_updated,
296
298
  pxt_rows_updated=self.pxt_rows_updated + other.pxt_rows_updated,
297
- num_excs=self.num_excs + other.num_excs
299
+ num_excs=self.num_excs + other.num_excs,
298
300
  )
299
301
 
300
302
  @classmethod
@@ -304,13 +306,14 @@ class SyncStatus:
304
306
 
305
307
  class MockProject(Project):
306
308
  """A project that cannot be synced, used mainly for testing."""
309
+
307
310
  def __init__(
308
- self,
309
- name: str,
310
- export_cols: dict[str, ts.ColumnType],
311
- import_cols: dict[str, ts.ColumnType],
312
- col_mapping: dict[Column, str],
313
- stored_proxies: Optional[dict[Column, Column]] = None
311
+ self,
312
+ name: str,
313
+ export_cols: dict[str, ts.ColumnType],
314
+ import_cols: dict[str, ts.ColumnType],
315
+ col_mapping: dict[Column, str],
316
+ stored_proxies: Optional[dict[Column, Column]] = None,
314
317
  ):
315
318
  super().__init__(name, col_mapping, stored_proxies)
316
319
  self.export_cols = export_cols
@@ -319,12 +322,12 @@ class MockProject(Project):
319
322
 
320
323
  @classmethod
321
324
  def create(
322
- cls,
323
- t: Table,
324
- name: str,
325
- export_cols: dict[str, ts.ColumnType],
326
- import_cols: dict[str, ts.ColumnType],
327
- col_mapping: Optional[dict[str, str]] = None
325
+ cls,
326
+ t: Table,
327
+ name: str,
328
+ export_cols: dict[str, ts.ColumnType],
329
+ import_cols: dict[str, ts.ColumnType],
330
+ col_mapping: Optional[dict[str, str]] = None,
328
331
  ) -> 'MockProject':
329
332
  col_mapping = cls.validate_columns(t, export_cols, import_cols, col_mapping)
330
333
  return cls(name, export_cols, import_cols, col_mapping)
@@ -351,7 +354,9 @@ class MockProject(Project):
351
354
  'export_cols': {k: v.as_dict() for k, v in self.export_cols.items()},
352
355
  'import_cols': {k: v.as_dict() for k, v in self.import_cols.items()},
353
356
  'col_mapping': [[self._column_as_dict(k), v] for k, v in self.col_mapping.items()],
354
- 'stored_proxies': [[self._column_as_dict(k), self._column_as_dict(v)] for k, v in self.stored_proxies.items()]
357
+ 'stored_proxies': [
358
+ [self._column_as_dict(k), self._column_as_dict(v)] for k, v in self.stored_proxies.items()
359
+ ],
355
360
  }
356
361
 
357
362
  @classmethod
@@ -361,7 +366,7 @@ class MockProject(Project):
361
366
  {k: ts.ColumnType.from_dict(v) for k, v in md['export_cols'].items()},
362
367
  {k: ts.ColumnType.from_dict(v) for k, v in md['import_cols'].items()},
363
368
  {cls._column_from_dict(entry[0]): entry[1] for entry in md['col_mapping']},
364
- {cls._column_from_dict(entry[0]): cls._column_from_dict(entry[1]) for entry in md['stored_proxies']}
369
+ {cls._column_from_dict(entry[0]): cls._column_from_dict(entry[1]) for entry in md['stored_proxies']},
365
370
  )
366
371
 
367
372
  def __eq__(self, other: Any) -> bool:
pixeltable/io/fiftyone.py CHANGED
@@ -16,6 +16,7 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
16
16
  """
17
17
  Implementation of a FiftyOne `DatasetImporter` that reads image data from a Pixeltable table.
18
18
  """
19
+
19
20
  __image_format: str # format to use for any exported images that are not already stored on disk
20
21
  __labels: dict[str, tuple[exprs.Expr, type[fo.Label]]] # label_name -> (expr, label_cls)
21
22
  __image_idx: int # index of the image expr in the select list
@@ -34,12 +35,7 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
34
35
  seed: Union[int, float, str, bytes, bytearray, None] = None,
35
36
  max_samples: Optional[int] = None,
36
37
  ):
37
- super().__init__(
38
- dataset_dir=dataset_dir,
39
- shuffle=shuffle,
40
- seed=seed,
41
- max_samples=max_samples
42
- )
38
+ super().__init__(dataset_dir=dataset_dir, shuffle=shuffle, seed=seed, max_samples=max_samples)
43
39
 
44
40
  self.__image_format = image_format
45
41
 
@@ -54,9 +50,9 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
54
50
  if isinstance(exprs_, dict):
55
51
  for label_name, expr in exprs_.items():
56
52
  if not label_name.isidentifier():
57
- raise excs.Error(f"Invalid label name: {label_name}")
53
+ raise excs.Error(f'Invalid label name: {label_name}')
58
54
  if label_name in self.__labels:
59
- raise excs.Error(f"Duplicate label name: {label_name}")
55
+ raise excs.Error(f'Duplicate label name: {label_name}')
60
56
  self.__labels[label_name] = (expr, label_cls)
61
57
 
62
58
  # Now add the remaining labels, assigning unused default names.
@@ -137,13 +133,9 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
137
133
  def __as_fo_classifications(self, data: list) -> list[fo.Classification]:
138
134
  if not isinstance(data, list) or any('label' not in entry for entry in data):
139
135
  raise excs.Error(
140
- f'Invalid classifications data: {data}\n'
141
- "(Expected a list of dicts, each containing a 'label' key)"
136
+ f"Invalid classifications data: {data}\n(Expected a list of dicts, each containing a 'label' key)"
142
137
  )
143
- return [
144
- fo.Classification(label=entry['label'], confidence=entry.get('confidence'))
145
- for entry in data
146
- ]
138
+ return [fo.Classification(label=entry['label'], confidence=entry.get('confidence')) for entry in data]
147
139
 
148
140
  def __as_fo_detections(self, data: list) -> list[fo.Detections]:
149
141
  if not isinstance(data, list) or any('label' not in entry or 'bounding_box' not in entry for entry in data):
pixeltable/io/globals.py CHANGED
@@ -11,15 +11,15 @@ if TYPE_CHECKING:
11
11
 
12
12
 
13
13
  def create_label_studio_project(
14
- t: Table,
15
- label_config: str,
16
- name: Optional[str] = None,
17
- title: Optional[str] = None,
18
- media_import_method: Literal['post', 'file', 'url'] = 'post',
19
- col_mapping: Optional[dict[str, str]] = None,
20
- sync_immediately: bool = True,
21
- s3_configuration: Optional[dict[str, Any]] = None,
22
- **kwargs: Any
14
+ t: Table,
15
+ label_config: str,
16
+ name: Optional[str] = None,
17
+ title: Optional[str] = None,
18
+ media_import_method: Literal['post', 'file', 'url'] = 'post',
19
+ col_mapping: Optional[dict[str, str]] = None,
20
+ sync_immediately: bool = True,
21
+ s3_configuration: Optional[dict[str, Any]] = None,
22
+ **kwargs: Any,
23
23
  ) -> SyncStatus:
24
24
  """
25
25
  Create a new Label Studio project and link it to the specified [`Table`][pixeltable.Table].
@@ -125,14 +125,7 @@ def create_label_studio_project(
125
125
  from pixeltable.io.label_studio import LabelStudioProject
126
126
 
127
127
  ls_project = LabelStudioProject.create(
128
- t,
129
- label_config,
130
- name,
131
- title,
132
- media_import_method,
133
- col_mapping,
134
- s3_configuration,
135
- **kwargs
128
+ t, label_config, name, title, media_import_method, col_mapping, s3_configuration, **kwargs
136
129
  )
137
130
 
138
131
  # Link the project to `t`, and sync if appropriate.
@@ -150,8 +143,8 @@ def import_rows(
150
143
  schema_overrides: Optional[dict[str, pxt.ColumnType]] = None,
151
144
  primary_key: Optional[Union[str, list[str]]] = None,
152
145
  num_retained_versions: int = 10,
153
- comment: str = ''
154
- ) -> Table:
146
+ comment: str = '',
147
+ ) -> Table:
155
148
  """
156
149
  Creates a new base table from a list of dictionaries. The dictionaries must be of the
157
150
  form `{column_name: value, ...}`. Pixeltable will attempt to infer the schema of the table from the
@@ -194,7 +187,9 @@ def import_rows(
194
187
  # The column type will always be nullable by default.
195
188
  col_type = pxt.ColumnType.infer_literal_type(value, nullable=True)
196
189
  if col_type is None:
197
- raise excs.Error(f'Could not infer type for column `{col_name}`; the value in row {n} has an unsupported type: {type(value)}')
190
+ raise excs.Error(
191
+ f'Could not infer type for column `{col_name}`; the value in row {n} has an unsupported type: {type(value)}'
192
+ )
198
193
  if col_name not in schema:
199
194
  schema[col_name] = col_type
200
195
  else:
@@ -210,7 +205,9 @@ def import_rows(
210
205
 
211
206
  extraneous_keys = schema_overrides.keys() - schema.keys()
212
207
  if len(extraneous_keys) > 0:
213
- raise excs.Error(f'The following columns specified in `schema_overrides` are not present in the data: {", ".join(extraneous_keys)}')
208
+ raise excs.Error(
209
+ f'The following columns specified in `schema_overrides` are not present in the data: {", ".join(extraneous_keys)}'
210
+ )
214
211
 
215
212
  entirely_none_cols = cols_with_nones - schema.keys()
216
213
  if len(entirely_none_cols) > 0:
@@ -221,7 +218,9 @@ def import_rows(
221
218
  'Consider specifying the type(s) explicitly in `schema_overrides`.'
222
219
  )
223
220
 
224
- t = pxt.create_table(tbl_path, schema, primary_key=primary_key, num_retained_versions=num_retained_versions, comment=comment)
221
+ t = pxt.create_table(
222
+ tbl_path, schema, primary_key=primary_key, num_retained_versions=num_retained_versions, comment=comment
223
+ )
225
224
  t.insert(rows)
226
225
  return t
227
226
 
@@ -234,7 +233,7 @@ def import_json(
234
233
  primary_key: Optional[Union[str, list[str]]] = None,
235
234
  num_retained_versions: int = 10,
236
235
  comment: str = '',
237
- **kwargs: Any
236
+ **kwargs: Any,
238
237
  ) -> Table:
239
238
  """
240
239
  Creates a new base table from a JSON file. This is a convenience method and is
@@ -272,7 +271,14 @@ def import_json(
272
271
  # URL
273
272
  contents = urllib.request.urlopen(filepath_or_url).read()
274
273
  data = json.loads(contents, **kwargs)
275
- return import_rows(tbl_path, data, schema_overrides=schema_overrides, primary_key=primary_key, num_retained_versions=num_retained_versions, comment=comment)
274
+ return import_rows(
275
+ tbl_path,
276
+ data,
277
+ schema_overrides=schema_overrides,
278
+ primary_key=primary_key,
279
+ num_retained_versions=num_retained_versions,
280
+ comment=comment,
281
+ )
276
282
 
277
283
 
278
284
  def export_images_as_fo_dataset(
@@ -358,6 +364,6 @@ def export_images_as_fo_dataset(
358
364
  if not images.col_type.is_image_type():
359
365
  raise excs.Error(f'`images` must be an expression of type Image (got {images.col_type._to_base_str()})')
360
366
 
361
- return fo.Dataset.from_importer(PxtImageDatasetImporter(
362
- tbl, images, image_format, classifications=classifications, detections=detections
363
- ))
367
+ return fo.Dataset.from_importer(
368
+ PxtImageDatasetImporter(tbl, images, image_format, classifications=classifications, detections=detections)
369
+ )
@@ -4,7 +4,7 @@ import logging
4
4
  import math
5
5
  import random
6
6
  import typing
7
- from typing import Union, Optional, Any
7
+ from typing import Any, Optional, Union
8
8
 
9
9
  import pixeltable as pxt
10
10
  import pixeltable.type_system as ts
@@ -103,6 +103,7 @@ def import_huggingface_dataset(
103
103
  A handle to the newly created [`Table`][pixeltable.Table].
104
104
  """
105
105
  import datasets
106
+
106
107
  import pixeltable as pxt
107
108
 
108
109
  if table_path in pxt.list_tables():
@@ -47,12 +47,12 @@ class LabelStudioProject(Project):
47
47
  """
48
48
 
49
49
  def __init__(
50
- self,
51
- name: str,
52
- project_id: int,
53
- media_import_method: Literal['post', 'file', 'url'],
54
- col_mapping: dict[Column, str],
55
- stored_proxies: Optional[dict[Column, Column]] = None
50
+ self,
51
+ name: str,
52
+ project_id: int,
53
+ media_import_method: Literal['post', 'file', 'url'],
54
+ col_mapping: dict[Column, str],
55
+ stored_proxies: Optional[dict[Column, Column]] = None,
56
56
  ):
57
57
  """
58
58
  The constructor will NOT create a new Label Studio project; it is also used when loading
@@ -70,8 +70,10 @@ class LabelStudioProject(Project):
70
70
  try:
71
71
  self._project = _label_studio_client().get_project(self.project_id)
72
72
  except HTTPError as exc:
73
- raise excs.Error(f'Could not locate Label Studio project: {self.project_id} '
74
- '(cannot connect to server or project no longer exists)') from exc
73
+ raise excs.Error(
74
+ f'Could not locate Label Studio project: {self.project_id} '
75
+ '(cannot connect to server or project no longer exists)'
76
+ ) from exc
75
77
  return self._project
76
78
 
77
79
  @property
@@ -105,8 +107,10 @@ class LabelStudioProject(Project):
105
107
  return {ANNOTATIONS_COLUMN: pxt.JsonType(nullable=True)}
106
108
 
107
109
  def sync(self, t: Table, export_data: bool, import_data: bool) -> SyncStatus:
108
- _logger.info(f'Syncing Label Studio project "{self.project_title}" with table `{t._name}`'
109
- f' (export: {export_data}, import: {import_data}).')
110
+ _logger.info(
111
+ f'Syncing Label Studio project "{self.project_title}" with table `{t._name}`'
112
+ f' (export: {export_data}, import: {import_data}).'
113
+ )
110
114
  # Collect all existing tasks into a dict with entries `rowid: task`
111
115
  tasks = {tuple(task['meta']['rowid']): task for task in self.__fetch_all_tasks()}
112
116
  sync_status = SyncStatus.empty()
@@ -148,18 +152,14 @@ class LabelStudioProject(Project):
148
152
  config = self.__project_config
149
153
 
150
154
  # Columns in `t` that map to Label Studio data keys
151
- t_data_cols = [
152
- t_col for t_col, ext_col_name in self.col_mapping.items()
153
- if ext_col_name in config.data_keys
154
- ]
155
+ t_data_cols = [t_col for t_col, ext_col_name in self.col_mapping.items() if ext_col_name in config.data_keys]
155
156
 
156
157
  if len(t_data_cols) == 0:
157
158
  return SyncStatus.empty()
158
159
 
159
160
  # Columns in `t` that map to `rectanglelabels` preannotations
160
161
  t_rl_cols = [
161
- t_col for t_col, ext_col_name in self.col_mapping.items()
162
- if ext_col_name in config.rectangle_labels
162
+ t_col for t_col, ext_col_name in self.col_mapping.items() if ext_col_name in config.rectangle_labels
163
163
  ]
164
164
 
165
165
  # Destinations for `rectanglelabels` preannotations
@@ -180,12 +180,12 @@ class LabelStudioProject(Project):
180
180
  assert False
181
181
 
182
182
  def __update_tasks_by_post(
183
- self,
184
- t: Table,
185
- existing_tasks: dict[tuple, dict],
186
- media_col: Column,
187
- t_rl_cols: list[Column],
188
- rl_info: list['_RectangleLabel']
183
+ self,
184
+ t: Table,
185
+ existing_tasks: dict[tuple, dict],
186
+ media_col: Column,
187
+ t_rl_cols: list[Column],
188
+ rl_info: list['_RectangleLabel'],
189
189
  ) -> SyncStatus:
190
190
  is_stored = media_col.is_stored
191
191
  # If it's a stored column, we can use `localpath`
@@ -197,7 +197,7 @@ class LabelStudioProject(Project):
197
197
 
198
198
  for row in rows._exec():
199
199
  media_col_idx = rows._select_list_exprs[0].slot_idx
200
- rl_col_idxs = [expr.slot_idx for expr in rows._select_list_exprs[1: 1 + len(t_rl_cols)]]
200
+ rl_col_idxs = [expr.slot_idx for expr in rows._select_list_exprs[1 : 1 + len(t_rl_cols)]]
201
201
  row_ids_in_pxt.add(row.rowid)
202
202
  if row.rowid not in existing_tasks:
203
203
  # Upload the media file to Label Studio
@@ -239,12 +239,12 @@ class LabelStudioProject(Project):
239
239
  return sync_status.combine(deletion_sync_status)
240
240
 
241
241
  def __update_tasks_by_files(
242
- self,
243
- t: Table,
244
- existing_tasks: dict[tuple, dict],
245
- t_data_cols: list[Column],
246
- t_rl_cols: list[Column],
247
- rl_info: list['_RectangleLabel']
242
+ self,
243
+ t: Table,
244
+ existing_tasks: dict[tuple, dict],
245
+ t_data_cols: list[Column],
246
+ t_rl_cols: list[Column],
247
+ rl_info: list['_RectangleLabel'],
248
248
  ) -> SyncStatus:
249
249
  ext_data_cols = [self.col_mapping[col] for col in t_data_cols]
250
250
  expr_refs: dict[str, Expr] = {} # kwargs for the select statement
@@ -301,21 +301,23 @@ class LabelStudioProject(Project):
301
301
  return {
302
302
  'data': dict(zip(ext_data_cols, data_vals)),
303
303
  'meta': {'rowid': row.rowid},
304
- 'predictions': predictions
304
+ 'predictions': predictions,
305
305
  }
306
306
 
307
307
  for row in df._exec():
308
308
  if rl_col_idxs is None:
309
- rl_col_idxs = [expr.slot_idx for expr in df._select_list_exprs[:len(t_rl_cols)]]
310
- data_col_idxs = [expr.slot_idx for expr in df._select_list_exprs[len(t_rl_cols):]]
309
+ rl_col_idxs = [expr.slot_idx for expr in df._select_list_exprs[: len(t_rl_cols)]]
310
+ data_col_idxs = [expr.slot_idx for expr in df._select_list_exprs[len(t_rl_cols) :]]
311
311
  row_ids_in_pxt.add(row.rowid)
312
312
  task_info = create_task_info(row)
313
313
  # TODO(aaron-siegel): Implement more efficient update logic (currently involves a full table scan)
314
314
  if row.rowid in existing_tasks:
315
315
  # A task for this row already exists; see if it needs an update.
316
316
  existing_task = existing_tasks[row.rowid]
317
- if task_info['data'] != existing_task['data'] or \
318
- task_info['predictions'] != existing_task['predictions']:
317
+ if (
318
+ task_info['data'] != existing_task['data']
319
+ or task_info['predictions'] != existing_task['predictions']
320
+ ):
319
321
  _logger.debug(f'Updating task for rowid {row.rowid}.')
320
322
  self.project.update_task(existing_tasks[row.rowid]['id'], **task_info)
321
323
  tasks_updated += 1
@@ -330,7 +332,9 @@ class LabelStudioProject(Project):
330
332
  if len(page) > 0:
331
333
  self.project.import_tasks(page)
332
334
 
333
- env.Env.get().console_logger.info(f'Created {tasks_created} new task(s) and updated {tasks_updated} existing task(s) in {self}.')
335
+ env.Env.get().console_logger.info(
336
+ f'Created {tasks_created} new task(s) and updated {tasks_updated} existing task(s) in {self}.'
337
+ )
334
338
 
335
339
  sync_status = SyncStatus(external_rows_created=tasks_created, external_rows_updated=tasks_updated)
336
340
 
@@ -355,7 +359,9 @@ class LabelStudioProject(Project):
355
359
  relpath = Path(localpath).relative_to(env.Env.get().home)
356
360
  return f'/data/local-files/?d={str(relpath)}'
357
361
 
358
- def __delete_stale_tasks(self, existing_tasks: dict[tuple, dict], row_ids_in_pxt: set[tuple], tasks_created: int) -> SyncStatus:
362
+ def __delete_stale_tasks(
363
+ self, existing_tasks: dict[tuple, dict], row_ids_in_pxt: set[tuple], tasks_created: int
364
+ ) -> SyncStatus:
359
365
  deleted_rowids = set(existing_tasks.keys()) - row_ids_in_pxt
360
366
  # Sanity check the math
361
367
  assert len(deleted_rowids) == len(existing_tasks) + tasks_created - len(row_ids_in_pxt)
@@ -363,7 +369,9 @@ class LabelStudioProject(Project):
363
369
 
364
370
  if len(tasks_to_delete) > 0:
365
371
  self.project.delete_tasks(tasks_to_delete)
366
- env.Env.get().console_logger.info(f'Deleted {len(tasks_to_delete)} tasks(s) in {self} that are no longer present in Pixeltable.')
372
+ env.Env.get().console_logger.info(
373
+ f'Deleted {len(tasks_to_delete)} tasks(s) in {self} that are no longer present in Pixeltable.'
374
+ )
367
375
 
368
376
  # Remove them from the `existing_tasks` dict so that future updates are applied correctly
369
377
  for rowid in deleted_rowids:
@@ -417,7 +425,9 @@ class LabelStudioProject(Project):
417
425
  'project_id': self.project_id,
418
426
  'media_import_method': self.media_import_method,
419
427
  'col_mapping': [[self._column_as_dict(k), v] for k, v in self.col_mapping.items()],
420
- 'stored_proxies': [[self._column_as_dict(k), self._column_as_dict(v)] for k, v in self.stored_proxies.items()]
428
+ 'stored_proxies': [
429
+ [self._column_as_dict(k), self._column_as_dict(v)] for k, v in self.stored_proxies.items()
430
+ ],
421
431
  }
422
432
 
423
433
  @classmethod
@@ -427,7 +437,7 @@ class LabelStudioProject(Project):
427
437
  md['project_id'],
428
438
  md['media_import_method'],
429
439
  {cls._column_from_dict(entry[0]): entry[1] for entry in md['col_mapping']},
430
- {cls._column_from_dict(entry[0]): cls._column_from_dict(entry[1]) for entry in md['stored_proxies']}
440
+ {cls._column_from_dict(entry[0]): cls._column_from_dict(entry[1]) for entry in md['stored_proxies']},
431
441
  )
432
442
 
433
443
  def __repr__(self) -> str:
@@ -444,8 +454,7 @@ class LabelStudioProject(Project):
444
454
  if root.tag.lower() != 'view':
445
455
  raise excs.Error('Root of Label Studio config must be a `View`')
446
456
  config = _LabelStudioConfig(
447
- data_keys=cls.__parse_data_keys_config(root),
448
- rectangle_labels=cls.__parse_rectangle_labels_config(root)
457
+ data_keys=cls.__parse_data_keys_config(root), rectangle_labels=cls.__parse_rectangle_labels_config(root)
449
458
  )
450
459
  config.validate()
451
460
  return config
@@ -474,10 +483,7 @@ class LabelStudioProject(Project):
474
483
  if element.tag.lower() == 'rectanglelabels':
475
484
  name = element.attrib['name']
476
485
  to_name = element.attrib['toName']
477
- labels = [
478
- child.attrib['value']
479
- for child in element if child.tag.lower() == 'label'
480
- ]
486
+ labels = [child.attrib['value'] for child in element if child.tag.lower() == 'label']
481
487
  for label in labels:
482
488
  if label not in coco.COCO_2017_CATEGORIES.values():
483
489
  raise excs.Error(f'Label in `rectanglelabels` config is not a valid COCO object name: {label}')
@@ -486,11 +492,7 @@ class LabelStudioProject(Project):
486
492
 
487
493
  @classmethod
488
494
  def __coco_to_predictions(
489
- cls,
490
- coco_annotations: dict[str, Any],
491
- from_name: str,
492
- rl_info: '_RectangleLabel',
493
- task_id: Optional[int] = None
495
+ cls, coco_annotations: dict[str, Any], from_name: str, rl_info: '_RectangleLabel', task_id: Optional[int] = None
494
496
  ) -> dict[str, Any]:
495
497
  width = coco_annotations['image']['width']
496
498
  height = coco_annotations['image']['height']
@@ -510,8 +512,8 @@ class LabelStudioProject(Project):
510
512
  'y': entry['bbox'][1] * 100.0 / height,
511
513
  'width': entry['bbox'][2] * 100.0 / width,
512
514
  'height': entry['bbox'][3] * 100.0 / height,
513
- 'rectanglelabels': [coco.COCO_2017_CATEGORIES[entry['category']]]
514
- }
515
+ 'rectanglelabels': [coco.COCO_2017_CATEGORIES[entry['category']]],
516
+ },
515
517
  }
516
518
  for i, entry in enumerate(coco_annotations['annotations'])
517
519
  # include only the COCO labels that match a rectanglelabel name
@@ -539,15 +541,15 @@ class LabelStudioProject(Project):
539
541
 
540
542
  @classmethod
541
543
  def create(
542
- cls,
543
- t: Table,
544
- label_config: str,
545
- name: Optional[str],
546
- title: Optional[str],
547
- media_import_method: Literal['post', 'file', 'url'],
548
- col_mapping: Optional[dict[str, str]],
549
- s3_configuration: Optional[dict[str, Any]],
550
- **kwargs: Any
544
+ cls,
545
+ t: Table,
546
+ label_config: str,
547
+ name: Optional[str],
548
+ title: Optional[str],
549
+ media_import_method: Literal['post', 'file', 'url'],
550
+ col_mapping: Optional[dict[str, str]],
551
+ s3_configuration: Optional[dict[str, Any]],
552
+ **kwargs: Any,
551
553
  ) -> 'LabelStudioProject':
552
554
  """
553
555
  Creates a new Label Studio project, using the Label Studio client configured in Pixeltable.
@@ -577,7 +579,8 @@ class LabelStudioProject(Project):
577
579
  t.add_columns({local_annotations_column: pxt.JsonType(nullable=True)})
578
580
 
579
581
  resolved_col_mapping = cls.validate_columns(
580
- t, config.export_columns, {ANNOTATIONS_COLUMN: pxt.JsonType(nullable=True)}, col_mapping)
582
+ t, config.export_columns, {ANNOTATIONS_COLUMN: pxt.JsonType(nullable=True)}, col_mapping
583
+ )
581
584
 
582
585
  # Perform some additional validation
583
586
  if media_import_method == 'post' and len(config.data_keys) > 1:
@@ -591,12 +594,15 @@ class LabelStudioProject(Project):
591
594
  raise excs.Error('`s3_configuration` must contain a `bucket` field')
592
595
  if not 'title' in s3_configuration:
593
596
  s3_configuration['title'] = 'Pixeltable-S3-Import-Storage'
594
- if ('aws_access_key_id' not in s3_configuration and
595
- 'aws_secret_access_key' not in s3_configuration and
596
- 'aws_session_token' not in s3_configuration):
597
+ if (
598
+ 'aws_access_key_id' not in s3_configuration
599
+ and 'aws_secret_access_key' not in s3_configuration
600
+ and 'aws_session_token' not in s3_configuration
601
+ ):
597
602
  # Attempt to fill any missing credentials from the environment
598
603
  try:
599
604
  import boto3
605
+
600
606
  s3_credentials = boto3.Session().get_credentials().get_frozen_credentials()
601
607
  _logger.info(f'Using AWS credentials from the environment for Label Studio project: {title}')
602
608
  s3_configuration['aws_access_key_id'] = s3_credentials.access_key
@@ -618,8 +624,11 @@ class LabelStudioProject(Project):
618
624
  except HTTPError as exc:
619
625
  if exc.errno == 400:
620
626
  response: dict = json.loads(exc.response.text)
621
- if 'validation_errors' in response and 'non_field_errors' in response['validation_errors'] \
622
- and 'LOCAL_FILES_SERVING_ENABLED' in response['validation_errors']['non_field_errors'][0]:
627
+ if (
628
+ 'validation_errors' in response
629
+ and 'non_field_errors' in response['validation_errors']
630
+ and 'LOCAL_FILES_SERVING_ENABLED' in response['validation_errors']['non_field_errors'][0]
631
+ ):
623
632
  raise excs.Error(
624
633
  '`media_import_method` is set to `file`, but your Label Studio server is not configured '
625
634
  'for local file storage.\nPlease set the `LABEL_STUDIO_LOCAL_FILES_SERVING_ENABLED` '
@@ -675,5 +684,5 @@ _LS_TAG_MAP = {
675
684
  'text': pxt.StringType(),
676
685
  'image': pxt.ImageType(),
677
686
  'video': pxt.VideoType(),
678
- 'audio': pxt.AudioType()
687
+ 'audio': pxt.AudioType(),
679
688
  }