pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +83 -19
- pixeltable/_query.py +1444 -0
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +7 -4
- pixeltable/catalog/catalog.py +2394 -119
- pixeltable/catalog/column.py +225 -104
- pixeltable/catalog/dir.py +38 -9
- pixeltable/catalog/globals.py +53 -34
- pixeltable/catalog/insertable_table.py +265 -115
- pixeltable/catalog/path.py +80 -17
- pixeltable/catalog/schema_object.py +28 -43
- pixeltable/catalog/table.py +1270 -677
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +1270 -751
- pixeltable/catalog/table_version_handle.py +109 -0
- pixeltable/catalog/table_version_path.py +137 -42
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +251 -134
- pixeltable/config.py +215 -0
- pixeltable/env.py +736 -285
- pixeltable/exceptions.py +26 -2
- pixeltable/exec/__init__.py +7 -2
- pixeltable/exec/aggregation_node.py +39 -21
- pixeltable/exec/cache_prefetch_node.py +87 -109
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +25 -28
- pixeltable/exec/data_row_batch.py +11 -46
- pixeltable/exec/exec_context.py +26 -11
- pixeltable/exec/exec_node.py +35 -27
- pixeltable/exec/expr_eval/__init__.py +3 -0
- pixeltable/exec/expr_eval/evaluators.py +365 -0
- pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
- pixeltable/exec/expr_eval/globals.py +200 -0
- pixeltable/exec/expr_eval/row_buffer.py +74 -0
- pixeltable/exec/expr_eval/schedulers.py +413 -0
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +35 -27
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +44 -29
- pixeltable/exec/sql_node.py +414 -115
- pixeltable/exprs/__init__.py +8 -5
- pixeltable/exprs/arithmetic_expr.py +79 -45
- pixeltable/exprs/array_slice.py +5 -5
- pixeltable/exprs/column_property_ref.py +40 -26
- pixeltable/exprs/column_ref.py +254 -61
- pixeltable/exprs/comparison.py +14 -9
- pixeltable/exprs/compound_predicate.py +9 -10
- pixeltable/exprs/data_row.py +213 -72
- pixeltable/exprs/expr.py +270 -104
- pixeltable/exprs/expr_dict.py +6 -5
- pixeltable/exprs/expr_set.py +20 -11
- pixeltable/exprs/function_call.py +383 -284
- pixeltable/exprs/globals.py +18 -5
- pixeltable/exprs/in_predicate.py +7 -7
- pixeltable/exprs/inline_expr.py +37 -37
- pixeltable/exprs/is_null.py +8 -4
- pixeltable/exprs/json_mapper.py +120 -54
- pixeltable/exprs/json_path.py +90 -60
- pixeltable/exprs/literal.py +61 -16
- pixeltable/exprs/method_ref.py +7 -6
- pixeltable/exprs/object_ref.py +19 -8
- pixeltable/exprs/row_builder.py +238 -75
- pixeltable/exprs/rowid_ref.py +53 -15
- pixeltable/exprs/similarity_expr.py +65 -50
- pixeltable/exprs/sql_element_cache.py +5 -5
- pixeltable/exprs/string_op.py +107 -0
- pixeltable/exprs/type_cast.py +25 -13
- pixeltable/exprs/variable.py +2 -2
- pixeltable/func/__init__.py +9 -5
- pixeltable/func/aggregate_function.py +197 -92
- pixeltable/func/callable_function.py +119 -35
- pixeltable/func/expr_template_function.py +101 -48
- pixeltable/func/function.py +375 -62
- pixeltable/func/function_registry.py +20 -19
- pixeltable/func/globals.py +6 -5
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +151 -35
- pixeltable/func/signature.py +178 -49
- pixeltable/func/tools.py +164 -0
- pixeltable/func/udf.py +176 -53
- pixeltable/functions/__init__.py +44 -4
- pixeltable/functions/anthropic.py +226 -47
- pixeltable/functions/audio.py +148 -11
- pixeltable/functions/bedrock.py +137 -0
- pixeltable/functions/date.py +188 -0
- pixeltable/functions/deepseek.py +113 -0
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +72 -20
- pixeltable/functions/gemini.py +249 -0
- pixeltable/functions/globals.py +208 -53
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1088 -95
- pixeltable/functions/image.py +155 -84
- pixeltable/functions/json.py +8 -11
- pixeltable/functions/llama_cpp.py +31 -19
- pixeltable/functions/math.py +169 -0
- pixeltable/functions/mistralai.py +50 -75
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +29 -36
- pixeltable/functions/openai.py +548 -160
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +15 -14
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +310 -85
- pixeltable/functions/timestamp.py +37 -19
- pixeltable/functions/together.py +77 -120
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +7 -2
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1528 -117
- pixeltable/functions/vision.py +26 -26
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +19 -10
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/functions/yolox.py +112 -0
- pixeltable/globals.py +716 -236
- pixeltable/index/__init__.py +3 -1
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +32 -22
- pixeltable/index/embedding_index.py +155 -92
- pixeltable/io/__init__.py +12 -7
- pixeltable/io/datarows.py +140 -0
- pixeltable/io/external_store.py +83 -125
- pixeltable/io/fiftyone.py +24 -33
- pixeltable/io/globals.py +47 -182
- pixeltable/io/hf_datasets.py +96 -127
- pixeltable/io/label_studio.py +171 -156
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +136 -115
- pixeltable/io/parquet.py +40 -153
- pixeltable/io/table_data_conduit.py +702 -0
- pixeltable/io/utils.py +100 -0
- pixeltable/iterators/__init__.py +8 -4
- pixeltable/iterators/audio.py +207 -0
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +144 -87
- pixeltable/iterators/image.py +17 -38
- pixeltable/iterators/string.py +15 -12
- pixeltable/iterators/video.py +523 -127
- pixeltable/metadata/__init__.py +33 -8
- pixeltable/metadata/converters/convert_10.py +2 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_15.py +15 -11
- pixeltable/metadata/converters/convert_16.py +4 -5
- pixeltable/metadata/converters/convert_17.py +4 -5
- pixeltable/metadata/converters/convert_18.py +4 -6
- pixeltable/metadata/converters/convert_19.py +6 -9
- pixeltable/metadata/converters/convert_20.py +3 -6
- pixeltable/metadata/converters/convert_21.py +6 -8
- pixeltable/metadata/converters/convert_22.py +3 -2
- pixeltable/metadata/converters/convert_23.py +33 -0
- pixeltable/metadata/converters/convert_24.py +55 -0
- pixeltable/metadata/converters/convert_25.py +19 -0
- pixeltable/metadata/converters/convert_26.py +23 -0
- pixeltable/metadata/converters/convert_27.py +29 -0
- pixeltable/metadata/converters/convert_28.py +13 -0
- pixeltable/metadata/converters/convert_29.py +110 -0
- pixeltable/metadata/converters/convert_30.py +63 -0
- pixeltable/metadata/converters/convert_31.py +11 -0
- pixeltable/metadata/converters/convert_32.py +15 -0
- pixeltable/metadata/converters/convert_33.py +17 -0
- pixeltable/metadata/converters/convert_34.py +21 -0
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +44 -18
- pixeltable/metadata/notes.py +21 -0
- pixeltable/metadata/schema.py +185 -42
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +616 -225
- pixeltable/share/__init__.py +3 -0
- pixeltable/share/packager.py +797 -0
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +349 -0
- pixeltable/store.py +398 -232
- pixeltable/type_system.py +730 -267
- pixeltable/utils/__init__.py +40 -0
- pixeltable/utils/arrow.py +201 -29
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +26 -27
- pixeltable/utils/code.py +4 -4
- pixeltable/utils/console_output.py +46 -0
- pixeltable/utils/coroutine.py +24 -0
- pixeltable/utils/dbms.py +92 -0
- pixeltable/utils/description_helper.py +11 -12
- pixeltable/utils/documents.py +60 -61
- pixeltable/utils/exception_handler.py +36 -0
- pixeltable/utils/filecache.py +38 -22
- pixeltable/utils/formatter.py +88 -51
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +14 -13
- pixeltable/utils/iceberg.py +13 -0
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +20 -20
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +32 -5
- pixeltable/utils/system.py +30 -0
- pixeltable/utils/transactional_directory.py +4 -3
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -36
- pixeltable/catalog/path_dict.py +0 -141
- pixeltable/dataframe.py +0 -894
- pixeltable/exec/expr_eval_node.py +0 -232
- pixeltable/ext/__init__.py +0 -14
- pixeltable/ext/functions/__init__.py +0 -8
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/ext/functions/yolox.py +0 -157
- pixeltable/tool/create_test_db_dump.py +0 -311
- pixeltable/tool/create_test_video.py +0 -81
- pixeltable/tool/doc_plugins/griffe.py +0 -50
- pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
- pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
- pixeltable/tool/embed_udf.py +0 -9
- pixeltable/tool/mypy_plugin.py +0 -55
- pixeltable/utils/media_store.py +0 -76
- pixeltable/utils/s3.py +0 -16
- pixeltable-0.2.26.dist-info/METADATA +0 -400
- pixeltable-0.2.26.dist-info/RECORD +0 -156
- pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
pixeltable/io/label_studio.py
CHANGED
|
@@ -4,20 +4,22 @@ import logging
|
|
|
4
4
|
import os
|
|
5
5
|
from dataclasses import dataclass
|
|
6
6
|
from pathlib import Path
|
|
7
|
-
from typing import Any, Iterator, Literal
|
|
8
|
-
from xml.etree import ElementTree
|
|
7
|
+
from typing import Any, Iterator, Literal
|
|
8
|
+
from xml.etree import ElementTree as ET
|
|
9
9
|
|
|
10
|
-
import label_studio_sdk
|
|
10
|
+
import label_studio_sdk
|
|
11
11
|
import PIL.Image
|
|
12
12
|
from requests.exceptions import HTTPError
|
|
13
13
|
|
|
14
|
-
import pixeltable as
|
|
15
|
-
import
|
|
16
|
-
|
|
17
|
-
from pixeltable import
|
|
14
|
+
import pixeltable.type_system as ts
|
|
15
|
+
from pixeltable import Column, Table, env, exceptions as excs
|
|
16
|
+
from pixeltable.catalog import ColumnHandle
|
|
17
|
+
from pixeltable.catalog.update_status import RowCountStats, UpdateStatus
|
|
18
|
+
from pixeltable.config import Config
|
|
18
19
|
from pixeltable.exprs import ColumnRef, DataRow, Expr
|
|
19
|
-
from pixeltable.io.external_store import Project
|
|
20
|
+
from pixeltable.io.external_store import Project
|
|
20
21
|
from pixeltable.utils import coco
|
|
22
|
+
from pixeltable.utils.local_store import TempStore
|
|
21
23
|
|
|
22
24
|
# label_studio_sdk>=1 and label_studio_sdk<1 are not compatible, so we need to try
|
|
23
25
|
# the import two different ways to insure intercompatibility
|
|
@@ -26,7 +28,7 @@ try:
|
|
|
26
28
|
import label_studio_sdk.project as ls_project # type: ignore
|
|
27
29
|
except ImportError:
|
|
28
30
|
# label_studio_sdk>=1 compatibility
|
|
29
|
-
import label_studio_sdk._legacy.project as ls_project
|
|
31
|
+
import label_studio_sdk._legacy.project as ls_project
|
|
30
32
|
|
|
31
33
|
_logger = logging.getLogger('pixeltable')
|
|
32
34
|
|
|
@@ -44,23 +46,26 @@ class LabelStudioProject(Project):
|
|
|
44
46
|
"""
|
|
45
47
|
An [`ExternalStore`][pixeltable.io.ExternalStore] that represents a Label Studio project, providing functionality
|
|
46
48
|
for synchronizing between a Pixeltable table and a Label Studio project.
|
|
49
|
+
|
|
50
|
+
The constructor will NOT create a new Label Studio project; it is also used when loading
|
|
51
|
+
metadata for existing projects.
|
|
47
52
|
"""
|
|
48
53
|
|
|
54
|
+
project_id: int # Label Studio project ID
|
|
55
|
+
media_import_method: Literal['post', 'file', 'url']
|
|
56
|
+
_project: ls_project.Project | None
|
|
57
|
+
|
|
49
58
|
def __init__(
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
59
|
+
self,
|
|
60
|
+
name: str,
|
|
61
|
+
project_id: int,
|
|
62
|
+
media_import_method: Literal['post', 'file', 'url'],
|
|
63
|
+
col_mapping: dict[ColumnHandle, str],
|
|
64
|
+
stored_proxies: dict[ColumnHandle, ColumnHandle] | None = None,
|
|
56
65
|
):
|
|
57
|
-
"""
|
|
58
|
-
The constructor will NOT create a new Label Studio project; it is also used when loading
|
|
59
|
-
metadata for existing projects.
|
|
60
|
-
"""
|
|
61
66
|
self.project_id = project_id
|
|
62
67
|
self.media_import_method = media_import_method
|
|
63
|
-
self._project
|
|
68
|
+
self._project = None
|
|
64
69
|
super().__init__(name, col_mapping, stored_proxies)
|
|
65
70
|
|
|
66
71
|
@property
|
|
@@ -70,8 +75,10 @@ class LabelStudioProject(Project):
|
|
|
70
75
|
try:
|
|
71
76
|
self._project = _label_studio_client().get_project(self.project_id)
|
|
72
77
|
except HTTPError as exc:
|
|
73
|
-
raise excs.Error(
|
|
74
|
-
|
|
78
|
+
raise excs.Error(
|
|
79
|
+
f'Could not locate Label Studio project: {self.project_id} '
|
|
80
|
+
'(cannot connect to server or project no longer exists)'
|
|
81
|
+
) from exc
|
|
75
82
|
return self._project
|
|
76
83
|
|
|
77
84
|
@property
|
|
@@ -88,34 +95,36 @@ class LabelStudioProject(Project):
|
|
|
88
95
|
def __project_config(self) -> '_LabelStudioConfig':
|
|
89
96
|
return self.__parse_project_config(self.project_params['label_config'])
|
|
90
97
|
|
|
91
|
-
def get_export_columns(self) -> dict[str,
|
|
98
|
+
def get_export_columns(self) -> dict[str, ts.ColumnType]:
|
|
92
99
|
"""
|
|
93
100
|
The data keys and preannotation fields specified in this Label Studio project.
|
|
94
101
|
"""
|
|
95
102
|
return self.__project_config.export_columns
|
|
96
103
|
|
|
97
|
-
def get_import_columns(self) -> dict[str,
|
|
104
|
+
def get_import_columns(self) -> dict[str, ts.ColumnType]:
|
|
98
105
|
"""
|
|
99
106
|
Always contains a single entry:
|
|
100
107
|
|
|
101
108
|
```
|
|
102
|
-
{"annotations":
|
|
109
|
+
{"annotations": ts.JsonType(nullable=True)}
|
|
103
110
|
```
|
|
104
111
|
"""
|
|
105
|
-
return {ANNOTATIONS_COLUMN:
|
|
112
|
+
return {ANNOTATIONS_COLUMN: ts.JsonType(nullable=True)}
|
|
106
113
|
|
|
107
|
-
def sync(self, t: Table, export_data: bool, import_data: bool) ->
|
|
108
|
-
_logger.info(
|
|
109
|
-
|
|
114
|
+
def sync(self, t: Table, export_data: bool, import_data: bool) -> UpdateStatus:
|
|
115
|
+
_logger.info(
|
|
116
|
+
f'Syncing Label Studio project "{self.project_title}" with table `{t._name}`'
|
|
117
|
+
f' (export: {export_data}, import: {import_data}).'
|
|
118
|
+
)
|
|
110
119
|
# Collect all existing tasks into a dict with entries `rowid: task`
|
|
111
120
|
tasks = {tuple(task['meta']['rowid']): task for task in self.__fetch_all_tasks()}
|
|
112
|
-
sync_status =
|
|
121
|
+
sync_status = UpdateStatus()
|
|
113
122
|
if export_data:
|
|
114
123
|
export_sync_status = self.__update_tasks(t, tasks)
|
|
115
|
-
sync_status
|
|
124
|
+
sync_status += export_sync_status
|
|
116
125
|
if import_data:
|
|
117
126
|
import_sync_status = self.__update_table_from_tasks(t, tasks)
|
|
118
|
-
sync_status
|
|
127
|
+
sync_status += import_sync_status
|
|
119
128
|
return sync_status
|
|
120
129
|
|
|
121
130
|
def __fetch_all_tasks(self) -> Iterator[dict[str, Any]]:
|
|
@@ -135,10 +144,11 @@ class LabelStudioProject(Project):
|
|
|
135
144
|
page += 1
|
|
136
145
|
if unknown_task_count > 0:
|
|
137
146
|
_logger.warning(
|
|
138
|
-
f'Skipped {unknown_task_count} unrecognized task(s) when syncing
|
|
147
|
+
f'Skipped {unknown_task_count} unrecognized task(s) when syncing '
|
|
148
|
+
f'Label Studio project {self.project_title!r}.'
|
|
139
149
|
)
|
|
140
150
|
|
|
141
|
-
def __update_tasks(self, t: Table, existing_tasks: dict[tuple, dict]) ->
|
|
151
|
+
def __update_tasks(self, t: Table, existing_tasks: dict[tuple, dict]) -> UpdateStatus:
|
|
142
152
|
"""
|
|
143
153
|
Updates all tasks in this Label Studio project based on the Pixeltable data:
|
|
144
154
|
- Creates new tasks for rows that don't map to any existing task;
|
|
@@ -148,18 +158,14 @@ class LabelStudioProject(Project):
|
|
|
148
158
|
config = self.__project_config
|
|
149
159
|
|
|
150
160
|
# Columns in `t` that map to Label Studio data keys
|
|
151
|
-
t_data_cols = [
|
|
152
|
-
t_col for t_col, ext_col_name in self.col_mapping.items()
|
|
153
|
-
if ext_col_name in config.data_keys
|
|
154
|
-
]
|
|
161
|
+
t_data_cols = [t_col for t_col, ext_col_name in self.col_mapping.items() if ext_col_name in config.data_keys]
|
|
155
162
|
|
|
156
163
|
if len(t_data_cols) == 0:
|
|
157
|
-
return
|
|
164
|
+
return UpdateStatus()
|
|
158
165
|
|
|
159
166
|
# Columns in `t` that map to `rectanglelabels` preannotations
|
|
160
167
|
t_rl_cols = [
|
|
161
|
-
t_col for t_col, ext_col_name in self.col_mapping.items()
|
|
162
|
-
if ext_col_name in config.rectangle_labels
|
|
168
|
+
t_col for t_col, ext_col_name in self.col_mapping.items() if ext_col_name in config.rectangle_labels
|
|
163
169
|
]
|
|
164
170
|
|
|
165
171
|
# Destinations for `rectanglelabels` preannotations
|
|
@@ -173,31 +179,31 @@ class LabelStudioProject(Project):
|
|
|
173
179
|
# Send media to Label Studio by HTTP post.
|
|
174
180
|
assert len(t_data_cols) == 1 # This was verified when the project was set up
|
|
175
181
|
return self.__update_tasks_by_post(t, existing_tasks, t_data_cols[0], t_rl_cols, rl_info)
|
|
176
|
-
elif self.media_import_method
|
|
182
|
+
elif self.media_import_method in ('file', 'url'):
|
|
177
183
|
# Send media to Label Studio by file reference (local file or URL).
|
|
178
184
|
return self.__update_tasks_by_files(t, existing_tasks, t_data_cols, t_rl_cols, rl_info)
|
|
179
185
|
else:
|
|
180
|
-
|
|
186
|
+
raise AssertionError()
|
|
181
187
|
|
|
182
188
|
def __update_tasks_by_post(
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
) ->
|
|
190
|
-
is_stored = media_col.is_stored
|
|
189
|
+
self,
|
|
190
|
+
t: Table,
|
|
191
|
+
existing_tasks: dict[tuple, dict],
|
|
192
|
+
media_col: ColumnHandle,
|
|
193
|
+
t_rl_cols: list[ColumnHandle],
|
|
194
|
+
rl_info: list['_RectangleLabel'],
|
|
195
|
+
) -> UpdateStatus:
|
|
196
|
+
is_stored = media_col.get().is_stored
|
|
191
197
|
# If it's a stored column, we can use `localpath`
|
|
192
|
-
localpath_col_opt = [t[media_col.name].localpath] if is_stored else []
|
|
198
|
+
localpath_col_opt = [t[media_col.get().name].localpath] if is_stored else []
|
|
193
199
|
# Select the media column, rectanglelabels columns, and localpath (if appropriate)
|
|
194
|
-
rows = t.select(t[media_col.name], *[t[col.name] for col in t_rl_cols], *localpath_col_opt)
|
|
200
|
+
rows = t.select(t[media_col.get().name], *[t[col.get().name] for col in t_rl_cols], *localpath_col_opt)
|
|
195
201
|
tasks_created = 0
|
|
196
202
|
row_ids_in_pxt: set[tuple] = set()
|
|
197
203
|
|
|
198
204
|
for row in rows._exec():
|
|
199
205
|
media_col_idx = rows._select_list_exprs[0].slot_idx
|
|
200
|
-
rl_col_idxs = [expr.slot_idx for expr in rows._select_list_exprs[1: 1 + len(t_rl_cols)]]
|
|
206
|
+
rl_col_idxs = [expr.slot_idx for expr in rows._select_list_exprs[1 : 1 + len(t_rl_cols)]]
|
|
201
207
|
row_ids_in_pxt.add(row.rowid)
|
|
202
208
|
if row.rowid not in existing_tasks:
|
|
203
209
|
# Upload the media file to Label Studio
|
|
@@ -209,7 +215,7 @@ class LabelStudioProject(Project):
|
|
|
209
215
|
else:
|
|
210
216
|
# No localpath; create a temp file and upload it
|
|
211
217
|
assert isinstance(row[media_col_idx], PIL.Image.Image)
|
|
212
|
-
file =
|
|
218
|
+
file = TempStore.create_path(extension='.png')
|
|
213
219
|
row[media_col_idx].save(file, format='png')
|
|
214
220
|
task_id = self.project.import_tasks(file)[0]
|
|
215
221
|
os.remove(file)
|
|
@@ -226,54 +232,54 @@ class LabelStudioProject(Project):
|
|
|
226
232
|
)
|
|
227
233
|
for i in range(len(coco_annotations))
|
|
228
234
|
]
|
|
229
|
-
_logger.debug(
|
|
235
|
+
_logger.debug('`predictions`: {%s}', predictions)
|
|
230
236
|
self.project.create_predictions(predictions)
|
|
231
237
|
tasks_created += 1
|
|
232
238
|
|
|
233
|
-
|
|
239
|
+
env.Env.get().console_logger.info(f'Created {tasks_created} new task(s) in {self}.')
|
|
234
240
|
|
|
235
|
-
sync_status =
|
|
241
|
+
sync_status = UpdateStatus(ext_row_count_stats=RowCountStats(ins_rows=tasks_created))
|
|
236
242
|
|
|
237
243
|
deletion_sync_status = self.__delete_stale_tasks(existing_tasks, row_ids_in_pxt, tasks_created)
|
|
238
|
-
|
|
239
|
-
return sync_status
|
|
244
|
+
sync_status += deletion_sync_status
|
|
245
|
+
return sync_status
|
|
240
246
|
|
|
241
247
|
def __update_tasks_by_files(
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
) ->
|
|
248
|
+
self,
|
|
249
|
+
t: Table,
|
|
250
|
+
existing_tasks: dict[tuple, dict],
|
|
251
|
+
t_data_cols: list[ColumnHandle],
|
|
252
|
+
t_rl_cols: list[ColumnHandle],
|
|
253
|
+
rl_info: list['_RectangleLabel'],
|
|
254
|
+
) -> UpdateStatus:
|
|
249
255
|
ext_data_cols = [self.col_mapping[col] for col in t_data_cols]
|
|
250
256
|
expr_refs: dict[str, Expr] = {} # kwargs for the select statement
|
|
251
257
|
for col in t_data_cols:
|
|
252
|
-
col_name = col.name
|
|
258
|
+
col_name = col.get().name
|
|
253
259
|
if self.media_import_method == 'url':
|
|
254
260
|
expr_refs[col_name] = t[col_name].fileurl
|
|
255
261
|
else:
|
|
256
262
|
assert self.media_import_method == 'file'
|
|
257
|
-
if not col.col_type.is_media_type():
|
|
263
|
+
if not col.get().col_type.is_media_type():
|
|
258
264
|
# Not a media column; query the data directly
|
|
259
|
-
expr_refs[col_name] =
|
|
265
|
+
expr_refs[col_name] = t[col_name]
|
|
260
266
|
elif col in self.stored_proxies:
|
|
261
267
|
# Media column that has a stored proxy; use it. We have to give it a name,
|
|
262
268
|
# since it's an anonymous column
|
|
263
|
-
stored_proxy_col = self.stored_proxies[col]
|
|
269
|
+
stored_proxy_col = self.stored_proxies[col].get()
|
|
264
270
|
expr_refs[f'{col_name}_proxy'] = ColumnRef(stored_proxy_col).localpath
|
|
265
271
|
else:
|
|
266
272
|
# Media column without a stored proxy; this means it's a stored computed column,
|
|
267
273
|
# and we can just use the localpath
|
|
268
274
|
expr_refs[col_name] = t[col_name].localpath
|
|
269
275
|
|
|
270
|
-
|
|
276
|
+
query = t.select(*[t[col.get().name] for col in t_rl_cols], **expr_refs)
|
|
271
277
|
# The following buffers will hold `DataRow` indices that correspond to each of the selected
|
|
272
278
|
# columns. `rl_col_idxs` holds the indices for the columns that map to RectangleLabels
|
|
273
279
|
# preannotations; `data_col_idxs` holds the indices for the columns that map to data fields.
|
|
274
280
|
# We have to wait until we begin iterating to populate them, so they're initially `None`.
|
|
275
|
-
rl_col_idxs:
|
|
276
|
-
data_col_idxs:
|
|
281
|
+
rl_col_idxs: list[int] | None = None
|
|
282
|
+
data_col_idxs: list[int] | None = None
|
|
277
283
|
|
|
278
284
|
row_ids_in_pxt: set[tuple] = set()
|
|
279
285
|
tasks_created = 0
|
|
@@ -286,11 +292,11 @@ class LabelStudioProject(Project):
|
|
|
286
292
|
data_vals = [row[idx] for idx in data_col_idxs]
|
|
287
293
|
coco_annotations = [row[idx] for idx in rl_col_idxs]
|
|
288
294
|
for i in range(len(t_data_cols)):
|
|
289
|
-
if t_data_cols[i].col_type.is_media_type():
|
|
295
|
+
if t_data_cols[i].get().col_type.is_media_type():
|
|
290
296
|
# Special handling for media columns
|
|
291
297
|
assert isinstance(data_vals[i], str)
|
|
292
298
|
if self.media_import_method == 'url':
|
|
293
|
-
data_vals[i] = self.__validate_fileurl(t_data_cols[i], data_vals[i])
|
|
299
|
+
data_vals[i] = self.__validate_fileurl(t_data_cols[i].get(), data_vals[i])
|
|
294
300
|
else:
|
|
295
301
|
assert self.media_import_method == 'file'
|
|
296
302
|
data_vals[i] = self.__localpath_to_lspath(data_vals[i])
|
|
@@ -301,21 +307,23 @@ class LabelStudioProject(Project):
|
|
|
301
307
|
return {
|
|
302
308
|
'data': dict(zip(ext_data_cols, data_vals)),
|
|
303
309
|
'meta': {'rowid': row.rowid},
|
|
304
|
-
'predictions': predictions
|
|
310
|
+
'predictions': predictions,
|
|
305
311
|
}
|
|
306
312
|
|
|
307
|
-
for row in
|
|
313
|
+
for row in query._exec():
|
|
308
314
|
if rl_col_idxs is None:
|
|
309
|
-
rl_col_idxs = [expr.slot_idx for expr in
|
|
310
|
-
data_col_idxs = [expr.slot_idx for expr in
|
|
315
|
+
rl_col_idxs = [expr.slot_idx for expr in query._select_list_exprs[: len(t_rl_cols)]]
|
|
316
|
+
data_col_idxs = [expr.slot_idx for expr in query._select_list_exprs[len(t_rl_cols) :]]
|
|
311
317
|
row_ids_in_pxt.add(row.rowid)
|
|
312
318
|
task_info = create_task_info(row)
|
|
313
319
|
# TODO(aaron-siegel): Implement more efficient update logic (currently involves a full table scan)
|
|
314
320
|
if row.rowid in existing_tasks:
|
|
315
321
|
# A task for this row already exists; see if it needs an update.
|
|
316
322
|
existing_task = existing_tasks[row.rowid]
|
|
317
|
-
if
|
|
318
|
-
|
|
323
|
+
if (
|
|
324
|
+
task_info['data'] != existing_task['data']
|
|
325
|
+
or task_info['predictions'] != existing_task['predictions']
|
|
326
|
+
):
|
|
319
327
|
_logger.debug(f'Updating task for rowid {row.rowid}.')
|
|
320
328
|
self.project.update_task(existing_tasks[row.rowid]['id'], **task_info)
|
|
321
329
|
tasks_updated += 1
|
|
@@ -330,16 +338,18 @@ class LabelStudioProject(Project):
|
|
|
330
338
|
if len(page) > 0:
|
|
331
339
|
self.project.import_tasks(page)
|
|
332
340
|
|
|
333
|
-
|
|
341
|
+
env.Env.get().console_logger.info(
|
|
342
|
+
f'Created {tasks_created} new task(s) and updated {tasks_updated} existing task(s) in {self}.'
|
|
343
|
+
)
|
|
334
344
|
|
|
335
|
-
sync_status =
|
|
345
|
+
sync_status = UpdateStatus(ext_row_count_stats=RowCountStats(ins_rows=tasks_created, upd_rows=tasks_updated))
|
|
336
346
|
|
|
337
347
|
deletion_sync_status = self.__delete_stale_tasks(existing_tasks, row_ids_in_pxt, tasks_created)
|
|
338
|
-
|
|
339
|
-
return sync_status
|
|
348
|
+
sync_status += deletion_sync_status
|
|
349
|
+
return sync_status
|
|
340
350
|
|
|
341
351
|
@classmethod
|
|
342
|
-
def __validate_fileurl(cls, col: Column, url: str) ->
|
|
352
|
+
def __validate_fileurl(cls, col: Column, url: str) -> str | None:
|
|
343
353
|
# Check that the URL is one that will be visible to Label Studio. If it isn't, log an info message
|
|
344
354
|
# to help users debug the issue.
|
|
345
355
|
if not (url.startswith('http://') or url.startswith('https://')):
|
|
@@ -352,10 +362,12 @@ class LabelStudioProject(Project):
|
|
|
352
362
|
@classmethod
|
|
353
363
|
def __localpath_to_lspath(cls, localpath: str) -> str:
|
|
354
364
|
# Transform the local path into Label Studio's bespoke path format.
|
|
355
|
-
relpath = Path(localpath).relative_to(
|
|
356
|
-
return f'/data/local-files/?d={
|
|
365
|
+
relpath = Path(localpath).relative_to(Config.get().home)
|
|
366
|
+
return f'/data/local-files/?d={relpath}'
|
|
357
367
|
|
|
358
|
-
def __delete_stale_tasks(
|
|
368
|
+
def __delete_stale_tasks(
|
|
369
|
+
self, existing_tasks: dict[tuple, dict], row_ids_in_pxt: set[tuple], tasks_created: int
|
|
370
|
+
) -> UpdateStatus:
|
|
359
371
|
deleted_rowids = set(existing_tasks.keys()) - row_ids_in_pxt
|
|
360
372
|
# Sanity check the math
|
|
361
373
|
assert len(deleted_rowids) == len(existing_tasks) + tasks_created - len(row_ids_in_pxt)
|
|
@@ -363,17 +375,19 @@ class LabelStudioProject(Project):
|
|
|
363
375
|
|
|
364
376
|
if len(tasks_to_delete) > 0:
|
|
365
377
|
self.project.delete_tasks(tasks_to_delete)
|
|
366
|
-
|
|
378
|
+
env.Env.get().console_logger.info(
|
|
379
|
+
f'Deleted {len(tasks_to_delete)} tasks(s) in {self} that are no longer present in Pixeltable.'
|
|
380
|
+
)
|
|
367
381
|
|
|
368
382
|
# Remove them from the `existing_tasks` dict so that future updates are applied correctly
|
|
369
383
|
for rowid in deleted_rowids:
|
|
370
384
|
del existing_tasks[rowid]
|
|
371
385
|
|
|
372
|
-
return
|
|
386
|
+
return UpdateStatus(ext_row_count_stats=RowCountStats(del_rows=len(deleted_rowids)))
|
|
373
387
|
|
|
374
|
-
def __update_table_from_tasks(self, t: Table, tasks: dict[tuple, dict]) ->
|
|
388
|
+
def __update_table_from_tasks(self, t: Table, tasks: dict[tuple, dict]) -> UpdateStatus:
|
|
375
389
|
if ANNOTATIONS_COLUMN not in self.col_mapping.values():
|
|
376
|
-
return
|
|
390
|
+
return UpdateStatus()
|
|
377
391
|
|
|
378
392
|
annotations = {
|
|
379
393
|
# Replace [] by None to indicate no annotations. We do want to sync rows with no annotations,
|
|
@@ -383,7 +397,7 @@ class LabelStudioProject(Project):
|
|
|
383
397
|
for task in tasks.values()
|
|
384
398
|
}
|
|
385
399
|
|
|
386
|
-
local_annotations_col = next(k for k, v in self.col_mapping.items() if v == ANNOTATIONS_COLUMN)
|
|
400
|
+
local_annotations_col = next(k for k, v in self.col_mapping.items() if v == ANNOTATIONS_COLUMN).get()
|
|
387
401
|
|
|
388
402
|
# Prune the annotations down to just the ones that have actually changed.
|
|
389
403
|
rows = t.select(t[local_annotations_col.name])
|
|
@@ -396,28 +410,29 @@ class LabelStudioProject(Project):
|
|
|
396
410
|
updates = [{'_rowid': rowid, local_annotations_col.name: ann} for rowid, ann in annotations.items()]
|
|
397
411
|
if len(updates) > 0:
|
|
398
412
|
_logger.info(
|
|
399
|
-
f'Updating table
|
|
413
|
+
f'Updating table {t._name!r}, column {local_annotations_col.name!r} '
|
|
414
|
+
f'with {len(updates)} total annotations.'
|
|
400
415
|
)
|
|
401
416
|
# batch_update currently doesn't propagate from views to base tables. As a workaround, we call
|
|
402
417
|
# batch_update on the actual ancestor table that holds the annotations column.
|
|
403
418
|
# TODO(aaron-siegel): Simplify this once propagation is properly implemented in batch_update
|
|
404
419
|
ancestor = t
|
|
405
|
-
while local_annotations_col not in ancestor._tbl_version.cols:
|
|
406
|
-
assert ancestor.
|
|
407
|
-
ancestor = ancestor.
|
|
420
|
+
while local_annotations_col not in ancestor._tbl_version.get().cols:
|
|
421
|
+
assert ancestor._get_base_table is not None
|
|
422
|
+
ancestor = ancestor._get_base_table()
|
|
408
423
|
update_status = ancestor.batch_update(updates)
|
|
409
|
-
|
|
410
|
-
return
|
|
424
|
+
env.Env.get().console_logger.info(f'Updated annotation(s) from {len(updates)} task(s) in {self}.')
|
|
425
|
+
return update_status
|
|
411
426
|
else:
|
|
412
|
-
return
|
|
427
|
+
return UpdateStatus()
|
|
413
428
|
|
|
414
429
|
def as_dict(self) -> dict[str, Any]:
|
|
415
430
|
return {
|
|
416
431
|
'name': self.name,
|
|
417
432
|
'project_id': self.project_id,
|
|
418
433
|
'media_import_method': self.media_import_method,
|
|
419
|
-
'col_mapping': [[
|
|
420
|
-
'stored_proxies': [[
|
|
434
|
+
'col_mapping': [[k.as_dict(), v] for k, v in self.col_mapping.items()],
|
|
435
|
+
'stored_proxies': [[k.as_dict(), v.as_dict()] for k, v in self.stored_proxies.items()],
|
|
421
436
|
}
|
|
422
437
|
|
|
423
438
|
@classmethod
|
|
@@ -426,8 +441,8 @@ class LabelStudioProject(Project):
|
|
|
426
441
|
md['name'],
|
|
427
442
|
md['project_id'],
|
|
428
443
|
md['media_import_method'],
|
|
429
|
-
{
|
|
430
|
-
{
|
|
444
|
+
{ColumnHandle.from_dict(entry[0]): entry[1] for entry in md['col_mapping']},
|
|
445
|
+
{ColumnHandle.from_dict(entry[0]): ColumnHandle.from_dict(entry[1]) for entry in md['stored_proxies']},
|
|
431
446
|
)
|
|
432
447
|
|
|
433
448
|
def __repr__(self) -> str:
|
|
@@ -440,18 +455,17 @@ class LabelStudioProject(Project):
|
|
|
440
455
|
Parses a Label Studio XML config, extracting the names and Pixeltable types of
|
|
441
456
|
all input variables.
|
|
442
457
|
"""
|
|
443
|
-
root:
|
|
458
|
+
root: ET.Element = ET.fromstring(xml_config)
|
|
444
459
|
if root.tag.lower() != 'view':
|
|
445
460
|
raise excs.Error('Root of Label Studio config must be a `View`')
|
|
446
461
|
config = _LabelStudioConfig(
|
|
447
|
-
data_keys=cls.__parse_data_keys_config(root),
|
|
448
|
-
rectangle_labels=cls.__parse_rectangle_labels_config(root)
|
|
462
|
+
data_keys=cls.__parse_data_keys_config(root), rectangle_labels=cls.__parse_rectangle_labels_config(root)
|
|
449
463
|
)
|
|
450
464
|
config.validate()
|
|
451
465
|
return config
|
|
452
466
|
|
|
453
467
|
@classmethod
|
|
454
|
-
def __parse_data_keys_config(cls, root:
|
|
468
|
+
def __parse_data_keys_config(cls, root: ET.Element) -> dict[str, '_DataKey']:
|
|
455
469
|
"""Parses the data keys from a Label Studio XML config."""
|
|
456
470
|
config: dict[str, '_DataKey'] = {}
|
|
457
471
|
for element in root:
|
|
@@ -467,17 +481,14 @@ class LabelStudioProject(Project):
|
|
|
467
481
|
return config
|
|
468
482
|
|
|
469
483
|
@classmethod
|
|
470
|
-
def __parse_rectangle_labels_config(cls, root:
|
|
484
|
+
def __parse_rectangle_labels_config(cls, root: ET.Element) -> dict[str, '_RectangleLabel']:
|
|
471
485
|
"""Parses the RectangleLabels from a Label Studio XML config."""
|
|
472
486
|
config: dict[str, '_RectangleLabel'] = {}
|
|
473
487
|
for element in root:
|
|
474
488
|
if element.tag.lower() == 'rectanglelabels':
|
|
475
489
|
name = element.attrib['name']
|
|
476
490
|
to_name = element.attrib['toName']
|
|
477
|
-
labels = [
|
|
478
|
-
child.attrib['value']
|
|
479
|
-
for child in element if child.tag.lower() == 'label'
|
|
480
|
-
]
|
|
491
|
+
labels = [child.attrib['value'] for child in element if child.tag.lower() == 'label']
|
|
481
492
|
for label in labels:
|
|
482
493
|
if label not in coco.COCO_2017_CATEGORIES.values():
|
|
483
494
|
raise excs.Error(f'Label in `rectanglelabels` config is not a valid COCO object name: {label}')
|
|
@@ -486,11 +497,7 @@ class LabelStudioProject(Project):
|
|
|
486
497
|
|
|
487
498
|
@classmethod
|
|
488
499
|
def __coco_to_predictions(
|
|
489
|
-
|
|
490
|
-
coco_annotations: dict[str, Any],
|
|
491
|
-
from_name: str,
|
|
492
|
-
rl_info: '_RectangleLabel',
|
|
493
|
-
task_id: Optional[int] = None
|
|
500
|
+
cls, coco_annotations: dict[str, Any], from_name: str, rl_info: '_RectangleLabel', task_id: int | None = None
|
|
494
501
|
) -> dict[str, Any]:
|
|
495
502
|
width = coco_annotations['image']['width']
|
|
496
503
|
height = coco_annotations['image']['height']
|
|
@@ -510,8 +517,8 @@ class LabelStudioProject(Project):
|
|
|
510
517
|
'y': entry['bbox'][1] * 100.0 / height,
|
|
511
518
|
'width': entry['bbox'][2] * 100.0 / width,
|
|
512
519
|
'height': entry['bbox'][3] * 100.0 / height,
|
|
513
|
-
'rectanglelabels': [coco.COCO_2017_CATEGORIES[entry['category']]]
|
|
514
|
-
}
|
|
520
|
+
'rectanglelabels': [coco.COCO_2017_CATEGORIES[entry['category']]],
|
|
521
|
+
},
|
|
515
522
|
}
|
|
516
523
|
for i, entry in enumerate(coco_annotations['annotations'])
|
|
517
524
|
# include only the COCO labels that match a rectanglelabel name
|
|
@@ -529,9 +536,9 @@ class LabelStudioProject(Project):
|
|
|
529
536
|
"""
|
|
530
537
|
title = self.project_title
|
|
531
538
|
_label_studio_client().delete_project(self.project_id)
|
|
532
|
-
|
|
539
|
+
env.Env.get().console_logger.info(f'Deleted Label Studio project: {title}')
|
|
533
540
|
|
|
534
|
-
def __eq__(self, other) -> bool:
|
|
541
|
+
def __eq__(self, other: object) -> bool:
|
|
535
542
|
return isinstance(other, LabelStudioProject) and self.project_id == other.project_id
|
|
536
543
|
|
|
537
544
|
def __hash__(self) -> int:
|
|
@@ -539,15 +546,15 @@ class LabelStudioProject(Project):
|
|
|
539
546
|
|
|
540
547
|
@classmethod
|
|
541
548
|
def create(
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
549
|
+
cls,
|
|
550
|
+
t: Table,
|
|
551
|
+
label_config: str,
|
|
552
|
+
name: str | None,
|
|
553
|
+
title: str | None,
|
|
554
|
+
media_import_method: Literal['post', 'file', 'url'],
|
|
555
|
+
col_mapping: dict[str, str] | None,
|
|
556
|
+
s3_configuration: dict[str, Any] | None,
|
|
557
|
+
**kwargs: Any,
|
|
551
558
|
) -> 'LabelStudioProject':
|
|
552
559
|
"""
|
|
553
560
|
Creates a new Label Studio project, using the Label Studio client configured in Pixeltable.
|
|
@@ -557,7 +564,7 @@ class LabelStudioProject(Project):
|
|
|
557
564
|
|
|
558
565
|
if name is None:
|
|
559
566
|
# Create a default name that's unique to the table
|
|
560
|
-
all_stores = t.external_stores
|
|
567
|
+
all_stores = t.external_stores()
|
|
561
568
|
n = 0
|
|
562
569
|
while f'ls_project_{n}' in all_stores:
|
|
563
570
|
n += 1
|
|
@@ -573,11 +580,12 @@ class LabelStudioProject(Project):
|
|
|
573
580
|
local_annotations_column = ANNOTATIONS_COLUMN
|
|
574
581
|
else:
|
|
575
582
|
local_annotations_column = next(k for k, v in col_mapping.items() if v == ANNOTATIONS_COLUMN)
|
|
576
|
-
if local_annotations_column not in t.
|
|
577
|
-
t
|
|
583
|
+
if local_annotations_column not in t._get_schema():
|
|
584
|
+
t.add_columns({local_annotations_column: ts.Json})
|
|
578
585
|
|
|
579
586
|
resolved_col_mapping = cls.validate_columns(
|
|
580
|
-
t, config.export_columns, {ANNOTATIONS_COLUMN:
|
|
587
|
+
t, config.export_columns, {ANNOTATIONS_COLUMN: ts.JsonType(nullable=True)}, col_mapping
|
|
588
|
+
)
|
|
581
589
|
|
|
582
590
|
# Perform some additional validation
|
|
583
591
|
if media_import_method == 'post' and len(config.data_keys) > 1:
|
|
@@ -587,16 +595,19 @@ class LabelStudioProject(Project):
|
|
|
587
595
|
if media_import_method != 'url':
|
|
588
596
|
raise excs.Error("`s3_configuration` is only valid when `media_import_method == 'url'`")
|
|
589
597
|
s3_configuration = copy.copy(s3_configuration)
|
|
590
|
-
if
|
|
598
|
+
if 'bucket' not in s3_configuration:
|
|
591
599
|
raise excs.Error('`s3_configuration` must contain a `bucket` field')
|
|
592
|
-
if
|
|
600
|
+
if 'title' not in s3_configuration:
|
|
593
601
|
s3_configuration['title'] = 'Pixeltable-S3-Import-Storage'
|
|
594
|
-
if (
|
|
595
|
-
'
|
|
596
|
-
'
|
|
602
|
+
if (
|
|
603
|
+
'aws_access_key_id' not in s3_configuration
|
|
604
|
+
and 'aws_secret_access_key' not in s3_configuration
|
|
605
|
+
and 'aws_session_token' not in s3_configuration
|
|
606
|
+
):
|
|
597
607
|
# Attempt to fill any missing credentials from the environment
|
|
598
608
|
try:
|
|
599
609
|
import boto3
|
|
610
|
+
|
|
600
611
|
s3_credentials = boto3.Session().get_credentials().get_frozen_credentials()
|
|
601
612
|
_logger.info(f'Using AWS credentials from the environment for Label Studio project: {title}')
|
|
602
613
|
s3_configuration['aws_access_key_id'] = s3_credentials.access_key
|
|
@@ -612,18 +623,22 @@ class LabelStudioProject(Project):
|
|
|
612
623
|
|
|
613
624
|
if media_import_method == 'file':
|
|
614
625
|
# We need to set up a local storage connection to receive media files
|
|
615
|
-
os.environ['LABEL_STUDIO_LOCAL_FILES_DOCUMENT_ROOT'] = str(
|
|
626
|
+
os.environ['LABEL_STUDIO_LOCAL_FILES_DOCUMENT_ROOT'] = str(Config.get().home)
|
|
616
627
|
try:
|
|
617
628
|
project.connect_local_import_storage(local_store_path=str(env.Env.get().media_dir))
|
|
618
629
|
except HTTPError as exc:
|
|
619
630
|
if exc.errno == 400:
|
|
620
631
|
response: dict = json.loads(exc.response.text)
|
|
621
|
-
if
|
|
622
|
-
|
|
632
|
+
if (
|
|
633
|
+
'validation_errors' in response
|
|
634
|
+
and 'non_field_errors' in response['validation_errors']
|
|
635
|
+
and 'LOCAL_FILES_SERVING_ENABLED' in response['validation_errors']['non_field_errors'][0]
|
|
636
|
+
):
|
|
623
637
|
raise excs.Error(
|
|
624
638
|
'`media_import_method` is set to `file`, but your Label Studio server is not configured '
|
|
625
639
|
'for local file storage.\nPlease set the `LABEL_STUDIO_LOCAL_FILES_SERVING_ENABLED` '
|
|
626
|
-
'environment variable to `true` in the environment where your Label Studio server
|
|
640
|
+
'environment variable to `true` in the environment where your Label Studio server '
|
|
641
|
+
'is running.'
|
|
627
642
|
) from exc
|
|
628
643
|
raise # Handle any other exception type normally
|
|
629
644
|
|
|
@@ -637,8 +652,8 @@ class LabelStudioProject(Project):
|
|
|
637
652
|
|
|
638
653
|
@dataclass(frozen=True)
|
|
639
654
|
class _DataKey:
|
|
640
|
-
name:
|
|
641
|
-
column_type:
|
|
655
|
+
name: str | None # The 'name' attribute of the data key; may differ from the field name
|
|
656
|
+
column_type: ts.ColumnType
|
|
642
657
|
|
|
643
658
|
|
|
644
659
|
@dataclass(frozen=True)
|
|
@@ -653,7 +668,7 @@ class _LabelStudioConfig:
|
|
|
653
668
|
rectangle_labels: dict[str, _RectangleLabel]
|
|
654
669
|
|
|
655
670
|
def validate(self) -> None:
|
|
656
|
-
data_key_names =
|
|
671
|
+
data_key_names = {key.name for key in self.data_keys.values() if key.name is not None}
|
|
657
672
|
for name, rl in self.rectangle_labels.items():
|
|
658
673
|
if rl.to_name not in data_key_names:
|
|
659
674
|
raise excs.Error(
|
|
@@ -662,18 +677,18 @@ class _LabelStudioConfig:
|
|
|
662
677
|
)
|
|
663
678
|
|
|
664
679
|
@property
|
|
665
|
-
def export_columns(self) -> dict[str,
|
|
680
|
+
def export_columns(self) -> dict[str, ts.ColumnType]:
|
|
666
681
|
data_key_cols = {key_id: key_info.column_type for key_id, key_info in self.data_keys.items()}
|
|
667
|
-
rl_cols = {name:
|
|
682
|
+
rl_cols = {name: ts.JsonType() for name in self.rectangle_labels}
|
|
668
683
|
return {**data_key_cols, **rl_cols}
|
|
669
684
|
|
|
670
685
|
|
|
671
686
|
ANNOTATIONS_COLUMN = 'annotations'
|
|
672
687
|
_PAGE_SIZE = 100 # This is the default used in the LS SDK
|
|
673
688
|
_LS_TAG_MAP = {
|
|
674
|
-
'header':
|
|
675
|
-
'text':
|
|
676
|
-
'image':
|
|
677
|
-
'video':
|
|
678
|
-
'audio':
|
|
689
|
+
'header': ts.StringType(),
|
|
690
|
+
'text': ts.StringType(),
|
|
691
|
+
'image': ts.ImageType(),
|
|
692
|
+
'video': ts.VideoType(),
|
|
693
|
+
'audio': ts.AudioType(),
|
|
679
694
|
}
|