label-studio-sdk 0.0.34__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- label_studio_sdk/__init__.py +206 -9
- label_studio_sdk/_extensions/label_studio_tools/__init__.py +0 -0
- label_studio_sdk/_extensions/label_studio_tools/core/__init__.py +0 -0
- label_studio_sdk/_extensions/label_studio_tools/core/label_config.py +163 -0
- label_studio_sdk/_extensions/label_studio_tools/core/utils/__init__.py +0 -0
- label_studio_sdk/_extensions/label_studio_tools/core/utils/exceptions.py +2 -0
- label_studio_sdk/_extensions/label_studio_tools/core/utils/io.py +228 -0
- label_studio_sdk/_extensions/label_studio_tools/core/utils/params.py +45 -0
- label_studio_sdk/_extensions/label_studio_tools/etl/__init__.py +1 -0
- label_studio_sdk/_extensions/label_studio_tools/etl/beam.py +34 -0
- label_studio_sdk/_extensions/label_studio_tools/etl/example.py +17 -0
- label_studio_sdk/_extensions/label_studio_tools/etl/registry.py +67 -0
- label_studio_sdk/_extensions/label_studio_tools/postprocessing/__init__.py +0 -0
- label_studio_sdk/_extensions/label_studio_tools/postprocessing/video.py +97 -0
- label_studio_sdk/_legacy/__init__.py +11 -0
- label_studio_sdk/_legacy/client.py +471 -0
- label_studio_sdk/_legacy/label_interface/data_examples.json +96 -0
- label_studio_sdk/{label_interface → _legacy/label_interface}/interface.py +9 -6
- label_studio_sdk/{project.py → _legacy/project.py} +2 -2
- label_studio_sdk/actions/__init__.py +2 -0
- label_studio_sdk/actions/client.py +150 -0
- label_studio_sdk/annotations/__init__.py +2 -0
- label_studio_sdk/annotations/client.py +750 -0
- label_studio_sdk/client.py +162 -450
- label_studio_sdk/converter/__init__.py +7 -0
- label_studio_sdk/converter/audio.py +56 -0
- label_studio_sdk/converter/brush.py +452 -0
- label_studio_sdk/converter/converter.py +1175 -0
- label_studio_sdk/converter/exports/__init__.py +0 -0
- label_studio_sdk/converter/exports/csv.py +82 -0
- label_studio_sdk/converter/exports/csv2.py +103 -0
- label_studio_sdk/converter/funsd.py +85 -0
- label_studio_sdk/converter/imports/__init__.py +0 -0
- label_studio_sdk/converter/imports/coco.py +314 -0
- label_studio_sdk/converter/imports/colors.py +198 -0
- label_studio_sdk/converter/imports/label_config.py +45 -0
- label_studio_sdk/converter/imports/pathtrack.py +269 -0
- label_studio_sdk/converter/imports/yolo.py +236 -0
- label_studio_sdk/converter/main.py +202 -0
- label_studio_sdk/converter/utils.py +473 -0
- label_studio_sdk/core/__init__.py +33 -0
- label_studio_sdk/core/api_error.py +15 -0
- label_studio_sdk/core/client_wrapper.py +55 -0
- label_studio_sdk/core/datetime_utils.py +28 -0
- label_studio_sdk/core/file.py +38 -0
- label_studio_sdk/core/http_client.py +443 -0
- label_studio_sdk/core/jsonable_encoder.py +99 -0
- label_studio_sdk/core/pagination.py +87 -0
- label_studio_sdk/core/pydantic_utilities.py +28 -0
- label_studio_sdk/core/query_encoder.py +33 -0
- label_studio_sdk/core/remove_none_from_dict.py +11 -0
- label_studio_sdk/core/request_options.py +32 -0
- label_studio_sdk/environment.py +7 -0
- label_studio_sdk/errors/__init__.py +6 -0
- label_studio_sdk/errors/bad_request_error.py +8 -0
- label_studio_sdk/errors/internal_server_error.py +8 -0
- label_studio_sdk/export_storage/__init__.py +28 -0
- label_studio_sdk/export_storage/azure/__init__.py +5 -0
- label_studio_sdk/export_storage/azure/client.py +722 -0
- label_studio_sdk/export_storage/azure/types/__init__.py +6 -0
- label_studio_sdk/export_storage/azure/types/azure_create_response.py +52 -0
- label_studio_sdk/export_storage/azure/types/azure_update_response.py +52 -0
- label_studio_sdk/export_storage/client.py +107 -0
- label_studio_sdk/export_storage/gcs/__init__.py +5 -0
- label_studio_sdk/export_storage/gcs/client.py +722 -0
- label_studio_sdk/export_storage/gcs/types/__init__.py +6 -0
- label_studio_sdk/export_storage/gcs/types/gcs_create_response.py +52 -0
- label_studio_sdk/export_storage/gcs/types/gcs_update_response.py +52 -0
- label_studio_sdk/export_storage/local/__init__.py +5 -0
- label_studio_sdk/export_storage/local/client.py +688 -0
- label_studio_sdk/export_storage/local/types/__init__.py +6 -0
- label_studio_sdk/export_storage/local/types/local_create_response.py +47 -0
- label_studio_sdk/export_storage/local/types/local_update_response.py +47 -0
- label_studio_sdk/export_storage/redis/__init__.py +5 -0
- label_studio_sdk/export_storage/redis/client.py +714 -0
- label_studio_sdk/export_storage/redis/types/__init__.py +6 -0
- label_studio_sdk/export_storage/redis/types/redis_create_response.py +57 -0
- label_studio_sdk/export_storage/redis/types/redis_update_response.py +57 -0
- label_studio_sdk/export_storage/s3/__init__.py +5 -0
- label_studio_sdk/export_storage/s3/client.py +820 -0
- label_studio_sdk/export_storage/s3/types/__init__.py +6 -0
- label_studio_sdk/export_storage/s3/types/s3create_response.py +74 -0
- label_studio_sdk/export_storage/s3/types/s3update_response.py +74 -0
- label_studio_sdk/export_storage/types/__init__.py +5 -0
- label_studio_sdk/export_storage/types/export_storage_list_types_response_item.py +30 -0
- label_studio_sdk/files/__init__.py +2 -0
- label_studio_sdk/files/client.py +556 -0
- label_studio_sdk/import_storage/__init__.py +28 -0
- label_studio_sdk/import_storage/azure/__init__.py +5 -0
- label_studio_sdk/import_storage/azure/client.py +812 -0
- label_studio_sdk/import_storage/azure/types/__init__.py +6 -0
- label_studio_sdk/import_storage/azure/types/azure_create_response.py +72 -0
- label_studio_sdk/import_storage/azure/types/azure_update_response.py +72 -0
- label_studio_sdk/import_storage/client.py +107 -0
- label_studio_sdk/import_storage/gcs/__init__.py +5 -0
- label_studio_sdk/import_storage/gcs/client.py +812 -0
- label_studio_sdk/import_storage/gcs/types/__init__.py +6 -0
- label_studio_sdk/import_storage/gcs/types/gcs_create_response.py +72 -0
- label_studio_sdk/import_storage/gcs/types/gcs_update_response.py +72 -0
- label_studio_sdk/import_storage/local/__init__.py +5 -0
- label_studio_sdk/import_storage/local/client.py +690 -0
- label_studio_sdk/import_storage/local/types/__init__.py +6 -0
- label_studio_sdk/import_storage/local/types/local_create_response.py +47 -0
- label_studio_sdk/import_storage/local/types/local_update_response.py +47 -0
- label_studio_sdk/import_storage/redis/__init__.py +5 -0
- label_studio_sdk/import_storage/redis/client.py +768 -0
- label_studio_sdk/import_storage/redis/types/__init__.py +6 -0
- label_studio_sdk/import_storage/redis/types/redis_create_response.py +62 -0
- label_studio_sdk/import_storage/redis/types/redis_update_response.py +62 -0
- label_studio_sdk/import_storage/s3/__init__.py +5 -0
- label_studio_sdk/import_storage/s3/client.py +912 -0
- label_studio_sdk/import_storage/s3/types/__init__.py +6 -0
- label_studio_sdk/import_storage/s3/types/s3create_response.py +99 -0
- label_studio_sdk/import_storage/s3/types/s3update_response.py +99 -0
- label_studio_sdk/import_storage/types/__init__.py +5 -0
- label_studio_sdk/import_storage/types/import_storage_list_types_response_item.py +30 -0
- label_studio_sdk/ml/__init__.py +19 -0
- label_studio_sdk/ml/client.py +981 -0
- label_studio_sdk/ml/types/__init__.py +17 -0
- label_studio_sdk/ml/types/ml_create_request_auth_method.py +5 -0
- label_studio_sdk/ml/types/ml_create_response.py +78 -0
- label_studio_sdk/ml/types/ml_create_response_auth_method.py +5 -0
- label_studio_sdk/ml/types/ml_update_request_auth_method.py +5 -0
- label_studio_sdk/ml/types/ml_update_response.py +78 -0
- label_studio_sdk/ml/types/ml_update_response_auth_method.py +5 -0
- label_studio_sdk/predictions/__init__.py +2 -0
- label_studio_sdk/predictions/client.py +638 -0
- label_studio_sdk/projects/__init__.py +6 -0
- label_studio_sdk/projects/client.py +1053 -0
- label_studio_sdk/projects/exports/__init__.py +2 -0
- label_studio_sdk/projects/exports/client.py +930 -0
- label_studio_sdk/projects/types/__init__.py +7 -0
- label_studio_sdk/projects/types/projects_create_response.py +96 -0
- label_studio_sdk/projects/types/projects_import_tasks_response.py +71 -0
- label_studio_sdk/projects/types/projects_list_response.py +33 -0
- label_studio_sdk/py.typed +0 -0
- label_studio_sdk/tasks/__init__.py +5 -0
- label_studio_sdk/tasks/client.py +811 -0
- label_studio_sdk/tasks/types/__init__.py +6 -0
- label_studio_sdk/tasks/types/tasks_list_request_fields.py +5 -0
- label_studio_sdk/tasks/types/tasks_list_response.py +48 -0
- label_studio_sdk/types/__init__.py +115 -0
- label_studio_sdk/types/annotation.py +116 -0
- label_studio_sdk/types/annotation_filter_options.py +42 -0
- label_studio_sdk/types/annotation_last_action.py +19 -0
- label_studio_sdk/types/azure_blob_export_storage.py +112 -0
- label_studio_sdk/types/azure_blob_export_storage_status.py +7 -0
- label_studio_sdk/types/azure_blob_import_storage.py +113 -0
- label_studio_sdk/types/azure_blob_import_storage_status.py +7 -0
- label_studio_sdk/types/base_task.py +113 -0
- label_studio_sdk/types/base_user.py +42 -0
- label_studio_sdk/types/converted_format.py +36 -0
- label_studio_sdk/types/converted_format_status.py +5 -0
- label_studio_sdk/types/export.py +48 -0
- label_studio_sdk/types/export_convert.py +32 -0
- label_studio_sdk/types/export_create.py +54 -0
- label_studio_sdk/types/export_create_status.py +5 -0
- label_studio_sdk/types/export_status.py +5 -0
- label_studio_sdk/types/file_upload.py +30 -0
- label_studio_sdk/types/filter.py +53 -0
- label_studio_sdk/types/filter_group.py +35 -0
- label_studio_sdk/types/gcs_export_storage.py +112 -0
- label_studio_sdk/types/gcs_export_storage_status.py +7 -0
- label_studio_sdk/types/gcs_import_storage.py +113 -0
- label_studio_sdk/types/gcs_import_storage_status.py +7 -0
- label_studio_sdk/types/local_files_export_storage.py +97 -0
- label_studio_sdk/types/local_files_export_storage_status.py +7 -0
- label_studio_sdk/types/local_files_import_storage.py +92 -0
- label_studio_sdk/types/local_files_import_storage_status.py +7 -0
- label_studio_sdk/types/ml_backend.py +89 -0
- label_studio_sdk/types/ml_backend_auth_method.py +5 -0
- label_studio_sdk/types/ml_backend_state.py +5 -0
- label_studio_sdk/types/prediction.py +78 -0
- label_studio_sdk/types/project.py +198 -0
- label_studio_sdk/types/project_import.py +63 -0
- label_studio_sdk/types/project_import_status.py +5 -0
- label_studio_sdk/types/project_label_config.py +32 -0
- label_studio_sdk/types/project_sampling.py +7 -0
- label_studio_sdk/types/project_skip_queue.py +5 -0
- label_studio_sdk/types/redis_export_storage.py +117 -0
- label_studio_sdk/types/redis_export_storage_status.py +7 -0
- label_studio_sdk/types/redis_import_storage.py +112 -0
- label_studio_sdk/types/redis_import_storage_status.py +7 -0
- label_studio_sdk/types/s3export_storage.py +134 -0
- label_studio_sdk/types/s3export_storage_status.py +7 -0
- label_studio_sdk/types/s3import_storage.py +140 -0
- label_studio_sdk/types/s3import_storage_status.py +7 -0
- label_studio_sdk/types/serialization_option.py +36 -0
- label_studio_sdk/types/serialization_options.py +45 -0
- label_studio_sdk/types/task.py +157 -0
- label_studio_sdk/types/task_filter_options.py +49 -0
- label_studio_sdk/types/user_simple.py +37 -0
- label_studio_sdk/types/view.py +55 -0
- label_studio_sdk/types/webhook.py +67 -0
- label_studio_sdk/types/webhook_actions_item.py +21 -0
- label_studio_sdk/types/webhook_serializer_for_update.py +67 -0
- label_studio_sdk/types/webhook_serializer_for_update_actions_item.py +21 -0
- label_studio_sdk/users/__init__.py +5 -0
- label_studio_sdk/users/client.py +830 -0
- label_studio_sdk/users/types/__init__.py +6 -0
- label_studio_sdk/users/types/users_get_token_response.py +36 -0
- label_studio_sdk/users/types/users_reset_token_response.py +36 -0
- label_studio_sdk/version.py +4 -0
- label_studio_sdk/views/__init__.py +31 -0
- label_studio_sdk/views/client.py +564 -0
- label_studio_sdk/views/types/__init__.py +29 -0
- label_studio_sdk/views/types/views_create_request_data.py +43 -0
- label_studio_sdk/views/types/views_create_request_data_filters.py +43 -0
- label_studio_sdk/views/types/views_create_request_data_filters_conjunction.py +5 -0
- label_studio_sdk/views/types/views_create_request_data_filters_items_item.py +47 -0
- label_studio_sdk/views/types/views_create_request_data_ordering_item.py +38 -0
- label_studio_sdk/views/types/views_create_request_data_ordering_item_direction.py +5 -0
- label_studio_sdk/views/types/views_update_request_data.py +43 -0
- label_studio_sdk/views/types/views_update_request_data_filters.py +43 -0
- label_studio_sdk/views/types/views_update_request_data_filters_conjunction.py +5 -0
- label_studio_sdk/views/types/views_update_request_data_filters_items_item.py +47 -0
- label_studio_sdk/views/types/views_update_request_data_ordering_item.py +38 -0
- label_studio_sdk/views/types/views_update_request_data_ordering_item_direction.py +5 -0
- label_studio_sdk/webhooks/__init__.py +5 -0
- label_studio_sdk/webhooks/client.py +636 -0
- label_studio_sdk/webhooks/types/__init__.py +5 -0
- label_studio_sdk/webhooks/types/webhooks_update_request_actions_item.py +21 -0
- label_studio_sdk-1.0.0.dist-info/METADATA +307 -0
- label_studio_sdk-1.0.0.dist-info/RECORD +239 -0
- {label_studio_sdk-0.0.34.dist-info → label_studio_sdk-1.0.0.dist-info}/WHEEL +1 -2
- label_studio_sdk-0.0.34.dist-info/LICENSE +0 -201
- label_studio_sdk-0.0.34.dist-info/METADATA +0 -24
- label_studio_sdk-0.0.34.dist-info/RECORD +0 -37
- label_studio_sdk-0.0.34.dist-info/top_level.txt +0 -2
- tests/test_client.py +0 -37
- tests/test_export.py +0 -105
- tests/test_interface/__init__.py +0 -1
- tests/test_interface/configs.py +0 -137
- tests/test_interface/mockups.py +0 -22
- tests/test_interface/test_compat.py +0 -64
- tests/test_interface/test_control_tags.py +0 -55
- tests/test_interface/test_data_generation.py +0 -45
- tests/test_interface/test_lpi.py +0 -15
- tests/test_interface/test_main.py +0 -196
- tests/test_interface/test_object_tags.py +0 -36
- tests/test_interface/test_region.py +0 -36
- tests/test_interface/test_validate_summary.py +0 -35
- tests/test_interface/test_validation.py +0 -59
- {tests → label_studio_sdk/_extensions}/__init__.py +0 -0
- /label_studio_sdk/{exceptions.py → _legacy/exceptions.py} +0 -0
- /label_studio_sdk/{label_interface → _legacy/label_interface}/__init__.py +0 -0
- /label_studio_sdk/{label_interface → _legacy/label_interface}/base.py +0 -0
- /label_studio_sdk/{label_interface → _legacy/label_interface}/control_tags.py +0 -0
- /label_studio_sdk/{label_interface → _legacy/label_interface}/label_tags.py +0 -0
- /label_studio_sdk/{label_interface → _legacy/label_interface}/object_tags.py +0 -0
- /label_studio_sdk/{label_interface → _legacy/label_interface}/region.py +0 -0
- /label_studio_sdk/{objects.py → _legacy/objects.py} +0 -0
- /label_studio_sdk/{schema → _legacy/schema}/label_config_schema.json +0 -0
- /label_studio_sdk/{users.py → _legacy/users.py} +0 -0
- /label_studio_sdk/{utils.py → _legacy/utils.py} +0 -0
- /label_studio_sdk/{workspaces.py → _legacy/workspaces.py} +0 -0
|
File without changes
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# this csv converter is not used in GUI export, see convert_to_csv function
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import os
|
|
4
|
+
import json
|
|
5
|
+
|
|
6
|
+
from copy import deepcopy
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ExportToCSV(object):
|
|
10
|
+
def __init__(self, tasks):
|
|
11
|
+
if isinstance(tasks, str) and tasks.endswith(".json"):
|
|
12
|
+
if not os.path.exists(tasks):
|
|
13
|
+
raise Exception(f"Task file not found {tasks}")
|
|
14
|
+
# input is a file
|
|
15
|
+
with open(tasks) as f:
|
|
16
|
+
self.tasks = json.load(f)
|
|
17
|
+
else:
|
|
18
|
+
# input is a JSON object
|
|
19
|
+
self.tasks = tasks
|
|
20
|
+
|
|
21
|
+
def _get_result_name(self, result):
|
|
22
|
+
return result.get("from_name")
|
|
23
|
+
|
|
24
|
+
def _minify_result(self, result):
|
|
25
|
+
value = result["value"]
|
|
26
|
+
name = self._get_result_name(result)
|
|
27
|
+
if len(value) == 1:
|
|
28
|
+
item = next(iter(value.values()))
|
|
29
|
+
if len(item) == 0:
|
|
30
|
+
return {name: None}
|
|
31
|
+
if len(item) == 1:
|
|
32
|
+
return {name: item[0]}
|
|
33
|
+
else:
|
|
34
|
+
return {name: item}
|
|
35
|
+
else:
|
|
36
|
+
return value
|
|
37
|
+
|
|
38
|
+
def _get_annotation_results(self, annotation, minify, flat_regions):
|
|
39
|
+
results = annotation["result"]
|
|
40
|
+
if not flat_regions:
|
|
41
|
+
yield {"result": results}
|
|
42
|
+
|
|
43
|
+
for result in annotation["result"]:
|
|
44
|
+
if minify:
|
|
45
|
+
yield self._minify_result(result)
|
|
46
|
+
else:
|
|
47
|
+
yield {self._get_result_name(result): result}
|
|
48
|
+
|
|
49
|
+
def _get_annotator_id(self, annotation):
|
|
50
|
+
annotator = annotation.get("completed_by", {})
|
|
51
|
+
if isinstance(annotator, int):
|
|
52
|
+
return annotator
|
|
53
|
+
elif isinstance(annotator, dict):
|
|
54
|
+
return annotator.get("email") or annotator.get("id")
|
|
55
|
+
|
|
56
|
+
def to_records(self, minify=True, flat_regions=True):
|
|
57
|
+
records = []
|
|
58
|
+
for task in self.tasks:
|
|
59
|
+
annotations = task.get("annotations")
|
|
60
|
+
if annotations is None:
|
|
61
|
+
# Temp legacy fix
|
|
62
|
+
annotations = task["completions"]
|
|
63
|
+
for annotation in annotations:
|
|
64
|
+
record = {
|
|
65
|
+
"id": task["id"],
|
|
66
|
+
"annotation_id": annotation.get("id"),
|
|
67
|
+
"annotator": self._get_annotator_id(annotation),
|
|
68
|
+
}
|
|
69
|
+
record.update(task["data"])
|
|
70
|
+
for result in self._get_annotation_results(
|
|
71
|
+
annotation, minify, flat_regions
|
|
72
|
+
):
|
|
73
|
+
rec = deepcopy(record)
|
|
74
|
+
rec.update(result)
|
|
75
|
+
records.append(rec)
|
|
76
|
+
return records
|
|
77
|
+
|
|
78
|
+
def to_dataframe(self, minify=True, flat_regions=True):
|
|
79
|
+
return pd.DataFrame.from_records(self.to_records(minify, flat_regions))
|
|
80
|
+
|
|
81
|
+
def to_file(self, file, minify=True, flat_regions=True, **kwargs):
|
|
82
|
+
return self.to_dataframe(minify, flat_regions).to_csv(file, **kwargs)
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import csv
|
|
3
|
+
import time
|
|
4
|
+
import logging
|
|
5
|
+
import ujson as json
|
|
6
|
+
|
|
7
|
+
from copy import deepcopy, copy
|
|
8
|
+
|
|
9
|
+
from label_studio_sdk.converter.utils import ensure_dir, get_annotator, prettify_result
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
logger.setLevel("DEBUG")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def convert(item_iterator, input_data, output_dir, **kwargs):
|
|
17
|
+
start_time = time.time()
|
|
18
|
+
logger.debug("Convert CSV started")
|
|
19
|
+
if str(output_dir).endswith(".csv"):
|
|
20
|
+
output_file = output_dir
|
|
21
|
+
else:
|
|
22
|
+
ensure_dir(output_dir)
|
|
23
|
+
output_file = os.path.join(output_dir, "result.csv")
|
|
24
|
+
|
|
25
|
+
# these keys are always presented
|
|
26
|
+
keys = {"annotator", "annotation_id", "created_at", "updated_at", "lead_time"}
|
|
27
|
+
|
|
28
|
+
# make 2 passes: the first pass is to get keys, otherwise we can't write csv without headers
|
|
29
|
+
logger.debug("Prepare column names for CSV ...")
|
|
30
|
+
for item in item_iterator(input_data):
|
|
31
|
+
record = prepare_annotation_keys(item)
|
|
32
|
+
keys.update(record)
|
|
33
|
+
|
|
34
|
+
# the second pass is to write records to csv
|
|
35
|
+
logger.debug(
|
|
36
|
+
f"Prepare done in {time.time()-start_time:0.2f} sec. Write CSV rows now ..."
|
|
37
|
+
)
|
|
38
|
+
with open(output_file, "w", encoding="utf8") as outfile:
|
|
39
|
+
writer = csv.DictWriter(
|
|
40
|
+
outfile,
|
|
41
|
+
fieldnames=sorted(list(keys)),
|
|
42
|
+
quoting=csv.QUOTE_NONNUMERIC,
|
|
43
|
+
delimiter=kwargs["sep"],
|
|
44
|
+
)
|
|
45
|
+
writer.writeheader()
|
|
46
|
+
|
|
47
|
+
for item in item_iterator(input_data):
|
|
48
|
+
record = prepare_annotation(item)
|
|
49
|
+
writer.writerow(record)
|
|
50
|
+
|
|
51
|
+
logger.debug(f"CSV conversion finished in {time.time()-start_time:0.2f} sec")
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def prepare_annotation(item):
|
|
55
|
+
record = {}
|
|
56
|
+
if item.get("id") is not None:
|
|
57
|
+
record["id"] = item["id"]
|
|
58
|
+
|
|
59
|
+
for name, value in item["output"].items():
|
|
60
|
+
pretty_value = prettify_result(value)
|
|
61
|
+
record[name] = (
|
|
62
|
+
pretty_value
|
|
63
|
+
if isinstance(pretty_value, str)
|
|
64
|
+
else json.dumps(pretty_value, ensure_ascii=False)
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
for name, value in item["input"].items():
|
|
68
|
+
if isinstance(value, dict) or isinstance(value, list):
|
|
69
|
+
# flat dicts and arrays from task.data to json strings
|
|
70
|
+
record[name] = json.dumps(value, ensure_ascii=False)
|
|
71
|
+
else:
|
|
72
|
+
record[name] = value
|
|
73
|
+
|
|
74
|
+
record["annotator"] = get_annotator(item)
|
|
75
|
+
record["annotation_id"] = item["annotation_id"]
|
|
76
|
+
record["created_at"] = item["created_at"]
|
|
77
|
+
record["updated_at"] = item["updated_at"]
|
|
78
|
+
record["lead_time"] = item["lead_time"]
|
|
79
|
+
|
|
80
|
+
if "agreement" in item:
|
|
81
|
+
record["agreement"] = item["agreement"]
|
|
82
|
+
|
|
83
|
+
if "history" in item and item["history"]:
|
|
84
|
+
record["history"] = json.dumps(item["history"], ensure_ascii=False)
|
|
85
|
+
|
|
86
|
+
return record
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def prepare_annotation_keys(item):
|
|
90
|
+
record = set(item["input"].keys()) # we don't need deepcopy for keys
|
|
91
|
+
if item.get("id") is not None:
|
|
92
|
+
record.add("id")
|
|
93
|
+
|
|
94
|
+
for name, value in item["output"].items():
|
|
95
|
+
record.add(name)
|
|
96
|
+
|
|
97
|
+
if "agreement" in item:
|
|
98
|
+
record.add("agreement")
|
|
99
|
+
|
|
100
|
+
if "history" in item and item["history"]:
|
|
101
|
+
record.add("history")
|
|
102
|
+
|
|
103
|
+
return record
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""This code allows to export Label Studio Export JSON to FUNSD format.
|
|
2
|
+
It's only the basic converter, it converts every bbox as a separate word.
|
|
3
|
+
Check this github issue for more details:
|
|
4
|
+
https://github.com/heartexlabs/label-studio/issues/2634#issuecomment-1251648670
|
|
5
|
+
|
|
6
|
+
Usage: funsd.py export.json
|
|
7
|
+
This command will export your LS OCR annotations to "./funsd/" directory.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import os
|
|
11
|
+
import json
|
|
12
|
+
from collections import defaultdict
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def convert_annotation_to_fund(result):
|
|
16
|
+
# collect all LS results and combine labels, text, coordinates into one record
|
|
17
|
+
pre = defaultdict(dict)
|
|
18
|
+
for item in result:
|
|
19
|
+
o = pre[item["id"]]
|
|
20
|
+
|
|
21
|
+
labels = item.get("value", {}).get("labels", None)
|
|
22
|
+
if labels:
|
|
23
|
+
o["label"] = labels[0]
|
|
24
|
+
|
|
25
|
+
text = item.get("value", {}).get("text", None)
|
|
26
|
+
if text:
|
|
27
|
+
o["text"] = text[0]
|
|
28
|
+
|
|
29
|
+
if "box" not in o:
|
|
30
|
+
w, h = item["original_width"], item["original_height"]
|
|
31
|
+
v = item.get("value")
|
|
32
|
+
x1 = v["x"] / 100.0 * w
|
|
33
|
+
y1 = v["y"] / 100.0 * h
|
|
34
|
+
x2 = x1 + v["width"] / 100.0 * w
|
|
35
|
+
y2 = y1 + v["height"] / 100.0 * h
|
|
36
|
+
o["box"] = [x1, x2, y1, y2]
|
|
37
|
+
|
|
38
|
+
# make FUNSD output
|
|
39
|
+
output = []
|
|
40
|
+
counter = 0
|
|
41
|
+
for key in pre:
|
|
42
|
+
counter += 1
|
|
43
|
+
output.append(
|
|
44
|
+
{
|
|
45
|
+
"id": counter,
|
|
46
|
+
"box": pre[key]["box"],
|
|
47
|
+
"text": pre[key]["text"],
|
|
48
|
+
"label": pre[key]["label"],
|
|
49
|
+
"words": [{"box": pre[key]["box"], "text": pre[key]["text"]}],
|
|
50
|
+
"linking": [],
|
|
51
|
+
}
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
return {"form": output}
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def ls_to_funsd_converter(
|
|
58
|
+
ls_export_path="export.json", funsd_dir="funsd", data_key="ocr"
|
|
59
|
+
):
|
|
60
|
+
with open(ls_export_path) as f:
|
|
61
|
+
tasks = json.load(f)
|
|
62
|
+
|
|
63
|
+
os.makedirs(funsd_dir, exist_ok=True)
|
|
64
|
+
|
|
65
|
+
for task in tasks:
|
|
66
|
+
for annotation in task["annotations"]:
|
|
67
|
+
output = convert_annotation_to_fund(annotation["result"])
|
|
68
|
+
filename = task["data"][data_key]
|
|
69
|
+
filename = os.path.basename(filename)
|
|
70
|
+
filename = (
|
|
71
|
+
f'{funsd_dir}/task-{task["id"]}-annotation-{annotation["id"]}-'
|
|
72
|
+
f"{filename}.json"
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
with open(filename, "w") as f:
|
|
76
|
+
json.dump(output, f)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
if __name__ == "__main__":
|
|
80
|
+
import sys
|
|
81
|
+
|
|
82
|
+
print("Usage:", sys.argv[0], "export.json")
|
|
83
|
+
print('This command will export your LS OCR annotations to "./funsd/" directory')
|
|
84
|
+
|
|
85
|
+
ls_to_funsd_converter(sys.argv[1])
|
|
File without changes
|
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import json # better to use "imports ujson as json" for the best performance
|
|
3
|
+
import uuid
|
|
4
|
+
import logging
|
|
5
|
+
from PIL import Image
|
|
6
|
+
|
|
7
|
+
from label_studio_sdk.converter.utils import ExpandFullPath
|
|
8
|
+
from label_studio_sdk.converter.imports.label_config import generate_label_config
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger("root")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def new_task(out_type, root_url, file_name):
|
|
14
|
+
return {
|
|
15
|
+
"data": {"image": os.path.join(root_url, file_name)},
|
|
16
|
+
# 'annotations' or 'predictions'
|
|
17
|
+
out_type: [
|
|
18
|
+
{
|
|
19
|
+
"result": [],
|
|
20
|
+
"ground_truth": False,
|
|
21
|
+
}
|
|
22
|
+
],
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def create_bbox(annotation, categories, from_name, image_height, image_width, to_name):
|
|
27
|
+
label = categories[int(annotation["category_id"])]
|
|
28
|
+
x, y, width, height = annotation["bbox"]
|
|
29
|
+
x, y, width, height = float(x), float(y), float(width), float(height)
|
|
30
|
+
item = {
|
|
31
|
+
"id": uuid.uuid4().hex[0:10],
|
|
32
|
+
"type": "rectanglelabels",
|
|
33
|
+
"value": {
|
|
34
|
+
"x": x / image_width * 100.0,
|
|
35
|
+
"y": y / image_height * 100.0,
|
|
36
|
+
"width": width / image_width * 100.0,
|
|
37
|
+
"height": height / image_height * 100.0,
|
|
38
|
+
"rotation": 0,
|
|
39
|
+
"rectanglelabels": [label],
|
|
40
|
+
},
|
|
41
|
+
"to_name": to_name,
|
|
42
|
+
"from_name": from_name,
|
|
43
|
+
"image_rotation": 0,
|
|
44
|
+
"original_width": image_width,
|
|
45
|
+
"original_height": image_height,
|
|
46
|
+
}
|
|
47
|
+
return item
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def create_segmentation(
|
|
51
|
+
annotation, categories, from_name, image_height, image_width, to_name
|
|
52
|
+
):
|
|
53
|
+
label = categories[int(annotation["category_id"])]
|
|
54
|
+
segmentation = annotation["segmentation"][0]
|
|
55
|
+
points = [list(x) for x in zip(*[iter(segmentation)] * 2)]
|
|
56
|
+
|
|
57
|
+
for i in range(len(points)):
|
|
58
|
+
points[i][0] = points[i][0] / image_width * 100.0
|
|
59
|
+
points[i][1] = points[i][1] / image_height * 100.0
|
|
60
|
+
|
|
61
|
+
item = {
|
|
62
|
+
"id": uuid.uuid4().hex[0:10],
|
|
63
|
+
"type": "polygonlabels",
|
|
64
|
+
"value": {"points": points, "polygonlabels": [label]},
|
|
65
|
+
"to_name": to_name,
|
|
66
|
+
"from_name": from_name,
|
|
67
|
+
"image_rotation": 0,
|
|
68
|
+
"original_width": image_width,
|
|
69
|
+
"original_height": image_height,
|
|
70
|
+
}
|
|
71
|
+
return item
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def create_keypoints(
|
|
75
|
+
annotation, categories, from_name, to_name, image_height, image_width, point_width
|
|
76
|
+
):
|
|
77
|
+
label = categories[int(annotation["category_id"])]
|
|
78
|
+
points = annotation["keypoints"]
|
|
79
|
+
items = []
|
|
80
|
+
|
|
81
|
+
for i in range(0, len(points), 3):
|
|
82
|
+
x, y, v = points[i : i + 3] # x, y, visibility
|
|
83
|
+
x, y, v = float(x), float(y), int(v)
|
|
84
|
+
item = {
|
|
85
|
+
"id": uuid.uuid4().hex[0:10],
|
|
86
|
+
"type": "keypointlabels",
|
|
87
|
+
"value": {
|
|
88
|
+
"x": x / image_width * 100.0,
|
|
89
|
+
"y": y / image_height * 100.0,
|
|
90
|
+
"width": point_width,
|
|
91
|
+
"keypointlabels": [label],
|
|
92
|
+
},
|
|
93
|
+
"to_name": to_name,
|
|
94
|
+
"from_name": from_name,
|
|
95
|
+
"image_rotation": 0,
|
|
96
|
+
"original_width": image_width,
|
|
97
|
+
"original_height": image_height,
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
# visibility
|
|
101
|
+
if v < 2:
|
|
102
|
+
item["value"]["hidden"] = True
|
|
103
|
+
|
|
104
|
+
items.append(item)
|
|
105
|
+
return items
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def convert_coco_to_ls(
|
|
109
|
+
input_file,
|
|
110
|
+
out_file,
|
|
111
|
+
to_name="image",
|
|
112
|
+
from_name="label",
|
|
113
|
+
out_type="annotations",
|
|
114
|
+
image_root_url="/data/local-files/?d=",
|
|
115
|
+
use_super_categories=False,
|
|
116
|
+
point_width=1.0,
|
|
117
|
+
):
|
|
118
|
+
"""Convert COCO labeling to Label Studio JSON
|
|
119
|
+
|
|
120
|
+
:param input_file: file with COCO json
|
|
121
|
+
:param out_file: output file with Label Studio JSON tasks
|
|
122
|
+
:param to_name: object name from Label Studio labeling config
|
|
123
|
+
:param from_name: control tag name from Label Studio labeling config
|
|
124
|
+
:param out_type: annotation type - "annotations" or "predictions"
|
|
125
|
+
:param image_root_url: root URL path where images will be hosted, e.g.: http://example.com/images
|
|
126
|
+
:param use_super_categories: use super categories from categories if they are presented
|
|
127
|
+
:param point_width: key point width
|
|
128
|
+
"""
|
|
129
|
+
|
|
130
|
+
tasks = {} # image_id => task
|
|
131
|
+
logger.info("Reading COCO notes and categories from %s", input_file)
|
|
132
|
+
|
|
133
|
+
with open(input_file, encoding="utf8") as f:
|
|
134
|
+
coco = json.load(f)
|
|
135
|
+
|
|
136
|
+
# build categories => labels dict
|
|
137
|
+
new_categories = {}
|
|
138
|
+
# list to dict conversion: [...] => {category_id: category_item}
|
|
139
|
+
categories = {int(category["id"]): category for category in coco["categories"]}
|
|
140
|
+
ids = sorted(categories.keys()) # sort labels by their origin ids
|
|
141
|
+
|
|
142
|
+
for i in ids:
|
|
143
|
+
name = categories[i]["name"]
|
|
144
|
+
if use_super_categories and "supercategory" in categories[i]:
|
|
145
|
+
name = categories[i]["supercategory"] + ":" + name
|
|
146
|
+
new_categories[i] = name
|
|
147
|
+
|
|
148
|
+
# mapping: id => category name
|
|
149
|
+
categories = new_categories
|
|
150
|
+
|
|
151
|
+
# mapping: image id => image
|
|
152
|
+
images = {item["id"]: item for item in coco["images"]}
|
|
153
|
+
|
|
154
|
+
logger.info(
|
|
155
|
+
f'Found {len(categories)} categories, {len(images)} images and {len(coco["annotations"])} annotations'
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
# flags for labeling config composing
|
|
159
|
+
segmentation = bbox = keypoints = rle = False
|
|
160
|
+
segmentation_once = bbox_once = keypoints_once = rle_once = False
|
|
161
|
+
rectangles_from_name, keypoints_from_name = (
|
|
162
|
+
from_name + "_rectangles",
|
|
163
|
+
from_name + "_keypoints",
|
|
164
|
+
)
|
|
165
|
+
segmentation_from_name = from_name + "polygons"
|
|
166
|
+
tags = {}
|
|
167
|
+
|
|
168
|
+
# create tasks
|
|
169
|
+
for image in coco["images"]:
|
|
170
|
+
image_id, image_file_name = image["id"], image["file_name"]
|
|
171
|
+
tasks[image_id] = new_task(out_type, image_root_url, image_file_name)
|
|
172
|
+
|
|
173
|
+
for i, annotation in enumerate(coco["annotations"]):
|
|
174
|
+
segmentation |= "segmentation" in annotation
|
|
175
|
+
bbox |= "bbox" in annotation
|
|
176
|
+
keypoints |= "keypoints" in annotation
|
|
177
|
+
rle |= (
|
|
178
|
+
annotation.get("iscrowd") == 1
|
|
179
|
+
) # 0 - polygons are in segmentation, otherwise rle
|
|
180
|
+
|
|
181
|
+
if rle and not rle_once: # not supported
|
|
182
|
+
logger.error("RLE in segmentation is not yet supported in COCO")
|
|
183
|
+
rle_once = True
|
|
184
|
+
if keypoints and not keypoints_once:
|
|
185
|
+
logger.warning("Keypoints are partially supported without skeletons")
|
|
186
|
+
tags.update({keypoints_from_name: "KeyPointLabels"})
|
|
187
|
+
keypoints_once = True
|
|
188
|
+
if segmentation and not segmentation_once: # not supported
|
|
189
|
+
logger.warning("Segmentation in COCO is experimental")
|
|
190
|
+
tags.update({segmentation_from_name: "PolygonLabels"})
|
|
191
|
+
segmentation_once = True
|
|
192
|
+
if bbox and not bbox_once:
|
|
193
|
+
tags.update({rectangles_from_name: "RectangleLabels"})
|
|
194
|
+
bbox_once = True
|
|
195
|
+
|
|
196
|
+
# read image sizes
|
|
197
|
+
image_id = annotation["image_id"]
|
|
198
|
+
image = images[image_id]
|
|
199
|
+
image_file_name, image_width, image_height = (
|
|
200
|
+
image["file_name"],
|
|
201
|
+
image["width"],
|
|
202
|
+
image["height"],
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
task = tasks[image_id]
|
|
206
|
+
|
|
207
|
+
if "bbox" in annotation:
|
|
208
|
+
item = create_bbox(
|
|
209
|
+
annotation,
|
|
210
|
+
categories,
|
|
211
|
+
rectangles_from_name,
|
|
212
|
+
image_height,
|
|
213
|
+
image_width,
|
|
214
|
+
to_name,
|
|
215
|
+
)
|
|
216
|
+
task[out_type][0]["result"].append(item)
|
|
217
|
+
|
|
218
|
+
if "segmentation" in annotation and len(annotation["segmentation"]):
|
|
219
|
+
item = create_segmentation(
|
|
220
|
+
annotation,
|
|
221
|
+
categories,
|
|
222
|
+
segmentation_from_name,
|
|
223
|
+
image_height,
|
|
224
|
+
image_width,
|
|
225
|
+
to_name,
|
|
226
|
+
)
|
|
227
|
+
task[out_type][0]["result"].append(item)
|
|
228
|
+
|
|
229
|
+
if "keypoints" in annotation:
|
|
230
|
+
items = create_keypoints(
|
|
231
|
+
annotation,
|
|
232
|
+
categories,
|
|
233
|
+
keypoints_from_name,
|
|
234
|
+
to_name,
|
|
235
|
+
image_height,
|
|
236
|
+
image_width,
|
|
237
|
+
point_width,
|
|
238
|
+
)
|
|
239
|
+
task[out_type][0]["result"] += items
|
|
240
|
+
|
|
241
|
+
tasks[image_id] = task
|
|
242
|
+
|
|
243
|
+
# generate and save labeling config
|
|
244
|
+
label_config_file = out_file.replace(".json", "") + ".label_config.xml"
|
|
245
|
+
generate_label_config(categories, tags, to_name, from_name, label_config_file)
|
|
246
|
+
|
|
247
|
+
if len(tasks) > 0:
|
|
248
|
+
tasks = [tasks[key] for key in sorted(tasks.keys())]
|
|
249
|
+
logger.info("Saving Label Studio JSON to %s", out_file)
|
|
250
|
+
with open(out_file, "w") as out:
|
|
251
|
+
json.dump(tasks, out)
|
|
252
|
+
|
|
253
|
+
print(
|
|
254
|
+
"\n"
|
|
255
|
+
f" 1. Create a new project in Label Studio\n"
|
|
256
|
+
f' 2. Use Labeling Config from "{label_config_file}"\n'
|
|
257
|
+
f" 3. Setup serving for images [e.g. you can use Local Storage (or others):\n"
|
|
258
|
+
f" https://labelstud.io/guide/storage.html#Local-storage]\n"
|
|
259
|
+
f' 4. Import "{out_file}" to the project\n'
|
|
260
|
+
)
|
|
261
|
+
else:
|
|
262
|
+
logger.error("No labels converted")
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def add_parser(subparsers):
|
|
266
|
+
coco = subparsers.add_parser("coco")
|
|
267
|
+
|
|
268
|
+
coco.add_argument(
|
|
269
|
+
"-i",
|
|
270
|
+
"--input",
|
|
271
|
+
dest="input",
|
|
272
|
+
required=True,
|
|
273
|
+
help="directory with COCO where images, labels, notes.json are located",
|
|
274
|
+
action=ExpandFullPath,
|
|
275
|
+
)
|
|
276
|
+
coco.add_argument(
|
|
277
|
+
"-o",
|
|
278
|
+
"--output",
|
|
279
|
+
dest="output",
|
|
280
|
+
help="output file with Label Studio JSON tasks",
|
|
281
|
+
default="output.json",
|
|
282
|
+
action=ExpandFullPath,
|
|
283
|
+
)
|
|
284
|
+
coco.add_argument(
|
|
285
|
+
"--to-name",
|
|
286
|
+
dest="to_name",
|
|
287
|
+
help="object name from Label Studio labeling config",
|
|
288
|
+
default="image",
|
|
289
|
+
)
|
|
290
|
+
coco.add_argument(
|
|
291
|
+
"--from-name",
|
|
292
|
+
dest="from_name",
|
|
293
|
+
help="control tag name from Label Studio labeling config",
|
|
294
|
+
default="label",
|
|
295
|
+
)
|
|
296
|
+
coco.add_argument(
|
|
297
|
+
"--out-type",
|
|
298
|
+
dest="out_type",
|
|
299
|
+
help='annotation type - "annotations" or "predictions"',
|
|
300
|
+
default="annotations",
|
|
301
|
+
)
|
|
302
|
+
coco.add_argument(
|
|
303
|
+
"--image-root-url",
|
|
304
|
+
dest="image_root_url",
|
|
305
|
+
help="root URL path where images will be hosted, e.g.: http://example.com/images",
|
|
306
|
+
default="/data/local-files/?d=",
|
|
307
|
+
)
|
|
308
|
+
coco.add_argument(
|
|
309
|
+
"--point-width",
|
|
310
|
+
dest="point_width",
|
|
311
|
+
help="key point width (size)",
|
|
312
|
+
default=1.0,
|
|
313
|
+
type=float,
|
|
314
|
+
)
|