label-studio-sdk 0.0.34__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- label_studio_sdk/__init__.py +206 -9
- label_studio_sdk/_extensions/label_studio_tools/__init__.py +0 -0
- label_studio_sdk/_extensions/label_studio_tools/core/__init__.py +0 -0
- label_studio_sdk/_extensions/label_studio_tools/core/label_config.py +163 -0
- label_studio_sdk/_extensions/label_studio_tools/core/utils/__init__.py +0 -0
- label_studio_sdk/_extensions/label_studio_tools/core/utils/exceptions.py +2 -0
- label_studio_sdk/_extensions/label_studio_tools/core/utils/io.py +228 -0
- label_studio_sdk/_extensions/label_studio_tools/core/utils/params.py +45 -0
- label_studio_sdk/_extensions/label_studio_tools/etl/__init__.py +1 -0
- label_studio_sdk/_extensions/label_studio_tools/etl/beam.py +34 -0
- label_studio_sdk/_extensions/label_studio_tools/etl/example.py +17 -0
- label_studio_sdk/_extensions/label_studio_tools/etl/registry.py +67 -0
- label_studio_sdk/_extensions/label_studio_tools/postprocessing/__init__.py +0 -0
- label_studio_sdk/_extensions/label_studio_tools/postprocessing/video.py +97 -0
- label_studio_sdk/_legacy/__init__.py +11 -0
- label_studio_sdk/_legacy/client.py +471 -0
- label_studio_sdk/_legacy/label_interface/data_examples.json +96 -0
- label_studio_sdk/{label_interface → _legacy/label_interface}/interface.py +9 -6
- label_studio_sdk/{project.py → _legacy/project.py} +2 -2
- label_studio_sdk/actions/__init__.py +2 -0
- label_studio_sdk/actions/client.py +150 -0
- label_studio_sdk/annotations/__init__.py +2 -0
- label_studio_sdk/annotations/client.py +750 -0
- label_studio_sdk/client.py +162 -450
- label_studio_sdk/converter/__init__.py +7 -0
- label_studio_sdk/converter/audio.py +56 -0
- label_studio_sdk/converter/brush.py +452 -0
- label_studio_sdk/converter/converter.py +1175 -0
- label_studio_sdk/converter/exports/__init__.py +0 -0
- label_studio_sdk/converter/exports/csv.py +82 -0
- label_studio_sdk/converter/exports/csv2.py +103 -0
- label_studio_sdk/converter/funsd.py +85 -0
- label_studio_sdk/converter/imports/__init__.py +0 -0
- label_studio_sdk/converter/imports/coco.py +314 -0
- label_studio_sdk/converter/imports/colors.py +198 -0
- label_studio_sdk/converter/imports/label_config.py +45 -0
- label_studio_sdk/converter/imports/pathtrack.py +269 -0
- label_studio_sdk/converter/imports/yolo.py +236 -0
- label_studio_sdk/converter/main.py +202 -0
- label_studio_sdk/converter/utils.py +473 -0
- label_studio_sdk/core/__init__.py +33 -0
- label_studio_sdk/core/api_error.py +15 -0
- label_studio_sdk/core/client_wrapper.py +55 -0
- label_studio_sdk/core/datetime_utils.py +28 -0
- label_studio_sdk/core/file.py +38 -0
- label_studio_sdk/core/http_client.py +443 -0
- label_studio_sdk/core/jsonable_encoder.py +99 -0
- label_studio_sdk/core/pagination.py +87 -0
- label_studio_sdk/core/pydantic_utilities.py +28 -0
- label_studio_sdk/core/query_encoder.py +33 -0
- label_studio_sdk/core/remove_none_from_dict.py +11 -0
- label_studio_sdk/core/request_options.py +32 -0
- label_studio_sdk/environment.py +7 -0
- label_studio_sdk/errors/__init__.py +6 -0
- label_studio_sdk/errors/bad_request_error.py +8 -0
- label_studio_sdk/errors/internal_server_error.py +8 -0
- label_studio_sdk/export_storage/__init__.py +28 -0
- label_studio_sdk/export_storage/azure/__init__.py +5 -0
- label_studio_sdk/export_storage/azure/client.py +722 -0
- label_studio_sdk/export_storage/azure/types/__init__.py +6 -0
- label_studio_sdk/export_storage/azure/types/azure_create_response.py +52 -0
- label_studio_sdk/export_storage/azure/types/azure_update_response.py +52 -0
- label_studio_sdk/export_storage/client.py +107 -0
- label_studio_sdk/export_storage/gcs/__init__.py +5 -0
- label_studio_sdk/export_storage/gcs/client.py +722 -0
- label_studio_sdk/export_storage/gcs/types/__init__.py +6 -0
- label_studio_sdk/export_storage/gcs/types/gcs_create_response.py +52 -0
- label_studio_sdk/export_storage/gcs/types/gcs_update_response.py +52 -0
- label_studio_sdk/export_storage/local/__init__.py +5 -0
- label_studio_sdk/export_storage/local/client.py +688 -0
- label_studio_sdk/export_storage/local/types/__init__.py +6 -0
- label_studio_sdk/export_storage/local/types/local_create_response.py +47 -0
- label_studio_sdk/export_storage/local/types/local_update_response.py +47 -0
- label_studio_sdk/export_storage/redis/__init__.py +5 -0
- label_studio_sdk/export_storage/redis/client.py +714 -0
- label_studio_sdk/export_storage/redis/types/__init__.py +6 -0
- label_studio_sdk/export_storage/redis/types/redis_create_response.py +57 -0
- label_studio_sdk/export_storage/redis/types/redis_update_response.py +57 -0
- label_studio_sdk/export_storage/s3/__init__.py +5 -0
- label_studio_sdk/export_storage/s3/client.py +820 -0
- label_studio_sdk/export_storage/s3/types/__init__.py +6 -0
- label_studio_sdk/export_storage/s3/types/s3create_response.py +74 -0
- label_studio_sdk/export_storage/s3/types/s3update_response.py +74 -0
- label_studio_sdk/export_storage/types/__init__.py +5 -0
- label_studio_sdk/export_storage/types/export_storage_list_types_response_item.py +30 -0
- label_studio_sdk/files/__init__.py +2 -0
- label_studio_sdk/files/client.py +556 -0
- label_studio_sdk/import_storage/__init__.py +28 -0
- label_studio_sdk/import_storage/azure/__init__.py +5 -0
- label_studio_sdk/import_storage/azure/client.py +812 -0
- label_studio_sdk/import_storage/azure/types/__init__.py +6 -0
- label_studio_sdk/import_storage/azure/types/azure_create_response.py +72 -0
- label_studio_sdk/import_storage/azure/types/azure_update_response.py +72 -0
- label_studio_sdk/import_storage/client.py +107 -0
- label_studio_sdk/import_storage/gcs/__init__.py +5 -0
- label_studio_sdk/import_storage/gcs/client.py +812 -0
- label_studio_sdk/import_storage/gcs/types/__init__.py +6 -0
- label_studio_sdk/import_storage/gcs/types/gcs_create_response.py +72 -0
- label_studio_sdk/import_storage/gcs/types/gcs_update_response.py +72 -0
- label_studio_sdk/import_storage/local/__init__.py +5 -0
- label_studio_sdk/import_storage/local/client.py +690 -0
- label_studio_sdk/import_storage/local/types/__init__.py +6 -0
- label_studio_sdk/import_storage/local/types/local_create_response.py +47 -0
- label_studio_sdk/import_storage/local/types/local_update_response.py +47 -0
- label_studio_sdk/import_storage/redis/__init__.py +5 -0
- label_studio_sdk/import_storage/redis/client.py +768 -0
- label_studio_sdk/import_storage/redis/types/__init__.py +6 -0
- label_studio_sdk/import_storage/redis/types/redis_create_response.py +62 -0
- label_studio_sdk/import_storage/redis/types/redis_update_response.py +62 -0
- label_studio_sdk/import_storage/s3/__init__.py +5 -0
- label_studio_sdk/import_storage/s3/client.py +912 -0
- label_studio_sdk/import_storage/s3/types/__init__.py +6 -0
- label_studio_sdk/import_storage/s3/types/s3create_response.py +99 -0
- label_studio_sdk/import_storage/s3/types/s3update_response.py +99 -0
- label_studio_sdk/import_storage/types/__init__.py +5 -0
- label_studio_sdk/import_storage/types/import_storage_list_types_response_item.py +30 -0
- label_studio_sdk/ml/__init__.py +19 -0
- label_studio_sdk/ml/client.py +981 -0
- label_studio_sdk/ml/types/__init__.py +17 -0
- label_studio_sdk/ml/types/ml_create_request_auth_method.py +5 -0
- label_studio_sdk/ml/types/ml_create_response.py +78 -0
- label_studio_sdk/ml/types/ml_create_response_auth_method.py +5 -0
- label_studio_sdk/ml/types/ml_update_request_auth_method.py +5 -0
- label_studio_sdk/ml/types/ml_update_response.py +78 -0
- label_studio_sdk/ml/types/ml_update_response_auth_method.py +5 -0
- label_studio_sdk/predictions/__init__.py +2 -0
- label_studio_sdk/predictions/client.py +638 -0
- label_studio_sdk/projects/__init__.py +6 -0
- label_studio_sdk/projects/client.py +1053 -0
- label_studio_sdk/projects/exports/__init__.py +2 -0
- label_studio_sdk/projects/exports/client.py +930 -0
- label_studio_sdk/projects/types/__init__.py +7 -0
- label_studio_sdk/projects/types/projects_create_response.py +96 -0
- label_studio_sdk/projects/types/projects_import_tasks_response.py +71 -0
- label_studio_sdk/projects/types/projects_list_response.py +33 -0
- label_studio_sdk/py.typed +0 -0
- label_studio_sdk/tasks/__init__.py +5 -0
- label_studio_sdk/tasks/client.py +811 -0
- label_studio_sdk/tasks/types/__init__.py +6 -0
- label_studio_sdk/tasks/types/tasks_list_request_fields.py +5 -0
- label_studio_sdk/tasks/types/tasks_list_response.py +48 -0
- label_studio_sdk/types/__init__.py +115 -0
- label_studio_sdk/types/annotation.py +116 -0
- label_studio_sdk/types/annotation_filter_options.py +42 -0
- label_studio_sdk/types/annotation_last_action.py +19 -0
- label_studio_sdk/types/azure_blob_export_storage.py +112 -0
- label_studio_sdk/types/azure_blob_export_storage_status.py +7 -0
- label_studio_sdk/types/azure_blob_import_storage.py +113 -0
- label_studio_sdk/types/azure_blob_import_storage_status.py +7 -0
- label_studio_sdk/types/base_task.py +113 -0
- label_studio_sdk/types/base_user.py +42 -0
- label_studio_sdk/types/converted_format.py +36 -0
- label_studio_sdk/types/converted_format_status.py +5 -0
- label_studio_sdk/types/export.py +48 -0
- label_studio_sdk/types/export_convert.py +32 -0
- label_studio_sdk/types/export_create.py +54 -0
- label_studio_sdk/types/export_create_status.py +5 -0
- label_studio_sdk/types/export_status.py +5 -0
- label_studio_sdk/types/file_upload.py +30 -0
- label_studio_sdk/types/filter.py +53 -0
- label_studio_sdk/types/filter_group.py +35 -0
- label_studio_sdk/types/gcs_export_storage.py +112 -0
- label_studio_sdk/types/gcs_export_storage_status.py +7 -0
- label_studio_sdk/types/gcs_import_storage.py +113 -0
- label_studio_sdk/types/gcs_import_storage_status.py +7 -0
- label_studio_sdk/types/local_files_export_storage.py +97 -0
- label_studio_sdk/types/local_files_export_storage_status.py +7 -0
- label_studio_sdk/types/local_files_import_storage.py +92 -0
- label_studio_sdk/types/local_files_import_storage_status.py +7 -0
- label_studio_sdk/types/ml_backend.py +89 -0
- label_studio_sdk/types/ml_backend_auth_method.py +5 -0
- label_studio_sdk/types/ml_backend_state.py +5 -0
- label_studio_sdk/types/prediction.py +78 -0
- label_studio_sdk/types/project.py +198 -0
- label_studio_sdk/types/project_import.py +63 -0
- label_studio_sdk/types/project_import_status.py +5 -0
- label_studio_sdk/types/project_label_config.py +32 -0
- label_studio_sdk/types/project_sampling.py +7 -0
- label_studio_sdk/types/project_skip_queue.py +5 -0
- label_studio_sdk/types/redis_export_storage.py +117 -0
- label_studio_sdk/types/redis_export_storage_status.py +7 -0
- label_studio_sdk/types/redis_import_storage.py +112 -0
- label_studio_sdk/types/redis_import_storage_status.py +7 -0
- label_studio_sdk/types/s3export_storage.py +134 -0
- label_studio_sdk/types/s3export_storage_status.py +7 -0
- label_studio_sdk/types/s3import_storage.py +140 -0
- label_studio_sdk/types/s3import_storage_status.py +7 -0
- label_studio_sdk/types/serialization_option.py +36 -0
- label_studio_sdk/types/serialization_options.py +45 -0
- label_studio_sdk/types/task.py +157 -0
- label_studio_sdk/types/task_filter_options.py +49 -0
- label_studio_sdk/types/user_simple.py +37 -0
- label_studio_sdk/types/view.py +55 -0
- label_studio_sdk/types/webhook.py +67 -0
- label_studio_sdk/types/webhook_actions_item.py +21 -0
- label_studio_sdk/types/webhook_serializer_for_update.py +67 -0
- label_studio_sdk/types/webhook_serializer_for_update_actions_item.py +21 -0
- label_studio_sdk/users/__init__.py +5 -0
- label_studio_sdk/users/client.py +830 -0
- label_studio_sdk/users/types/__init__.py +6 -0
- label_studio_sdk/users/types/users_get_token_response.py +36 -0
- label_studio_sdk/users/types/users_reset_token_response.py +36 -0
- label_studio_sdk/version.py +4 -0
- label_studio_sdk/views/__init__.py +31 -0
- label_studio_sdk/views/client.py +564 -0
- label_studio_sdk/views/types/__init__.py +29 -0
- label_studio_sdk/views/types/views_create_request_data.py +43 -0
- label_studio_sdk/views/types/views_create_request_data_filters.py +43 -0
- label_studio_sdk/views/types/views_create_request_data_filters_conjunction.py +5 -0
- label_studio_sdk/views/types/views_create_request_data_filters_items_item.py +47 -0
- label_studio_sdk/views/types/views_create_request_data_ordering_item.py +38 -0
- label_studio_sdk/views/types/views_create_request_data_ordering_item_direction.py +5 -0
- label_studio_sdk/views/types/views_update_request_data.py +43 -0
- label_studio_sdk/views/types/views_update_request_data_filters.py +43 -0
- label_studio_sdk/views/types/views_update_request_data_filters_conjunction.py +5 -0
- label_studio_sdk/views/types/views_update_request_data_filters_items_item.py +47 -0
- label_studio_sdk/views/types/views_update_request_data_ordering_item.py +38 -0
- label_studio_sdk/views/types/views_update_request_data_ordering_item_direction.py +5 -0
- label_studio_sdk/webhooks/__init__.py +5 -0
- label_studio_sdk/webhooks/client.py +636 -0
- label_studio_sdk/webhooks/types/__init__.py +5 -0
- label_studio_sdk/webhooks/types/webhooks_update_request_actions_item.py +21 -0
- label_studio_sdk-1.0.0.dist-info/METADATA +307 -0
- label_studio_sdk-1.0.0.dist-info/RECORD +239 -0
- {label_studio_sdk-0.0.34.dist-info → label_studio_sdk-1.0.0.dist-info}/WHEEL +1 -2
- label_studio_sdk-0.0.34.dist-info/LICENSE +0 -201
- label_studio_sdk-0.0.34.dist-info/METADATA +0 -24
- label_studio_sdk-0.0.34.dist-info/RECORD +0 -37
- label_studio_sdk-0.0.34.dist-info/top_level.txt +0 -2
- tests/test_client.py +0 -37
- tests/test_export.py +0 -105
- tests/test_interface/__init__.py +0 -1
- tests/test_interface/configs.py +0 -137
- tests/test_interface/mockups.py +0 -22
- tests/test_interface/test_compat.py +0 -64
- tests/test_interface/test_control_tags.py +0 -55
- tests/test_interface/test_data_generation.py +0 -45
- tests/test_interface/test_lpi.py +0 -15
- tests/test_interface/test_main.py +0 -196
- tests/test_interface/test_object_tags.py +0 -36
- tests/test_interface/test_region.py +0 -36
- tests/test_interface/test_validate_summary.py +0 -35
- tests/test_interface/test_validation.py +0 -59
- {tests → label_studio_sdk/_extensions}/__init__.py +0 -0
- /label_studio_sdk/{exceptions.py → _legacy/exceptions.py} +0 -0
- /label_studio_sdk/{label_interface → _legacy/label_interface}/__init__.py +0 -0
- /label_studio_sdk/{label_interface → _legacy/label_interface}/base.py +0 -0
- /label_studio_sdk/{label_interface → _legacy/label_interface}/control_tags.py +0 -0
- /label_studio_sdk/{label_interface → _legacy/label_interface}/label_tags.py +0 -0
- /label_studio_sdk/{label_interface → _legacy/label_interface}/object_tags.py +0 -0
- /label_studio_sdk/{label_interface → _legacy/label_interface}/region.py +0 -0
- /label_studio_sdk/{objects.py → _legacy/objects.py} +0 -0
- /label_studio_sdk/{schema → _legacy/schema}/label_config_schema.json +0 -0
- /label_studio_sdk/{users.py → _legacy/users.py} +0 -0
- /label_studio_sdk/{utils.py → _legacy/utils.py} +0 -0
- /label_studio_sdk/{workspaces.py → _legacy/workspaces.py} +0 -0
|
@@ -0,0 +1,1175 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import logging
|
|
3
|
+
import math
|
|
4
|
+
import os
|
|
5
|
+
import re
|
|
6
|
+
import xml.dom
|
|
7
|
+
import xml.dom.minidom
|
|
8
|
+
from collections import defaultdict
|
|
9
|
+
from copy import deepcopy
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from enum import Enum
|
|
12
|
+
from glob import glob
|
|
13
|
+
from operator import itemgetter
|
|
14
|
+
from shutil import copy2
|
|
15
|
+
from typing import Optional
|
|
16
|
+
|
|
17
|
+
import ijson
|
|
18
|
+
import ujson as json
|
|
19
|
+
from PIL import Image
|
|
20
|
+
from label_studio_sdk.converter import brush
|
|
21
|
+
from label_studio_sdk.converter.audio import convert_to_asr_json_manifest
|
|
22
|
+
from label_studio_sdk.converter.exports import csv2
|
|
23
|
+
from label_studio_sdk.converter.utils import (
|
|
24
|
+
parse_config,
|
|
25
|
+
create_tokens_and_tags,
|
|
26
|
+
download,
|
|
27
|
+
get_image_size_and_channels,
|
|
28
|
+
ensure_dir,
|
|
29
|
+
get_polygon_area,
|
|
30
|
+
get_polygon_bounding_box,
|
|
31
|
+
get_annotator,
|
|
32
|
+
get_json_root_type,
|
|
33
|
+
prettify_result,
|
|
34
|
+
convert_annotation_to_yolo,
|
|
35
|
+
convert_annotation_to_yolo_obb,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
logger = logging.getLogger(__name__)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class FormatNotSupportedError(NotImplementedError):
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class Format(Enum):
|
|
46
|
+
JSON = 1
|
|
47
|
+
JSON_MIN = 2
|
|
48
|
+
CSV = 3
|
|
49
|
+
TSV = 4
|
|
50
|
+
CONLL2003 = 5
|
|
51
|
+
COCO = 6
|
|
52
|
+
VOC = 7
|
|
53
|
+
BRUSH_TO_NUMPY = 8
|
|
54
|
+
BRUSH_TO_PNG = 9
|
|
55
|
+
ASR_MANIFEST = 10
|
|
56
|
+
YOLO = 11
|
|
57
|
+
YOLO_OBB = 12
|
|
58
|
+
CSV_OLD = 13
|
|
59
|
+
|
|
60
|
+
def __str__(self):
|
|
61
|
+
return self.name
|
|
62
|
+
|
|
63
|
+
@classmethod
|
|
64
|
+
def from_string(cls, s):
|
|
65
|
+
try:
|
|
66
|
+
return Format[s]
|
|
67
|
+
except KeyError:
|
|
68
|
+
raise ValueError()
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class Converter(object):
|
|
72
|
+
_FORMAT_INFO = {
|
|
73
|
+
Format.JSON: {
|
|
74
|
+
"title": "JSON",
|
|
75
|
+
"description": "List of items in raw JSON format stored in one JSON file. Use to export both the data "
|
|
76
|
+
"and the annotations for a dataset. It's Label Studio Common Format",
|
|
77
|
+
"link": "https://labelstud.io/guide/export.html#JSON",
|
|
78
|
+
},
|
|
79
|
+
Format.JSON_MIN: {
|
|
80
|
+
"title": "JSON-MIN",
|
|
81
|
+
"description": 'List of items where only "from_name", "to_name" values from the raw JSON format are '
|
|
82
|
+
"exported. Use to export only the annotations for a dataset.",
|
|
83
|
+
"link": "https://labelstud.io/guide/export.html#JSON-MIN",
|
|
84
|
+
},
|
|
85
|
+
Format.CSV: {
|
|
86
|
+
"title": "CSV",
|
|
87
|
+
"description": "Results are stored as comma-separated values with the column names specified by the "
|
|
88
|
+
'values of the "from_name" and "to_name" fields.',
|
|
89
|
+
"link": "https://labelstud.io/guide/export.html#CSV",
|
|
90
|
+
},
|
|
91
|
+
Format.TSV: {
|
|
92
|
+
"title": "TSV",
|
|
93
|
+
"description": "Results are stored in tab-separated tabular file with column names specified by "
|
|
94
|
+
'"from_name" "to_name" values',
|
|
95
|
+
"link": "https://labelstud.io/guide/export.html#TSV",
|
|
96
|
+
},
|
|
97
|
+
Format.CONLL2003: {
|
|
98
|
+
"title": "CONLL2003",
|
|
99
|
+
"description": "Popular format used for the CoNLL-2003 named entity recognition challenge.",
|
|
100
|
+
"link": "https://labelstud.io/guide/export.html#CONLL2003",
|
|
101
|
+
"tags": ["sequence labeling", "text tagging", "named entity recognition"],
|
|
102
|
+
},
|
|
103
|
+
Format.COCO: {
|
|
104
|
+
"title": "COCO",
|
|
105
|
+
"description": "Popular machine learning format used by the COCO dataset for object detection and image "
|
|
106
|
+
"segmentation tasks with polygons and rectangles.",
|
|
107
|
+
"link": "https://labelstud.io/guide/export.html#COCO",
|
|
108
|
+
"tags": ["image segmentation", "object detection"],
|
|
109
|
+
},
|
|
110
|
+
Format.VOC: {
|
|
111
|
+
"title": "Pascal VOC XML",
|
|
112
|
+
"description": "Popular XML format used for object detection and polygon image segmentation tasks.",
|
|
113
|
+
"link": "https://labelstud.io/guide/export.html#Pascal-VOC-XML",
|
|
114
|
+
"tags": ["image segmentation", "object detection"],
|
|
115
|
+
},
|
|
116
|
+
Format.YOLO: {
|
|
117
|
+
"title": "YOLO",
|
|
118
|
+
"description": "Popular TXT format is created for each image file. Each txt file contains annotations for "
|
|
119
|
+
"the corresponding image file, that is object class, object coordinates, height & width.",
|
|
120
|
+
"link": "https://labelstud.io/guide/export.html#YOLO",
|
|
121
|
+
"tags": ["image segmentation", "object detection"],
|
|
122
|
+
},
|
|
123
|
+
Format.YOLO_OBB: {
|
|
124
|
+
"title": "YOLOv8 OBB",
|
|
125
|
+
"description": "Popular TXT format is created for each image file. Each txt file contains annotations for "
|
|
126
|
+
"the corresponding image file. The YOLO OBB format designates bounding boxes by their four corner points "
|
|
127
|
+
"with coordinates normalized between 0 and 1, so it is possible to export rotated objects.",
|
|
128
|
+
"link": "https://labelstud.io/guide/export.html#YOLO",
|
|
129
|
+
"tags": ["image segmentation", "object detection"],
|
|
130
|
+
},
|
|
131
|
+
Format.BRUSH_TO_NUMPY: {
|
|
132
|
+
"title": "Brush labels to NumPy",
|
|
133
|
+
"description": "Export your brush labels as NumPy 2d arrays. Each label outputs as one image.",
|
|
134
|
+
"link": "https://labelstud.io/guide/export.html#Brush-labels-to-NumPy-amp-PNG",
|
|
135
|
+
"tags": ["image segmentation"],
|
|
136
|
+
},
|
|
137
|
+
Format.BRUSH_TO_PNG: {
|
|
138
|
+
"title": "Brush labels to PNG",
|
|
139
|
+
"description": "Export your brush labels as PNG images. Each label outputs as one image.",
|
|
140
|
+
"link": "https://labelstud.io/guide/export.html#Brush-labels-to-NumPy-amp-PNG",
|
|
141
|
+
"tags": ["image segmentation"],
|
|
142
|
+
},
|
|
143
|
+
Format.ASR_MANIFEST: {
|
|
144
|
+
"title": "ASR Manifest",
|
|
145
|
+
"description": "Export audio transcription labels for automatic speech recognition as the JSON manifest "
|
|
146
|
+
"format expected by NVIDIA NeMo models.",
|
|
147
|
+
"link": "https://labelstud.io/guide/export.html#ASR-MANIFEST",
|
|
148
|
+
"tags": ["speech recognition"],
|
|
149
|
+
},
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
def all_formats(self):
|
|
153
|
+
return self._FORMAT_INFO
|
|
154
|
+
|
|
155
|
+
def __init__(
|
|
156
|
+
self,
|
|
157
|
+
config,
|
|
158
|
+
project_dir,
|
|
159
|
+
output_tags=None,
|
|
160
|
+
upload_dir=None,
|
|
161
|
+
download_resources=True,
|
|
162
|
+
):
|
|
163
|
+
"""Initialize Label Studio Converter for Exports
|
|
164
|
+
|
|
165
|
+
:param config: string or dict: XML string with Label studio labeling config or path to this file or parsed_config
|
|
166
|
+
:param project_dir: upload root directory for images, audio and other labeling files
|
|
167
|
+
:param output_tags: it will be calculated automatically, contains label names
|
|
168
|
+
:param upload_dir: upload root directory with files that were imported using LS GUI
|
|
169
|
+
:param download_resources: if True, LS will try to download images, audio, etc and include them to export
|
|
170
|
+
"""
|
|
171
|
+
self.project_dir = project_dir
|
|
172
|
+
self.upload_dir = upload_dir
|
|
173
|
+
self.download_resources = download_resources
|
|
174
|
+
self._schema = None
|
|
175
|
+
|
|
176
|
+
if isinstance(config, dict):
|
|
177
|
+
self._schema = config
|
|
178
|
+
elif isinstance(config, str):
|
|
179
|
+
if os.path.isfile(config):
|
|
180
|
+
with io.open(config) as f:
|
|
181
|
+
config_string = f.read()
|
|
182
|
+
else:
|
|
183
|
+
config_string = config
|
|
184
|
+
self._schema = parse_config(config_string)
|
|
185
|
+
|
|
186
|
+
if self._schema is None:
|
|
187
|
+
logger.warning(
|
|
188
|
+
"Label config or schema for Converter is not provided, "
|
|
189
|
+
"it might be critical for some export formats, now set schema to empty dict"
|
|
190
|
+
)
|
|
191
|
+
self._schema = {}
|
|
192
|
+
|
|
193
|
+
self._data_keys, self._output_tags = self._get_data_keys_and_output_tags(
|
|
194
|
+
output_tags
|
|
195
|
+
)
|
|
196
|
+
self._supported_formats = self._get_supported_formats()
|
|
197
|
+
|
|
198
|
+
def convert(self, input_data, output_data, format, is_dir=True, **kwargs):
|
|
199
|
+
if isinstance(format, str):
|
|
200
|
+
format = Format.from_string(format)
|
|
201
|
+
|
|
202
|
+
if format == Format.JSON:
|
|
203
|
+
self.convert_to_json(input_data, output_data, is_dir=is_dir)
|
|
204
|
+
elif format == Format.JSON_MIN:
|
|
205
|
+
self.convert_to_json_min(input_data, output_data, is_dir=is_dir)
|
|
206
|
+
elif format == Format.CSV:
|
|
207
|
+
header = kwargs.get("csv_header", True)
|
|
208
|
+
sep = kwargs.get("csv_separator", ",")
|
|
209
|
+
self.convert_to_csv(
|
|
210
|
+
input_data, output_data, sep=sep, header=header, is_dir=is_dir
|
|
211
|
+
)
|
|
212
|
+
elif format == Format.TSV:
|
|
213
|
+
header = kwargs.get("csv_header", True)
|
|
214
|
+
sep = kwargs.get("csv_separator", "\t")
|
|
215
|
+
self.convert_to_csv(
|
|
216
|
+
input_data, output_data, sep=sep, header=header, is_dir=is_dir
|
|
217
|
+
)
|
|
218
|
+
elif format == Format.CONLL2003:
|
|
219
|
+
self.convert_to_conll2003(input_data, output_data, is_dir=is_dir)
|
|
220
|
+
elif format == Format.COCO:
|
|
221
|
+
image_dir = kwargs.get("image_dir")
|
|
222
|
+
self.convert_to_coco(
|
|
223
|
+
input_data, output_data, output_image_dir=image_dir, is_dir=is_dir
|
|
224
|
+
)
|
|
225
|
+
elif format == Format.YOLO or format == Format.YOLO_OBB:
|
|
226
|
+
image_dir = kwargs.get("image_dir")
|
|
227
|
+
label_dir = kwargs.get("label_dir")
|
|
228
|
+
self.convert_to_yolo(
|
|
229
|
+
input_data,
|
|
230
|
+
output_data,
|
|
231
|
+
output_image_dir=image_dir,
|
|
232
|
+
output_label_dir=label_dir,
|
|
233
|
+
is_dir=is_dir,
|
|
234
|
+
is_obb=(format == Format.YOLO_OBB),
|
|
235
|
+
)
|
|
236
|
+
elif format == Format.VOC:
|
|
237
|
+
image_dir = kwargs.get("image_dir")
|
|
238
|
+
self.convert_to_voc(
|
|
239
|
+
input_data, output_data, output_image_dir=image_dir, is_dir=is_dir
|
|
240
|
+
)
|
|
241
|
+
elif format == Format.BRUSH_TO_NUMPY:
|
|
242
|
+
items = (
|
|
243
|
+
self.iter_from_dir(input_data)
|
|
244
|
+
if is_dir
|
|
245
|
+
else self.iter_from_json_file(input_data)
|
|
246
|
+
)
|
|
247
|
+
brush.convert_task_dir(items, output_data, out_format="numpy")
|
|
248
|
+
elif format == Format.BRUSH_TO_PNG:
|
|
249
|
+
items = (
|
|
250
|
+
self.iter_from_dir(input_data)
|
|
251
|
+
if is_dir
|
|
252
|
+
else self.iter_from_json_file(input_data)
|
|
253
|
+
)
|
|
254
|
+
brush.convert_task_dir(items, output_data, out_format="png")
|
|
255
|
+
elif format == Format.ASR_MANIFEST:
|
|
256
|
+
items = (
|
|
257
|
+
self.iter_from_dir(input_data)
|
|
258
|
+
if is_dir
|
|
259
|
+
else self.iter_from_json_file(input_data)
|
|
260
|
+
)
|
|
261
|
+
convert_to_asr_json_manifest(
|
|
262
|
+
items,
|
|
263
|
+
output_data,
|
|
264
|
+
data_key=self._data_keys[0],
|
|
265
|
+
project_dir=self.project_dir,
|
|
266
|
+
upload_dir=self.upload_dir,
|
|
267
|
+
download_resources=self.download_resources,
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
def _get_data_keys_and_output_tags(self, output_tags=None):
|
|
271
|
+
data_keys = set()
|
|
272
|
+
output_tag_names = []
|
|
273
|
+
if output_tags is not None:
|
|
274
|
+
for tag in output_tags:
|
|
275
|
+
if tag not in self._schema:
|
|
276
|
+
logger.debug(
|
|
277
|
+
'Specified tag "{tag}" not found in config schema: '
|
|
278
|
+
"available options are {schema_keys}".format(
|
|
279
|
+
tag=tag, schema_keys=str(list(self._schema.keys()))
|
|
280
|
+
)
|
|
281
|
+
)
|
|
282
|
+
for name, info in self._schema.items():
|
|
283
|
+
if output_tags is not None and name not in output_tags:
|
|
284
|
+
continue
|
|
285
|
+
data_keys |= set(map(itemgetter("value"), info["inputs"]))
|
|
286
|
+
output_tag_names.append(name)
|
|
287
|
+
|
|
288
|
+
return list(data_keys), output_tag_names
|
|
289
|
+
|
|
290
|
+
def _get_supported_formats(self):
|
|
291
|
+
if len(self._data_keys) > 1:
|
|
292
|
+
return [
|
|
293
|
+
Format.JSON.name,
|
|
294
|
+
Format.JSON_MIN.name,
|
|
295
|
+
Format.CSV.name,
|
|
296
|
+
Format.TSV.name,
|
|
297
|
+
]
|
|
298
|
+
output_tag_types = set()
|
|
299
|
+
input_tag_types = set()
|
|
300
|
+
for info in self._schema.values():
|
|
301
|
+
output_tag_types.add(info["type"])
|
|
302
|
+
for input_tag in info["inputs"]:
|
|
303
|
+
if input_tag["type"] == "Text" and input_tag.get("valueType") == "url":
|
|
304
|
+
logger.error('valueType="url" are not supported for text inputs')
|
|
305
|
+
continue
|
|
306
|
+
input_tag_types.add(input_tag["type"])
|
|
307
|
+
|
|
308
|
+
all_formats = [f.name for f in Format]
|
|
309
|
+
if not ("Text" in input_tag_types and "Labels" in output_tag_types):
|
|
310
|
+
all_formats.remove(Format.CONLL2003.name)
|
|
311
|
+
if not (
|
|
312
|
+
"Image" in input_tag_types
|
|
313
|
+
and (
|
|
314
|
+
"RectangleLabels" in output_tag_types
|
|
315
|
+
or "Rectangle" in output_tag_types
|
|
316
|
+
and "Labels" in output_tag_types
|
|
317
|
+
)
|
|
318
|
+
):
|
|
319
|
+
all_formats.remove(Format.VOC.name)
|
|
320
|
+
if not (
|
|
321
|
+
"Image" in input_tag_types
|
|
322
|
+
and (
|
|
323
|
+
"RectangleLabels" in output_tag_types
|
|
324
|
+
or "PolygonLabels" in output_tag_types
|
|
325
|
+
)
|
|
326
|
+
or "Rectangle" in output_tag_types
|
|
327
|
+
and "Labels" in output_tag_types
|
|
328
|
+
or "PolygonLabels" in output_tag_types
|
|
329
|
+
and "Labels" in output_tag_types
|
|
330
|
+
):
|
|
331
|
+
all_formats.remove(Format.COCO.name)
|
|
332
|
+
all_formats.remove(Format.YOLO.name)
|
|
333
|
+
if not (
|
|
334
|
+
"Image" in input_tag_types
|
|
335
|
+
and (
|
|
336
|
+
"BrushLabels" in output_tag_types
|
|
337
|
+
or "brushlabels" in output_tag_types
|
|
338
|
+
or "Brush" in output_tag_types
|
|
339
|
+
and "Labels" in output_tag_types
|
|
340
|
+
)
|
|
341
|
+
):
|
|
342
|
+
all_formats.remove(Format.BRUSH_TO_NUMPY.name)
|
|
343
|
+
all_formats.remove(Format.BRUSH_TO_PNG.name)
|
|
344
|
+
if not (
|
|
345
|
+
("Audio" in input_tag_types or "AudioPlus" in input_tag_types)
|
|
346
|
+
and "TextArea" in output_tag_types
|
|
347
|
+
):
|
|
348
|
+
all_formats.remove(Format.ASR_MANIFEST.name)
|
|
349
|
+
|
|
350
|
+
return all_formats
|
|
351
|
+
|
|
352
|
+
@property
|
|
353
|
+
def supported_formats(self):
|
|
354
|
+
return self._supported_formats
|
|
355
|
+
|
|
356
|
+
def iter_from_dir(self, input_dir):
|
|
357
|
+
if not os.path.exists(input_dir):
|
|
358
|
+
raise FileNotFoundError(
|
|
359
|
+
"{input_dir} doesn't exist".format(input_dir=input_dir)
|
|
360
|
+
)
|
|
361
|
+
for json_file in glob(os.path.join(input_dir, "*.json")):
|
|
362
|
+
for item in self.iter_from_json_file(json_file):
|
|
363
|
+
if item:
|
|
364
|
+
yield item
|
|
365
|
+
|
|
366
|
+
def iter_from_json_file(self, json_file):
|
|
367
|
+
"""Extract annotation results from json file
|
|
368
|
+
|
|
369
|
+
param json_file: path to task list or dict with annotations
|
|
370
|
+
"""
|
|
371
|
+
data_type = get_json_root_type(json_file)
|
|
372
|
+
|
|
373
|
+
# one task
|
|
374
|
+
if data_type == "dict":
|
|
375
|
+
with open(json_file, "r") as json_file:
|
|
376
|
+
data = json.load(json_file)
|
|
377
|
+
for item in self.annotation_result_from_task(data):
|
|
378
|
+
yield item
|
|
379
|
+
|
|
380
|
+
# many tasks
|
|
381
|
+
elif data_type == "list":
|
|
382
|
+
with io.open(json_file, "rb") as f:
|
|
383
|
+
logger.debug(f"ijson backend in use: {ijson.backend}")
|
|
384
|
+
data = ijson.items(
|
|
385
|
+
f, "item", use_float=True
|
|
386
|
+
) # 'item' means to read array of dicts
|
|
387
|
+
for task in data:
|
|
388
|
+
for item in self.annotation_result_from_task(task):
|
|
389
|
+
if item is not None:
|
|
390
|
+
yield item
|
|
391
|
+
|
|
392
|
+
def _maybe_matching_tag_from_schema(self, from_name: str) -> Optional[str]:
|
|
393
|
+
"""If the from name exactly matches an output tag from the schema, return that tag.
|
|
394
|
+
|
|
395
|
+
Otherwise, certain tags (like those from Repeater) contain
|
|
396
|
+
placeholders like {{idx}}. Such placeholders are mapped to a regex in self._schema.
|
|
397
|
+
For example, if "my_output_tag_{{idx}}" is a tag in the schema,
|
|
398
|
+
then the from_name "my_output_tag_0" should match it, and we should return "my_output_tag_{{idx}}".
|
|
399
|
+
"""
|
|
400
|
+
|
|
401
|
+
for tag_name, tag_info in self._schema.items():
|
|
402
|
+
if tag_name == from_name:
|
|
403
|
+
return tag_name
|
|
404
|
+
|
|
405
|
+
if not tag_info.get("regex"):
|
|
406
|
+
continue
|
|
407
|
+
|
|
408
|
+
tag_name_pattern = tag_name
|
|
409
|
+
for variable, regex in tag_info["regex"].items():
|
|
410
|
+
tag_name_pattern = tag_name_pattern.replace(variable, regex)
|
|
411
|
+
|
|
412
|
+
if re.compile(tag_name_pattern).match(from_name):
|
|
413
|
+
return tag_name
|
|
414
|
+
|
|
415
|
+
return None
|
|
416
|
+
|
|
417
|
+
def annotation_result_from_task(self, task):
|
|
418
|
+
has_annotations = "completions" in task or "annotations" in task
|
|
419
|
+
if not has_annotations:
|
|
420
|
+
logger.warning(
|
|
421
|
+
'Each task dict item should contain "annotations" or "completions" [deprecated], '
|
|
422
|
+
"where value is list of dicts"
|
|
423
|
+
)
|
|
424
|
+
return None
|
|
425
|
+
|
|
426
|
+
# get last not skipped completion and make result from it
|
|
427
|
+
annotations = (
|
|
428
|
+
task["annotations"] if "annotations" in task else task["completions"]
|
|
429
|
+
)
|
|
430
|
+
|
|
431
|
+
# return task with empty annotations
|
|
432
|
+
if not annotations:
|
|
433
|
+
data = Converter.get_data(task, {}, {})
|
|
434
|
+
yield data
|
|
435
|
+
|
|
436
|
+
# skip cancelled annotations
|
|
437
|
+
cancelled = lambda x: not (
|
|
438
|
+
x.get("skipped", False) or x.get("was_cancelled", False)
|
|
439
|
+
)
|
|
440
|
+
annotations = list(filter(cancelled, annotations))
|
|
441
|
+
if not annotations:
|
|
442
|
+
return None
|
|
443
|
+
|
|
444
|
+
# sort by creation time
|
|
445
|
+
annotations = sorted(
|
|
446
|
+
annotations, key=lambda x: x.get("created_at", 0), reverse=True
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
for annotation in annotations:
|
|
450
|
+
result = annotation["result"]
|
|
451
|
+
outputs = defaultdict(list)
|
|
452
|
+
|
|
453
|
+
# get results only as output
|
|
454
|
+
for r in result:
|
|
455
|
+
if "from_name" in r and (
|
|
456
|
+
tag_name := self._maybe_matching_tag_from_schema(r["from_name"])
|
|
457
|
+
):
|
|
458
|
+
v = deepcopy(r["value"])
|
|
459
|
+
v["type"] = self._schema[tag_name]["type"]
|
|
460
|
+
if "original_width" in r:
|
|
461
|
+
v["original_width"] = r["original_width"]
|
|
462
|
+
if "original_height" in r:
|
|
463
|
+
v["original_height"] = r["original_height"]
|
|
464
|
+
outputs[r["from_name"]].append(v)
|
|
465
|
+
|
|
466
|
+
data = Converter.get_data(task, outputs, annotation)
|
|
467
|
+
if "agreement" in task:
|
|
468
|
+
data["agreement"] = task["agreement"]
|
|
469
|
+
yield data
|
|
470
|
+
|
|
471
|
+
@staticmethod
|
|
472
|
+
def get_data(task, outputs, annotation):
|
|
473
|
+
return {
|
|
474
|
+
"id": task["id"],
|
|
475
|
+
"input": task["data"],
|
|
476
|
+
"output": outputs or {},
|
|
477
|
+
"completed_by": annotation.get("completed_by", {}),
|
|
478
|
+
"annotation_id": annotation.get("id"),
|
|
479
|
+
"created_at": annotation.get("created_at"),
|
|
480
|
+
"updated_at": annotation.get("updated_at"),
|
|
481
|
+
"lead_time": annotation.get("lead_time"),
|
|
482
|
+
"history": annotation.get("history"),
|
|
483
|
+
"was_cancelled": annotation.get("was_cancelled"),
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
def _check_format(self, fmt):
|
|
487
|
+
pass
|
|
488
|
+
|
|
489
|
+
def convert_to_json(self, input_data, output_dir, is_dir=True):
|
|
490
|
+
self._check_format(Format.JSON)
|
|
491
|
+
ensure_dir(output_dir)
|
|
492
|
+
output_file = os.path.join(output_dir, "result.json")
|
|
493
|
+
records = []
|
|
494
|
+
if is_dir:
|
|
495
|
+
for json_file in glob(os.path.join(input_data, "*.json")):
|
|
496
|
+
with io.open(json_file, encoding="utf8") as f:
|
|
497
|
+
records.append(json.load(f))
|
|
498
|
+
with io.open(output_file, mode="w", encoding="utf8") as fout:
|
|
499
|
+
json.dump(records, fout, indent=2, ensure_ascii=False)
|
|
500
|
+
else:
|
|
501
|
+
copy2(input_data, output_file)
|
|
502
|
+
|
|
503
|
+
def convert_to_json_min(self, input_data, output_dir, is_dir=True):
|
|
504
|
+
self._check_format(Format.JSON_MIN)
|
|
505
|
+
ensure_dir(output_dir)
|
|
506
|
+
output_file = os.path.join(output_dir, "result.json")
|
|
507
|
+
records = []
|
|
508
|
+
item_iterator = self.iter_from_dir if is_dir else self.iter_from_json_file
|
|
509
|
+
|
|
510
|
+
for item in item_iterator(input_data):
|
|
511
|
+
record = deepcopy(item["input"])
|
|
512
|
+
if item.get("id") is not None:
|
|
513
|
+
record["id"] = item["id"]
|
|
514
|
+
for name, value in item["output"].items():
|
|
515
|
+
record[name] = prettify_result(value)
|
|
516
|
+
record["annotator"] = get_annotator(item, int_id=True)
|
|
517
|
+
record["annotation_id"] = item["annotation_id"]
|
|
518
|
+
record["created_at"] = item["created_at"]
|
|
519
|
+
record["updated_at"] = item["updated_at"]
|
|
520
|
+
record["lead_time"] = item["lead_time"]
|
|
521
|
+
if "agreement" in item:
|
|
522
|
+
record["agreement"] = item["agreement"]
|
|
523
|
+
records.append(record)
|
|
524
|
+
|
|
525
|
+
with io.open(output_file, mode="w", encoding="utf8") as fout:
|
|
526
|
+
json.dump(records, fout, indent=2, ensure_ascii=False)
|
|
527
|
+
|
|
528
|
+
def convert_to_csv(self, input_data, output_dir, is_dir=True, **kwargs):
|
|
529
|
+
self._check_format(Format.CSV)
|
|
530
|
+
item_iterator = self.iter_from_dir if is_dir else self.iter_from_json_file
|
|
531
|
+
return csv2.convert(item_iterator, input_data, output_dir, **kwargs)
|
|
532
|
+
|
|
533
|
+
def convert_to_conll2003(self, input_data, output_dir, is_dir=True):
|
|
534
|
+
self._check_format(Format.CONLL2003)
|
|
535
|
+
ensure_dir(output_dir)
|
|
536
|
+
output_file = os.path.join(output_dir, "result.conll")
|
|
537
|
+
data_key = self._data_keys[0]
|
|
538
|
+
with io.open(output_file, "w", encoding="utf8") as fout:
|
|
539
|
+
fout.write("-DOCSTART- -X- O\n")
|
|
540
|
+
item_iterator = self.iter_from_dir if is_dir else self.iter_from_json_file
|
|
541
|
+
|
|
542
|
+
for item in item_iterator(input_data):
|
|
543
|
+
filtered_output = list(
|
|
544
|
+
filter(
|
|
545
|
+
lambda x: x[0]["type"].lower() == "labels",
|
|
546
|
+
item["output"].values(),
|
|
547
|
+
)
|
|
548
|
+
)
|
|
549
|
+
tokens, tags = create_tokens_and_tags(
|
|
550
|
+
text=item["input"][data_key],
|
|
551
|
+
spans=next(iter(filtered_output), None),
|
|
552
|
+
)
|
|
553
|
+
for token, tag in zip(tokens, tags):
|
|
554
|
+
fout.write("{token} -X- _ {tag}\n".format(token=token, tag=tag))
|
|
555
|
+
fout.write("\n")
|
|
556
|
+
|
|
557
|
+
def convert_to_coco(
|
|
558
|
+
self, input_data, output_dir, output_image_dir=None, is_dir=True
|
|
559
|
+
):
|
|
560
|
+
def add_image(images, width, height, image_id, image_path):
|
|
561
|
+
images.append(
|
|
562
|
+
{
|
|
563
|
+
"width": width,
|
|
564
|
+
"height": height,
|
|
565
|
+
"id": image_id,
|
|
566
|
+
"file_name": image_path,
|
|
567
|
+
}
|
|
568
|
+
)
|
|
569
|
+
return images
|
|
570
|
+
|
|
571
|
+
self._check_format(Format.COCO)
|
|
572
|
+
ensure_dir(output_dir)
|
|
573
|
+
output_file = os.path.join(output_dir, "result.json")
|
|
574
|
+
if output_image_dir is not None:
|
|
575
|
+
ensure_dir(output_image_dir)
|
|
576
|
+
else:
|
|
577
|
+
output_image_dir = os.path.join(output_dir, "images")
|
|
578
|
+
os.makedirs(output_image_dir, exist_ok=True)
|
|
579
|
+
images, categories, annotations = [], [], []
|
|
580
|
+
categories, category_name_to_id = self._get_labels()
|
|
581
|
+
data_key = self._data_keys[0]
|
|
582
|
+
item_iterator = (
|
|
583
|
+
self.iter_from_dir(input_data)
|
|
584
|
+
if is_dir
|
|
585
|
+
else self.iter_from_json_file(input_data)
|
|
586
|
+
)
|
|
587
|
+
for item_idx, item in enumerate(item_iterator):
|
|
588
|
+
image_path = item["input"][data_key]
|
|
589
|
+
image_id = len(images)
|
|
590
|
+
width = None
|
|
591
|
+
height = None
|
|
592
|
+
# download all images of the dataset, including the ones without annotations
|
|
593
|
+
if not os.path.exists(image_path):
|
|
594
|
+
try:
|
|
595
|
+
image_path = download(
|
|
596
|
+
image_path,
|
|
597
|
+
output_image_dir,
|
|
598
|
+
project_dir=self.project_dir,
|
|
599
|
+
return_relative_path=True,
|
|
600
|
+
upload_dir=self.upload_dir,
|
|
601
|
+
download_resources=self.download_resources,
|
|
602
|
+
)
|
|
603
|
+
except:
|
|
604
|
+
logger.info(
|
|
605
|
+
"Unable to download {image_path}. The image of {item} will be skipped".format(
|
|
606
|
+
image_path=image_path, item=item
|
|
607
|
+
),
|
|
608
|
+
exc_info=True,
|
|
609
|
+
)
|
|
610
|
+
# add image to final images list
|
|
611
|
+
try:
|
|
612
|
+
with Image.open(os.path.join(output_dir, image_path)) as img:
|
|
613
|
+
width, height = img.size
|
|
614
|
+
images = add_image(images, width, height, image_id, image_path)
|
|
615
|
+
except:
|
|
616
|
+
logger.info(
|
|
617
|
+
"Unable to open {image_path}, can't extract width and height for COCO export".format(
|
|
618
|
+
image_path=image_path, item=item
|
|
619
|
+
),
|
|
620
|
+
exc_info=True,
|
|
621
|
+
)
|
|
622
|
+
|
|
623
|
+
# skip tasks without annotations
|
|
624
|
+
if not item["output"]:
|
|
625
|
+
# image wasn't load and there are no labels
|
|
626
|
+
if not width:
|
|
627
|
+
images = add_image(images, width, height, image_id, image_path)
|
|
628
|
+
|
|
629
|
+
logger.warning("No annotations found for item #" + str(item_idx))
|
|
630
|
+
continue
|
|
631
|
+
|
|
632
|
+
# concatenate results over all tag names
|
|
633
|
+
labels = []
|
|
634
|
+
for key in item["output"]:
|
|
635
|
+
labels += item["output"][key]
|
|
636
|
+
|
|
637
|
+
if len(labels) == 0:
|
|
638
|
+
logger.debug(f'Empty bboxes for {item["output"]}')
|
|
639
|
+
continue
|
|
640
|
+
|
|
641
|
+
for label in labels:
|
|
642
|
+
category_name = None
|
|
643
|
+
for key in ["rectanglelabels", "polygonlabels", "labels"]:
|
|
644
|
+
if key in label and len(label[key]) > 0:
|
|
645
|
+
category_name = label[key][0]
|
|
646
|
+
break
|
|
647
|
+
|
|
648
|
+
if category_name is None:
|
|
649
|
+
logger.warning("Unknown label type or labels are empty")
|
|
650
|
+
continue
|
|
651
|
+
|
|
652
|
+
if not height or not width:
|
|
653
|
+
if "original_width" not in label or "original_height" not in label:
|
|
654
|
+
logger.debug(
|
|
655
|
+
f"original_width or original_height not found in {image_path}"
|
|
656
|
+
)
|
|
657
|
+
continue
|
|
658
|
+
|
|
659
|
+
width, height = label["original_width"], label["original_height"]
|
|
660
|
+
images = add_image(images, width, height, image_id, image_path)
|
|
661
|
+
|
|
662
|
+
if category_name not in category_name_to_id:
|
|
663
|
+
category_id = len(categories)
|
|
664
|
+
category_name_to_id[category_name] = category_id
|
|
665
|
+
categories.append({"id": category_id, "name": category_name})
|
|
666
|
+
category_id = category_name_to_id[category_name]
|
|
667
|
+
|
|
668
|
+
annotation_id = len(annotations)
|
|
669
|
+
|
|
670
|
+
if "rectanglelabels" in label or "labels" in label:
|
|
671
|
+
xywh = self.rotated_rectangle(label)
|
|
672
|
+
if xywh is None:
|
|
673
|
+
continue
|
|
674
|
+
|
|
675
|
+
x, y, w, h = xywh
|
|
676
|
+
x = x * label["original_width"] / 100
|
|
677
|
+
y = y * label["original_height"] / 100
|
|
678
|
+
w = w * label["original_width"] / 100
|
|
679
|
+
h = h * label["original_height"] / 100
|
|
680
|
+
|
|
681
|
+
annotations.append(
|
|
682
|
+
{
|
|
683
|
+
"id": annotation_id,
|
|
684
|
+
"image_id": image_id,
|
|
685
|
+
"category_id": category_id,
|
|
686
|
+
"segmentation": [],
|
|
687
|
+
"bbox": [x, y, w, h],
|
|
688
|
+
"ignore": 0,
|
|
689
|
+
"iscrowd": 0,
|
|
690
|
+
"area": w * h,
|
|
691
|
+
}
|
|
692
|
+
)
|
|
693
|
+
elif "polygonlabels" in label:
|
|
694
|
+
points_abs = [
|
|
695
|
+
(x / 100 * width, y / 100 * height) for x, y in label["points"]
|
|
696
|
+
]
|
|
697
|
+
x, y = zip(*points_abs)
|
|
698
|
+
|
|
699
|
+
annotations.append(
|
|
700
|
+
{
|
|
701
|
+
"id": annotation_id,
|
|
702
|
+
"image_id": image_id,
|
|
703
|
+
"category_id": category_id,
|
|
704
|
+
"segmentation": [
|
|
705
|
+
[coord for point in points_abs for coord in point]
|
|
706
|
+
],
|
|
707
|
+
"bbox": get_polygon_bounding_box(x, y),
|
|
708
|
+
"ignore": 0,
|
|
709
|
+
"iscrowd": 0,
|
|
710
|
+
"area": get_polygon_area(x, y),
|
|
711
|
+
}
|
|
712
|
+
)
|
|
713
|
+
else:
|
|
714
|
+
raise ValueError("Unknown label type")
|
|
715
|
+
|
|
716
|
+
if os.getenv("LABEL_STUDIO_FORCE_ANNOTATOR_EXPORT"):
|
|
717
|
+
annotations[-1].update({"annotator": get_annotator(item)})
|
|
718
|
+
|
|
719
|
+
with io.open(output_file, mode="w", encoding="utf8") as fout:
|
|
720
|
+
json.dump(
|
|
721
|
+
{
|
|
722
|
+
"images": images,
|
|
723
|
+
"categories": categories,
|
|
724
|
+
"annotations": annotations,
|
|
725
|
+
"info": {
|
|
726
|
+
"year": datetime.now().year,
|
|
727
|
+
"version": "1.0",
|
|
728
|
+
"description": "",
|
|
729
|
+
"contributor": "Label Studio",
|
|
730
|
+
"url": "",
|
|
731
|
+
"date_created": str(datetime.now()),
|
|
732
|
+
},
|
|
733
|
+
},
|
|
734
|
+
fout,
|
|
735
|
+
indent=2,
|
|
736
|
+
)
|
|
737
|
+
|
|
738
|
+
def convert_to_yolo(
|
|
739
|
+
self,
|
|
740
|
+
input_data,
|
|
741
|
+
output_dir,
|
|
742
|
+
output_image_dir=None,
|
|
743
|
+
output_label_dir=None,
|
|
744
|
+
is_dir=True,
|
|
745
|
+
split_labelers=False,
|
|
746
|
+
is_obb=False,
|
|
747
|
+
):
|
|
748
|
+
"""Convert data in a specific format to the YOLO format.
|
|
749
|
+
|
|
750
|
+
Parameters
|
|
751
|
+
----------
|
|
752
|
+
input_data : str
|
|
753
|
+
The input data, either a directory or a JSON file.
|
|
754
|
+
output_dir : str
|
|
755
|
+
The directory to store the output files in.
|
|
756
|
+
output_image_dir : str, optional
|
|
757
|
+
The directory to store the image files in. If not provided, it will default to a subdirectory called 'images' in output_dir.
|
|
758
|
+
output_label_dir : str, optional
|
|
759
|
+
The directory to store the label files in. If not provided, it will default to a subdirectory called 'labels' in output_dir.
|
|
760
|
+
is_dir : bool, optional
|
|
761
|
+
A boolean indicating whether `input_data` is a directory (True) or a JSON file (False).
|
|
762
|
+
split_labelers : bool, optional
|
|
763
|
+
A boolean indicating whether to create a dedicated subfolder for each labeler in the output label directory.
|
|
764
|
+
obb : bool, optional
|
|
765
|
+
A boolean indicating whether to convert to Oriented Bounding Box (OBB) format.
|
|
766
|
+
"""
|
|
767
|
+
if is_obb:
|
|
768
|
+
self._check_format(Format.YOLO_OBB)
|
|
769
|
+
else:
|
|
770
|
+
self._check_format(Format.YOLO)
|
|
771
|
+
ensure_dir(output_dir)
|
|
772
|
+
notes_file = os.path.join(output_dir, "notes.json")
|
|
773
|
+
class_file = os.path.join(output_dir, "classes.txt")
|
|
774
|
+
if output_image_dir is not None:
|
|
775
|
+
ensure_dir(output_image_dir)
|
|
776
|
+
else:
|
|
777
|
+
output_image_dir = os.path.join(output_dir, "images")
|
|
778
|
+
os.makedirs(output_image_dir, exist_ok=True)
|
|
779
|
+
if output_label_dir is not None:
|
|
780
|
+
ensure_dir(output_label_dir)
|
|
781
|
+
else:
|
|
782
|
+
output_label_dir = os.path.join(output_dir, "labels")
|
|
783
|
+
os.makedirs(output_label_dir, exist_ok=True)
|
|
784
|
+
categories, category_name_to_id = self._get_labels()
|
|
785
|
+
data_key = self._data_keys[0]
|
|
786
|
+
item_iterator = (
|
|
787
|
+
self.iter_from_dir(input_data)
|
|
788
|
+
if is_dir
|
|
789
|
+
else self.iter_from_json_file(input_data)
|
|
790
|
+
)
|
|
791
|
+
for item_idx, item in enumerate(item_iterator):
|
|
792
|
+
# get image path(s) and label file path
|
|
793
|
+
image_paths = item["input"][data_key]
|
|
794
|
+
image_paths = [image_paths] if isinstance(image_paths, str) else image_paths
|
|
795
|
+
# download image(s)
|
|
796
|
+
image_path = None
|
|
797
|
+
# TODO: for multi-page annotation, this code won't produce correct relationships between page and annotated shapes
|
|
798
|
+
# fixing the issue in RND-84
|
|
799
|
+
for image_path in reversed(image_paths):
|
|
800
|
+
if not os.path.exists(image_path):
|
|
801
|
+
try:
|
|
802
|
+
image_path = download(
|
|
803
|
+
image_path,
|
|
804
|
+
output_image_dir,
|
|
805
|
+
project_dir=self.project_dir,
|
|
806
|
+
return_relative_path=True,
|
|
807
|
+
upload_dir=self.upload_dir,
|
|
808
|
+
download_resources=self.download_resources,
|
|
809
|
+
)
|
|
810
|
+
except:
|
|
811
|
+
logger.info(
|
|
812
|
+
"Unable to download {image_path}. The item {item} will be skipped".format(
|
|
813
|
+
image_path=image_path, item=item
|
|
814
|
+
),
|
|
815
|
+
exc_info=True,
|
|
816
|
+
)
|
|
817
|
+
if not image_path:
|
|
818
|
+
logger.error(f"No image path found for item #{item_idx}")
|
|
819
|
+
continue
|
|
820
|
+
|
|
821
|
+
# create dedicated subfolder for each labeler if split_labelers=True
|
|
822
|
+
labeler_subfolder = str(item["completed_by"]) if split_labelers else ""
|
|
823
|
+
os.makedirs(
|
|
824
|
+
os.path.join(output_label_dir, labeler_subfolder), exist_ok=True
|
|
825
|
+
)
|
|
826
|
+
|
|
827
|
+
# identify label file path
|
|
828
|
+
filename = os.path.splitext(os.path.basename(image_path))[0]
|
|
829
|
+
filename = filename[
|
|
830
|
+
0 : 255 - 4
|
|
831
|
+
] # urls might be too long, use 255 bytes (-4 for .txt) limit for filenames
|
|
832
|
+
label_path = os.path.join(
|
|
833
|
+
output_label_dir, labeler_subfolder, filename + ".txt"
|
|
834
|
+
)
|
|
835
|
+
|
|
836
|
+
# Skip tasks without annotations
|
|
837
|
+
if not item["output"]:
|
|
838
|
+
logger.warning("No completions found for item #" + str(item_idx))
|
|
839
|
+
if not os.path.exists(label_path):
|
|
840
|
+
with open(label_path, "x"):
|
|
841
|
+
pass
|
|
842
|
+
continue
|
|
843
|
+
|
|
844
|
+
# concatenate results over all tag names
|
|
845
|
+
labels = []
|
|
846
|
+
for key in item["output"]:
|
|
847
|
+
labels += item["output"][key]
|
|
848
|
+
|
|
849
|
+
if len(labels) == 0:
|
|
850
|
+
logger.warning(f'Empty bboxes for {item["output"]}')
|
|
851
|
+
if not os.path.exists(label_path):
|
|
852
|
+
with open(label_path, "x"):
|
|
853
|
+
pass
|
|
854
|
+
continue
|
|
855
|
+
|
|
856
|
+
annotations = []
|
|
857
|
+
for label in labels:
|
|
858
|
+
category_name = None
|
|
859
|
+
category_names = [] # considering multi-label
|
|
860
|
+
for key in ["rectanglelabels", "polygonlabels", "labels"]:
|
|
861
|
+
if key in label and len(label[key]) > 0:
|
|
862
|
+
# change to save multi-label
|
|
863
|
+
for category_name in label[key]:
|
|
864
|
+
category_names.append(category_name)
|
|
865
|
+
|
|
866
|
+
if len(category_names) == 0:
|
|
867
|
+
logger.debug(
|
|
868
|
+
"Unknown label type or labels are empty: " + str(label)
|
|
869
|
+
)
|
|
870
|
+
continue
|
|
871
|
+
|
|
872
|
+
for category_name in category_names:
|
|
873
|
+
if category_name not in category_name_to_id:
|
|
874
|
+
category_id = len(categories)
|
|
875
|
+
category_name_to_id[category_name] = category_id
|
|
876
|
+
categories.append({"id": category_id, "name": category_name})
|
|
877
|
+
category_id = category_name_to_id[category_name]
|
|
878
|
+
|
|
879
|
+
if (
|
|
880
|
+
"rectanglelabels" in label
|
|
881
|
+
or "rectangle" in label
|
|
882
|
+
or "labels" in label
|
|
883
|
+
):
|
|
884
|
+
# yolo obb
|
|
885
|
+
if is_obb:
|
|
886
|
+
obb_annotation = convert_annotation_to_yolo_obb(label)
|
|
887
|
+
if obb_annotation is None:
|
|
888
|
+
continue
|
|
889
|
+
|
|
890
|
+
top_left, top_right, bottom_right, bottom_left = (
|
|
891
|
+
obb_annotation
|
|
892
|
+
)
|
|
893
|
+
x1, y1 = top_left
|
|
894
|
+
x2, y2 = top_right
|
|
895
|
+
x3, y3 = bottom_right
|
|
896
|
+
x4, y4 = bottom_left
|
|
897
|
+
annotations.append(
|
|
898
|
+
[category_id, x1, y1, x2, y2, x3, y3, x4, y4]
|
|
899
|
+
)
|
|
900
|
+
|
|
901
|
+
# simple yolo
|
|
902
|
+
else:
|
|
903
|
+
annotation = convert_annotation_to_yolo(label)
|
|
904
|
+
if annotation is None:
|
|
905
|
+
continue
|
|
906
|
+
|
|
907
|
+
(
|
|
908
|
+
x,
|
|
909
|
+
y,
|
|
910
|
+
w,
|
|
911
|
+
h,
|
|
912
|
+
) = annotation
|
|
913
|
+
annotations.append([category_id, x, y, w, h])
|
|
914
|
+
|
|
915
|
+
elif "polygonlabels" in label or "polygon" in label:
|
|
916
|
+
points_abs = [(x / 100, y / 100) for x, y in label["points"]]
|
|
917
|
+
annotations.append(
|
|
918
|
+
[category_id]
|
|
919
|
+
+ [coord for point in points_abs for coord in point]
|
|
920
|
+
)
|
|
921
|
+
else:
|
|
922
|
+
raise ValueError(f"Unknown label type {label}")
|
|
923
|
+
with open(label_path, "w") as f:
|
|
924
|
+
for annotation in annotations:
|
|
925
|
+
for idx, l in enumerate(annotation):
|
|
926
|
+
if idx == len(annotation) - 1:
|
|
927
|
+
f.write(f"{l}\n")
|
|
928
|
+
else:
|
|
929
|
+
f.write(f"{l} ")
|
|
930
|
+
with open(class_file, "w", encoding="utf8") as f:
|
|
931
|
+
for c in categories:
|
|
932
|
+
f.write(c["name"] + "\n")
|
|
933
|
+
with io.open(notes_file, mode="w", encoding="utf8") as fout:
|
|
934
|
+
json.dump(
|
|
935
|
+
{
|
|
936
|
+
"categories": categories,
|
|
937
|
+
"info": {
|
|
938
|
+
"year": datetime.now().year,
|
|
939
|
+
"version": "1.0",
|
|
940
|
+
"contributor": "Label Studio",
|
|
941
|
+
},
|
|
942
|
+
},
|
|
943
|
+
fout,
|
|
944
|
+
indent=2,
|
|
945
|
+
)
|
|
946
|
+
|
|
947
|
+
@staticmethod
|
|
948
|
+
def rotated_rectangle(label):
|
|
949
|
+
if not (
|
|
950
|
+
"x" in label and "y" in label and "width" in label and "height" in label
|
|
951
|
+
):
|
|
952
|
+
return None
|
|
953
|
+
|
|
954
|
+
label_x, label_y, label_w, label_h, label_r = (
|
|
955
|
+
label["x"],
|
|
956
|
+
label["y"],
|
|
957
|
+
label["width"],
|
|
958
|
+
label["height"],
|
|
959
|
+
label["rotation"] if "rotation" in label else 0.0,
|
|
960
|
+
)
|
|
961
|
+
|
|
962
|
+
if abs(label_r) > 0:
|
|
963
|
+
alpha = math.atan(label_h / label_w)
|
|
964
|
+
beta = math.pi * (
|
|
965
|
+
label_r / 180
|
|
966
|
+
) # Label studio defines the angle towards the vertical axis
|
|
967
|
+
|
|
968
|
+
radius = math.sqrt((label_w / 2) ** 2 + (label_h / 2) ** 2)
|
|
969
|
+
|
|
970
|
+
# Label studio saves the position of top left corner after rotation
|
|
971
|
+
x_0 = (
|
|
972
|
+
label_x
|
|
973
|
+
- radius
|
|
974
|
+
* (math.cos(math.pi - alpha - beta) - math.cos(math.pi - alpha))
|
|
975
|
+
+ label_w / 2
|
|
976
|
+
)
|
|
977
|
+
y_0 = (
|
|
978
|
+
label_y
|
|
979
|
+
+ radius
|
|
980
|
+
* (math.sin(math.pi - alpha - beta) - math.sin(math.pi - alpha))
|
|
981
|
+
+ label_h / 2
|
|
982
|
+
)
|
|
983
|
+
|
|
984
|
+
theta_1 = alpha + beta
|
|
985
|
+
theta_2 = math.pi - alpha + beta
|
|
986
|
+
theta_3 = math.pi + alpha + beta
|
|
987
|
+
theta_4 = 2 * math.pi - alpha + beta
|
|
988
|
+
|
|
989
|
+
x_coord = [
|
|
990
|
+
x_0 + radius * math.cos(theta_1),
|
|
991
|
+
x_0 + radius * math.cos(theta_2),
|
|
992
|
+
x_0 + radius * math.cos(theta_3),
|
|
993
|
+
x_0 + radius * math.cos(theta_4),
|
|
994
|
+
]
|
|
995
|
+
y_coord = [
|
|
996
|
+
y_0 + radius * math.sin(theta_1),
|
|
997
|
+
y_0 + radius * math.sin(theta_2),
|
|
998
|
+
y_0 + radius * math.sin(theta_3),
|
|
999
|
+
y_0 + radius * math.sin(theta_4),
|
|
1000
|
+
]
|
|
1001
|
+
|
|
1002
|
+
label_x = min(x_coord)
|
|
1003
|
+
label_y = min(y_coord)
|
|
1004
|
+
label_w = max(x_coord) - label_x
|
|
1005
|
+
label_h = max(y_coord) - label_y
|
|
1006
|
+
|
|
1007
|
+
return label_x, label_y, label_w, label_h
|
|
1008
|
+
|
|
1009
|
+
def convert_to_voc(
|
|
1010
|
+
self, input_data, output_dir, output_image_dir=None, is_dir=True
|
|
1011
|
+
):
|
|
1012
|
+
ensure_dir(output_dir)
|
|
1013
|
+
if output_image_dir is not None:
|
|
1014
|
+
ensure_dir(output_image_dir)
|
|
1015
|
+
output_image_dir_rel = output_image_dir
|
|
1016
|
+
else:
|
|
1017
|
+
output_image_dir = os.path.join(output_dir, "images")
|
|
1018
|
+
os.makedirs(output_image_dir, exist_ok=True)
|
|
1019
|
+
output_image_dir_rel = "images"
|
|
1020
|
+
|
|
1021
|
+
def create_child_node(doc, tag, attr, parent_node):
|
|
1022
|
+
child_node = doc.createElement(tag)
|
|
1023
|
+
text_node = doc.createTextNode(attr)
|
|
1024
|
+
child_node.appendChild(text_node)
|
|
1025
|
+
parent_node.appendChild(child_node)
|
|
1026
|
+
|
|
1027
|
+
data_key = self._data_keys[0]
|
|
1028
|
+
item_iterator = (
|
|
1029
|
+
self.iter_from_dir(input_data)
|
|
1030
|
+
if is_dir
|
|
1031
|
+
else self.iter_from_json_file(input_data)
|
|
1032
|
+
)
|
|
1033
|
+
for item_idx, item in enumerate(item_iterator):
|
|
1034
|
+
image_path = item["input"][data_key]
|
|
1035
|
+
annotations_dir = os.path.join(output_dir, "Annotations")
|
|
1036
|
+
if not os.path.exists(annotations_dir):
|
|
1037
|
+
os.makedirs(annotations_dir)
|
|
1038
|
+
# Download image
|
|
1039
|
+
channels = 3
|
|
1040
|
+
if not os.path.exists(image_path):
|
|
1041
|
+
try:
|
|
1042
|
+
image_path = download(
|
|
1043
|
+
image_path,
|
|
1044
|
+
output_image_dir,
|
|
1045
|
+
project_dir=self.project_dir,
|
|
1046
|
+
upload_dir=self.upload_dir,
|
|
1047
|
+
return_relative_path=True,
|
|
1048
|
+
download_resources=self.download_resources,
|
|
1049
|
+
)
|
|
1050
|
+
except:
|
|
1051
|
+
logger.info(
|
|
1052
|
+
"Unable to download {image_path}. The item {item} will be skipped".format(
|
|
1053
|
+
image_path=image_path, item=item
|
|
1054
|
+
),
|
|
1055
|
+
exc_info=True,
|
|
1056
|
+
)
|
|
1057
|
+
else:
|
|
1058
|
+
full_image_path = os.path.join(
|
|
1059
|
+
output_image_dir, os.path.basename(image_path)
|
|
1060
|
+
)
|
|
1061
|
+
# retrieve number of channels from downloaded image
|
|
1062
|
+
try:
|
|
1063
|
+
_, _, channels = get_image_size_and_channels(full_image_path)
|
|
1064
|
+
except:
|
|
1065
|
+
logger.warning(f"Can't read channels from image")
|
|
1066
|
+
|
|
1067
|
+
# skip tasks without annotations
|
|
1068
|
+
if not item["output"]:
|
|
1069
|
+
logger.warning("No annotations found for item #" + str(item_idx))
|
|
1070
|
+
continue
|
|
1071
|
+
|
|
1072
|
+
image_name = os.path.basename(image_path)
|
|
1073
|
+
xml_name = os.path.splitext(image_name)[0] + ".xml"
|
|
1074
|
+
|
|
1075
|
+
# concatenate results over all tag names
|
|
1076
|
+
bboxes = []
|
|
1077
|
+
for key in item["output"]:
|
|
1078
|
+
bboxes += item["output"][key]
|
|
1079
|
+
|
|
1080
|
+
if len(bboxes) == 0:
|
|
1081
|
+
logger.debug(f'Empty bboxes for {item["output"]}')
|
|
1082
|
+
continue
|
|
1083
|
+
|
|
1084
|
+
if "original_width" not in bboxes[0] or "original_height" not in bboxes[0]:
|
|
1085
|
+
logger.debug(
|
|
1086
|
+
f"original_width or original_height not found in {image_name}"
|
|
1087
|
+
)
|
|
1088
|
+
continue
|
|
1089
|
+
|
|
1090
|
+
width, height = bboxes[0]["original_width"], bboxes[0]["original_height"]
|
|
1091
|
+
xml_filepath = os.path.join(annotations_dir, xml_name)
|
|
1092
|
+
|
|
1093
|
+
my_dom = xml.dom.getDOMImplementation()
|
|
1094
|
+
doc = my_dom.createDocument(None, "annotation", None)
|
|
1095
|
+
root_node = doc.documentElement
|
|
1096
|
+
create_child_node(doc, "folder", output_image_dir_rel, root_node)
|
|
1097
|
+
create_child_node(doc, "filename", image_name, root_node)
|
|
1098
|
+
|
|
1099
|
+
source_node = doc.createElement("source")
|
|
1100
|
+
create_child_node(doc, "database", "MyDatabase", source_node)
|
|
1101
|
+
create_child_node(doc, "annotation", "COCO2017", source_node)
|
|
1102
|
+
create_child_node(doc, "image", "flickr", source_node)
|
|
1103
|
+
create_child_node(doc, "flickrid", "NULL", source_node)
|
|
1104
|
+
create_child_node(doc, "annotator", get_annotator(item, ""), source_node)
|
|
1105
|
+
root_node.appendChild(source_node)
|
|
1106
|
+
|
|
1107
|
+
owner_node = doc.createElement("owner")
|
|
1108
|
+
create_child_node(doc, "flickrid", "NULL", owner_node)
|
|
1109
|
+
create_child_node(doc, "name", "Label Studio", owner_node)
|
|
1110
|
+
root_node.appendChild(owner_node)
|
|
1111
|
+
size_node = doc.createElement("size")
|
|
1112
|
+
create_child_node(doc, "width", str(width), size_node)
|
|
1113
|
+
create_child_node(doc, "height", str(height), size_node)
|
|
1114
|
+
create_child_node(doc, "depth", str(channels), size_node)
|
|
1115
|
+
root_node.appendChild(size_node)
|
|
1116
|
+
create_child_node(doc, "segmented", "0", root_node)
|
|
1117
|
+
|
|
1118
|
+
for bbox in bboxes:
|
|
1119
|
+
key = (
|
|
1120
|
+
"rectanglelabels"
|
|
1121
|
+
if "rectanglelabels" in bbox
|
|
1122
|
+
else ("labels" if "labels" in bbox else None)
|
|
1123
|
+
)
|
|
1124
|
+
if key is None or len(bbox[key]) == 0:
|
|
1125
|
+
continue
|
|
1126
|
+
|
|
1127
|
+
name = bbox[key][0]
|
|
1128
|
+
x = int(bbox["x"] / 100 * width)
|
|
1129
|
+
y = int(bbox["y"] / 100 * height)
|
|
1130
|
+
w = int(bbox["width"] / 100 * width)
|
|
1131
|
+
h = int(bbox["height"] / 100 * height)
|
|
1132
|
+
|
|
1133
|
+
object_node = doc.createElement("object")
|
|
1134
|
+
create_child_node(doc, "name", name, object_node)
|
|
1135
|
+
create_child_node(doc, "pose", "Unspecified", object_node)
|
|
1136
|
+
create_child_node(doc, "truncated", "0", object_node)
|
|
1137
|
+
create_child_node(doc, "difficult", "0", object_node)
|
|
1138
|
+
bndbox_node = doc.createElement("bndbox")
|
|
1139
|
+
create_child_node(doc, "xmin", str(x), bndbox_node)
|
|
1140
|
+
create_child_node(doc, "ymin", str(y), bndbox_node)
|
|
1141
|
+
create_child_node(doc, "xmax", str(x + w), bndbox_node)
|
|
1142
|
+
create_child_node(doc, "ymax", str(y + h), bndbox_node)
|
|
1143
|
+
|
|
1144
|
+
object_node.appendChild(bndbox_node)
|
|
1145
|
+
root_node.appendChild(object_node)
|
|
1146
|
+
|
|
1147
|
+
with io.open(xml_filepath, mode="w", encoding="utf8") as fout:
|
|
1148
|
+
doc.writexml(fout, addindent="" * 4, newl="\n", encoding="utf-8")
|
|
1149
|
+
|
|
1150
|
+
def _get_labels(self):
|
|
1151
|
+
labels = set()
|
|
1152
|
+
categories = list()
|
|
1153
|
+
category_name_to_id = dict()
|
|
1154
|
+
|
|
1155
|
+
for name, info in self._schema.items():
|
|
1156
|
+
labels |= set(info["labels"])
|
|
1157
|
+
attrs = info["labels_attrs"]
|
|
1158
|
+
for label in attrs:
|
|
1159
|
+
if attrs[label].get("category"):
|
|
1160
|
+
categories.append(
|
|
1161
|
+
{"id": attrs[label].get("category"), "name": label}
|
|
1162
|
+
)
|
|
1163
|
+
category_name_to_id[label] = attrs[label].get("category")
|
|
1164
|
+
labels_to_add = set(labels) - set(list(category_name_to_id.keys()))
|
|
1165
|
+
labels_to_add = sorted(list(labels_to_add))
|
|
1166
|
+
idx = 0
|
|
1167
|
+
while idx in list(category_name_to_id.values()):
|
|
1168
|
+
idx += 1
|
|
1169
|
+
for label in labels_to_add:
|
|
1170
|
+
categories.append({"id": idx, "name": label})
|
|
1171
|
+
category_name_to_id[label] = idx
|
|
1172
|
+
idx += 1
|
|
1173
|
+
while idx in list(category_name_to_id.values()):
|
|
1174
|
+
idx += 1
|
|
1175
|
+
return categories, category_name_to_id
|