label-studio-sdk 0.0.34__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (256) hide show
  1. label_studio_sdk/__init__.py +206 -9
  2. label_studio_sdk/_extensions/label_studio_tools/__init__.py +0 -0
  3. label_studio_sdk/_extensions/label_studio_tools/core/__init__.py +0 -0
  4. label_studio_sdk/_extensions/label_studio_tools/core/label_config.py +163 -0
  5. label_studio_sdk/_extensions/label_studio_tools/core/utils/__init__.py +0 -0
  6. label_studio_sdk/_extensions/label_studio_tools/core/utils/exceptions.py +2 -0
  7. label_studio_sdk/_extensions/label_studio_tools/core/utils/io.py +228 -0
  8. label_studio_sdk/_extensions/label_studio_tools/core/utils/params.py +45 -0
  9. label_studio_sdk/_extensions/label_studio_tools/etl/__init__.py +1 -0
  10. label_studio_sdk/_extensions/label_studio_tools/etl/beam.py +34 -0
  11. label_studio_sdk/_extensions/label_studio_tools/etl/example.py +17 -0
  12. label_studio_sdk/_extensions/label_studio_tools/etl/registry.py +67 -0
  13. label_studio_sdk/_extensions/label_studio_tools/postprocessing/__init__.py +0 -0
  14. label_studio_sdk/_extensions/label_studio_tools/postprocessing/video.py +97 -0
  15. label_studio_sdk/_legacy/__init__.py +11 -0
  16. label_studio_sdk/_legacy/client.py +471 -0
  17. label_studio_sdk/_legacy/label_interface/data_examples.json +96 -0
  18. label_studio_sdk/{label_interface → _legacy/label_interface}/interface.py +9 -6
  19. label_studio_sdk/{project.py → _legacy/project.py} +2 -2
  20. label_studio_sdk/actions/__init__.py +2 -0
  21. label_studio_sdk/actions/client.py +150 -0
  22. label_studio_sdk/annotations/__init__.py +2 -0
  23. label_studio_sdk/annotations/client.py +750 -0
  24. label_studio_sdk/client.py +162 -450
  25. label_studio_sdk/converter/__init__.py +7 -0
  26. label_studio_sdk/converter/audio.py +56 -0
  27. label_studio_sdk/converter/brush.py +452 -0
  28. label_studio_sdk/converter/converter.py +1175 -0
  29. label_studio_sdk/converter/exports/__init__.py +0 -0
  30. label_studio_sdk/converter/exports/csv.py +82 -0
  31. label_studio_sdk/converter/exports/csv2.py +103 -0
  32. label_studio_sdk/converter/funsd.py +85 -0
  33. label_studio_sdk/converter/imports/__init__.py +0 -0
  34. label_studio_sdk/converter/imports/coco.py +314 -0
  35. label_studio_sdk/converter/imports/colors.py +198 -0
  36. label_studio_sdk/converter/imports/label_config.py +45 -0
  37. label_studio_sdk/converter/imports/pathtrack.py +269 -0
  38. label_studio_sdk/converter/imports/yolo.py +236 -0
  39. label_studio_sdk/converter/main.py +202 -0
  40. label_studio_sdk/converter/utils.py +473 -0
  41. label_studio_sdk/core/__init__.py +33 -0
  42. label_studio_sdk/core/api_error.py +15 -0
  43. label_studio_sdk/core/client_wrapper.py +55 -0
  44. label_studio_sdk/core/datetime_utils.py +28 -0
  45. label_studio_sdk/core/file.py +38 -0
  46. label_studio_sdk/core/http_client.py +443 -0
  47. label_studio_sdk/core/jsonable_encoder.py +99 -0
  48. label_studio_sdk/core/pagination.py +87 -0
  49. label_studio_sdk/core/pydantic_utilities.py +28 -0
  50. label_studio_sdk/core/query_encoder.py +33 -0
  51. label_studio_sdk/core/remove_none_from_dict.py +11 -0
  52. label_studio_sdk/core/request_options.py +32 -0
  53. label_studio_sdk/environment.py +7 -0
  54. label_studio_sdk/errors/__init__.py +6 -0
  55. label_studio_sdk/errors/bad_request_error.py +8 -0
  56. label_studio_sdk/errors/internal_server_error.py +8 -0
  57. label_studio_sdk/export_storage/__init__.py +28 -0
  58. label_studio_sdk/export_storage/azure/__init__.py +5 -0
  59. label_studio_sdk/export_storage/azure/client.py +722 -0
  60. label_studio_sdk/export_storage/azure/types/__init__.py +6 -0
  61. label_studio_sdk/export_storage/azure/types/azure_create_response.py +52 -0
  62. label_studio_sdk/export_storage/azure/types/azure_update_response.py +52 -0
  63. label_studio_sdk/export_storage/client.py +107 -0
  64. label_studio_sdk/export_storage/gcs/__init__.py +5 -0
  65. label_studio_sdk/export_storage/gcs/client.py +722 -0
  66. label_studio_sdk/export_storage/gcs/types/__init__.py +6 -0
  67. label_studio_sdk/export_storage/gcs/types/gcs_create_response.py +52 -0
  68. label_studio_sdk/export_storage/gcs/types/gcs_update_response.py +52 -0
  69. label_studio_sdk/export_storage/local/__init__.py +5 -0
  70. label_studio_sdk/export_storage/local/client.py +688 -0
  71. label_studio_sdk/export_storage/local/types/__init__.py +6 -0
  72. label_studio_sdk/export_storage/local/types/local_create_response.py +47 -0
  73. label_studio_sdk/export_storage/local/types/local_update_response.py +47 -0
  74. label_studio_sdk/export_storage/redis/__init__.py +5 -0
  75. label_studio_sdk/export_storage/redis/client.py +714 -0
  76. label_studio_sdk/export_storage/redis/types/__init__.py +6 -0
  77. label_studio_sdk/export_storage/redis/types/redis_create_response.py +57 -0
  78. label_studio_sdk/export_storage/redis/types/redis_update_response.py +57 -0
  79. label_studio_sdk/export_storage/s3/__init__.py +5 -0
  80. label_studio_sdk/export_storage/s3/client.py +820 -0
  81. label_studio_sdk/export_storage/s3/types/__init__.py +6 -0
  82. label_studio_sdk/export_storage/s3/types/s3create_response.py +74 -0
  83. label_studio_sdk/export_storage/s3/types/s3update_response.py +74 -0
  84. label_studio_sdk/export_storage/types/__init__.py +5 -0
  85. label_studio_sdk/export_storage/types/export_storage_list_types_response_item.py +30 -0
  86. label_studio_sdk/files/__init__.py +2 -0
  87. label_studio_sdk/files/client.py +556 -0
  88. label_studio_sdk/import_storage/__init__.py +28 -0
  89. label_studio_sdk/import_storage/azure/__init__.py +5 -0
  90. label_studio_sdk/import_storage/azure/client.py +812 -0
  91. label_studio_sdk/import_storage/azure/types/__init__.py +6 -0
  92. label_studio_sdk/import_storage/azure/types/azure_create_response.py +72 -0
  93. label_studio_sdk/import_storage/azure/types/azure_update_response.py +72 -0
  94. label_studio_sdk/import_storage/client.py +107 -0
  95. label_studio_sdk/import_storage/gcs/__init__.py +5 -0
  96. label_studio_sdk/import_storage/gcs/client.py +812 -0
  97. label_studio_sdk/import_storage/gcs/types/__init__.py +6 -0
  98. label_studio_sdk/import_storage/gcs/types/gcs_create_response.py +72 -0
  99. label_studio_sdk/import_storage/gcs/types/gcs_update_response.py +72 -0
  100. label_studio_sdk/import_storage/local/__init__.py +5 -0
  101. label_studio_sdk/import_storage/local/client.py +690 -0
  102. label_studio_sdk/import_storage/local/types/__init__.py +6 -0
  103. label_studio_sdk/import_storage/local/types/local_create_response.py +47 -0
  104. label_studio_sdk/import_storage/local/types/local_update_response.py +47 -0
  105. label_studio_sdk/import_storage/redis/__init__.py +5 -0
  106. label_studio_sdk/import_storage/redis/client.py +768 -0
  107. label_studio_sdk/import_storage/redis/types/__init__.py +6 -0
  108. label_studio_sdk/import_storage/redis/types/redis_create_response.py +62 -0
  109. label_studio_sdk/import_storage/redis/types/redis_update_response.py +62 -0
  110. label_studio_sdk/import_storage/s3/__init__.py +5 -0
  111. label_studio_sdk/import_storage/s3/client.py +912 -0
  112. label_studio_sdk/import_storage/s3/types/__init__.py +6 -0
  113. label_studio_sdk/import_storage/s3/types/s3create_response.py +99 -0
  114. label_studio_sdk/import_storage/s3/types/s3update_response.py +99 -0
  115. label_studio_sdk/import_storage/types/__init__.py +5 -0
  116. label_studio_sdk/import_storage/types/import_storage_list_types_response_item.py +30 -0
  117. label_studio_sdk/ml/__init__.py +19 -0
  118. label_studio_sdk/ml/client.py +981 -0
  119. label_studio_sdk/ml/types/__init__.py +17 -0
  120. label_studio_sdk/ml/types/ml_create_request_auth_method.py +5 -0
  121. label_studio_sdk/ml/types/ml_create_response.py +78 -0
  122. label_studio_sdk/ml/types/ml_create_response_auth_method.py +5 -0
  123. label_studio_sdk/ml/types/ml_update_request_auth_method.py +5 -0
  124. label_studio_sdk/ml/types/ml_update_response.py +78 -0
  125. label_studio_sdk/ml/types/ml_update_response_auth_method.py +5 -0
  126. label_studio_sdk/predictions/__init__.py +2 -0
  127. label_studio_sdk/predictions/client.py +638 -0
  128. label_studio_sdk/projects/__init__.py +6 -0
  129. label_studio_sdk/projects/client.py +1053 -0
  130. label_studio_sdk/projects/exports/__init__.py +2 -0
  131. label_studio_sdk/projects/exports/client.py +930 -0
  132. label_studio_sdk/projects/types/__init__.py +7 -0
  133. label_studio_sdk/projects/types/projects_create_response.py +96 -0
  134. label_studio_sdk/projects/types/projects_import_tasks_response.py +71 -0
  135. label_studio_sdk/projects/types/projects_list_response.py +33 -0
  136. label_studio_sdk/py.typed +0 -0
  137. label_studio_sdk/tasks/__init__.py +5 -0
  138. label_studio_sdk/tasks/client.py +811 -0
  139. label_studio_sdk/tasks/types/__init__.py +6 -0
  140. label_studio_sdk/tasks/types/tasks_list_request_fields.py +5 -0
  141. label_studio_sdk/tasks/types/tasks_list_response.py +48 -0
  142. label_studio_sdk/types/__init__.py +115 -0
  143. label_studio_sdk/types/annotation.py +116 -0
  144. label_studio_sdk/types/annotation_filter_options.py +42 -0
  145. label_studio_sdk/types/annotation_last_action.py +19 -0
  146. label_studio_sdk/types/azure_blob_export_storage.py +112 -0
  147. label_studio_sdk/types/azure_blob_export_storage_status.py +7 -0
  148. label_studio_sdk/types/azure_blob_import_storage.py +113 -0
  149. label_studio_sdk/types/azure_blob_import_storage_status.py +7 -0
  150. label_studio_sdk/types/base_task.py +113 -0
  151. label_studio_sdk/types/base_user.py +42 -0
  152. label_studio_sdk/types/converted_format.py +36 -0
  153. label_studio_sdk/types/converted_format_status.py +5 -0
  154. label_studio_sdk/types/export.py +48 -0
  155. label_studio_sdk/types/export_convert.py +32 -0
  156. label_studio_sdk/types/export_create.py +54 -0
  157. label_studio_sdk/types/export_create_status.py +5 -0
  158. label_studio_sdk/types/export_status.py +5 -0
  159. label_studio_sdk/types/file_upload.py +30 -0
  160. label_studio_sdk/types/filter.py +53 -0
  161. label_studio_sdk/types/filter_group.py +35 -0
  162. label_studio_sdk/types/gcs_export_storage.py +112 -0
  163. label_studio_sdk/types/gcs_export_storage_status.py +7 -0
  164. label_studio_sdk/types/gcs_import_storage.py +113 -0
  165. label_studio_sdk/types/gcs_import_storage_status.py +7 -0
  166. label_studio_sdk/types/local_files_export_storage.py +97 -0
  167. label_studio_sdk/types/local_files_export_storage_status.py +7 -0
  168. label_studio_sdk/types/local_files_import_storage.py +92 -0
  169. label_studio_sdk/types/local_files_import_storage_status.py +7 -0
  170. label_studio_sdk/types/ml_backend.py +89 -0
  171. label_studio_sdk/types/ml_backend_auth_method.py +5 -0
  172. label_studio_sdk/types/ml_backend_state.py +5 -0
  173. label_studio_sdk/types/prediction.py +78 -0
  174. label_studio_sdk/types/project.py +198 -0
  175. label_studio_sdk/types/project_import.py +63 -0
  176. label_studio_sdk/types/project_import_status.py +5 -0
  177. label_studio_sdk/types/project_label_config.py +32 -0
  178. label_studio_sdk/types/project_sampling.py +7 -0
  179. label_studio_sdk/types/project_skip_queue.py +5 -0
  180. label_studio_sdk/types/redis_export_storage.py +117 -0
  181. label_studio_sdk/types/redis_export_storage_status.py +7 -0
  182. label_studio_sdk/types/redis_import_storage.py +112 -0
  183. label_studio_sdk/types/redis_import_storage_status.py +7 -0
  184. label_studio_sdk/types/s3export_storage.py +134 -0
  185. label_studio_sdk/types/s3export_storage_status.py +7 -0
  186. label_studio_sdk/types/s3import_storage.py +140 -0
  187. label_studio_sdk/types/s3import_storage_status.py +7 -0
  188. label_studio_sdk/types/serialization_option.py +36 -0
  189. label_studio_sdk/types/serialization_options.py +45 -0
  190. label_studio_sdk/types/task.py +157 -0
  191. label_studio_sdk/types/task_filter_options.py +49 -0
  192. label_studio_sdk/types/user_simple.py +37 -0
  193. label_studio_sdk/types/view.py +55 -0
  194. label_studio_sdk/types/webhook.py +67 -0
  195. label_studio_sdk/types/webhook_actions_item.py +21 -0
  196. label_studio_sdk/types/webhook_serializer_for_update.py +67 -0
  197. label_studio_sdk/types/webhook_serializer_for_update_actions_item.py +21 -0
  198. label_studio_sdk/users/__init__.py +5 -0
  199. label_studio_sdk/users/client.py +830 -0
  200. label_studio_sdk/users/types/__init__.py +6 -0
  201. label_studio_sdk/users/types/users_get_token_response.py +36 -0
  202. label_studio_sdk/users/types/users_reset_token_response.py +36 -0
  203. label_studio_sdk/version.py +4 -0
  204. label_studio_sdk/views/__init__.py +31 -0
  205. label_studio_sdk/views/client.py +564 -0
  206. label_studio_sdk/views/types/__init__.py +29 -0
  207. label_studio_sdk/views/types/views_create_request_data.py +43 -0
  208. label_studio_sdk/views/types/views_create_request_data_filters.py +43 -0
  209. label_studio_sdk/views/types/views_create_request_data_filters_conjunction.py +5 -0
  210. label_studio_sdk/views/types/views_create_request_data_filters_items_item.py +47 -0
  211. label_studio_sdk/views/types/views_create_request_data_ordering_item.py +38 -0
  212. label_studio_sdk/views/types/views_create_request_data_ordering_item_direction.py +5 -0
  213. label_studio_sdk/views/types/views_update_request_data.py +43 -0
  214. label_studio_sdk/views/types/views_update_request_data_filters.py +43 -0
  215. label_studio_sdk/views/types/views_update_request_data_filters_conjunction.py +5 -0
  216. label_studio_sdk/views/types/views_update_request_data_filters_items_item.py +47 -0
  217. label_studio_sdk/views/types/views_update_request_data_ordering_item.py +38 -0
  218. label_studio_sdk/views/types/views_update_request_data_ordering_item_direction.py +5 -0
  219. label_studio_sdk/webhooks/__init__.py +5 -0
  220. label_studio_sdk/webhooks/client.py +636 -0
  221. label_studio_sdk/webhooks/types/__init__.py +5 -0
  222. label_studio_sdk/webhooks/types/webhooks_update_request_actions_item.py +21 -0
  223. label_studio_sdk-1.0.0.dist-info/METADATA +307 -0
  224. label_studio_sdk-1.0.0.dist-info/RECORD +239 -0
  225. {label_studio_sdk-0.0.34.dist-info → label_studio_sdk-1.0.0.dist-info}/WHEEL +1 -2
  226. label_studio_sdk-0.0.34.dist-info/LICENSE +0 -201
  227. label_studio_sdk-0.0.34.dist-info/METADATA +0 -24
  228. label_studio_sdk-0.0.34.dist-info/RECORD +0 -37
  229. label_studio_sdk-0.0.34.dist-info/top_level.txt +0 -2
  230. tests/test_client.py +0 -37
  231. tests/test_export.py +0 -105
  232. tests/test_interface/__init__.py +0 -1
  233. tests/test_interface/configs.py +0 -137
  234. tests/test_interface/mockups.py +0 -22
  235. tests/test_interface/test_compat.py +0 -64
  236. tests/test_interface/test_control_tags.py +0 -55
  237. tests/test_interface/test_data_generation.py +0 -45
  238. tests/test_interface/test_lpi.py +0 -15
  239. tests/test_interface/test_main.py +0 -196
  240. tests/test_interface/test_object_tags.py +0 -36
  241. tests/test_interface/test_region.py +0 -36
  242. tests/test_interface/test_validate_summary.py +0 -35
  243. tests/test_interface/test_validation.py +0 -59
  244. {tests → label_studio_sdk/_extensions}/__init__.py +0 -0
  245. /label_studio_sdk/{exceptions.py → _legacy/exceptions.py} +0 -0
  246. /label_studio_sdk/{label_interface → _legacy/label_interface}/__init__.py +0 -0
  247. /label_studio_sdk/{label_interface → _legacy/label_interface}/base.py +0 -0
  248. /label_studio_sdk/{label_interface → _legacy/label_interface}/control_tags.py +0 -0
  249. /label_studio_sdk/{label_interface → _legacy/label_interface}/label_tags.py +0 -0
  250. /label_studio_sdk/{label_interface → _legacy/label_interface}/object_tags.py +0 -0
  251. /label_studio_sdk/{label_interface → _legacy/label_interface}/region.py +0 -0
  252. /label_studio_sdk/{objects.py → _legacy/objects.py} +0 -0
  253. /label_studio_sdk/{schema → _legacy/schema}/label_config_schema.json +0 -0
  254. /label_studio_sdk/{users.py → _legacy/users.py} +0 -0
  255. /label_studio_sdk/{utils.py → _legacy/utils.py} +0 -0
  256. /label_studio_sdk/{workspaces.py → _legacy/workspaces.py} +0 -0
@@ -0,0 +1,1175 @@
1
+ import io
2
+ import logging
3
+ import math
4
+ import os
5
+ import re
6
+ import xml.dom
7
+ import xml.dom.minidom
8
+ from collections import defaultdict
9
+ from copy import deepcopy
10
+ from datetime import datetime
11
+ from enum import Enum
12
+ from glob import glob
13
+ from operator import itemgetter
14
+ from shutil import copy2
15
+ from typing import Optional
16
+
17
+ import ijson
18
+ import ujson as json
19
+ from PIL import Image
20
+ from label_studio_sdk.converter import brush
21
+ from label_studio_sdk.converter.audio import convert_to_asr_json_manifest
22
+ from label_studio_sdk.converter.exports import csv2
23
+ from label_studio_sdk.converter.utils import (
24
+ parse_config,
25
+ create_tokens_and_tags,
26
+ download,
27
+ get_image_size_and_channels,
28
+ ensure_dir,
29
+ get_polygon_area,
30
+ get_polygon_bounding_box,
31
+ get_annotator,
32
+ get_json_root_type,
33
+ prettify_result,
34
+ convert_annotation_to_yolo,
35
+ convert_annotation_to_yolo_obb,
36
+ )
37
+
38
+ logger = logging.getLogger(__name__)
39
+
40
+
41
+ class FormatNotSupportedError(NotImplementedError):
42
+ pass
43
+
44
+
45
+ class Format(Enum):
46
+ JSON = 1
47
+ JSON_MIN = 2
48
+ CSV = 3
49
+ TSV = 4
50
+ CONLL2003 = 5
51
+ COCO = 6
52
+ VOC = 7
53
+ BRUSH_TO_NUMPY = 8
54
+ BRUSH_TO_PNG = 9
55
+ ASR_MANIFEST = 10
56
+ YOLO = 11
57
+ YOLO_OBB = 12
58
+ CSV_OLD = 13
59
+
60
+ def __str__(self):
61
+ return self.name
62
+
63
+ @classmethod
64
+ def from_string(cls, s):
65
+ try:
66
+ return Format[s]
67
+ except KeyError:
68
+ raise ValueError()
69
+
70
+
71
+ class Converter(object):
72
+ _FORMAT_INFO = {
73
+ Format.JSON: {
74
+ "title": "JSON",
75
+ "description": "List of items in raw JSON format stored in one JSON file. Use to export both the data "
76
+ "and the annotations for a dataset. It's Label Studio Common Format",
77
+ "link": "https://labelstud.io/guide/export.html#JSON",
78
+ },
79
+ Format.JSON_MIN: {
80
+ "title": "JSON-MIN",
81
+ "description": 'List of items where only "from_name", "to_name" values from the raw JSON format are '
82
+ "exported. Use to export only the annotations for a dataset.",
83
+ "link": "https://labelstud.io/guide/export.html#JSON-MIN",
84
+ },
85
+ Format.CSV: {
86
+ "title": "CSV",
87
+ "description": "Results are stored as comma-separated values with the column names specified by the "
88
+ 'values of the "from_name" and "to_name" fields.',
89
+ "link": "https://labelstud.io/guide/export.html#CSV",
90
+ },
91
+ Format.TSV: {
92
+ "title": "TSV",
93
+ "description": "Results are stored in tab-separated tabular file with column names specified by "
94
+ '"from_name" "to_name" values',
95
+ "link": "https://labelstud.io/guide/export.html#TSV",
96
+ },
97
+ Format.CONLL2003: {
98
+ "title": "CONLL2003",
99
+ "description": "Popular format used for the CoNLL-2003 named entity recognition challenge.",
100
+ "link": "https://labelstud.io/guide/export.html#CONLL2003",
101
+ "tags": ["sequence labeling", "text tagging", "named entity recognition"],
102
+ },
103
+ Format.COCO: {
104
+ "title": "COCO",
105
+ "description": "Popular machine learning format used by the COCO dataset for object detection and image "
106
+ "segmentation tasks with polygons and rectangles.",
107
+ "link": "https://labelstud.io/guide/export.html#COCO",
108
+ "tags": ["image segmentation", "object detection"],
109
+ },
110
+ Format.VOC: {
111
+ "title": "Pascal VOC XML",
112
+ "description": "Popular XML format used for object detection and polygon image segmentation tasks.",
113
+ "link": "https://labelstud.io/guide/export.html#Pascal-VOC-XML",
114
+ "tags": ["image segmentation", "object detection"],
115
+ },
116
+ Format.YOLO: {
117
+ "title": "YOLO",
118
+ "description": "Popular TXT format is created for each image file. Each txt file contains annotations for "
119
+ "the corresponding image file, that is object class, object coordinates, height & width.",
120
+ "link": "https://labelstud.io/guide/export.html#YOLO",
121
+ "tags": ["image segmentation", "object detection"],
122
+ },
123
+ Format.YOLO_OBB: {
124
+ "title": "YOLOv8 OBB",
125
+ "description": "Popular TXT format is created for each image file. Each txt file contains annotations for "
126
+ "the corresponding image file. The YOLO OBB format designates bounding boxes by their four corner points "
127
+ "with coordinates normalized between 0 and 1, so it is possible to export rotated objects.",
128
+ "link": "https://labelstud.io/guide/export.html#YOLO",
129
+ "tags": ["image segmentation", "object detection"],
130
+ },
131
+ Format.BRUSH_TO_NUMPY: {
132
+ "title": "Brush labels to NumPy",
133
+ "description": "Export your brush labels as NumPy 2d arrays. Each label outputs as one image.",
134
+ "link": "https://labelstud.io/guide/export.html#Brush-labels-to-NumPy-amp-PNG",
135
+ "tags": ["image segmentation"],
136
+ },
137
+ Format.BRUSH_TO_PNG: {
138
+ "title": "Brush labels to PNG",
139
+ "description": "Export your brush labels as PNG images. Each label outputs as one image.",
140
+ "link": "https://labelstud.io/guide/export.html#Brush-labels-to-NumPy-amp-PNG",
141
+ "tags": ["image segmentation"],
142
+ },
143
+ Format.ASR_MANIFEST: {
144
+ "title": "ASR Manifest",
145
+ "description": "Export audio transcription labels for automatic speech recognition as the JSON manifest "
146
+ "format expected by NVIDIA NeMo models.",
147
+ "link": "https://labelstud.io/guide/export.html#ASR-MANIFEST",
148
+ "tags": ["speech recognition"],
149
+ },
150
+ }
151
+
152
+ def all_formats(self):
153
+ return self._FORMAT_INFO
154
+
155
+ def __init__(
156
+ self,
157
+ config,
158
+ project_dir,
159
+ output_tags=None,
160
+ upload_dir=None,
161
+ download_resources=True,
162
+ ):
163
+ """Initialize Label Studio Converter for Exports
164
+
165
+ :param config: string or dict: XML string with Label studio labeling config or path to this file or parsed_config
166
+ :param project_dir: upload root directory for images, audio and other labeling files
167
+ :param output_tags: it will be calculated automatically, contains label names
168
+ :param upload_dir: upload root directory with files that were imported using LS GUI
169
+ :param download_resources: if True, LS will try to download images, audio, etc and include them to export
170
+ """
171
+ self.project_dir = project_dir
172
+ self.upload_dir = upload_dir
173
+ self.download_resources = download_resources
174
+ self._schema = None
175
+
176
+ if isinstance(config, dict):
177
+ self._schema = config
178
+ elif isinstance(config, str):
179
+ if os.path.isfile(config):
180
+ with io.open(config) as f:
181
+ config_string = f.read()
182
+ else:
183
+ config_string = config
184
+ self._schema = parse_config(config_string)
185
+
186
+ if self._schema is None:
187
+ logger.warning(
188
+ "Label config or schema for Converter is not provided, "
189
+ "it might be critical for some export formats, now set schema to empty dict"
190
+ )
191
+ self._schema = {}
192
+
193
+ self._data_keys, self._output_tags = self._get_data_keys_and_output_tags(
194
+ output_tags
195
+ )
196
+ self._supported_formats = self._get_supported_formats()
197
+
198
+ def convert(self, input_data, output_data, format, is_dir=True, **kwargs):
199
+ if isinstance(format, str):
200
+ format = Format.from_string(format)
201
+
202
+ if format == Format.JSON:
203
+ self.convert_to_json(input_data, output_data, is_dir=is_dir)
204
+ elif format == Format.JSON_MIN:
205
+ self.convert_to_json_min(input_data, output_data, is_dir=is_dir)
206
+ elif format == Format.CSV:
207
+ header = kwargs.get("csv_header", True)
208
+ sep = kwargs.get("csv_separator", ",")
209
+ self.convert_to_csv(
210
+ input_data, output_data, sep=sep, header=header, is_dir=is_dir
211
+ )
212
+ elif format == Format.TSV:
213
+ header = kwargs.get("csv_header", True)
214
+ sep = kwargs.get("csv_separator", "\t")
215
+ self.convert_to_csv(
216
+ input_data, output_data, sep=sep, header=header, is_dir=is_dir
217
+ )
218
+ elif format == Format.CONLL2003:
219
+ self.convert_to_conll2003(input_data, output_data, is_dir=is_dir)
220
+ elif format == Format.COCO:
221
+ image_dir = kwargs.get("image_dir")
222
+ self.convert_to_coco(
223
+ input_data, output_data, output_image_dir=image_dir, is_dir=is_dir
224
+ )
225
+ elif format == Format.YOLO or format == Format.YOLO_OBB:
226
+ image_dir = kwargs.get("image_dir")
227
+ label_dir = kwargs.get("label_dir")
228
+ self.convert_to_yolo(
229
+ input_data,
230
+ output_data,
231
+ output_image_dir=image_dir,
232
+ output_label_dir=label_dir,
233
+ is_dir=is_dir,
234
+ is_obb=(format == Format.YOLO_OBB),
235
+ )
236
+ elif format == Format.VOC:
237
+ image_dir = kwargs.get("image_dir")
238
+ self.convert_to_voc(
239
+ input_data, output_data, output_image_dir=image_dir, is_dir=is_dir
240
+ )
241
+ elif format == Format.BRUSH_TO_NUMPY:
242
+ items = (
243
+ self.iter_from_dir(input_data)
244
+ if is_dir
245
+ else self.iter_from_json_file(input_data)
246
+ )
247
+ brush.convert_task_dir(items, output_data, out_format="numpy")
248
+ elif format == Format.BRUSH_TO_PNG:
249
+ items = (
250
+ self.iter_from_dir(input_data)
251
+ if is_dir
252
+ else self.iter_from_json_file(input_data)
253
+ )
254
+ brush.convert_task_dir(items, output_data, out_format="png")
255
+ elif format == Format.ASR_MANIFEST:
256
+ items = (
257
+ self.iter_from_dir(input_data)
258
+ if is_dir
259
+ else self.iter_from_json_file(input_data)
260
+ )
261
+ convert_to_asr_json_manifest(
262
+ items,
263
+ output_data,
264
+ data_key=self._data_keys[0],
265
+ project_dir=self.project_dir,
266
+ upload_dir=self.upload_dir,
267
+ download_resources=self.download_resources,
268
+ )
269
+
270
+ def _get_data_keys_and_output_tags(self, output_tags=None):
271
+ data_keys = set()
272
+ output_tag_names = []
273
+ if output_tags is not None:
274
+ for tag in output_tags:
275
+ if tag not in self._schema:
276
+ logger.debug(
277
+ 'Specified tag "{tag}" not found in config schema: '
278
+ "available options are {schema_keys}".format(
279
+ tag=tag, schema_keys=str(list(self._schema.keys()))
280
+ )
281
+ )
282
+ for name, info in self._schema.items():
283
+ if output_tags is not None and name not in output_tags:
284
+ continue
285
+ data_keys |= set(map(itemgetter("value"), info["inputs"]))
286
+ output_tag_names.append(name)
287
+
288
+ return list(data_keys), output_tag_names
289
+
290
+ def _get_supported_formats(self):
291
+ if len(self._data_keys) > 1:
292
+ return [
293
+ Format.JSON.name,
294
+ Format.JSON_MIN.name,
295
+ Format.CSV.name,
296
+ Format.TSV.name,
297
+ ]
298
+ output_tag_types = set()
299
+ input_tag_types = set()
300
+ for info in self._schema.values():
301
+ output_tag_types.add(info["type"])
302
+ for input_tag in info["inputs"]:
303
+ if input_tag["type"] == "Text" and input_tag.get("valueType") == "url":
304
+ logger.error('valueType="url" are not supported for text inputs')
305
+ continue
306
+ input_tag_types.add(input_tag["type"])
307
+
308
+ all_formats = [f.name for f in Format]
309
+ if not ("Text" in input_tag_types and "Labels" in output_tag_types):
310
+ all_formats.remove(Format.CONLL2003.name)
311
+ if not (
312
+ "Image" in input_tag_types
313
+ and (
314
+ "RectangleLabels" in output_tag_types
315
+ or "Rectangle" in output_tag_types
316
+ and "Labels" in output_tag_types
317
+ )
318
+ ):
319
+ all_formats.remove(Format.VOC.name)
320
+ if not (
321
+ "Image" in input_tag_types
322
+ and (
323
+ "RectangleLabels" in output_tag_types
324
+ or "PolygonLabels" in output_tag_types
325
+ )
326
+ or "Rectangle" in output_tag_types
327
+ and "Labels" in output_tag_types
328
+ or "PolygonLabels" in output_tag_types
329
+ and "Labels" in output_tag_types
330
+ ):
331
+ all_formats.remove(Format.COCO.name)
332
+ all_formats.remove(Format.YOLO.name)
333
+ if not (
334
+ "Image" in input_tag_types
335
+ and (
336
+ "BrushLabels" in output_tag_types
337
+ or "brushlabels" in output_tag_types
338
+ or "Brush" in output_tag_types
339
+ and "Labels" in output_tag_types
340
+ )
341
+ ):
342
+ all_formats.remove(Format.BRUSH_TO_NUMPY.name)
343
+ all_formats.remove(Format.BRUSH_TO_PNG.name)
344
+ if not (
345
+ ("Audio" in input_tag_types or "AudioPlus" in input_tag_types)
346
+ and "TextArea" in output_tag_types
347
+ ):
348
+ all_formats.remove(Format.ASR_MANIFEST.name)
349
+
350
+ return all_formats
351
+
352
+ @property
353
+ def supported_formats(self):
354
+ return self._supported_formats
355
+
356
+ def iter_from_dir(self, input_dir):
357
+ if not os.path.exists(input_dir):
358
+ raise FileNotFoundError(
359
+ "{input_dir} doesn't exist".format(input_dir=input_dir)
360
+ )
361
+ for json_file in glob(os.path.join(input_dir, "*.json")):
362
+ for item in self.iter_from_json_file(json_file):
363
+ if item:
364
+ yield item
365
+
366
+ def iter_from_json_file(self, json_file):
367
+ """Extract annotation results from json file
368
+
369
+ param json_file: path to task list or dict with annotations
370
+ """
371
+ data_type = get_json_root_type(json_file)
372
+
373
+ # one task
374
+ if data_type == "dict":
375
+ with open(json_file, "r") as json_file:
376
+ data = json.load(json_file)
377
+ for item in self.annotation_result_from_task(data):
378
+ yield item
379
+
380
+ # many tasks
381
+ elif data_type == "list":
382
+ with io.open(json_file, "rb") as f:
383
+ logger.debug(f"ijson backend in use: {ijson.backend}")
384
+ data = ijson.items(
385
+ f, "item", use_float=True
386
+ ) # 'item' means to read array of dicts
387
+ for task in data:
388
+ for item in self.annotation_result_from_task(task):
389
+ if item is not None:
390
+ yield item
391
+
392
+ def _maybe_matching_tag_from_schema(self, from_name: str) -> Optional[str]:
393
+ """If the from name exactly matches an output tag from the schema, return that tag.
394
+
395
+ Otherwise, certain tags (like those from Repeater) contain
396
+ placeholders like {{idx}}. Such placeholders are mapped to a regex in self._schema.
397
+ For example, if "my_output_tag_{{idx}}" is a tag in the schema,
398
+ then the from_name "my_output_tag_0" should match it, and we should return "my_output_tag_{{idx}}".
399
+ """
400
+
401
+ for tag_name, tag_info in self._schema.items():
402
+ if tag_name == from_name:
403
+ return tag_name
404
+
405
+ if not tag_info.get("regex"):
406
+ continue
407
+
408
+ tag_name_pattern = tag_name
409
+ for variable, regex in tag_info["regex"].items():
410
+ tag_name_pattern = tag_name_pattern.replace(variable, regex)
411
+
412
+ if re.compile(tag_name_pattern).match(from_name):
413
+ return tag_name
414
+
415
+ return None
416
+
417
+ def annotation_result_from_task(self, task):
418
+ has_annotations = "completions" in task or "annotations" in task
419
+ if not has_annotations:
420
+ logger.warning(
421
+ 'Each task dict item should contain "annotations" or "completions" [deprecated], '
422
+ "where value is list of dicts"
423
+ )
424
+ return None
425
+
426
+ # get last not skipped completion and make result from it
427
+ annotations = (
428
+ task["annotations"] if "annotations" in task else task["completions"]
429
+ )
430
+
431
+ # return task with empty annotations
432
+ if not annotations:
433
+ data = Converter.get_data(task, {}, {})
434
+ yield data
435
+
436
+ # skip cancelled annotations
437
+ cancelled = lambda x: not (
438
+ x.get("skipped", False) or x.get("was_cancelled", False)
439
+ )
440
+ annotations = list(filter(cancelled, annotations))
441
+ if not annotations:
442
+ return None
443
+
444
+ # sort by creation time
445
+ annotations = sorted(
446
+ annotations, key=lambda x: x.get("created_at", 0), reverse=True
447
+ )
448
+
449
+ for annotation in annotations:
450
+ result = annotation["result"]
451
+ outputs = defaultdict(list)
452
+
453
+ # get results only as output
454
+ for r in result:
455
+ if "from_name" in r and (
456
+ tag_name := self._maybe_matching_tag_from_schema(r["from_name"])
457
+ ):
458
+ v = deepcopy(r["value"])
459
+ v["type"] = self._schema[tag_name]["type"]
460
+ if "original_width" in r:
461
+ v["original_width"] = r["original_width"]
462
+ if "original_height" in r:
463
+ v["original_height"] = r["original_height"]
464
+ outputs[r["from_name"]].append(v)
465
+
466
+ data = Converter.get_data(task, outputs, annotation)
467
+ if "agreement" in task:
468
+ data["agreement"] = task["agreement"]
469
+ yield data
470
+
471
+ @staticmethod
472
+ def get_data(task, outputs, annotation):
473
+ return {
474
+ "id": task["id"],
475
+ "input": task["data"],
476
+ "output": outputs or {},
477
+ "completed_by": annotation.get("completed_by", {}),
478
+ "annotation_id": annotation.get("id"),
479
+ "created_at": annotation.get("created_at"),
480
+ "updated_at": annotation.get("updated_at"),
481
+ "lead_time": annotation.get("lead_time"),
482
+ "history": annotation.get("history"),
483
+ "was_cancelled": annotation.get("was_cancelled"),
484
+ }
485
+
486
+ def _check_format(self, fmt):
487
+ pass
488
+
489
+ def convert_to_json(self, input_data, output_dir, is_dir=True):
490
+ self._check_format(Format.JSON)
491
+ ensure_dir(output_dir)
492
+ output_file = os.path.join(output_dir, "result.json")
493
+ records = []
494
+ if is_dir:
495
+ for json_file in glob(os.path.join(input_data, "*.json")):
496
+ with io.open(json_file, encoding="utf8") as f:
497
+ records.append(json.load(f))
498
+ with io.open(output_file, mode="w", encoding="utf8") as fout:
499
+ json.dump(records, fout, indent=2, ensure_ascii=False)
500
+ else:
501
+ copy2(input_data, output_file)
502
+
503
+ def convert_to_json_min(self, input_data, output_dir, is_dir=True):
504
+ self._check_format(Format.JSON_MIN)
505
+ ensure_dir(output_dir)
506
+ output_file = os.path.join(output_dir, "result.json")
507
+ records = []
508
+ item_iterator = self.iter_from_dir if is_dir else self.iter_from_json_file
509
+
510
+ for item in item_iterator(input_data):
511
+ record = deepcopy(item["input"])
512
+ if item.get("id") is not None:
513
+ record["id"] = item["id"]
514
+ for name, value in item["output"].items():
515
+ record[name] = prettify_result(value)
516
+ record["annotator"] = get_annotator(item, int_id=True)
517
+ record["annotation_id"] = item["annotation_id"]
518
+ record["created_at"] = item["created_at"]
519
+ record["updated_at"] = item["updated_at"]
520
+ record["lead_time"] = item["lead_time"]
521
+ if "agreement" in item:
522
+ record["agreement"] = item["agreement"]
523
+ records.append(record)
524
+
525
+ with io.open(output_file, mode="w", encoding="utf8") as fout:
526
+ json.dump(records, fout, indent=2, ensure_ascii=False)
527
+
528
+ def convert_to_csv(self, input_data, output_dir, is_dir=True, **kwargs):
529
+ self._check_format(Format.CSV)
530
+ item_iterator = self.iter_from_dir if is_dir else self.iter_from_json_file
531
+ return csv2.convert(item_iterator, input_data, output_dir, **kwargs)
532
+
533
+ def convert_to_conll2003(self, input_data, output_dir, is_dir=True):
534
+ self._check_format(Format.CONLL2003)
535
+ ensure_dir(output_dir)
536
+ output_file = os.path.join(output_dir, "result.conll")
537
+ data_key = self._data_keys[0]
538
+ with io.open(output_file, "w", encoding="utf8") as fout:
539
+ fout.write("-DOCSTART- -X- O\n")
540
+ item_iterator = self.iter_from_dir if is_dir else self.iter_from_json_file
541
+
542
+ for item in item_iterator(input_data):
543
+ filtered_output = list(
544
+ filter(
545
+ lambda x: x[0]["type"].lower() == "labels",
546
+ item["output"].values(),
547
+ )
548
+ )
549
+ tokens, tags = create_tokens_and_tags(
550
+ text=item["input"][data_key],
551
+ spans=next(iter(filtered_output), None),
552
+ )
553
+ for token, tag in zip(tokens, tags):
554
+ fout.write("{token} -X- _ {tag}\n".format(token=token, tag=tag))
555
+ fout.write("\n")
556
+
557
+ def convert_to_coco(
558
+ self, input_data, output_dir, output_image_dir=None, is_dir=True
559
+ ):
560
+ def add_image(images, width, height, image_id, image_path):
561
+ images.append(
562
+ {
563
+ "width": width,
564
+ "height": height,
565
+ "id": image_id,
566
+ "file_name": image_path,
567
+ }
568
+ )
569
+ return images
570
+
571
+ self._check_format(Format.COCO)
572
+ ensure_dir(output_dir)
573
+ output_file = os.path.join(output_dir, "result.json")
574
+ if output_image_dir is not None:
575
+ ensure_dir(output_image_dir)
576
+ else:
577
+ output_image_dir = os.path.join(output_dir, "images")
578
+ os.makedirs(output_image_dir, exist_ok=True)
579
+ images, categories, annotations = [], [], []
580
+ categories, category_name_to_id = self._get_labels()
581
+ data_key = self._data_keys[0]
582
+ item_iterator = (
583
+ self.iter_from_dir(input_data)
584
+ if is_dir
585
+ else self.iter_from_json_file(input_data)
586
+ )
587
+ for item_idx, item in enumerate(item_iterator):
588
+ image_path = item["input"][data_key]
589
+ image_id = len(images)
590
+ width = None
591
+ height = None
592
+ # download all images of the dataset, including the ones without annotations
593
+ if not os.path.exists(image_path):
594
+ try:
595
+ image_path = download(
596
+ image_path,
597
+ output_image_dir,
598
+ project_dir=self.project_dir,
599
+ return_relative_path=True,
600
+ upload_dir=self.upload_dir,
601
+ download_resources=self.download_resources,
602
+ )
603
+ except:
604
+ logger.info(
605
+ "Unable to download {image_path}. The image of {item} will be skipped".format(
606
+ image_path=image_path, item=item
607
+ ),
608
+ exc_info=True,
609
+ )
610
+ # add image to final images list
611
+ try:
612
+ with Image.open(os.path.join(output_dir, image_path)) as img:
613
+ width, height = img.size
614
+ images = add_image(images, width, height, image_id, image_path)
615
+ except:
616
+ logger.info(
617
+ "Unable to open {image_path}, can't extract width and height for COCO export".format(
618
+ image_path=image_path, item=item
619
+ ),
620
+ exc_info=True,
621
+ )
622
+
623
+ # skip tasks without annotations
624
+ if not item["output"]:
625
+ # image wasn't load and there are no labels
626
+ if not width:
627
+ images = add_image(images, width, height, image_id, image_path)
628
+
629
+ logger.warning("No annotations found for item #" + str(item_idx))
630
+ continue
631
+
632
+ # concatenate results over all tag names
633
+ labels = []
634
+ for key in item["output"]:
635
+ labels += item["output"][key]
636
+
637
+ if len(labels) == 0:
638
+ logger.debug(f'Empty bboxes for {item["output"]}')
639
+ continue
640
+
641
+ for label in labels:
642
+ category_name = None
643
+ for key in ["rectanglelabels", "polygonlabels", "labels"]:
644
+ if key in label and len(label[key]) > 0:
645
+ category_name = label[key][0]
646
+ break
647
+
648
+ if category_name is None:
649
+ logger.warning("Unknown label type or labels are empty")
650
+ continue
651
+
652
+ if not height or not width:
653
+ if "original_width" not in label or "original_height" not in label:
654
+ logger.debug(
655
+ f"original_width or original_height not found in {image_path}"
656
+ )
657
+ continue
658
+
659
+ width, height = label["original_width"], label["original_height"]
660
+ images = add_image(images, width, height, image_id, image_path)
661
+
662
+ if category_name not in category_name_to_id:
663
+ category_id = len(categories)
664
+ category_name_to_id[category_name] = category_id
665
+ categories.append({"id": category_id, "name": category_name})
666
+ category_id = category_name_to_id[category_name]
667
+
668
+ annotation_id = len(annotations)
669
+
670
+ if "rectanglelabels" in label or "labels" in label:
671
+ xywh = self.rotated_rectangle(label)
672
+ if xywh is None:
673
+ continue
674
+
675
+ x, y, w, h = xywh
676
+ x = x * label["original_width"] / 100
677
+ y = y * label["original_height"] / 100
678
+ w = w * label["original_width"] / 100
679
+ h = h * label["original_height"] / 100
680
+
681
+ annotations.append(
682
+ {
683
+ "id": annotation_id,
684
+ "image_id": image_id,
685
+ "category_id": category_id,
686
+ "segmentation": [],
687
+ "bbox": [x, y, w, h],
688
+ "ignore": 0,
689
+ "iscrowd": 0,
690
+ "area": w * h,
691
+ }
692
+ )
693
+ elif "polygonlabels" in label:
694
+ points_abs = [
695
+ (x / 100 * width, y / 100 * height) for x, y in label["points"]
696
+ ]
697
+ x, y = zip(*points_abs)
698
+
699
+ annotations.append(
700
+ {
701
+ "id": annotation_id,
702
+ "image_id": image_id,
703
+ "category_id": category_id,
704
+ "segmentation": [
705
+ [coord for point in points_abs for coord in point]
706
+ ],
707
+ "bbox": get_polygon_bounding_box(x, y),
708
+ "ignore": 0,
709
+ "iscrowd": 0,
710
+ "area": get_polygon_area(x, y),
711
+ }
712
+ )
713
+ else:
714
+ raise ValueError("Unknown label type")
715
+
716
+ if os.getenv("LABEL_STUDIO_FORCE_ANNOTATOR_EXPORT"):
717
+ annotations[-1].update({"annotator": get_annotator(item)})
718
+
719
+ with io.open(output_file, mode="w", encoding="utf8") as fout:
720
+ json.dump(
721
+ {
722
+ "images": images,
723
+ "categories": categories,
724
+ "annotations": annotations,
725
+ "info": {
726
+ "year": datetime.now().year,
727
+ "version": "1.0",
728
+ "description": "",
729
+ "contributor": "Label Studio",
730
+ "url": "",
731
+ "date_created": str(datetime.now()),
732
+ },
733
+ },
734
+ fout,
735
+ indent=2,
736
+ )
737
+
738
+ def convert_to_yolo(
739
+ self,
740
+ input_data,
741
+ output_dir,
742
+ output_image_dir=None,
743
+ output_label_dir=None,
744
+ is_dir=True,
745
+ split_labelers=False,
746
+ is_obb=False,
747
+ ):
748
+ """Convert data in a specific format to the YOLO format.
749
+
750
+ Parameters
751
+ ----------
752
+ input_data : str
753
+ The input data, either a directory or a JSON file.
754
+ output_dir : str
755
+ The directory to store the output files in.
756
+ output_image_dir : str, optional
757
+ The directory to store the image files in. If not provided, it will default to a subdirectory called 'images' in output_dir.
758
+ output_label_dir : str, optional
759
+ The directory to store the label files in. If not provided, it will default to a subdirectory called 'labels' in output_dir.
760
+ is_dir : bool, optional
761
+ A boolean indicating whether `input_data` is a directory (True) or a JSON file (False).
762
+ split_labelers : bool, optional
763
+ A boolean indicating whether to create a dedicated subfolder for each labeler in the output label directory.
764
+ obb : bool, optional
765
+ A boolean indicating whether to convert to Oriented Bounding Box (OBB) format.
766
+ """
767
+ if is_obb:
768
+ self._check_format(Format.YOLO_OBB)
769
+ else:
770
+ self._check_format(Format.YOLO)
771
+ ensure_dir(output_dir)
772
+ notes_file = os.path.join(output_dir, "notes.json")
773
+ class_file = os.path.join(output_dir, "classes.txt")
774
+ if output_image_dir is not None:
775
+ ensure_dir(output_image_dir)
776
+ else:
777
+ output_image_dir = os.path.join(output_dir, "images")
778
+ os.makedirs(output_image_dir, exist_ok=True)
779
+ if output_label_dir is not None:
780
+ ensure_dir(output_label_dir)
781
+ else:
782
+ output_label_dir = os.path.join(output_dir, "labels")
783
+ os.makedirs(output_label_dir, exist_ok=True)
784
+ categories, category_name_to_id = self._get_labels()
785
+ data_key = self._data_keys[0]
786
+ item_iterator = (
787
+ self.iter_from_dir(input_data)
788
+ if is_dir
789
+ else self.iter_from_json_file(input_data)
790
+ )
791
+ for item_idx, item in enumerate(item_iterator):
792
+ # get image path(s) and label file path
793
+ image_paths = item["input"][data_key]
794
+ image_paths = [image_paths] if isinstance(image_paths, str) else image_paths
795
+ # download image(s)
796
+ image_path = None
797
+ # TODO: for multi-page annotation, this code won't produce correct relationships between page and annotated shapes
798
+ # fixing the issue in RND-84
799
+ for image_path in reversed(image_paths):
800
+ if not os.path.exists(image_path):
801
+ try:
802
+ image_path = download(
803
+ image_path,
804
+ output_image_dir,
805
+ project_dir=self.project_dir,
806
+ return_relative_path=True,
807
+ upload_dir=self.upload_dir,
808
+ download_resources=self.download_resources,
809
+ )
810
+ except:
811
+ logger.info(
812
+ "Unable to download {image_path}. The item {item} will be skipped".format(
813
+ image_path=image_path, item=item
814
+ ),
815
+ exc_info=True,
816
+ )
817
+ if not image_path:
818
+ logger.error(f"No image path found for item #{item_idx}")
819
+ continue
820
+
821
+ # create dedicated subfolder for each labeler if split_labelers=True
822
+ labeler_subfolder = str(item["completed_by"]) if split_labelers else ""
823
+ os.makedirs(
824
+ os.path.join(output_label_dir, labeler_subfolder), exist_ok=True
825
+ )
826
+
827
+ # identify label file path
828
+ filename = os.path.splitext(os.path.basename(image_path))[0]
829
+ filename = filename[
830
+ 0 : 255 - 4
831
+ ] # urls might be too long, use 255 bytes (-4 for .txt) limit for filenames
832
+ label_path = os.path.join(
833
+ output_label_dir, labeler_subfolder, filename + ".txt"
834
+ )
835
+
836
+ # Skip tasks without annotations
837
+ if not item["output"]:
838
+ logger.warning("No completions found for item #" + str(item_idx))
839
+ if not os.path.exists(label_path):
840
+ with open(label_path, "x"):
841
+ pass
842
+ continue
843
+
844
+ # concatenate results over all tag names
845
+ labels = []
846
+ for key in item["output"]:
847
+ labels += item["output"][key]
848
+
849
+ if len(labels) == 0:
850
+ logger.warning(f'Empty bboxes for {item["output"]}')
851
+ if not os.path.exists(label_path):
852
+ with open(label_path, "x"):
853
+ pass
854
+ continue
855
+
856
+ annotations = []
857
+ for label in labels:
858
+ category_name = None
859
+ category_names = [] # considering multi-label
860
+ for key in ["rectanglelabels", "polygonlabels", "labels"]:
861
+ if key in label and len(label[key]) > 0:
862
+ # change to save multi-label
863
+ for category_name in label[key]:
864
+ category_names.append(category_name)
865
+
866
+ if len(category_names) == 0:
867
+ logger.debug(
868
+ "Unknown label type or labels are empty: " + str(label)
869
+ )
870
+ continue
871
+
872
+ for category_name in category_names:
873
+ if category_name not in category_name_to_id:
874
+ category_id = len(categories)
875
+ category_name_to_id[category_name] = category_id
876
+ categories.append({"id": category_id, "name": category_name})
877
+ category_id = category_name_to_id[category_name]
878
+
879
+ if (
880
+ "rectanglelabels" in label
881
+ or "rectangle" in label
882
+ or "labels" in label
883
+ ):
884
+ # yolo obb
885
+ if is_obb:
886
+ obb_annotation = convert_annotation_to_yolo_obb(label)
887
+ if obb_annotation is None:
888
+ continue
889
+
890
+ top_left, top_right, bottom_right, bottom_left = (
891
+ obb_annotation
892
+ )
893
+ x1, y1 = top_left
894
+ x2, y2 = top_right
895
+ x3, y3 = bottom_right
896
+ x4, y4 = bottom_left
897
+ annotations.append(
898
+ [category_id, x1, y1, x2, y2, x3, y3, x4, y4]
899
+ )
900
+
901
+ # simple yolo
902
+ else:
903
+ annotation = convert_annotation_to_yolo(label)
904
+ if annotation is None:
905
+ continue
906
+
907
+ (
908
+ x,
909
+ y,
910
+ w,
911
+ h,
912
+ ) = annotation
913
+ annotations.append([category_id, x, y, w, h])
914
+
915
+ elif "polygonlabels" in label or "polygon" in label:
916
+ points_abs = [(x / 100, y / 100) for x, y in label["points"]]
917
+ annotations.append(
918
+ [category_id]
919
+ + [coord for point in points_abs for coord in point]
920
+ )
921
+ else:
922
+ raise ValueError(f"Unknown label type {label}")
923
+ with open(label_path, "w") as f:
924
+ for annotation in annotations:
925
+ for idx, l in enumerate(annotation):
926
+ if idx == len(annotation) - 1:
927
+ f.write(f"{l}\n")
928
+ else:
929
+ f.write(f"{l} ")
930
+ with open(class_file, "w", encoding="utf8") as f:
931
+ for c in categories:
932
+ f.write(c["name"] + "\n")
933
+ with io.open(notes_file, mode="w", encoding="utf8") as fout:
934
+ json.dump(
935
+ {
936
+ "categories": categories,
937
+ "info": {
938
+ "year": datetime.now().year,
939
+ "version": "1.0",
940
+ "contributor": "Label Studio",
941
+ },
942
+ },
943
+ fout,
944
+ indent=2,
945
+ )
946
+
947
+ @staticmethod
948
+ def rotated_rectangle(label):
949
+ if not (
950
+ "x" in label and "y" in label and "width" in label and "height" in label
951
+ ):
952
+ return None
953
+
954
+ label_x, label_y, label_w, label_h, label_r = (
955
+ label["x"],
956
+ label["y"],
957
+ label["width"],
958
+ label["height"],
959
+ label["rotation"] if "rotation" in label else 0.0,
960
+ )
961
+
962
+ if abs(label_r) > 0:
963
+ alpha = math.atan(label_h / label_w)
964
+ beta = math.pi * (
965
+ label_r / 180
966
+ ) # Label studio defines the angle towards the vertical axis
967
+
968
+ radius = math.sqrt((label_w / 2) ** 2 + (label_h / 2) ** 2)
969
+
970
+ # Label studio saves the position of top left corner after rotation
971
+ x_0 = (
972
+ label_x
973
+ - radius
974
+ * (math.cos(math.pi - alpha - beta) - math.cos(math.pi - alpha))
975
+ + label_w / 2
976
+ )
977
+ y_0 = (
978
+ label_y
979
+ + radius
980
+ * (math.sin(math.pi - alpha - beta) - math.sin(math.pi - alpha))
981
+ + label_h / 2
982
+ )
983
+
984
+ theta_1 = alpha + beta
985
+ theta_2 = math.pi - alpha + beta
986
+ theta_3 = math.pi + alpha + beta
987
+ theta_4 = 2 * math.pi - alpha + beta
988
+
989
+ x_coord = [
990
+ x_0 + radius * math.cos(theta_1),
991
+ x_0 + radius * math.cos(theta_2),
992
+ x_0 + radius * math.cos(theta_3),
993
+ x_0 + radius * math.cos(theta_4),
994
+ ]
995
+ y_coord = [
996
+ y_0 + radius * math.sin(theta_1),
997
+ y_0 + radius * math.sin(theta_2),
998
+ y_0 + radius * math.sin(theta_3),
999
+ y_0 + radius * math.sin(theta_4),
1000
+ ]
1001
+
1002
+ label_x = min(x_coord)
1003
+ label_y = min(y_coord)
1004
+ label_w = max(x_coord) - label_x
1005
+ label_h = max(y_coord) - label_y
1006
+
1007
+ return label_x, label_y, label_w, label_h
1008
+
1009
+ def convert_to_voc(
1010
+ self, input_data, output_dir, output_image_dir=None, is_dir=True
1011
+ ):
1012
+ ensure_dir(output_dir)
1013
+ if output_image_dir is not None:
1014
+ ensure_dir(output_image_dir)
1015
+ output_image_dir_rel = output_image_dir
1016
+ else:
1017
+ output_image_dir = os.path.join(output_dir, "images")
1018
+ os.makedirs(output_image_dir, exist_ok=True)
1019
+ output_image_dir_rel = "images"
1020
+
1021
+ def create_child_node(doc, tag, attr, parent_node):
1022
+ child_node = doc.createElement(tag)
1023
+ text_node = doc.createTextNode(attr)
1024
+ child_node.appendChild(text_node)
1025
+ parent_node.appendChild(child_node)
1026
+
1027
+ data_key = self._data_keys[0]
1028
+ item_iterator = (
1029
+ self.iter_from_dir(input_data)
1030
+ if is_dir
1031
+ else self.iter_from_json_file(input_data)
1032
+ )
1033
+ for item_idx, item in enumerate(item_iterator):
1034
+ image_path = item["input"][data_key]
1035
+ annotations_dir = os.path.join(output_dir, "Annotations")
1036
+ if not os.path.exists(annotations_dir):
1037
+ os.makedirs(annotations_dir)
1038
+ # Download image
1039
+ channels = 3
1040
+ if not os.path.exists(image_path):
1041
+ try:
1042
+ image_path = download(
1043
+ image_path,
1044
+ output_image_dir,
1045
+ project_dir=self.project_dir,
1046
+ upload_dir=self.upload_dir,
1047
+ return_relative_path=True,
1048
+ download_resources=self.download_resources,
1049
+ )
1050
+ except:
1051
+ logger.info(
1052
+ "Unable to download {image_path}. The item {item} will be skipped".format(
1053
+ image_path=image_path, item=item
1054
+ ),
1055
+ exc_info=True,
1056
+ )
1057
+ else:
1058
+ full_image_path = os.path.join(
1059
+ output_image_dir, os.path.basename(image_path)
1060
+ )
1061
+ # retrieve number of channels from downloaded image
1062
+ try:
1063
+ _, _, channels = get_image_size_and_channels(full_image_path)
1064
+ except:
1065
+ logger.warning(f"Can't read channels from image")
1066
+
1067
+ # skip tasks without annotations
1068
+ if not item["output"]:
1069
+ logger.warning("No annotations found for item #" + str(item_idx))
1070
+ continue
1071
+
1072
+ image_name = os.path.basename(image_path)
1073
+ xml_name = os.path.splitext(image_name)[0] + ".xml"
1074
+
1075
+ # concatenate results over all tag names
1076
+ bboxes = []
1077
+ for key in item["output"]:
1078
+ bboxes += item["output"][key]
1079
+
1080
+ if len(bboxes) == 0:
1081
+ logger.debug(f'Empty bboxes for {item["output"]}')
1082
+ continue
1083
+
1084
+ if "original_width" not in bboxes[0] or "original_height" not in bboxes[0]:
1085
+ logger.debug(
1086
+ f"original_width or original_height not found in {image_name}"
1087
+ )
1088
+ continue
1089
+
1090
+ width, height = bboxes[0]["original_width"], bboxes[0]["original_height"]
1091
+ xml_filepath = os.path.join(annotations_dir, xml_name)
1092
+
1093
+ my_dom = xml.dom.getDOMImplementation()
1094
+ doc = my_dom.createDocument(None, "annotation", None)
1095
+ root_node = doc.documentElement
1096
+ create_child_node(doc, "folder", output_image_dir_rel, root_node)
1097
+ create_child_node(doc, "filename", image_name, root_node)
1098
+
1099
+ source_node = doc.createElement("source")
1100
+ create_child_node(doc, "database", "MyDatabase", source_node)
1101
+ create_child_node(doc, "annotation", "COCO2017", source_node)
1102
+ create_child_node(doc, "image", "flickr", source_node)
1103
+ create_child_node(doc, "flickrid", "NULL", source_node)
1104
+ create_child_node(doc, "annotator", get_annotator(item, ""), source_node)
1105
+ root_node.appendChild(source_node)
1106
+
1107
+ owner_node = doc.createElement("owner")
1108
+ create_child_node(doc, "flickrid", "NULL", owner_node)
1109
+ create_child_node(doc, "name", "Label Studio", owner_node)
1110
+ root_node.appendChild(owner_node)
1111
+ size_node = doc.createElement("size")
1112
+ create_child_node(doc, "width", str(width), size_node)
1113
+ create_child_node(doc, "height", str(height), size_node)
1114
+ create_child_node(doc, "depth", str(channels), size_node)
1115
+ root_node.appendChild(size_node)
1116
+ create_child_node(doc, "segmented", "0", root_node)
1117
+
1118
+ for bbox in bboxes:
1119
+ key = (
1120
+ "rectanglelabels"
1121
+ if "rectanglelabels" in bbox
1122
+ else ("labels" if "labels" in bbox else None)
1123
+ )
1124
+ if key is None or len(bbox[key]) == 0:
1125
+ continue
1126
+
1127
+ name = bbox[key][0]
1128
+ x = int(bbox["x"] / 100 * width)
1129
+ y = int(bbox["y"] / 100 * height)
1130
+ w = int(bbox["width"] / 100 * width)
1131
+ h = int(bbox["height"] / 100 * height)
1132
+
1133
+ object_node = doc.createElement("object")
1134
+ create_child_node(doc, "name", name, object_node)
1135
+ create_child_node(doc, "pose", "Unspecified", object_node)
1136
+ create_child_node(doc, "truncated", "0", object_node)
1137
+ create_child_node(doc, "difficult", "0", object_node)
1138
+ bndbox_node = doc.createElement("bndbox")
1139
+ create_child_node(doc, "xmin", str(x), bndbox_node)
1140
+ create_child_node(doc, "ymin", str(y), bndbox_node)
1141
+ create_child_node(doc, "xmax", str(x + w), bndbox_node)
1142
+ create_child_node(doc, "ymax", str(y + h), bndbox_node)
1143
+
1144
+ object_node.appendChild(bndbox_node)
1145
+ root_node.appendChild(object_node)
1146
+
1147
+ with io.open(xml_filepath, mode="w", encoding="utf8") as fout:
1148
+ doc.writexml(fout, addindent="" * 4, newl="\n", encoding="utf-8")
1149
+
1150
+ def _get_labels(self):
1151
+ labels = set()
1152
+ categories = list()
1153
+ category_name_to_id = dict()
1154
+
1155
+ for name, info in self._schema.items():
1156
+ labels |= set(info["labels"])
1157
+ attrs = info["labels_attrs"]
1158
+ for label in attrs:
1159
+ if attrs[label].get("category"):
1160
+ categories.append(
1161
+ {"id": attrs[label].get("category"), "name": label}
1162
+ )
1163
+ category_name_to_id[label] = attrs[label].get("category")
1164
+ labels_to_add = set(labels) - set(list(category_name_to_id.keys()))
1165
+ labels_to_add = sorted(list(labels_to_add))
1166
+ idx = 0
1167
+ while idx in list(category_name_to_id.values()):
1168
+ idx += 1
1169
+ for label in labels_to_add:
1170
+ categories.append({"id": idx, "name": label})
1171
+ category_name_to_id[label] = idx
1172
+ idx += 1
1173
+ while idx in list(category_name_to_id.values()):
1174
+ idx += 1
1175
+ return categories, category_name_to_id