label-studio-sdk 0.0.32__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (245) hide show
  1. label_studio_sdk/__init__.py +206 -6
  2. label_studio_sdk/_extensions/label_studio_tools/__init__.py +0 -0
  3. label_studio_sdk/_extensions/label_studio_tools/core/__init__.py +0 -0
  4. label_studio_sdk/_extensions/label_studio_tools/core/label_config.py +163 -0
  5. label_studio_sdk/_extensions/label_studio_tools/core/utils/__init__.py +0 -0
  6. label_studio_sdk/_extensions/label_studio_tools/core/utils/exceptions.py +2 -0
  7. label_studio_sdk/_extensions/label_studio_tools/core/utils/io.py +228 -0
  8. label_studio_sdk/_extensions/label_studio_tools/core/utils/params.py +45 -0
  9. label_studio_sdk/_extensions/label_studio_tools/etl/__init__.py +1 -0
  10. label_studio_sdk/_extensions/label_studio_tools/etl/beam.py +34 -0
  11. label_studio_sdk/_extensions/label_studio_tools/etl/example.py +17 -0
  12. label_studio_sdk/_extensions/label_studio_tools/etl/registry.py +67 -0
  13. label_studio_sdk/_extensions/label_studio_tools/postprocessing/__init__.py +0 -0
  14. label_studio_sdk/_extensions/label_studio_tools/postprocessing/video.py +97 -0
  15. label_studio_sdk/_legacy/__init__.py +11 -0
  16. label_studio_sdk/_legacy/client.py +471 -0
  17. label_studio_sdk/_legacy/exceptions.py +10 -0
  18. label_studio_sdk/_legacy/label_interface/__init__.py +1 -0
  19. label_studio_sdk/_legacy/label_interface/base.py +77 -0
  20. label_studio_sdk/_legacy/label_interface/control_tags.py +756 -0
  21. label_studio_sdk/_legacy/label_interface/data_examples.json +96 -0
  22. label_studio_sdk/_legacy/label_interface/interface.py +925 -0
  23. label_studio_sdk/_legacy/label_interface/label_tags.py +72 -0
  24. label_studio_sdk/_legacy/label_interface/object_tags.py +292 -0
  25. label_studio_sdk/_legacy/label_interface/region.py +43 -0
  26. label_studio_sdk/_legacy/objects.py +35 -0
  27. label_studio_sdk/{project.py → _legacy/project.py} +711 -258
  28. label_studio_sdk/_legacy/schema/label_config_schema.json +226 -0
  29. label_studio_sdk/{users.py → _legacy/users.py} +15 -13
  30. label_studio_sdk/{utils.py → _legacy/utils.py} +31 -30
  31. label_studio_sdk/{workspaces.py → _legacy/workspaces.py} +13 -11
  32. label_studio_sdk/actions/__init__.py +2 -0
  33. label_studio_sdk/actions/client.py +150 -0
  34. label_studio_sdk/annotations/__init__.py +2 -0
  35. label_studio_sdk/annotations/client.py +750 -0
  36. label_studio_sdk/client.py +164 -436
  37. label_studio_sdk/converter/__init__.py +7 -0
  38. label_studio_sdk/converter/audio.py +56 -0
  39. label_studio_sdk/converter/brush.py +452 -0
  40. label_studio_sdk/converter/converter.py +1175 -0
  41. label_studio_sdk/converter/exports/__init__.py +0 -0
  42. label_studio_sdk/converter/exports/csv.py +82 -0
  43. label_studio_sdk/converter/exports/csv2.py +103 -0
  44. label_studio_sdk/converter/funsd.py +85 -0
  45. label_studio_sdk/converter/imports/__init__.py +0 -0
  46. label_studio_sdk/converter/imports/coco.py +314 -0
  47. label_studio_sdk/converter/imports/colors.py +198 -0
  48. label_studio_sdk/converter/imports/label_config.py +45 -0
  49. label_studio_sdk/converter/imports/pathtrack.py +269 -0
  50. label_studio_sdk/converter/imports/yolo.py +236 -0
  51. label_studio_sdk/converter/main.py +202 -0
  52. label_studio_sdk/converter/utils.py +473 -0
  53. label_studio_sdk/core/__init__.py +33 -0
  54. label_studio_sdk/core/api_error.py +15 -0
  55. label_studio_sdk/core/client_wrapper.py +55 -0
  56. label_studio_sdk/core/datetime_utils.py +28 -0
  57. label_studio_sdk/core/file.py +38 -0
  58. label_studio_sdk/core/http_client.py +443 -0
  59. label_studio_sdk/core/jsonable_encoder.py +99 -0
  60. label_studio_sdk/core/pagination.py +87 -0
  61. label_studio_sdk/core/pydantic_utilities.py +28 -0
  62. label_studio_sdk/core/query_encoder.py +33 -0
  63. label_studio_sdk/core/remove_none_from_dict.py +11 -0
  64. label_studio_sdk/core/request_options.py +32 -0
  65. label_studio_sdk/data_manager.py +32 -23
  66. label_studio_sdk/environment.py +7 -0
  67. label_studio_sdk/errors/__init__.py +6 -0
  68. label_studio_sdk/errors/bad_request_error.py +8 -0
  69. label_studio_sdk/errors/internal_server_error.py +8 -0
  70. label_studio_sdk/export_storage/__init__.py +28 -0
  71. label_studio_sdk/export_storage/azure/__init__.py +5 -0
  72. label_studio_sdk/export_storage/azure/client.py +722 -0
  73. label_studio_sdk/export_storage/azure/types/__init__.py +6 -0
  74. label_studio_sdk/export_storage/azure/types/azure_create_response.py +52 -0
  75. label_studio_sdk/export_storage/azure/types/azure_update_response.py +52 -0
  76. label_studio_sdk/export_storage/client.py +107 -0
  77. label_studio_sdk/export_storage/gcs/__init__.py +5 -0
  78. label_studio_sdk/export_storage/gcs/client.py +722 -0
  79. label_studio_sdk/export_storage/gcs/types/__init__.py +6 -0
  80. label_studio_sdk/export_storage/gcs/types/gcs_create_response.py +52 -0
  81. label_studio_sdk/export_storage/gcs/types/gcs_update_response.py +52 -0
  82. label_studio_sdk/export_storage/local/__init__.py +5 -0
  83. label_studio_sdk/export_storage/local/client.py +688 -0
  84. label_studio_sdk/export_storage/local/types/__init__.py +6 -0
  85. label_studio_sdk/export_storage/local/types/local_create_response.py +47 -0
  86. label_studio_sdk/export_storage/local/types/local_update_response.py +47 -0
  87. label_studio_sdk/export_storage/redis/__init__.py +5 -0
  88. label_studio_sdk/export_storage/redis/client.py +714 -0
  89. label_studio_sdk/export_storage/redis/types/__init__.py +6 -0
  90. label_studio_sdk/export_storage/redis/types/redis_create_response.py +57 -0
  91. label_studio_sdk/export_storage/redis/types/redis_update_response.py +57 -0
  92. label_studio_sdk/export_storage/s3/__init__.py +5 -0
  93. label_studio_sdk/export_storage/s3/client.py +820 -0
  94. label_studio_sdk/export_storage/s3/types/__init__.py +6 -0
  95. label_studio_sdk/export_storage/s3/types/s3create_response.py +74 -0
  96. label_studio_sdk/export_storage/s3/types/s3update_response.py +74 -0
  97. label_studio_sdk/export_storage/types/__init__.py +5 -0
  98. label_studio_sdk/export_storage/types/export_storage_list_types_response_item.py +30 -0
  99. label_studio_sdk/files/__init__.py +2 -0
  100. label_studio_sdk/files/client.py +556 -0
  101. label_studio_sdk/import_storage/__init__.py +28 -0
  102. label_studio_sdk/import_storage/azure/__init__.py +5 -0
  103. label_studio_sdk/import_storage/azure/client.py +812 -0
  104. label_studio_sdk/import_storage/azure/types/__init__.py +6 -0
  105. label_studio_sdk/import_storage/azure/types/azure_create_response.py +72 -0
  106. label_studio_sdk/import_storage/azure/types/azure_update_response.py +72 -0
  107. label_studio_sdk/import_storage/client.py +107 -0
  108. label_studio_sdk/import_storage/gcs/__init__.py +5 -0
  109. label_studio_sdk/import_storage/gcs/client.py +812 -0
  110. label_studio_sdk/import_storage/gcs/types/__init__.py +6 -0
  111. label_studio_sdk/import_storage/gcs/types/gcs_create_response.py +72 -0
  112. label_studio_sdk/import_storage/gcs/types/gcs_update_response.py +72 -0
  113. label_studio_sdk/import_storage/local/__init__.py +5 -0
  114. label_studio_sdk/import_storage/local/client.py +690 -0
  115. label_studio_sdk/import_storage/local/types/__init__.py +6 -0
  116. label_studio_sdk/import_storage/local/types/local_create_response.py +47 -0
  117. label_studio_sdk/import_storage/local/types/local_update_response.py +47 -0
  118. label_studio_sdk/import_storage/redis/__init__.py +5 -0
  119. label_studio_sdk/import_storage/redis/client.py +768 -0
  120. label_studio_sdk/import_storage/redis/types/__init__.py +6 -0
  121. label_studio_sdk/import_storage/redis/types/redis_create_response.py +62 -0
  122. label_studio_sdk/import_storage/redis/types/redis_update_response.py +62 -0
  123. label_studio_sdk/import_storage/s3/__init__.py +5 -0
  124. label_studio_sdk/import_storage/s3/client.py +912 -0
  125. label_studio_sdk/import_storage/s3/types/__init__.py +6 -0
  126. label_studio_sdk/import_storage/s3/types/s3create_response.py +99 -0
  127. label_studio_sdk/import_storage/s3/types/s3update_response.py +99 -0
  128. label_studio_sdk/import_storage/types/__init__.py +5 -0
  129. label_studio_sdk/import_storage/types/import_storage_list_types_response_item.py +30 -0
  130. label_studio_sdk/ml/__init__.py +19 -0
  131. label_studio_sdk/ml/client.py +981 -0
  132. label_studio_sdk/ml/types/__init__.py +17 -0
  133. label_studio_sdk/ml/types/ml_create_request_auth_method.py +5 -0
  134. label_studio_sdk/ml/types/ml_create_response.py +78 -0
  135. label_studio_sdk/ml/types/ml_create_response_auth_method.py +5 -0
  136. label_studio_sdk/ml/types/ml_update_request_auth_method.py +5 -0
  137. label_studio_sdk/ml/types/ml_update_response.py +78 -0
  138. label_studio_sdk/ml/types/ml_update_response_auth_method.py +5 -0
  139. label_studio_sdk/predictions/__init__.py +2 -0
  140. label_studio_sdk/predictions/client.py +638 -0
  141. label_studio_sdk/projects/__init__.py +6 -0
  142. label_studio_sdk/projects/client.py +1053 -0
  143. label_studio_sdk/projects/exports/__init__.py +2 -0
  144. label_studio_sdk/projects/exports/client.py +930 -0
  145. label_studio_sdk/projects/types/__init__.py +7 -0
  146. label_studio_sdk/projects/types/projects_create_response.py +96 -0
  147. label_studio_sdk/projects/types/projects_import_tasks_response.py +71 -0
  148. label_studio_sdk/projects/types/projects_list_response.py +33 -0
  149. label_studio_sdk/py.typed +0 -0
  150. label_studio_sdk/tasks/__init__.py +5 -0
  151. label_studio_sdk/tasks/client.py +811 -0
  152. label_studio_sdk/tasks/types/__init__.py +6 -0
  153. label_studio_sdk/tasks/types/tasks_list_request_fields.py +5 -0
  154. label_studio_sdk/tasks/types/tasks_list_response.py +48 -0
  155. label_studio_sdk/types/__init__.py +115 -0
  156. label_studio_sdk/types/annotation.py +116 -0
  157. label_studio_sdk/types/annotation_filter_options.py +42 -0
  158. label_studio_sdk/types/annotation_last_action.py +19 -0
  159. label_studio_sdk/types/azure_blob_export_storage.py +112 -0
  160. label_studio_sdk/types/azure_blob_export_storage_status.py +7 -0
  161. label_studio_sdk/types/azure_blob_import_storage.py +113 -0
  162. label_studio_sdk/types/azure_blob_import_storage_status.py +7 -0
  163. label_studio_sdk/types/base_task.py +113 -0
  164. label_studio_sdk/types/base_user.py +42 -0
  165. label_studio_sdk/types/converted_format.py +36 -0
  166. label_studio_sdk/types/converted_format_status.py +5 -0
  167. label_studio_sdk/types/export.py +48 -0
  168. label_studio_sdk/types/export_convert.py +32 -0
  169. label_studio_sdk/types/export_create.py +54 -0
  170. label_studio_sdk/types/export_create_status.py +5 -0
  171. label_studio_sdk/types/export_status.py +5 -0
  172. label_studio_sdk/types/file_upload.py +30 -0
  173. label_studio_sdk/types/filter.py +53 -0
  174. label_studio_sdk/types/filter_group.py +35 -0
  175. label_studio_sdk/types/gcs_export_storage.py +112 -0
  176. label_studio_sdk/types/gcs_export_storage_status.py +7 -0
  177. label_studio_sdk/types/gcs_import_storage.py +113 -0
  178. label_studio_sdk/types/gcs_import_storage_status.py +7 -0
  179. label_studio_sdk/types/local_files_export_storage.py +97 -0
  180. label_studio_sdk/types/local_files_export_storage_status.py +7 -0
  181. label_studio_sdk/types/local_files_import_storage.py +92 -0
  182. label_studio_sdk/types/local_files_import_storage_status.py +7 -0
  183. label_studio_sdk/types/ml_backend.py +89 -0
  184. label_studio_sdk/types/ml_backend_auth_method.py +5 -0
  185. label_studio_sdk/types/ml_backend_state.py +5 -0
  186. label_studio_sdk/types/prediction.py +78 -0
  187. label_studio_sdk/types/project.py +198 -0
  188. label_studio_sdk/types/project_import.py +63 -0
  189. label_studio_sdk/types/project_import_status.py +5 -0
  190. label_studio_sdk/types/project_label_config.py +32 -0
  191. label_studio_sdk/types/project_sampling.py +7 -0
  192. label_studio_sdk/types/project_skip_queue.py +5 -0
  193. label_studio_sdk/types/redis_export_storage.py +117 -0
  194. label_studio_sdk/types/redis_export_storage_status.py +7 -0
  195. label_studio_sdk/types/redis_import_storage.py +112 -0
  196. label_studio_sdk/types/redis_import_storage_status.py +7 -0
  197. label_studio_sdk/types/s3export_storage.py +134 -0
  198. label_studio_sdk/types/s3export_storage_status.py +7 -0
  199. label_studio_sdk/types/s3import_storage.py +140 -0
  200. label_studio_sdk/types/s3import_storage_status.py +7 -0
  201. label_studio_sdk/types/serialization_option.py +36 -0
  202. label_studio_sdk/types/serialization_options.py +45 -0
  203. label_studio_sdk/types/task.py +157 -0
  204. label_studio_sdk/types/task_filter_options.py +49 -0
  205. label_studio_sdk/types/user_simple.py +37 -0
  206. label_studio_sdk/types/view.py +55 -0
  207. label_studio_sdk/types/webhook.py +67 -0
  208. label_studio_sdk/types/webhook_actions_item.py +21 -0
  209. label_studio_sdk/types/webhook_serializer_for_update.py +67 -0
  210. label_studio_sdk/types/webhook_serializer_for_update_actions_item.py +21 -0
  211. label_studio_sdk/users/__init__.py +5 -0
  212. label_studio_sdk/users/client.py +830 -0
  213. label_studio_sdk/users/types/__init__.py +6 -0
  214. label_studio_sdk/users/types/users_get_token_response.py +36 -0
  215. label_studio_sdk/users/types/users_reset_token_response.py +36 -0
  216. label_studio_sdk/version.py +4 -0
  217. label_studio_sdk/views/__init__.py +31 -0
  218. label_studio_sdk/views/client.py +564 -0
  219. label_studio_sdk/views/types/__init__.py +29 -0
  220. label_studio_sdk/views/types/views_create_request_data.py +43 -0
  221. label_studio_sdk/views/types/views_create_request_data_filters.py +43 -0
  222. label_studio_sdk/views/types/views_create_request_data_filters_conjunction.py +5 -0
  223. label_studio_sdk/views/types/views_create_request_data_filters_items_item.py +47 -0
  224. label_studio_sdk/views/types/views_create_request_data_ordering_item.py +38 -0
  225. label_studio_sdk/views/types/views_create_request_data_ordering_item_direction.py +5 -0
  226. label_studio_sdk/views/types/views_update_request_data.py +43 -0
  227. label_studio_sdk/views/types/views_update_request_data_filters.py +43 -0
  228. label_studio_sdk/views/types/views_update_request_data_filters_conjunction.py +5 -0
  229. label_studio_sdk/views/types/views_update_request_data_filters_items_item.py +47 -0
  230. label_studio_sdk/views/types/views_update_request_data_ordering_item.py +38 -0
  231. label_studio_sdk/views/types/views_update_request_data_ordering_item_direction.py +5 -0
  232. label_studio_sdk/webhooks/__init__.py +5 -0
  233. label_studio_sdk/webhooks/client.py +636 -0
  234. label_studio_sdk/webhooks/types/__init__.py +5 -0
  235. label_studio_sdk/webhooks/types/webhooks_update_request_actions_item.py +21 -0
  236. label_studio_sdk-1.0.0.dist-info/METADATA +307 -0
  237. label_studio_sdk-1.0.0.dist-info/RECORD +239 -0
  238. {label_studio_sdk-0.0.32.dist-info → label_studio_sdk-1.0.0.dist-info}/WHEEL +1 -2
  239. docs/__init__.py +0 -3
  240. label_studio_sdk-0.0.32.dist-info/LICENSE +0 -201
  241. label_studio_sdk-0.0.32.dist-info/METADATA +0 -22
  242. label_studio_sdk-0.0.32.dist-info/RECORD +0 -15
  243. label_studio_sdk-0.0.32.dist-info/top_level.txt +0 -3
  244. tests/test_client.py +0 -26
  245. {tests → label_studio_sdk/_extensions}/__init__.py +0 -0
File without changes
@@ -0,0 +1,82 @@
1
+ # this csv converter is not used in GUI export, see convert_to_csv function
2
+ import pandas as pd
3
+ import os
4
+ import json
5
+
6
+ from copy import deepcopy
7
+
8
+
9
+ class ExportToCSV(object):
10
+ def __init__(self, tasks):
11
+ if isinstance(tasks, str) and tasks.endswith(".json"):
12
+ if not os.path.exists(tasks):
13
+ raise Exception(f"Task file not found {tasks}")
14
+ # input is a file
15
+ with open(tasks) as f:
16
+ self.tasks = json.load(f)
17
+ else:
18
+ # input is a JSON object
19
+ self.tasks = tasks
20
+
21
+ def _get_result_name(self, result):
22
+ return result.get("from_name")
23
+
24
+ def _minify_result(self, result):
25
+ value = result["value"]
26
+ name = self._get_result_name(result)
27
+ if len(value) == 1:
28
+ item = next(iter(value.values()))
29
+ if len(item) == 0:
30
+ return {name: None}
31
+ if len(item) == 1:
32
+ return {name: item[0]}
33
+ else:
34
+ return {name: item}
35
+ else:
36
+ return value
37
+
38
+ def _get_annotation_results(self, annotation, minify, flat_regions):
39
+ results = annotation["result"]
40
+ if not flat_regions:
41
+ yield {"result": results}
42
+
43
+ for result in annotation["result"]:
44
+ if minify:
45
+ yield self._minify_result(result)
46
+ else:
47
+ yield {self._get_result_name(result): result}
48
+
49
+ def _get_annotator_id(self, annotation):
50
+ annotator = annotation.get("completed_by", {})
51
+ if isinstance(annotator, int):
52
+ return annotator
53
+ elif isinstance(annotator, dict):
54
+ return annotator.get("email") or annotator.get("id")
55
+
56
+ def to_records(self, minify=True, flat_regions=True):
57
+ records = []
58
+ for task in self.tasks:
59
+ annotations = task.get("annotations")
60
+ if annotations is None:
61
+ # Temp legacy fix
62
+ annotations = task["completions"]
63
+ for annotation in annotations:
64
+ record = {
65
+ "id": task["id"],
66
+ "annotation_id": annotation.get("id"),
67
+ "annotator": self._get_annotator_id(annotation),
68
+ }
69
+ record.update(task["data"])
70
+ for result in self._get_annotation_results(
71
+ annotation, minify, flat_regions
72
+ ):
73
+ rec = deepcopy(record)
74
+ rec.update(result)
75
+ records.append(rec)
76
+ return records
77
+
78
+ def to_dataframe(self, minify=True, flat_regions=True):
79
+ return pd.DataFrame.from_records(self.to_records(minify, flat_regions))
80
+
81
+ def to_file(self, file, minify=True, flat_regions=True, **kwargs):
82
+ return self.to_dataframe(minify, flat_regions).to_csv(file, **kwargs)
@@ -0,0 +1,103 @@
1
+ import os
2
+ import csv
3
+ import time
4
+ import logging
5
+ import ujson as json
6
+
7
+ from copy import deepcopy, copy
8
+
9
+ from label_studio_sdk.converter.utils import ensure_dir, get_annotator, prettify_result
10
+
11
+
12
+ logger = logging.getLogger(__name__)
13
+ logger.setLevel("DEBUG")
14
+
15
+
16
+ def convert(item_iterator, input_data, output_dir, **kwargs):
17
+ start_time = time.time()
18
+ logger.debug("Convert CSV started")
19
+ if str(output_dir).endswith(".csv"):
20
+ output_file = output_dir
21
+ else:
22
+ ensure_dir(output_dir)
23
+ output_file = os.path.join(output_dir, "result.csv")
24
+
25
+ # these keys are always presented
26
+ keys = {"annotator", "annotation_id", "created_at", "updated_at", "lead_time"}
27
+
28
+ # make 2 passes: the first pass is to get keys, otherwise we can't write csv without headers
29
+ logger.debug("Prepare column names for CSV ...")
30
+ for item in item_iterator(input_data):
31
+ record = prepare_annotation_keys(item)
32
+ keys.update(record)
33
+
34
+ # the second pass is to write records to csv
35
+ logger.debug(
36
+ f"Prepare done in {time.time()-start_time:0.2f} sec. Write CSV rows now ..."
37
+ )
38
+ with open(output_file, "w", encoding="utf8") as outfile:
39
+ writer = csv.DictWriter(
40
+ outfile,
41
+ fieldnames=sorted(list(keys)),
42
+ quoting=csv.QUOTE_NONNUMERIC,
43
+ delimiter=kwargs["sep"],
44
+ )
45
+ writer.writeheader()
46
+
47
+ for item in item_iterator(input_data):
48
+ record = prepare_annotation(item)
49
+ writer.writerow(record)
50
+
51
+ logger.debug(f"CSV conversion finished in {time.time()-start_time:0.2f} sec")
52
+
53
+
54
+ def prepare_annotation(item):
55
+ record = {}
56
+ if item.get("id") is not None:
57
+ record["id"] = item["id"]
58
+
59
+ for name, value in item["output"].items():
60
+ pretty_value = prettify_result(value)
61
+ record[name] = (
62
+ pretty_value
63
+ if isinstance(pretty_value, str)
64
+ else json.dumps(pretty_value, ensure_ascii=False)
65
+ )
66
+
67
+ for name, value in item["input"].items():
68
+ if isinstance(value, dict) or isinstance(value, list):
69
+ # flat dicts and arrays from task.data to json strings
70
+ record[name] = json.dumps(value, ensure_ascii=False)
71
+ else:
72
+ record[name] = value
73
+
74
+ record["annotator"] = get_annotator(item)
75
+ record["annotation_id"] = item["annotation_id"]
76
+ record["created_at"] = item["created_at"]
77
+ record["updated_at"] = item["updated_at"]
78
+ record["lead_time"] = item["lead_time"]
79
+
80
+ if "agreement" in item:
81
+ record["agreement"] = item["agreement"]
82
+
83
+ if "history" in item and item["history"]:
84
+ record["history"] = json.dumps(item["history"], ensure_ascii=False)
85
+
86
+ return record
87
+
88
+
89
+ def prepare_annotation_keys(item):
90
+ record = set(item["input"].keys()) # we don't need deepcopy for keys
91
+ if item.get("id") is not None:
92
+ record.add("id")
93
+
94
+ for name, value in item["output"].items():
95
+ record.add(name)
96
+
97
+ if "agreement" in item:
98
+ record.add("agreement")
99
+
100
+ if "history" in item and item["history"]:
101
+ record.add("history")
102
+
103
+ return record
@@ -0,0 +1,85 @@
1
+ """This code allows to export Label Studio Export JSON to FUNSD format.
2
+ It's only the basic converter, it converts every bbox as a separate word.
3
+ Check this github issue for more details:
4
+ https://github.com/heartexlabs/label-studio/issues/2634#issuecomment-1251648670
5
+
6
+ Usage: funsd.py export.json
7
+ This command will export your LS OCR annotations to "./funsd/" directory.
8
+ """
9
+
10
+ import os
11
+ import json
12
+ from collections import defaultdict
13
+
14
+
15
+ def convert_annotation_to_fund(result):
16
+ # collect all LS results and combine labels, text, coordinates into one record
17
+ pre = defaultdict(dict)
18
+ for item in result:
19
+ o = pre[item["id"]]
20
+
21
+ labels = item.get("value", {}).get("labels", None)
22
+ if labels:
23
+ o["label"] = labels[0]
24
+
25
+ text = item.get("value", {}).get("text", None)
26
+ if text:
27
+ o["text"] = text[0]
28
+
29
+ if "box" not in o:
30
+ w, h = item["original_width"], item["original_height"]
31
+ v = item.get("value")
32
+ x1 = v["x"] / 100.0 * w
33
+ y1 = v["y"] / 100.0 * h
34
+ x2 = x1 + v["width"] / 100.0 * w
35
+ y2 = y1 + v["height"] / 100.0 * h
36
+ o["box"] = [x1, x2, y1, y2]
37
+
38
+ # make FUNSD output
39
+ output = []
40
+ counter = 0
41
+ for key in pre:
42
+ counter += 1
43
+ output.append(
44
+ {
45
+ "id": counter,
46
+ "box": pre[key]["box"],
47
+ "text": pre[key]["text"],
48
+ "label": pre[key]["label"],
49
+ "words": [{"box": pre[key]["box"], "text": pre[key]["text"]}],
50
+ "linking": [],
51
+ }
52
+ )
53
+
54
+ return {"form": output}
55
+
56
+
57
+ def ls_to_funsd_converter(
58
+ ls_export_path="export.json", funsd_dir="funsd", data_key="ocr"
59
+ ):
60
+ with open(ls_export_path) as f:
61
+ tasks = json.load(f)
62
+
63
+ os.makedirs(funsd_dir, exist_ok=True)
64
+
65
+ for task in tasks:
66
+ for annotation in task["annotations"]:
67
+ output = convert_annotation_to_fund(annotation["result"])
68
+ filename = task["data"][data_key]
69
+ filename = os.path.basename(filename)
70
+ filename = (
71
+ f'{funsd_dir}/task-{task["id"]}-annotation-{annotation["id"]}-'
72
+ f"{filename}.json"
73
+ )
74
+
75
+ with open(filename, "w") as f:
76
+ json.dump(output, f)
77
+
78
+
79
+ if __name__ == "__main__":
80
+ import sys
81
+
82
+ print("Usage:", sys.argv[0], "export.json")
83
+ print('This command will export your LS OCR annotations to "./funsd/" directory')
84
+
85
+ ls_to_funsd_converter(sys.argv[1])
File without changes
@@ -0,0 +1,314 @@
1
+ import os
2
+ import json # better to use "imports ujson as json" for the best performance
3
+ import uuid
4
+ import logging
5
+ from PIL import Image
6
+
7
+ from label_studio_sdk.converter.utils import ExpandFullPath
8
+ from label_studio_sdk.converter.imports.label_config import generate_label_config
9
+
10
+ logger = logging.getLogger("root")
11
+
12
+
13
+ def new_task(out_type, root_url, file_name):
14
+ return {
15
+ "data": {"image": os.path.join(root_url, file_name)},
16
+ # 'annotations' or 'predictions'
17
+ out_type: [
18
+ {
19
+ "result": [],
20
+ "ground_truth": False,
21
+ }
22
+ ],
23
+ }
24
+
25
+
26
+ def create_bbox(annotation, categories, from_name, image_height, image_width, to_name):
27
+ label = categories[int(annotation["category_id"])]
28
+ x, y, width, height = annotation["bbox"]
29
+ x, y, width, height = float(x), float(y), float(width), float(height)
30
+ item = {
31
+ "id": uuid.uuid4().hex[0:10],
32
+ "type": "rectanglelabels",
33
+ "value": {
34
+ "x": x / image_width * 100.0,
35
+ "y": y / image_height * 100.0,
36
+ "width": width / image_width * 100.0,
37
+ "height": height / image_height * 100.0,
38
+ "rotation": 0,
39
+ "rectanglelabels": [label],
40
+ },
41
+ "to_name": to_name,
42
+ "from_name": from_name,
43
+ "image_rotation": 0,
44
+ "original_width": image_width,
45
+ "original_height": image_height,
46
+ }
47
+ return item
48
+
49
+
50
+ def create_segmentation(
51
+ annotation, categories, from_name, image_height, image_width, to_name
52
+ ):
53
+ label = categories[int(annotation["category_id"])]
54
+ segmentation = annotation["segmentation"][0]
55
+ points = [list(x) for x in zip(*[iter(segmentation)] * 2)]
56
+
57
+ for i in range(len(points)):
58
+ points[i][0] = points[i][0] / image_width * 100.0
59
+ points[i][1] = points[i][1] / image_height * 100.0
60
+
61
+ item = {
62
+ "id": uuid.uuid4().hex[0:10],
63
+ "type": "polygonlabels",
64
+ "value": {"points": points, "polygonlabels": [label]},
65
+ "to_name": to_name,
66
+ "from_name": from_name,
67
+ "image_rotation": 0,
68
+ "original_width": image_width,
69
+ "original_height": image_height,
70
+ }
71
+ return item
72
+
73
+
74
+ def create_keypoints(
75
+ annotation, categories, from_name, to_name, image_height, image_width, point_width
76
+ ):
77
+ label = categories[int(annotation["category_id"])]
78
+ points = annotation["keypoints"]
79
+ items = []
80
+
81
+ for i in range(0, len(points), 3):
82
+ x, y, v = points[i : i + 3] # x, y, visibility
83
+ x, y, v = float(x), float(y), int(v)
84
+ item = {
85
+ "id": uuid.uuid4().hex[0:10],
86
+ "type": "keypointlabels",
87
+ "value": {
88
+ "x": x / image_width * 100.0,
89
+ "y": y / image_height * 100.0,
90
+ "width": point_width,
91
+ "keypointlabels": [label],
92
+ },
93
+ "to_name": to_name,
94
+ "from_name": from_name,
95
+ "image_rotation": 0,
96
+ "original_width": image_width,
97
+ "original_height": image_height,
98
+ }
99
+
100
+ # visibility
101
+ if v < 2:
102
+ item["value"]["hidden"] = True
103
+
104
+ items.append(item)
105
+ return items
106
+
107
+
108
+ def convert_coco_to_ls(
109
+ input_file,
110
+ out_file,
111
+ to_name="image",
112
+ from_name="label",
113
+ out_type="annotations",
114
+ image_root_url="/data/local-files/?d=",
115
+ use_super_categories=False,
116
+ point_width=1.0,
117
+ ):
118
+ """Convert COCO labeling to Label Studio JSON
119
+
120
+ :param input_file: file with COCO json
121
+ :param out_file: output file with Label Studio JSON tasks
122
+ :param to_name: object name from Label Studio labeling config
123
+ :param from_name: control tag name from Label Studio labeling config
124
+ :param out_type: annotation type - "annotations" or "predictions"
125
+ :param image_root_url: root URL path where images will be hosted, e.g.: http://example.com/images
126
+ :param use_super_categories: use super categories from categories if they are presented
127
+ :param point_width: key point width
128
+ """
129
+
130
+ tasks = {} # image_id => task
131
+ logger.info("Reading COCO notes and categories from %s", input_file)
132
+
133
+ with open(input_file, encoding="utf8") as f:
134
+ coco = json.load(f)
135
+
136
+ # build categories => labels dict
137
+ new_categories = {}
138
+ # list to dict conversion: [...] => {category_id: category_item}
139
+ categories = {int(category["id"]): category for category in coco["categories"]}
140
+ ids = sorted(categories.keys()) # sort labels by their origin ids
141
+
142
+ for i in ids:
143
+ name = categories[i]["name"]
144
+ if use_super_categories and "supercategory" in categories[i]:
145
+ name = categories[i]["supercategory"] + ":" + name
146
+ new_categories[i] = name
147
+
148
+ # mapping: id => category name
149
+ categories = new_categories
150
+
151
+ # mapping: image id => image
152
+ images = {item["id"]: item for item in coco["images"]}
153
+
154
+ logger.info(
155
+ f'Found {len(categories)} categories, {len(images)} images and {len(coco["annotations"])} annotations'
156
+ )
157
+
158
+ # flags for labeling config composing
159
+ segmentation = bbox = keypoints = rle = False
160
+ segmentation_once = bbox_once = keypoints_once = rle_once = False
161
+ rectangles_from_name, keypoints_from_name = (
162
+ from_name + "_rectangles",
163
+ from_name + "_keypoints",
164
+ )
165
+ segmentation_from_name = from_name + "polygons"
166
+ tags = {}
167
+
168
+ # create tasks
169
+ for image in coco["images"]:
170
+ image_id, image_file_name = image["id"], image["file_name"]
171
+ tasks[image_id] = new_task(out_type, image_root_url, image_file_name)
172
+
173
+ for i, annotation in enumerate(coco["annotations"]):
174
+ segmentation |= "segmentation" in annotation
175
+ bbox |= "bbox" in annotation
176
+ keypoints |= "keypoints" in annotation
177
+ rle |= (
178
+ annotation.get("iscrowd") == 1
179
+ ) # 0 - polygons are in segmentation, otherwise rle
180
+
181
+ if rle and not rle_once: # not supported
182
+ logger.error("RLE in segmentation is not yet supported in COCO")
183
+ rle_once = True
184
+ if keypoints and not keypoints_once:
185
+ logger.warning("Keypoints are partially supported without skeletons")
186
+ tags.update({keypoints_from_name: "KeyPointLabels"})
187
+ keypoints_once = True
188
+ if segmentation and not segmentation_once: # not supported
189
+ logger.warning("Segmentation in COCO is experimental")
190
+ tags.update({segmentation_from_name: "PolygonLabels"})
191
+ segmentation_once = True
192
+ if bbox and not bbox_once:
193
+ tags.update({rectangles_from_name: "RectangleLabels"})
194
+ bbox_once = True
195
+
196
+ # read image sizes
197
+ image_id = annotation["image_id"]
198
+ image = images[image_id]
199
+ image_file_name, image_width, image_height = (
200
+ image["file_name"],
201
+ image["width"],
202
+ image["height"],
203
+ )
204
+
205
+ task = tasks[image_id]
206
+
207
+ if "bbox" in annotation:
208
+ item = create_bbox(
209
+ annotation,
210
+ categories,
211
+ rectangles_from_name,
212
+ image_height,
213
+ image_width,
214
+ to_name,
215
+ )
216
+ task[out_type][0]["result"].append(item)
217
+
218
+ if "segmentation" in annotation and len(annotation["segmentation"]):
219
+ item = create_segmentation(
220
+ annotation,
221
+ categories,
222
+ segmentation_from_name,
223
+ image_height,
224
+ image_width,
225
+ to_name,
226
+ )
227
+ task[out_type][0]["result"].append(item)
228
+
229
+ if "keypoints" in annotation:
230
+ items = create_keypoints(
231
+ annotation,
232
+ categories,
233
+ keypoints_from_name,
234
+ to_name,
235
+ image_height,
236
+ image_width,
237
+ point_width,
238
+ )
239
+ task[out_type][0]["result"] += items
240
+
241
+ tasks[image_id] = task
242
+
243
+ # generate and save labeling config
244
+ label_config_file = out_file.replace(".json", "") + ".label_config.xml"
245
+ generate_label_config(categories, tags, to_name, from_name, label_config_file)
246
+
247
+ if len(tasks) > 0:
248
+ tasks = [tasks[key] for key in sorted(tasks.keys())]
249
+ logger.info("Saving Label Studio JSON to %s", out_file)
250
+ with open(out_file, "w") as out:
251
+ json.dump(tasks, out)
252
+
253
+ print(
254
+ "\n"
255
+ f" 1. Create a new project in Label Studio\n"
256
+ f' 2. Use Labeling Config from "{label_config_file}"\n'
257
+ f" 3. Setup serving for images [e.g. you can use Local Storage (or others):\n"
258
+ f" https://labelstud.io/guide/storage.html#Local-storage]\n"
259
+ f' 4. Import "{out_file}" to the project\n'
260
+ )
261
+ else:
262
+ logger.error("No labels converted")
263
+
264
+
265
+ def add_parser(subparsers):
266
+ coco = subparsers.add_parser("coco")
267
+
268
+ coco.add_argument(
269
+ "-i",
270
+ "--input",
271
+ dest="input",
272
+ required=True,
273
+ help="directory with COCO where images, labels, notes.json are located",
274
+ action=ExpandFullPath,
275
+ )
276
+ coco.add_argument(
277
+ "-o",
278
+ "--output",
279
+ dest="output",
280
+ help="output file with Label Studio JSON tasks",
281
+ default="output.json",
282
+ action=ExpandFullPath,
283
+ )
284
+ coco.add_argument(
285
+ "--to-name",
286
+ dest="to_name",
287
+ help="object name from Label Studio labeling config",
288
+ default="image",
289
+ )
290
+ coco.add_argument(
291
+ "--from-name",
292
+ dest="from_name",
293
+ help="control tag name from Label Studio labeling config",
294
+ default="label",
295
+ )
296
+ coco.add_argument(
297
+ "--out-type",
298
+ dest="out_type",
299
+ help='annotation type - "annotations" or "predictions"',
300
+ default="annotations",
301
+ )
302
+ coco.add_argument(
303
+ "--image-root-url",
304
+ dest="image_root_url",
305
+ help="root URL path where images will be hosted, e.g.: http://example.com/images",
306
+ default="/data/local-files/?d=",
307
+ )
308
+ coco.add_argument(
309
+ "--point-width",
310
+ dest="point_width",
311
+ help="key point width (size)",
312
+ default=1.0,
313
+ type=float,
314
+ )