label-studio-sdk 0.0.30__py3-none-any.whl → 0.0.34__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of label-studio-sdk might be problematic. Click here for more details.

Files changed (38) hide show
  1. label_studio_sdk/__init__.py +4 -1
  2. label_studio_sdk/client.py +104 -85
  3. label_studio_sdk/data_manager.py +32 -23
  4. label_studio_sdk/exceptions.py +10 -0
  5. label_studio_sdk/label_interface/__init__.py +1 -0
  6. label_studio_sdk/label_interface/base.py +77 -0
  7. label_studio_sdk/label_interface/control_tags.py +756 -0
  8. label_studio_sdk/label_interface/interface.py +922 -0
  9. label_studio_sdk/label_interface/label_tags.py +72 -0
  10. label_studio_sdk/label_interface/object_tags.py +292 -0
  11. label_studio_sdk/label_interface/region.py +43 -0
  12. label_studio_sdk/objects.py +35 -0
  13. label_studio_sdk/project.py +725 -262
  14. label_studio_sdk/schema/label_config_schema.json +226 -0
  15. label_studio_sdk/users.py +15 -13
  16. label_studio_sdk/utils.py +31 -30
  17. label_studio_sdk/workspaces.py +13 -11
  18. {label_studio_sdk-0.0.30.dist-info → label_studio_sdk-0.0.34.dist-info}/METADATA +7 -5
  19. label_studio_sdk-0.0.34.dist-info/RECORD +37 -0
  20. {label_studio_sdk-0.0.30.dist-info → label_studio_sdk-0.0.34.dist-info}/WHEEL +1 -1
  21. {label_studio_sdk-0.0.30.dist-info → label_studio_sdk-0.0.34.dist-info}/top_level.txt +0 -1
  22. tests/test_client.py +21 -10
  23. tests/test_export.py +105 -0
  24. tests/test_interface/__init__.py +1 -0
  25. tests/test_interface/configs.py +137 -0
  26. tests/test_interface/mockups.py +22 -0
  27. tests/test_interface/test_compat.py +64 -0
  28. tests/test_interface/test_control_tags.py +55 -0
  29. tests/test_interface/test_data_generation.py +45 -0
  30. tests/test_interface/test_lpi.py +15 -0
  31. tests/test_interface/test_main.py +196 -0
  32. tests/test_interface/test_object_tags.py +36 -0
  33. tests/test_interface/test_region.py +36 -0
  34. tests/test_interface/test_validate_summary.py +35 -0
  35. tests/test_interface/test_validation.py +59 -0
  36. docs/__init__.py +0 -3
  37. label_studio_sdk-0.0.30.dist-info/RECORD +0 -15
  38. {label_studio_sdk-0.0.30.dist-info → label_studio_sdk-0.0.34.dist-info}/LICENSE +0 -0
@@ -1,22 +1,23 @@
1
1
  """ .. include::../docs/project.md
2
2
  """
3
- import os
3
+
4
4
  import json
5
5
  import logging
6
+ import os
6
7
  import pathlib
7
8
  import time
8
-
9
9
  from enum import Enum, auto
10
- from random import sample, shuffle
11
- from requests.exceptions import HTTPError, InvalidSchema, MissingSchema
12
- from requests import Response
13
10
  from pathlib import Path
11
+ from random import sample, shuffle
14
12
  from typing import Optional, Union, List, Dict, Callable
15
- from .client import Client
16
- from .utils import parse_config, chunk
17
13
 
18
- from label_studio_tools.core.utils.io import get_local_path
19
14
  from label_studio_tools.core.label_config import parse_config
15
+ from label_studio_tools.core.utils.io import get_local_path
16
+ from requests import Response
17
+ from requests.exceptions import HTTPError, InvalidSchema, MissingSchema
18
+
19
+ from .client import Client
20
+ from .utils import parse_config, chunk
20
21
 
21
22
  logger = logging.getLogger(__name__)
22
23
 
@@ -32,28 +33,28 @@ class LabelStudioAttributeError(LabelStudioException):
32
33
  class ProjectSampling(Enum):
33
34
  """Enumerate the available task sampling modes for labeling."""
34
35
 
35
- RANDOM = 'Uniform sampling'
36
+ RANDOM = "Uniform sampling"
36
37
  """ Uniform random sampling of tasks """
37
- SEQUENCE = 'Sequential sampling'
38
+ SEQUENCE = "Sequential sampling"
38
39
  """ Sequential sampling of tasks using task IDs """
39
- UNCERTAINTY = 'Uncertainty sampling'
40
+ UNCERTAINTY = "Uncertainty sampling"
40
41
  """ Sample tasks based on prediction scores, such as for active learning (Enterprise only)"""
41
42
 
42
43
 
43
44
  class ProjectStorage(Enum):
44
45
  """Enumerate the available types of external source and target storage for labeling projects."""
45
46
 
46
- GOOGLE = 'gcs'
47
+ GOOGLE = "gcs"
47
48
  """ Google Cloud Storage """
48
- S3 = 's3'
49
+ S3 = "s3"
49
50
  """ Amazon S3 Storage """
50
- AZURE = 'azure_blob'
51
+ AZURE = "azure_blob"
51
52
  """ Microsoft Azure Blob Storage """
52
- LOCAL = 'localfiles'
53
+ LOCAL = "localfiles"
53
54
  """ Label Studio Local File Storage """
54
- REDIS = 'redis'
55
+ REDIS = "redis"
55
56
  """ Redis Storage """
56
- S3_SECURED = 's3s'
57
+ S3_SECURED = "s3s"
57
58
  """ Amazon S3 Storage secured by IAM roles (Enterprise only) """
58
59
 
59
60
 
@@ -62,13 +63,13 @@ class AssignmentSamplingMethod(Enum):
62
63
 
63
64
 
64
65
  class ExportSnapshotStatus:
65
- CREATED = 'created'
66
+ CREATED = "created"
66
67
  """ Export snapshot is created """
67
- IN_PROGRESS = 'in_progress'
68
+ IN_PROGRESS = "in_progress"
68
69
  """ Export snapshot is in progress """
69
- FAILED = 'failed'
70
+ FAILED = "failed"
70
71
  """ Export snapshot failed with errors """
71
- COMPLETED = 'completed'
72
+ COMPLETED = "completed"
72
73
  """ Export snapshot was created and can be downloaded """
73
74
 
74
75
  def __init__(self, response):
@@ -77,30 +78,30 @@ class ExportSnapshotStatus:
77
78
  def is_created(self):
78
79
  """Export snapshot is created"""
79
80
  assert (
80
- 'status' in self.response
81
+ "status" in self.response
81
82
  ), '"status" field not found in export snapshot status response'
82
- return self.response['status'] == self.CREATED
83
+ return self.response["status"] == self.CREATED
83
84
 
84
85
  def is_in_progress(self):
85
86
  """Export snapshot is in progress"""
86
87
  assert (
87
- 'status' in self.response
88
+ "status" in self.response
88
89
  ), '"status" field not found in export_snapshot_status response'
89
- return self.response['status'] == self.IN_PROGRESS
90
+ return self.response["status"] == self.IN_PROGRESS
90
91
 
91
92
  def is_failed(self):
92
93
  """Export snapshot failed with errors"""
93
94
  assert (
94
- 'status' in self.response
95
+ "status" in self.response
95
96
  ), '"status" field not found in export_snapshot_status response'
96
- return self.response['status'] == self.FAILED
97
+ return self.response["status"] == self.FAILED
97
98
 
98
99
  def is_completed(self):
99
100
  """Export snapshot was created and can be downloaded"""
100
101
  assert (
101
- 'status' in self.response
102
+ "status" in self.response
102
103
  ), '"status" field not found in export_snapshot_status response'
103
- return self.response['status'] == self.COMPLETED
104
+ return self.response["status"] == self.COMPLETED
104
105
 
105
106
 
106
107
  class Project(Client):
@@ -161,10 +162,10 @@ class Project(Client):
161
162
  "Use get_users() instead."
162
163
  )
163
164
 
164
- response = self.make_request('GET', f'/api/projects/{self.id}/members')
165
+ response = self.make_request("GET", f"/api/projects/{self.id}/members")
165
166
  users = []
166
167
  for user_data in response.json():
167
- user_data['client'] = self
168
+ user_data["client"] = self
168
169
  users.append(User(**user_data))
169
170
  return users
170
171
 
@@ -181,9 +182,9 @@ class Project(Client):
181
182
  Dict with created member
182
183
 
183
184
  """
184
- payload = {'user': user.id}
185
+ payload = {"user": user.id}
185
186
  response = self.make_request(
186
- 'POST', f'/api/projects/{self.id}/members', json=payload
187
+ "POST", f"/api/projects/{self.id}/members", json=payload
187
188
  )
188
189
  return response.json()
189
190
 
@@ -201,20 +202,20 @@ class Project(Client):
201
202
  Dict with counter of created assignments
202
203
 
203
204
  """
204
- final_response = {'assignments': 0}
205
+ final_response = {"assignments": 0}
205
206
  users_ids = [user.id for user in users]
206
207
  # Assign tasks to users with batches
207
208
  for c in chunk(tasks_ids, 1000):
208
209
  logger.debug(f"Starting assignment for: {users_ids}")
209
210
  payload = {
210
- 'users': users_ids,
211
- 'selectedItems': {'all': False, 'included': c},
212
- 'type': 'AN',
211
+ "users": users_ids,
212
+ "selectedItems": {"all": False, "included": c},
213
+ "type": "AN",
213
214
  }
214
215
  response = self.make_request(
215
- 'POST', f'/api/projects/{self.id}/tasks/assignees', json=payload
216
+ "POST", f"/api/projects/{self.id}/tasks/assignees", json=payload
216
217
  )
217
- final_response['assignments'] += response.json()['assignments']
218
+ final_response["assignments"] += response.json()["assignments"]
218
219
  return final_response
219
220
 
220
221
  def delete_annotators_assignment(self, tasks_ids):
@@ -230,10 +231,10 @@ class Project(Client):
230
231
  Dict with counter of deleted annotator assignments
231
232
 
232
233
  """
233
- payload = {'selectedItems': {'all': False, 'included': tasks_ids}}
234
+ payload = {"selectedItems": {"all": False, "included": tasks_ids}}
234
235
  response = self.make_request(
235
- 'POST',
236
- f'/api/dm/actions?id=delete_annotators&project={self.id}',
236
+ "POST",
237
+ f"/api/dm/actions?id=delete_annotators&project={self.id}",
237
238
  json=payload,
238
239
  )
239
240
  return response.json()
@@ -251,10 +252,10 @@ class Project(Client):
251
252
  Dict with counter of deleted reviewer assignments
252
253
 
253
254
  """
254
- payload = {'selectedItems': {'all': False, 'included': tasks_ids}}
255
+ payload = {"selectedItems": {"all": False, "included": tasks_ids}}
255
256
  response = self.make_request(
256
- 'POST',
257
- f'/api/dm/actions?id=delete_reviewers&project={self.id}',
257
+ "POST",
258
+ f"/api/dm/actions?id=delete_reviewers&project={self.id}",
258
259
  json=payload,
259
260
  )
260
261
  return response.json()
@@ -274,12 +275,12 @@ class Project(Client):
274
275
 
275
276
  """
276
277
  payload = {
277
- 'users': [user.id for user in users],
278
- 'selectedItems': {'all': False, 'included': tasks_ids},
279
- 'type': 'RE',
278
+ "users": [user.id for user in users],
279
+ "selectedItems": {"all": False, "included": tasks_ids},
280
+ "type": "RE",
280
281
  }
281
282
  response = self.make_request(
282
- 'POST', f'/api/projects/{self.id}/tasks/assignees', json=payload
283
+ "POST", f"/api/projects/{self.id}/tasks/assignees", json=payload
283
284
  )
284
285
  return response.json()
285
286
 
@@ -354,7 +355,7 @@ class Project(Client):
354
355
  Retrieve and display predictions when loading a task
355
356
 
356
357
  """
357
- response = self.make_request('GET', f'/api/projects/{self.id}')
358
+ response = self.make_request("GET", f"/api/projects/{self.id}")
358
359
  return response.json()
359
360
 
360
361
  def get_model_versions(self):
@@ -366,7 +367,7 @@ class Project(Client):
366
367
  Model versions
367
368
 
368
369
  """
369
- response = self.make_request('GET', f'/api/projects/{self.id}/model-versions')
370
+ response = self.make_request("GET", f"/api/projects/{self.id}/model-versions")
370
371
  return response.json()
371
372
 
372
373
  def update_params(self):
@@ -434,11 +435,11 @@ class Project(Client):
434
435
  Raises LabelStudioException in case of errors.
435
436
 
436
437
  """
437
- response = self.make_request('POST', '/api/projects', json=kwargs)
438
+ response = self.make_request("POST", "/api/projects", json=kwargs)
438
439
  if response.status_code == 201:
439
440
  self.params = response.json()
440
441
  else:
441
- raise LabelStudioException('Project not created')
442
+ raise LabelStudioException("Project not created")
442
443
 
443
444
  @classmethod
444
445
  def _create_from_id(cls, client, project_id, params=None):
@@ -453,7 +454,7 @@ class Project(Client):
453
454
  if params and isinstance(params, dict):
454
455
  # TODO: validate project parameters
455
456
  project.params = params
456
- project.params['id'] = project_id
457
+ project.params["id"] = project_id
457
458
  return project
458
459
 
459
460
  @classmethod
@@ -494,26 +495,28 @@ class Project(Client):
494
495
  Imported task IDs
495
496
 
496
497
  """
497
- params = {'return_task_ids': '1'}
498
+ params = {"return_task_ids": "1"}
498
499
  if preannotated_from_fields:
499
- params['preannotated_from_fields'] = ','.join(preannotated_from_fields)
500
+ params["preannotated_from_fields"] = ",".join(preannotated_from_fields)
500
501
  if isinstance(tasks, (list, dict)):
501
502
  response = self.make_request(
502
- method='POST',
503
- url=f'/api/projects/{self.id}/import',
503
+ method="POST",
504
+ url=f"/api/projects/{self.id}/import",
504
505
  json=tasks,
505
506
  params=params,
507
+ timeout=(10, 600),
506
508
  )
507
509
  elif isinstance(tasks, (str, Path)):
508
510
  # try import from file
509
511
  if not os.path.isfile(tasks):
510
- raise LabelStudioException(f'Not found import tasks file {tasks}')
511
- with open(tasks, mode='rb') as f:
512
+ raise LabelStudioException(f"Not found import tasks file {tasks}")
513
+ with open(tasks, mode="rb") as f:
512
514
  response = self.make_request(
513
- method='POST',
514
- url=f'/api/projects/{self.id}/import',
515
- files={'file': f},
515
+ method="POST",
516
+ url=f"/api/projects/{self.id}/import",
517
+ files={"file": f},
516
518
  params=params,
519
+ timeout=(10, 600),
517
520
  )
518
521
  else:
519
522
  raise TypeError(
@@ -521,7 +524,7 @@ class Project(Client):
521
524
  )
522
525
  response = response.json()
523
526
 
524
- if 'import' in response:
527
+ if "import" in response:
525
528
  # check import status
526
529
  timeout = 300
527
530
  fibonacci_backoff = [1, 1]
@@ -530,18 +533,18 @@ class Project(Client):
530
533
 
531
534
  while True:
532
535
  import_status = self.make_request(
533
- method='GET',
536
+ method="GET",
534
537
  url=f'/api/projects/{self.id}/imports/{response["import"]}',
535
538
  ).json()
536
539
 
537
- if import_status['status'] == 'completed':
538
- return import_status['task_ids']
540
+ if import_status["status"] == "completed":
541
+ return import_status["task_ids"]
539
542
 
540
- if import_status['status'] == 'failed':
541
- raise LabelStudioException(import_status['error'])
543
+ if import_status["status"] == "failed":
544
+ raise LabelStudioException(import_status["error"])
542
545
 
543
546
  if time.time() - start_time >= timeout:
544
- raise LabelStudioException('Import timeout')
547
+ raise LabelStudioException("Import timeout")
545
548
 
546
549
  time.sleep(fibonacci_backoff[0])
547
550
  fibonacci_backoff = [
@@ -549,11 +552,11 @@ class Project(Client):
549
552
  fibonacci_backoff[0] + fibonacci_backoff[1],
550
553
  ]
551
554
 
552
- return response['task_ids']
555
+ return response["task_ids"]
553
556
 
554
557
  def export_tasks(
555
558
  self,
556
- export_type: str = 'JSON',
559
+ export_type: str = "JSON",
557
560
  download_all_tasks: bool = False,
558
561
  download_resources: bool = False,
559
562
  ids: Optional[List[int]] = None,
@@ -590,19 +593,19 @@ class Project(Client):
590
593
 
591
594
  """
592
595
  params = {
593
- 'exportType': export_type,
594
- 'download_all_tasks': download_all_tasks,
595
- 'download_resources': download_resources,
596
+ "exportType": export_type,
597
+ "download_all_tasks": download_all_tasks,
598
+ "download_resources": download_resources,
596
599
  }
597
600
  if ids:
598
- params['ids'] = ids
601
+ params["ids"] = ids
599
602
  response = self.make_request(
600
- method='GET', url=f'/api/projects/{self.id}/export', params=params
603
+ method="GET", url=f"/api/projects/{self.id}/export", params=params
601
604
  )
602
605
  if export_location is None:
603
- if 'JSON' not in export_type.upper():
606
+ if "JSON" not in export_type.upper():
604
607
  raise ValueError(
605
- f'{export_type} export type requires an export location to be specified'
608
+ f"{export_type} export type requires an export location to be specified"
606
609
  )
607
610
  return response.json()
608
611
 
@@ -621,7 +624,7 @@ class Project(Client):
621
624
 
622
625
  def set_params(self, **kwargs):
623
626
  """Low level function to set project parameters."""
624
- response = self.make_request('PATCH', f'/api/projects/{self.id}', json=kwargs)
627
+ response = self.make_request("PATCH", f"/api/projects/{self.id}", json=kwargs)
625
628
  assert response.status_code == 200
626
629
 
627
630
  def set_sampling(self, sampling: ProjectSampling):
@@ -702,7 +705,8 @@ class Project(Client):
702
705
 
703
706
  page = 1
704
707
  result = []
705
- while True:
708
+ data = {}
709
+ while not data.get("end_pagination"):
706
710
  try:
707
711
  data = self.get_paginated_tasks(
708
712
  filters=filters,
@@ -713,11 +717,10 @@ class Project(Client):
713
717
  page=page,
714
718
  page_size=100,
715
719
  )
716
- result += data['tasks']
720
+ result += data["tasks"]
717
721
  page += 1
718
- # we'll get 404 from API on empty page
719
722
  except LabelStudioException as e:
720
- logger.debug(f'End of pagination: {e}')
723
+ logger.debug(f"Error during pagination: {e}")
721
724
  break
722
725
  return result
723
726
 
@@ -728,7 +731,7 @@ class Project(Client):
728
731
  view_id=None,
729
732
  selected_ids=None,
730
733
  page: int = 1,
731
- page_size: int = -1,
734
+ page_size: int = 100,
732
735
  only_ids: bool = False,
733
736
  resolve_uri: bool = True,
734
737
  ):
@@ -766,7 +769,7 @@ class Project(Client):
766
769
  page: int
767
770
  Page. Default is 1.
768
771
  page_size: int
769
- Page size. Default is -1, to retrieve all tasks in the project.
772
+ Page size. Default is 100, to retrieve all tasks in the project you can use get_tasks().
770
773
  only_ids: bool
771
774
  If true, return only task IDs
772
775
  resolve_uri: bool
@@ -796,46 +799,56 @@ class Project(Client):
796
799
 
797
800
  """
798
801
  query = {
799
- 'filters': filters,
800
- 'ordering': ordering or [],
801
- 'selectedItems': {'all': False, 'included': selected_ids}
802
- if selected_ids
803
- else {'all': True, "excluded": []},
802
+ "filters": filters,
803
+ "ordering": ordering or [],
804
+ "selectedItems": (
805
+ {"all": False, "included": selected_ids}
806
+ if selected_ids
807
+ else {"all": True, "excluded": []}
808
+ ),
804
809
  }
805
810
  params = {
806
- 'project': self.id,
807
- 'page': page,
808
- 'page_size': page_size,
809
- 'view': view_id,
810
- 'query': json.dumps(query),
811
- 'fields': 'all',
812
- 'resolve_uri': resolve_uri,
811
+ "project": self.id,
812
+ "page": page,
813
+ "page_size": page_size,
814
+ "view": view_id,
815
+ "query": json.dumps(query),
816
+ "fields": "all",
817
+ "resolve_uri": resolve_uri,
813
818
  }
814
819
  if only_ids:
815
- params['include'] = 'id'
820
+ params["include"] = "id"
816
821
 
817
- try:
818
- response = self.make_request('GET', '/api/tasks', params)
819
- except HTTPError as e:
820
- raise LabelStudioException(f'Error loading tasks: {e}')
822
+ response = self.make_request(
823
+ "GET", "/api/tasks", params, raise_exceptions=False
824
+ )
825
+ # we'll get 404 from API on empty page
826
+ if response.status_code == 404:
827
+ return {"tasks": [], "end_pagination": True}
828
+ elif response.status_code != 200:
829
+ self.log_response_error(response)
830
+ try:
831
+ response.raise_for_status()
832
+ except HTTPError as e:
833
+ raise LabelStudioException(f"Error loading tasks: {e}")
821
834
 
822
835
  data = response.json()
823
- tasks = data['tasks']
836
+ tasks = data["tasks"]
824
837
  if only_ids:
825
- data['tasks'] = [task['id'] for task in tasks]
838
+ data["tasks"] = [task["id"] for task in tasks]
826
839
 
827
840
  return data
828
841
 
829
842
  def get_tasks_ids(self, *args, **kwargs):
830
843
  """Same as `label_studio_sdk.project.Project.get_tasks()` but returns only task IDs."""
831
- kwargs['only_ids'] = True
844
+ kwargs["only_ids"] = True
832
845
  return self.get_tasks(*args, **kwargs)
833
846
 
834
847
  def get_paginated_tasks_ids(self, *args, **kwargs):
835
848
  """Same as `label_studio_sdk.project.Project.get_paginated_tasks()` but returns
836
849
  only task IDs.
837
850
  """
838
- kwargs['only_ids'] = True
851
+ kwargs["only_ids"] = True
839
852
  return self.get_paginated_tasks(*args, **kwargs)
840
853
 
841
854
  def get_views(self):
@@ -856,10 +869,10 @@ class Project(Client):
856
869
  data: dict
857
870
  Filters, orderings and other visual settings
858
871
  """
859
- response = self.make_request('GET', f'/api/dm/views?project={self.id}')
872
+ response = self.make_request("GET", f"/api/dm/views?project={self.id}")
860
873
  return response.json()
861
874
 
862
- def create_view(self, filters, ordering=None, title='Tasks'):
875
+ def create_view(self, filters, ordering=None, title="Tasks"):
863
876
  """Create view
864
877
 
865
878
  Parameters
@@ -880,12 +893,29 @@ class Project(Client):
880
893
 
881
894
  """
882
895
  data = {
883
- 'project': self.id,
884
- 'data': {'title': title, 'ordering': ordering, 'filters': filters},
896
+ "project": self.id,
897
+ "data": {"title": title, "ordering": ordering, "filters": filters},
885
898
  }
886
- response = self.make_request('POST', '/api/dm/views', json=data)
899
+ response = self.make_request("POST", "/api/dm/views", json=data)
887
900
  return response.json()
888
901
 
902
+ def delete_view(self, view_id):
903
+ """Delete view
904
+
905
+ Parameters
906
+ ----------
907
+ view_id: int
908
+ View ID
909
+
910
+ Returns
911
+ -------
912
+ dict:
913
+ dict with deleted view
914
+
915
+ """
916
+ response = self.make_request("DELETE", f"/api/dm/views/{view_id}")
917
+ return
918
+
889
919
  @property
890
920
  def tasks(self):
891
921
  """Retrieve all tasks from the project. This call can be very slow if the project has a lot of tasks."""
@@ -912,13 +942,13 @@ class Project(Client):
912
942
  """
913
943
  return self.get_tasks(
914
944
  filters={
915
- 'conjunction': 'and',
916
- 'items': [
945
+ "conjunction": "and",
946
+ "items": [
917
947
  {
918
- 'filter': 'filter:tasks:completed_at',
919
- 'operator': 'empty',
920
- 'value': False,
921
- 'type': 'Datetime',
948
+ "filter": "filter:tasks:completed_at",
949
+ "operator": "empty",
950
+ "value": False,
951
+ "type": "Datetime",
922
952
  }
923
953
  ],
924
954
  },
@@ -953,13 +983,13 @@ class Project(Client):
953
983
  """
954
984
  return self.get_tasks(
955
985
  filters={
956
- 'conjunction': 'and',
957
- 'items': [
986
+ "conjunction": "and",
987
+ "items": [
958
988
  {
959
- 'filter': 'filter:tasks:completed_at',
960
- 'operator': 'empty',
961
- 'value': True,
962
- 'type': 'Datetime',
989
+ "filter": "filter:tasks:completed_at",
990
+ "operator": "empty",
991
+ "value": True,
992
+ "type": "Datetime",
963
993
  }
964
994
  ],
965
995
  },
@@ -1019,7 +1049,7 @@ class Project(Client):
1019
1049
  Uploaded file used as data source for this task
1020
1050
  ```
1021
1051
  """
1022
- response = self.make_request('GET', f'/api/tasks/{task_id}')
1052
+ response = self.make_request("GET", f"/api/tasks/{task_id}")
1023
1053
  return response.json()
1024
1054
 
1025
1055
  def update_task(self, task_id, **kwargs):
@@ -1038,7 +1068,7 @@ class Project(Client):
1038
1068
  Dict with updated task
1039
1069
 
1040
1070
  """
1041
- response = self.make_request('PATCH', f'/api/tasks/{task_id}', json=kwargs)
1071
+ response = self.make_request("PATCH", f"/api/tasks/{task_id}", json=kwargs)
1042
1072
  response.raise_for_status()
1043
1073
  return response.json()
1044
1074
 
@@ -1092,11 +1122,13 @@ class Project(Client):
1092
1122
  model_version: str
1093
1123
  Any string identifying your model
1094
1124
  """
1095
- data = {'task': task_id, 'result': result, 'score': score}
1125
+ data = {"task": task_id, "result": result, "score": score}
1096
1126
  if model_version is not None:
1097
- data['model_version'] = model_version
1098
- response = self.make_request('POST', '/api/predictions', json=data)
1099
- return response.json()
1127
+ data["model_version"] = model_version
1128
+ response = self.make_request("POST", "/api/predictions", json=data)
1129
+ json = response.json()
1130
+ logger.debug(f"Response: {json}")
1131
+ return json
1100
1132
 
1101
1133
  def create_predictions(self, predictions):
1102
1134
  """Bulk create predictions for tasks. See <a href="https://labelstud.io/guide/predictions.html">more
@@ -1109,7 +1141,7 @@ class Project(Client):
1109
1141
  Label Studio JSON format as for annotations</a>.
1110
1142
  """
1111
1143
  response = self.make_request(
1112
- 'POST', f'/api/projects/{self.id}/import/predictions', json=predictions
1144
+ "POST", f"/api/projects/{self.id}/import/predictions", json=predictions
1113
1145
  )
1114
1146
  return response.json()
1115
1147
 
@@ -1128,20 +1160,37 @@ class Project(Client):
1128
1160
 
1129
1161
  """
1130
1162
  payload = {
1131
- 'filters': {'conjunction': 'and', 'items': []},
1132
- 'model_version': model_versions,
1133
- 'ordering': [],
1134
- 'project': self.id,
1135
- 'selectedItems': {'all': True, 'excluded': []},
1163
+ "filters": {"conjunction": "and", "items": []},
1164
+ "model_version": model_versions,
1165
+ "ordering": [],
1166
+ "project": self.id,
1167
+ "selectedItems": {"all": True, "excluded": []},
1136
1168
  }
1137
1169
  response = self.make_request(
1138
- 'POST',
1139
- '/api/dm/actions',
1140
- params={'id': 'predictions_to_annotations', 'project': self.id},
1170
+ "POST",
1171
+ "/api/dm/actions",
1172
+ params={"id": "predictions_to_annotations", "project": self.id},
1141
1173
  json=payload,
1142
1174
  )
1143
1175
  return response.json()
1144
1176
 
1177
+ def list_annotations(self, task_id: int) -> List:
1178
+ """List all annotations for a task.
1179
+
1180
+ Parameters
1181
+ ----------
1182
+ task_id: int
1183
+ Task ID
1184
+
1185
+ Returns
1186
+ -------
1187
+ list of dict:
1188
+ List of annotations objects
1189
+ """
1190
+ response = self.make_request("GET", f"/api/tasks/{task_id}/annotations")
1191
+ response.raise_for_status()
1192
+ return response.json()
1193
+
1145
1194
  def create_annotation(self, task_id: int, **kwargs) -> Dict:
1146
1195
  """Add annotations to a task like an annotator does.
1147
1196
 
@@ -1160,11 +1209,28 @@ class Project(Client):
1160
1209
 
1161
1210
  """
1162
1211
  response = self.make_request(
1163
- 'POST', f'/api/tasks/{task_id}/annotations/', json=kwargs
1212
+ "POST", f"/api/tasks/{task_id}/annotations/", json=kwargs
1164
1213
  )
1165
1214
  response.raise_for_status()
1166
1215
  return response.json()
1167
1216
 
1217
+ def get_annotation(self, annotation_id: int) -> dict:
1218
+ """Retrieve a specific annotation for a task using the annotation ID.
1219
+
1220
+ Parameters
1221
+ ----------
1222
+ annotation_id: int
1223
+ A unique integer value identifying this annotation.
1224
+
1225
+ Returns
1226
+ ----------
1227
+ dict
1228
+ Retreived annotation object
1229
+ """
1230
+ response = self.make_request("GET", f"/api/annotations/{annotation_id}")
1231
+ response.raise_for_status()
1232
+ return response.json()
1233
+
1168
1234
  def update_annotation(self, annotation_id, **kwargs):
1169
1235
  """Update specific annotation with new annotation parameters, e.g.
1170
1236
  ```
@@ -1185,11 +1251,29 @@ class Project(Client):
1185
1251
 
1186
1252
  """
1187
1253
  response = self.make_request(
1188
- 'PATCH', f'/api/annotations/{annotation_id}', json=kwargs
1254
+ "PATCH", f"/api/annotations/{annotation_id}", json=kwargs
1189
1255
  )
1190
1256
  response.raise_for_status()
1191
1257
  return response.json()
1192
1258
 
1259
+ def delete_annotation(self, annotation_id: int) -> int:
1260
+ """Delete an annotation using the annotation ID. This action can't be undone!
1261
+
1262
+ Parameters
1263
+ ----------
1264
+ annotation_id: int
1265
+ A unique integer value identifying this annotation.
1266
+
1267
+ Returns
1268
+ ----------
1269
+ int
1270
+ Status code for operation
1271
+
1272
+ """
1273
+ response = self.make_request("DELETE", f"/api/annotations/{annotation_id}")
1274
+ response.raise_for_status()
1275
+ return response.status_code
1276
+
1193
1277
  def get_predictions_coverage(self):
1194
1278
  """Prediction coverage stats for all model versions for the project.
1195
1279
 
@@ -1208,7 +1292,7 @@ class Project(Client):
1208
1292
  """
1209
1293
  model_versions = self.get_model_versions()
1210
1294
  params = self.get_params()
1211
- tasks_number = params['task_number']
1295
+ tasks_number = params["task_number"]
1212
1296
  coverage = {
1213
1297
  model_version: count / tasks_number
1214
1298
  for model_version, count in model_versions.items()
@@ -1230,8 +1314,8 @@ class Project(Client):
1230
1314
  google_application_credentials: Optional[str] = None,
1231
1315
  presign: Optional[bool] = True,
1232
1316
  presign_ttl: Optional[int] = 1,
1233
- title: Optional[str] = '',
1234
- description: Optional[str] = '',
1317
+ title: Optional[str] = "",
1318
+ description: Optional[str] = "",
1235
1319
  ):
1236
1320
  """Connect a Google Cloud Storage (GCS) bucket to Label Studio to use as source storage and import tasks.
1237
1321
 
@@ -1273,23 +1357,25 @@ class Project(Client):
1273
1357
  Number of tasks synced in the last sync
1274
1358
 
1275
1359
  """
1276
- if os.path.isfile(google_application_credentials):
1360
+ if google_application_credentials and os.path.isfile(
1361
+ google_application_credentials
1362
+ ):
1277
1363
  with open(google_application_credentials) as f:
1278
1364
  google_application_credentials = f.read()
1279
1365
 
1280
1366
  payload = {
1281
- 'bucket': bucket,
1282
- 'project': self.id,
1283
- 'prefix': prefix,
1284
- 'regex_filter': regex_filter,
1285
- 'use_blob_urls': use_blob_urls,
1286
- 'google_application_credentials': google_application_credentials,
1287
- 'presign': presign,
1288
- 'presign_ttl': presign_ttl,
1289
- 'title': title,
1290
- 'description': description,
1367
+ "bucket": bucket,
1368
+ "project": self.id,
1369
+ "prefix": prefix,
1370
+ "regex_filter": regex_filter,
1371
+ "use_blob_urls": use_blob_urls,
1372
+ "google_application_credentials": google_application_credentials,
1373
+ "presign": presign,
1374
+ "presign_ttl": presign_ttl,
1375
+ "title": title,
1376
+ "description": description,
1291
1377
  }
1292
- response = self.make_request('POST', '/api/storages/gcs', json=payload)
1378
+ response = self.make_request("POST", "/api/storages/gcs", json=payload)
1293
1379
  return response.json()
1294
1380
 
1295
1381
  def connect_google_export_storage(
@@ -1297,8 +1383,8 @@ class Project(Client):
1297
1383
  bucket: str,
1298
1384
  prefix: Optional[str] = None,
1299
1385
  google_application_credentials: Optional[str] = None,
1300
- title: Optional[str] = '',
1301
- description: Optional[str] = '',
1386
+ title: Optional[str] = "",
1387
+ description: Optional[str] = "",
1302
1388
  can_delete_objects: bool = False,
1303
1389
  ):
1304
1390
  """Connect a Google Cloud Storage (GCS) bucket to Label Studio to use as target storage and export tasks.
@@ -1340,15 +1426,15 @@ class Project(Client):
1340
1426
  google_application_credentials = f.read()
1341
1427
 
1342
1428
  payload = {
1343
- 'bucket': bucket,
1344
- 'prefix': prefix,
1345
- 'google_application_credentials': google_application_credentials,
1346
- 'title': title,
1347
- 'description': description,
1348
- 'can_delete_objects': can_delete_objects,
1349
- 'project': self.id,
1429
+ "bucket": bucket,
1430
+ "prefix": prefix,
1431
+ "google_application_credentials": google_application_credentials,
1432
+ "title": title,
1433
+ "description": description,
1434
+ "can_delete_objects": can_delete_objects,
1435
+ "project": self.id,
1350
1436
  }
1351
- response = self.make_request('POST', '/api/storages/export/gcs', json=payload)
1437
+ response = self.make_request("POST", "/api/storages/export/gcs", json=payload)
1352
1438
  return response.json()
1353
1439
 
1354
1440
  def connect_s3_import_storage(
@@ -1359,13 +1445,14 @@ class Project(Client):
1359
1445
  use_blob_urls: Optional[bool] = True,
1360
1446
  presign: Optional[bool] = True,
1361
1447
  presign_ttl: Optional[int] = 1,
1362
- title: Optional[str] = '',
1363
- description: Optional[str] = '',
1448
+ title: Optional[str] = "",
1449
+ description: Optional[str] = "",
1364
1450
  aws_access_key_id: Optional[str] = None,
1365
1451
  aws_secret_access_key: Optional[str] = None,
1366
1452
  aws_session_token: Optional[str] = None,
1367
1453
  region_name: Optional[str] = None,
1368
1454
  s3_endpoint: Optional[str] = None,
1455
+ recursive_scan: Optional[bool] = False,
1369
1456
  ):
1370
1457
  """Connect an Amazon S3 bucket to Label Studio to use as source storage and import tasks.
1371
1458
 
@@ -1397,6 +1484,8 @@ class Project(Client):
1397
1484
  Optional, specify the AWS region of your S3 bucket.
1398
1485
  s3_endpoint: string
1399
1486
  Optional, specify an S3 endpoint URL to use to access your bucket instead of the standard access method.
1487
+ recursive_scan: bool
1488
+ Optional, specify whether to perform recursive scan over the bucket content.
1400
1489
 
1401
1490
  Returns
1402
1491
  -------
@@ -1415,30 +1504,113 @@ class Project(Client):
1415
1504
  Number of tasks synced in the last sync
1416
1505
  """
1417
1506
  payload = {
1418
- 'bucket': bucket,
1419
- 'prefix': prefix,
1420
- 'regex_filter': regex_filter,
1421
- 'use_blob_urls': use_blob_urls,
1422
- 'aws_access_key_id': aws_access_key_id,
1423
- 'aws_secret_access_key': aws_secret_access_key,
1424
- 'aws_session_token': aws_session_token,
1425
- 'region_name': region_name,
1426
- 's3_endpoint': s3_endpoint,
1427
- 'presign': presign,
1428
- 'presign_ttl': presign_ttl,
1429
- 'title': title,
1430
- 'description': description,
1431
- 'project': self.id,
1507
+ "bucket": bucket,
1508
+ "prefix": prefix,
1509
+ "regex_filter": regex_filter,
1510
+ "use_blob_urls": use_blob_urls,
1511
+ "aws_access_key_id": aws_access_key_id,
1512
+ "aws_secret_access_key": aws_secret_access_key,
1513
+ "aws_session_token": aws_session_token,
1514
+ "region_name": region_name,
1515
+ "s3_endpoint": s3_endpoint,
1516
+ "presign": presign,
1517
+ "presign_ttl": presign_ttl,
1518
+ "title": title,
1519
+ "description": description,
1520
+ "project": self.id,
1521
+ "recursive_scan": recursive_scan,
1432
1522
  }
1433
- response = self.make_request('POST', '/api/storages/s3', json=payload)
1523
+ response = self.make_request("POST", "/api/storages/s3", json=payload)
1524
+ return response.json()
1525
+
1526
+ def connect_s3s_iam_import_storage(
1527
+ self,
1528
+ role_arn: str,
1529
+ external_id: Optional[str] = None,
1530
+ bucket: Optional[str] = None,
1531
+ prefix: Optional[str] = None,
1532
+ regex_filter: Optional[str] = None,
1533
+ use_blob_urls: Optional[bool] = True,
1534
+ presign: Optional[bool] = True,
1535
+ presign_ttl: Optional[int] = 1,
1536
+ title: Optional[str] = "",
1537
+ description: Optional[str] = "",
1538
+ region_name: Optional[str] = None,
1539
+ s3_endpoint: Optional[str] = None,
1540
+ recursive_scan: Optional[bool] = False,
1541
+ aws_sse_kms_key_id: Optional[str] = None,
1542
+ ):
1543
+ """Create S3 secured import storage with IAM role access. Enterprise only.
1544
+
1545
+ Parameters
1546
+ ----------
1547
+ role_arn: string
1548
+ Required, specify the AWS Role ARN to assume.
1549
+ external_id: string or None
1550
+ Optional, specify the external ID to use to assume the role. If None, SDK will call api/organizations/<id>
1551
+ and use external_id from the response. You can find this ID on the organization page in the Label Studio UI.
1552
+ bucket: string
1553
+ Specify the name of the S3 bucket.
1554
+ prefix: string
1555
+ Optional, specify the prefix within the S3 bucket to import your data from.
1556
+ regex_filter: string
1557
+ Optional, specify a regex filter to use to match the file types of your data.
1558
+ use_blob_urls: bool
1559
+ Optional, true by default. Specify whether your data is raw image or video data, or JSON tasks.
1560
+ presign: bool
1561
+ Optional, true by default. Specify whether or not to create presigned URLs.
1562
+ presign_ttl: int
1563
+ Optional, 1 by default. Specify how long to keep presigned URLs active.
1564
+ title: string
1565
+ Optional, specify a title for your S3 import storage that appears in Label Studio.
1566
+ description: string
1567
+ Optional, specify a description for your S3 import storage.
1568
+ region_name: string
1569
+ Optional, specify the AWS region of your S3 bucket.
1570
+ s3_endpoint: string
1571
+ Optional, specify an S3 endpoint URL to use to access your bucket instead of the standard access method.
1572
+ recursive_scan: bool
1573
+ Optional, specify whether to perform recursive scan over the bucket content.
1574
+ aws_sse_kms_key_id: string
1575
+ Optional, specify an AWS SSE KMS Key ID for server-side encryption.
1576
+ synchronizable, last_sync, last_sync_count, last_sync_job, status, traceback, meta:
1577
+ Parameters for synchronization details and storage status.
1578
+
1579
+ Returns
1580
+ -------
1581
+ dict:
1582
+ containing the response from the API including storage ID and type, among other details.
1583
+ """
1584
+ if external_id is None:
1585
+ organization = self.get_organization()
1586
+ external_id = organization["external_id"]
1587
+
1588
+ payload = {
1589
+ "bucket": bucket,
1590
+ "prefix": prefix,
1591
+ "regex_filter": regex_filter,
1592
+ "use_blob_urls": use_blob_urls,
1593
+ "presign": presign,
1594
+ "presign_ttl": presign_ttl,
1595
+ "title": title,
1596
+ "description": description,
1597
+ "recursive_scan": recursive_scan,
1598
+ "role_arn": role_arn,
1599
+ "region_name": region_name,
1600
+ "s3_endpoint": s3_endpoint,
1601
+ "aws_sse_kms_key_id": aws_sse_kms_key_id,
1602
+ "project": self.id,
1603
+ "external_id": external_id,
1604
+ }
1605
+ response = self.make_request("POST", "/api/storages/s3s/", json=payload)
1434
1606
  return response.json()
1435
1607
 
1436
1608
  def connect_s3_export_storage(
1437
1609
  self,
1438
1610
  bucket: str,
1439
1611
  prefix: Optional[str] = None,
1440
- title: Optional[str] = '',
1441
- description: Optional[str] = '',
1612
+ title: Optional[str] = "",
1613
+ description: Optional[str] = "",
1442
1614
  aws_access_key_id: Optional[str] = None,
1443
1615
  aws_secret_access_key: Optional[str] = None,
1444
1616
  aws_session_token: Optional[str] = None,
@@ -1489,19 +1661,19 @@ class Project(Client):
1489
1661
  """
1490
1662
 
1491
1663
  payload = {
1492
- 'bucket': bucket,
1493
- 'prefix': prefix,
1494
- 'aws_access_key_id': aws_access_key_id,
1495
- 'aws_secret_access_key': aws_secret_access_key,
1496
- 'aws_session_token': aws_session_token,
1497
- 'region_name': region_name,
1498
- 's3_endpoint': s3_endpoint,
1499
- 'title': title,
1500
- 'description': description,
1501
- 'can_delete_objects': can_delete_objects,
1502
- 'project': self.id,
1664
+ "bucket": bucket,
1665
+ "prefix": prefix,
1666
+ "aws_access_key_id": aws_access_key_id,
1667
+ "aws_secret_access_key": aws_secret_access_key,
1668
+ "aws_session_token": aws_session_token,
1669
+ "region_name": region_name,
1670
+ "s3_endpoint": s3_endpoint,
1671
+ "title": title,
1672
+ "description": description,
1673
+ "can_delete_objects": can_delete_objects,
1674
+ "project": self.id,
1503
1675
  }
1504
- response = self.make_request('POST', '/api/storages/export/s3', json=payload)
1676
+ response = self.make_request("POST", "/api/storages/export/s3", json=payload)
1505
1677
  return response.json()
1506
1678
 
1507
1679
  def connect_azure_import_storage(
@@ -1512,8 +1684,8 @@ class Project(Client):
1512
1684
  use_blob_urls: Optional[bool] = True,
1513
1685
  presign: Optional[bool] = True,
1514
1686
  presign_ttl: Optional[int] = 1,
1515
- title: Optional[str] = '',
1516
- description: Optional[str] = '',
1687
+ title: Optional[str] = "",
1688
+ description: Optional[str] = "",
1517
1689
  account_name: Optional[str] = None,
1518
1690
  account_key: Optional[str] = None,
1519
1691
  ):
@@ -1559,27 +1731,27 @@ class Project(Client):
1559
1731
  Number of tasks synced in the last sync
1560
1732
  """
1561
1733
  payload = {
1562
- 'container': container,
1563
- 'prefix': prefix,
1564
- 'regex_filter': regex_filter,
1565
- 'use_blob_urls': use_blob_urls,
1566
- 'account_name': account_name,
1567
- 'account_key': account_key,
1568
- 'presign': presign,
1569
- 'presign_ttl': presign_ttl,
1570
- 'title': title,
1571
- 'description': description,
1572
- 'project': self.id,
1734
+ "container": container,
1735
+ "prefix": prefix,
1736
+ "regex_filter": regex_filter,
1737
+ "use_blob_urls": use_blob_urls,
1738
+ "account_name": account_name,
1739
+ "account_key": account_key,
1740
+ "presign": presign,
1741
+ "presign_ttl": presign_ttl,
1742
+ "title": title,
1743
+ "description": description,
1744
+ "project": self.id,
1573
1745
  }
1574
- response = self.make_request('POST', '/api/storages/azure', json=payload)
1746
+ response = self.make_request("POST", "/api/storages/azure", json=payload)
1575
1747
  return response.json()
1576
1748
 
1577
1749
  def connect_azure_export_storage(
1578
1750
  self,
1579
1751
  container: str,
1580
1752
  prefix: Optional[str] = None,
1581
- title: Optional[str] = '',
1582
- description: Optional[str] = '',
1753
+ title: Optional[str] = "",
1754
+ description: Optional[str] = "",
1583
1755
  account_name: Optional[str] = None,
1584
1756
  account_key: Optional[str] = None,
1585
1757
  can_delete_objects: bool = False,
@@ -1620,16 +1792,16 @@ class Project(Client):
1620
1792
  Number of tasks synced in the last sync
1621
1793
  """
1622
1794
  payload = {
1623
- 'container': container,
1624
- 'prefix': prefix,
1625
- 'account_name': account_name,
1626
- 'account_key': account_key,
1627
- 'title': title,
1628
- 'description': description,
1629
- 'can_delete_objects': can_delete_objects,
1630
- 'project': self.id,
1795
+ "container": container,
1796
+ "prefix": prefix,
1797
+ "account_name": account_name,
1798
+ "account_key": account_key,
1799
+ "title": title,
1800
+ "description": description,
1801
+ "can_delete_objects": can_delete_objects,
1802
+ "project": self.id,
1631
1803
  }
1632
- response = self.make_request('POST', '/api/storages/export/azure', json=payload)
1804
+ response = self.make_request("POST", "/api/storages/export/azure", json=payload)
1633
1805
  return response.json()
1634
1806
 
1635
1807
  def connect_local_import_storage(
@@ -1637,8 +1809,8 @@ class Project(Client):
1637
1809
  local_store_path: [str],
1638
1810
  regex_filter: Optional[str] = None,
1639
1811
  use_blob_urls: Optional[bool] = True,
1640
- title: Optional[str] = '',
1641
- description: Optional[str] = '',
1812
+ title: Optional[str] = "",
1813
+ description: Optional[str] = "",
1642
1814
  ):
1643
1815
  """Connect a Local storage to Label Studio to use as source storage and import tasks.
1644
1816
  Parameters
@@ -1668,37 +1840,233 @@ class Project(Client):
1668
1840
  last_sync_count: int
1669
1841
  Number of tasks synced in the last sync
1670
1842
  """
1671
- if 'LABEL_STUDIO_LOCAL_FILES_DOCUMENT_ROOT' not in os.environ:
1843
+ if "LABEL_STUDIO_LOCAL_FILES_DOCUMENT_ROOT" not in os.environ:
1672
1844
  raise ValueError(
1673
- 'To use connect_local_import_storage() you should set '
1674
- 'LABEL_STUDIO_LOCAL_FILES_DOCUMENT_ROOT environment variable, '
1675
- 'read more: https://labelstud.io/guide/storage.html#Prerequisites-2'
1845
+ "To use connect_local_import_storage() you should set "
1846
+ "LABEL_STUDIO_LOCAL_FILES_DOCUMENT_ROOT environment variable, "
1847
+ "read more: https://labelstud.io/guide/storage.html#Prerequisites-2"
1676
1848
  )
1677
- root = os.environ['LABEL_STUDIO_LOCAL_FILES_DOCUMENT_ROOT']
1849
+ root = os.environ["LABEL_STUDIO_LOCAL_FILES_DOCUMENT_ROOT"]
1678
1850
 
1679
1851
  if not os.path.isdir(local_store_path):
1680
- raise ValueError(f'{local_store_path} is not a directory')
1852
+ raise ValueError(f"{local_store_path} is not a directory")
1681
1853
  if (Path(root) in Path(local_store_path).parents) is False:
1682
1854
  raise ValueError(
1683
- f'{str(Path(root))} is not presented in local_store_path parents: '
1684
- f'{str(Path(local_store_path).parents)}'
1855
+ f"{str(Path(root))} is not presented in local_store_path parents: "
1856
+ f"{str(Path(local_store_path).parents)}"
1685
1857
  )
1686
1858
 
1687
1859
  payload = {
1688
- 'regex_filter': regex_filter,
1689
- 'use_blob_urls': use_blob_urls,
1690
- 'path': local_store_path,
1691
- 'presign': False,
1692
- 'presign_ttl': 1,
1693
- 'title': title,
1694
- 'description': description,
1695
- 'project': self.id,
1860
+ "regex_filter": regex_filter,
1861
+ "use_blob_urls": use_blob_urls,
1862
+ "path": local_store_path,
1863
+ "presign": False,
1864
+ "presign_ttl": 1,
1865
+ "title": title,
1866
+ "description": description,
1867
+ "project": self.id,
1696
1868
  }
1697
1869
  response = self.make_request(
1698
- 'POST', f'/api/storages/localfiles?project={self.id}', json=payload
1870
+ "POST", f"/api/storages/localfiles?project={self.id}", json=payload
1871
+ )
1872
+ return response.json()
1873
+
1874
+ def sync_import_storage(self, storage_type, storage_id):
1875
+ """Synchronize Import (Source) Cloud Storage.
1876
+
1877
+ Parameters
1878
+ ----------
1879
+ storage_type: string
1880
+ Specify the type of the storage container. See ProjectStorage for available types.
1881
+ storage_id: int
1882
+ Specify the storage ID of the storage container. See get_import_storages() to get ids.
1883
+
1884
+ Returns
1885
+ -------
1886
+ dict:
1887
+ containing the same fields as in the original storage request and:
1888
+
1889
+ id: int
1890
+ Storage ID
1891
+ type: str
1892
+ Type of storage
1893
+ created_at: str
1894
+ Creation time
1895
+ last_sync: str
1896
+ Time last sync finished, can be empty.
1897
+ last_sync_count: int
1898
+ Number of tasks synced in the last sync
1899
+ """
1900
+ # originally syn was implemented in Client class, keep it for compatibility
1901
+ response = self.make_request(
1902
+ "POST", f"/api/storages/{storage_type}/{str(storage_id)}/sync"
1903
+ )
1904
+ return response.json()
1905
+
1906
+ # write func for syn export storage
1907
+ def sync_export_storage(self, storage_type, storage_id):
1908
+ """Synchronize Export (Target) Cloud Storage.
1909
+
1910
+ Parameters
1911
+ ----------
1912
+ storage_type: string
1913
+ Specify the type of the storage container. See ProjectStorage for available types.
1914
+ storage_id: int
1915
+ Specify the storage ID of the storage container. See get_export_storages() to get ids.
1916
+
1917
+ Returns
1918
+ -------
1919
+ dict:
1920
+ containing the same fields as in the original storage request and:
1921
+
1922
+ id: int
1923
+ Storage ID
1924
+ type: str
1925
+ Type of storage
1926
+ created_at: str
1927
+ Creation time
1928
+ other fields:
1929
+ See more https://api.labelstud.io/#tag/Storage:S3/operation/api_storages_export_s3_sync_create
1930
+ """
1931
+ response = self.make_request(
1932
+ "POST", f"/api/storages/export/{storage_type}/{str(storage_id)}/sync"
1699
1933
  )
1700
1934
  return response.json()
1701
1935
 
1936
+ # write code for get_import_storages()
1937
+ def get_import_storages(self):
1938
+ """Get Import (Source) Cloud Storage.
1939
+
1940
+ Returns
1941
+ -------
1942
+ list of dicts:
1943
+ List of dicts with source storages, each dict consists of these fields:
1944
+
1945
+ -------
1946
+ Each dict consists of these fields:
1947
+
1948
+ id : int
1949
+ A unique integer value identifying this storage.
1950
+ type : str
1951
+ The type of the storage. Default is "s3".
1952
+ synchronizable : bool
1953
+ Indicates if the storage is synchronizable. Default is True.
1954
+ presign : bool
1955
+ Indicates if the storage is presign. Default is True.
1956
+ last_sync : str or None
1957
+ The last sync finished time. Can be None.
1958
+ last_sync_count : int or None
1959
+ The count of tasks synced last time. Can be None.
1960
+ last_sync_job : str or None
1961
+ The last sync job ID. Can be None.
1962
+ status : str
1963
+ The status of the storage. Can be one of "initialized", "queued", "in_progress", "failed", "completed".
1964
+ traceback : str or None
1965
+ The traceback report for the last failed sync. Can be None.
1966
+ meta : dict or None
1967
+ Meta and debug information about storage processes. Can be None.
1968
+ title : str or None
1969
+ The title of the cloud storage. Can be None.
1970
+ description : str or None
1971
+ The description of the cloud storage. Can be None.
1972
+ created_at : str
1973
+ The creation time of the storage.
1974
+ bucket : str or None
1975
+ The S3 bucket name. Can be None.
1976
+ prefix : str or None
1977
+ The S3 bucket prefix. Can be None.
1978
+ regex_filter : str or None
1979
+ The cloud storage regex for filtering objects. Can be None.
1980
+ use_blob_urls : bool
1981
+ Indicates if objects are interpreted as BLOBs and generate URLs.
1982
+ aws_access_key_id : str or None
1983
+ The AWS_ACCESS_KEY_ID. Can be None.
1984
+ aws_secret_access_key : str or None
1985
+ The AWS_SECRET_ACCESS_KEY. Can be None.
1986
+ aws_session_token : str or None
1987
+ The AWS_SESSION_TOKEN. Can be None.
1988
+ aws_sse_kms_key_id : str or None
1989
+ The AWS SSE KMS Key ID. Can be None.
1990
+ region_name : str or None
1991
+ The AWS Region. Can be None.
1992
+ s3_endpoint : str or None
1993
+ The S3 Endpoint. Can be None.
1994
+ presign_ttl : int
1995
+ The presigned URLs TTL (in minutes).
1996
+ recursive_scan : bool
1997
+ Indicates if a recursive scan over the bucket content is performed.
1998
+ glob_pattern : str or None
1999
+ The glob pattern for syncing from bucket. Can be None.
2000
+ synced : bool
2001
+ Flag indicating if the dataset has been previously synced or not.
2002
+
2003
+ """
2004
+ response = self.make_request("GET", f"/api/storages/?project={self.id}")
2005
+ return response.json()
2006
+
2007
+ def get_export_storages(self):
2008
+ """Get Export (Target) Cloud Storage.
2009
+
2010
+ Returns
2011
+ -------
2012
+ list of dicts:
2013
+ List of dicts with target storages
2014
+
2015
+ -------
2016
+ Each dict consists of these fields:
2017
+
2018
+ id : int
2019
+ A unique integer value identifying this storage.
2020
+ type : str
2021
+ The type of the storage. Default is "s3".
2022
+ synchronizable : bool
2023
+ Indicates if the storage is synchronizable. Default is True.
2024
+ last_sync : str or None
2025
+ The last sync finished time. Can be None.
2026
+ last_sync_count : int or None
2027
+ The count of tasks synced last time. Can be None.
2028
+ last_sync_job : str or None
2029
+ The last sync job ID. Can be None.
2030
+ status : str
2031
+ The status of the storage. Can be one of "initialized", "queued", "in_progress", "failed", "completed".
2032
+ traceback : str or None
2033
+ The traceback report for the last failed sync. Can be None.
2034
+ meta : dict or None
2035
+ Meta and debug information about storage processes. Can be None.
2036
+ title : str or None
2037
+ The title of the cloud storage. Can be None.
2038
+ description : str or None
2039
+ The description of the cloud storage. Can be None.
2040
+ created_at : str
2041
+ The creation time of the storage.
2042
+ can_delete_objects : bool or None
2043
+ Deletion from storage enabled. Can be None.
2044
+ bucket : str or None
2045
+ The S3 bucket name. Can be None.
2046
+ prefix : str or None
2047
+ The S3 bucket prefix. Can be None.
2048
+ regex_filter : str or None
2049
+ The cloud storage regex for filtering objects. Can be None.
2050
+ use_blob_urls : bool
2051
+ Indicates if objects are interpreted as BLOBs and generate URLs.
2052
+ aws_access_key_id : str or None
2053
+ The AWS_ACCESS_KEY_ID. Can be None.
2054
+ aws_secret_access_key : str or None
2055
+ The AWS_SECRET_ACCESS_KEY. Can be None.
2056
+ aws_session_token : str or None
2057
+ The AWS_SESSION_TOKEN. Can be None.
2058
+ aws_sse_kms_key_id : str or None
2059
+ The AWS SSE KMS Key ID. Can be None.
2060
+ region_name : str or None
2061
+ The AWS Region. Can be None.
2062
+ s3_endpoint : str or None
2063
+ The S3 Endpoint. Can be None.
2064
+ project : int
2065
+ A unique integer value identifying this project.
2066
+ """
2067
+ response = self.make_request("GET", f"/api/storages/export?project={self.id}")
2068
+ return response.json()
2069
+
1702
2070
  def _assign_by_sampling(
1703
2071
  self,
1704
2072
  users: List[int],
@@ -1729,8 +2097,8 @@ class Project(Client):
1729
2097
  list[dict]
1730
2098
  List of dicts with counter of created assignments
1731
2099
  """
1732
- assert len(users) > 0, 'Users list is empty.'
1733
- assert len(users) >= overlap, 'Overlap is more than number of users.'
2100
+ assert len(users) > 0, "Users list is empty."
2101
+ assert len(users) >= overlap, "Overlap is more than number of users."
1734
2102
  # check if users are int and not User objects
1735
2103
  if isinstance(users[0], int):
1736
2104
  # get users from project
@@ -1740,7 +2108,7 @@ class Project(Client):
1740
2108
  final_results = []
1741
2109
  # Get tasks to assign
1742
2110
  tasks = self.get_tasks(view_id=view_id, only_ids=True)
1743
- assert len(tasks) > 0, 'Tasks list is empty.'
2111
+ assert len(tasks) > 0, "Tasks list is empty."
1744
2112
  # Choice fraction of tasks
1745
2113
  if fraction != 1.0:
1746
2114
  k = int(len(tasks) * fraction)
@@ -1829,7 +2197,7 @@ class Project(Client):
1829
2197
  overlap: int = 1,
1830
2198
  ):
1831
2199
  """
1832
- Behaves similarly like `assign_annotators()` but instead of specify tasks_ids explicitely,
2200
+ Behaves similarly like `assign_annotators()` but instead of specify tasks_ids explicitly,
1833
2201
  it gets users' IDs list and optional view ID and splits all tasks across annotators.
1834
2202
  Fraction expresses the size of dataset to be assigned.
1835
2203
  Parameters
@@ -1878,7 +2246,7 @@ class Project(Client):
1878
2246
  finished_at: str
1879
2247
  Finished time
1880
2248
  """
1881
- response = self.make_request('GET', f'/api/projects/{self.id}/exports')
2249
+ response = self.make_request("GET", f"/api/projects/{self.id}/exports")
1882
2250
  return response.json()
1883
2251
 
1884
2252
  def export_snapshot_create(
@@ -1904,9 +2272,9 @@ class Project(Client):
1904
2272
  Task filter options, use {"view": tab_id} to apply filter from this tab,
1905
2273
  <a href="https://api.labelstud.io/#operation/api_projects_exports_create">check the API parameters for more details</a>
1906
2274
  serialization_options_drafts: bool
1907
- Expand drafts or include only ID
2275
+ Expand drafts (False) or include only ID (True)
1908
2276
  serialization_options_predictions: bool
1909
- Expand predictions or include only ID
2277
+ Expand predictions (False) or include only ID (True)
1910
2278
  serialization_options_annotations__completed_by: bool
1911
2279
  Expand user that completed_by (False) or include only ID (True)
1912
2280
  annotation_filter_options_usual: bool
@@ -1955,12 +2323,107 @@ class Project(Client):
1955
2323
  },
1956
2324
  }
1957
2325
  response = self.make_request(
1958
- 'POST',
1959
- f'/api/projects/{self.id}/exports?interpolate_key_frames={interpolate_key_frames}',
2326
+ "POST",
2327
+ f"/api/projects/{self.id}/exports?interpolate_key_frames={interpolate_key_frames}",
1960
2328
  json=payload,
1961
2329
  )
1962
2330
  return response.json()
1963
2331
 
2332
+ def export(
2333
+ self,
2334
+ filters=None,
2335
+ title="SDK Export",
2336
+ export_type="JSON",
2337
+ output_dir=".",
2338
+ **kwargs,
2339
+ ):
2340
+ """
2341
+ Export tasks from the project with optional filters,
2342
+ and save the exported data to a specified directory.
2343
+
2344
+ This method:
2345
+ (1) creates a temporary view with the specified filters if they are not None,
2346
+ (2) creates a new export snapshot using the view ID,
2347
+ (3) checks the status of the snapshot creation while it's in progress,
2348
+ (4) and downloads the snapshot file in the specified export format.
2349
+ (5) After the export, it cleans up and remove the temporary view.
2350
+
2351
+ Parameters
2352
+ ----------
2353
+ filters : data_manager.Filters, dict, optional
2354
+ Filters to apply when exporting tasks.
2355
+ If provided, a temporary view is created with these filters.
2356
+ The format of the filters should match the Label Studio filter options.
2357
+ Default is None, which means all tasks are exported.
2358
+ Use label_studio_sdk.data_manager.Filters.create() to create filters,
2359
+ Example of the filters JSON format:
2360
+ ```json
2361
+ {
2362
+ "conjunction": "and",
2363
+ "items": [
2364
+ {
2365
+ "filter": "filter:tasks:id",
2366
+ "operator": "equal",
2367
+ "type": "Number",
2368
+ "value": 1
2369
+ }
2370
+ ]
2371
+ }
2372
+ ```
2373
+ titile : str, optional
2374
+ The title of the export snapshot. Default is 'SDK Export'.
2375
+ export_type : str, optional
2376
+ The format of the exported data. It should be one of the formats supported by Label Studio ('JSON', 'CSV', etc.). Default is 'JSON'.
2377
+ output_dir : str, optional
2378
+ The directory where the exported file will be saved. Default is the current directory.
2379
+ kwargs : kwargs, optional
2380
+ The same parameters as in the export_snapshot_create method.
2381
+
2382
+ Returns
2383
+ -------
2384
+ dict
2385
+ containing the status of the export, the filename of the exported file, and the export ID.
2386
+
2387
+ filename : str
2388
+ Path to the downloaded export file
2389
+ status : int
2390
+ 200 is ok
2391
+ export_id : int
2392
+ Export ID, you can retrieve more details about this export using this ID
2393
+ """
2394
+
2395
+ # Create a temporary view with the specified filters
2396
+ if filters:
2397
+ view = self.create_view(title="Temp SDK export", filters=filters)
2398
+ task_filter_options = {"view": view["id"]}
2399
+ else:
2400
+ task_filter_options = None
2401
+ view = None
2402
+
2403
+ # Create a new export snapshot using the view ID
2404
+ export_result = self.export_snapshot_create(
2405
+ title=title,
2406
+ task_filter_options=task_filter_options,
2407
+ **kwargs,
2408
+ )
2409
+
2410
+ # Check the status of the snapshot creation
2411
+ export_id = export_result["id"]
2412
+ while self.export_snapshot_status(export_id).is_in_progress():
2413
+ time.sleep(1.0) # Wait until the snapshot is ready
2414
+
2415
+ os.makedirs(output_dir, exist_ok=True)
2416
+
2417
+ # Download the snapshot file once it's ready
2418
+ status, filename = self.export_snapshot_download(
2419
+ export_id, export_type=export_type, path=output_dir
2420
+ )
2421
+
2422
+ # Clean up the view
2423
+ if view:
2424
+ self.delete_view(view["id"])
2425
+ return {"status": status, "filename": filename, "export_id": export_id}
2426
+
1964
2427
  def export_snapshot_status(self, export_id: int) -> ExportSnapshotStatus:
1965
2428
  """
1966
2429
  Get export snapshot status by Export ID
@@ -1987,12 +2450,12 @@ class Project(Client):
1987
2450
  Finished time
1988
2451
  """
1989
2452
  response = self.make_request(
1990
- 'GET', f'/api/projects/{self.id}/exports/{export_id}'
2453
+ "GET", f"/api/projects/{self.id}/exports/{export_id}"
1991
2454
  )
1992
2455
  return ExportSnapshotStatus(response.json())
1993
2456
 
1994
2457
  def export_snapshot_download(
1995
- self, export_id: int, export_type: str = 'JSON', path: str = "."
2458
+ self, export_id: int, export_type: str = "JSON", path: str = "."
1996
2459
  ) -> (int, str):
1997
2460
  """
1998
2461
  Download file with export snapshot in provided format
@@ -2012,8 +2475,8 @@ class Project(Client):
2012
2475
  Status code for operation and downloaded filename
2013
2476
  """
2014
2477
  response = self.make_request(
2015
- 'GET',
2016
- f'/api/projects/{self.id}/exports/{export_id}/download?exportType={export_type}',
2478
+ "GET",
2479
+ f"/api/projects/{self.id}/exports/{export_id}/download?exportType={export_type}",
2017
2480
  )
2018
2481
  filename = None
2019
2482
  if response.status_code == 200:
@@ -2022,8 +2485,8 @@ class Project(Client):
2022
2485
  filename = content_disposition.split("filename=")[-1].strip("\"'")
2023
2486
  filename = os.path.basename(filename)
2024
2487
  else:
2025
- raise LabelStudioException('No filename in response')
2026
- with open(os.path.join(path, filename), 'wb') as f:
2488
+ raise LabelStudioException("No filename in response")
2489
+ with open(os.path.join(path, filename), "wb") as f:
2027
2490
  for chk in response:
2028
2491
  f.write(chk)
2029
2492
  return response.status_code, filename
@@ -2041,7 +2504,7 @@ class Project(Client):
2041
2504
  Status code for operation
2042
2505
  """
2043
2506
  response = self.make_request(
2044
- 'DELETE', f'/api/projects/{self.id}/exports/{export_id}'
2507
+ "DELETE", f"/api/projects/{self.id}/exports/{export_id}"
2045
2508
  )
2046
2509
  return response.status_code
2047
2510
 
@@ -2065,10 +2528,10 @@ class Project(Client):
2065
2528
  filenames = []
2066
2529
  if tasks:
2067
2530
  for task in tasks:
2068
- for key in task['data']:
2531
+ for key in task["data"]:
2069
2532
  try:
2070
2533
  filename = get_local_path(
2071
- task['data'][key],
2534
+ task["data"][key],
2072
2535
  access_token=self.api_key,
2073
2536
  hostname=self.url,
2074
2537
  )
@@ -2085,7 +2548,7 @@ class Project(Client):
2085
2548
  task_id: int
2086
2549
  Task id.
2087
2550
  """
2088
- assert isinstance(task_id, int), 'task_id should be int'
2551
+ assert isinstance(task_id, int), "task_id should be int"
2089
2552
  return self.make_request("DELETE", f"/api/tasks/{task_id}")
2090
2553
 
2091
2554
  def delete_tasks(self, task_ids: list) -> Response:
@@ -2096,7 +2559,7 @@ class Project(Client):
2096
2559
  task_ids: list of int
2097
2560
  Task ids.
2098
2561
  """
2099
- assert isinstance(task_ids, list), 'task_ids should be list of int'
2562
+ assert isinstance(task_ids, list), "task_ids should be list of int"
2100
2563
  if not task_ids: # avoid deletion of all tasks when task_ids = []
2101
2564
  return Response()
2102
2565
  payload = {
@@ -2117,7 +2580,7 @@ class Project(Client):
2117
2580
  """
2118
2581
  assert (
2119
2582
  isinstance(excluded_ids, list) or excluded_ids is None
2120
- ), 'excluded_ids should be list of int or None'
2583
+ ), "excluded_ids should be list of int or None"
2121
2584
  if excluded_ids is None:
2122
2585
  excluded_ids = []
2123
2586
  payload = {