dtlpy 1.113.10__py3-none-any.whl → 1.114.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (243) hide show
  1. dtlpy/__init__.py +488 -488
  2. dtlpy/__version__.py +1 -1
  3. dtlpy/assets/__init__.py +26 -26
  4. dtlpy/assets/__pycache__/__init__.cpython-38.pyc +0 -0
  5. dtlpy/assets/code_server/config.yaml +2 -2
  6. dtlpy/assets/code_server/installation.sh +24 -24
  7. dtlpy/assets/code_server/launch.json +13 -13
  8. dtlpy/assets/code_server/settings.json +2 -2
  9. dtlpy/assets/main.py +53 -53
  10. dtlpy/assets/main_partial.py +18 -18
  11. dtlpy/assets/mock.json +11 -11
  12. dtlpy/assets/model_adapter.py +83 -83
  13. dtlpy/assets/package.json +61 -61
  14. dtlpy/assets/package_catalog.json +29 -29
  15. dtlpy/assets/package_gitignore +307 -307
  16. dtlpy/assets/service_runners/__init__.py +33 -33
  17. dtlpy/assets/service_runners/converter.py +96 -96
  18. dtlpy/assets/service_runners/multi_method.py +49 -49
  19. dtlpy/assets/service_runners/multi_method_annotation.py +54 -54
  20. dtlpy/assets/service_runners/multi_method_dataset.py +55 -55
  21. dtlpy/assets/service_runners/multi_method_item.py +52 -52
  22. dtlpy/assets/service_runners/multi_method_json.py +52 -52
  23. dtlpy/assets/service_runners/single_method.py +37 -37
  24. dtlpy/assets/service_runners/single_method_annotation.py +43 -43
  25. dtlpy/assets/service_runners/single_method_dataset.py +43 -43
  26. dtlpy/assets/service_runners/single_method_item.py +41 -41
  27. dtlpy/assets/service_runners/single_method_json.py +42 -42
  28. dtlpy/assets/service_runners/single_method_multi_input.py +45 -45
  29. dtlpy/assets/voc_annotation_template.xml +23 -23
  30. dtlpy/caches/base_cache.py +32 -32
  31. dtlpy/caches/cache.py +473 -473
  32. dtlpy/caches/dl_cache.py +201 -201
  33. dtlpy/caches/filesystem_cache.py +89 -89
  34. dtlpy/caches/redis_cache.py +84 -84
  35. dtlpy/dlp/__init__.py +20 -20
  36. dtlpy/dlp/cli_utilities.py +367 -367
  37. dtlpy/dlp/command_executor.py +764 -764
  38. dtlpy/dlp/dlp +1 -1
  39. dtlpy/dlp/dlp.bat +1 -1
  40. dtlpy/dlp/dlp.py +128 -128
  41. dtlpy/dlp/parser.py +651 -651
  42. dtlpy/entities/__init__.py +83 -83
  43. dtlpy/entities/analytic.py +311 -311
  44. dtlpy/entities/annotation.py +1879 -1879
  45. dtlpy/entities/annotation_collection.py +699 -699
  46. dtlpy/entities/annotation_definitions/__init__.py +20 -20
  47. dtlpy/entities/annotation_definitions/base_annotation_definition.py +100 -100
  48. dtlpy/entities/annotation_definitions/box.py +195 -195
  49. dtlpy/entities/annotation_definitions/classification.py +67 -67
  50. dtlpy/entities/annotation_definitions/comparison.py +72 -72
  51. dtlpy/entities/annotation_definitions/cube.py +204 -204
  52. dtlpy/entities/annotation_definitions/cube_3d.py +149 -149
  53. dtlpy/entities/annotation_definitions/description.py +32 -32
  54. dtlpy/entities/annotation_definitions/ellipse.py +124 -124
  55. dtlpy/entities/annotation_definitions/free_text.py +62 -62
  56. dtlpy/entities/annotation_definitions/gis.py +69 -69
  57. dtlpy/entities/annotation_definitions/note.py +139 -139
  58. dtlpy/entities/annotation_definitions/point.py +117 -117
  59. dtlpy/entities/annotation_definitions/polygon.py +182 -182
  60. dtlpy/entities/annotation_definitions/polyline.py +111 -111
  61. dtlpy/entities/annotation_definitions/pose.py +92 -92
  62. dtlpy/entities/annotation_definitions/ref_image.py +86 -86
  63. dtlpy/entities/annotation_definitions/segmentation.py +240 -240
  64. dtlpy/entities/annotation_definitions/subtitle.py +34 -34
  65. dtlpy/entities/annotation_definitions/text.py +85 -85
  66. dtlpy/entities/annotation_definitions/undefined_annotation.py +74 -74
  67. dtlpy/entities/app.py +220 -220
  68. dtlpy/entities/app_module.py +107 -107
  69. dtlpy/entities/artifact.py +174 -174
  70. dtlpy/entities/assignment.py +399 -399
  71. dtlpy/entities/base_entity.py +214 -214
  72. dtlpy/entities/bot.py +113 -113
  73. dtlpy/entities/codebase.py +296 -296
  74. dtlpy/entities/collection.py +38 -38
  75. dtlpy/entities/command.py +169 -169
  76. dtlpy/entities/compute.py +442 -442
  77. dtlpy/entities/dataset.py +1285 -1285
  78. dtlpy/entities/directory_tree.py +44 -44
  79. dtlpy/entities/dpk.py +470 -470
  80. dtlpy/entities/driver.py +222 -222
  81. dtlpy/entities/execution.py +397 -397
  82. dtlpy/entities/feature.py +124 -124
  83. dtlpy/entities/feature_set.py +145 -145
  84. dtlpy/entities/filters.py +641 -641
  85. dtlpy/entities/gis_item.py +107 -107
  86. dtlpy/entities/integration.py +184 -184
  87. dtlpy/entities/item.py +953 -953
  88. dtlpy/entities/label.py +123 -123
  89. dtlpy/entities/links.py +85 -85
  90. dtlpy/entities/message.py +175 -175
  91. dtlpy/entities/model.py +694 -691
  92. dtlpy/entities/node.py +1005 -1005
  93. dtlpy/entities/ontology.py +803 -803
  94. dtlpy/entities/organization.py +287 -287
  95. dtlpy/entities/package.py +657 -657
  96. dtlpy/entities/package_defaults.py +5 -5
  97. dtlpy/entities/package_function.py +185 -185
  98. dtlpy/entities/package_module.py +113 -113
  99. dtlpy/entities/package_slot.py +118 -118
  100. dtlpy/entities/paged_entities.py +290 -267
  101. dtlpy/entities/pipeline.py +593 -593
  102. dtlpy/entities/pipeline_execution.py +279 -279
  103. dtlpy/entities/project.py +394 -394
  104. dtlpy/entities/prompt_item.py +499 -499
  105. dtlpy/entities/recipe.py +301 -301
  106. dtlpy/entities/reflect_dict.py +102 -102
  107. dtlpy/entities/resource_execution.py +138 -138
  108. dtlpy/entities/service.py +958 -958
  109. dtlpy/entities/service_driver.py +117 -117
  110. dtlpy/entities/setting.py +294 -294
  111. dtlpy/entities/task.py +491 -491
  112. dtlpy/entities/time_series.py +143 -143
  113. dtlpy/entities/trigger.py +426 -426
  114. dtlpy/entities/user.py +118 -118
  115. dtlpy/entities/webhook.py +124 -124
  116. dtlpy/examples/__init__.py +19 -19
  117. dtlpy/examples/add_labels.py +135 -135
  118. dtlpy/examples/add_metadata_to_item.py +21 -21
  119. dtlpy/examples/annotate_items_using_model.py +65 -65
  120. dtlpy/examples/annotate_video_using_model_and_tracker.py +75 -75
  121. dtlpy/examples/annotations_convert_to_voc.py +9 -9
  122. dtlpy/examples/annotations_convert_to_yolo.py +9 -9
  123. dtlpy/examples/convert_annotation_types.py +51 -51
  124. dtlpy/examples/converter.py +143 -143
  125. dtlpy/examples/copy_annotations.py +22 -22
  126. dtlpy/examples/copy_folder.py +31 -31
  127. dtlpy/examples/create_annotations.py +51 -51
  128. dtlpy/examples/create_video_annotations.py +83 -83
  129. dtlpy/examples/delete_annotations.py +26 -26
  130. dtlpy/examples/filters.py +113 -113
  131. dtlpy/examples/move_item.py +23 -23
  132. dtlpy/examples/play_video_annotation.py +13 -13
  133. dtlpy/examples/show_item_and_mask.py +53 -53
  134. dtlpy/examples/triggers.py +49 -49
  135. dtlpy/examples/upload_batch_of_items.py +20 -20
  136. dtlpy/examples/upload_items_and_custom_format_annotations.py +55 -55
  137. dtlpy/examples/upload_items_with_modalities.py +43 -43
  138. dtlpy/examples/upload_segmentation_annotations_from_mask_image.py +44 -44
  139. dtlpy/examples/upload_yolo_format_annotations.py +70 -70
  140. dtlpy/exceptions.py +125 -125
  141. dtlpy/miscellaneous/__init__.py +20 -20
  142. dtlpy/miscellaneous/dict_differ.py +95 -95
  143. dtlpy/miscellaneous/git_utils.py +217 -217
  144. dtlpy/miscellaneous/json_utils.py +14 -14
  145. dtlpy/miscellaneous/list_print.py +105 -105
  146. dtlpy/miscellaneous/zipping.py +130 -130
  147. dtlpy/ml/__init__.py +20 -20
  148. dtlpy/ml/base_feature_extractor_adapter.py +27 -27
  149. dtlpy/ml/base_model_adapter.py +945 -940
  150. dtlpy/ml/metrics.py +461 -461
  151. dtlpy/ml/predictions_utils.py +274 -274
  152. dtlpy/ml/summary_writer.py +57 -57
  153. dtlpy/ml/train_utils.py +60 -60
  154. dtlpy/new_instance.py +252 -252
  155. dtlpy/repositories/__init__.py +56 -56
  156. dtlpy/repositories/analytics.py +85 -85
  157. dtlpy/repositories/annotations.py +916 -916
  158. dtlpy/repositories/apps.py +383 -383
  159. dtlpy/repositories/artifacts.py +452 -452
  160. dtlpy/repositories/assignments.py +599 -599
  161. dtlpy/repositories/bots.py +213 -213
  162. dtlpy/repositories/codebases.py +559 -559
  163. dtlpy/repositories/collections.py +332 -348
  164. dtlpy/repositories/commands.py +158 -158
  165. dtlpy/repositories/compositions.py +61 -61
  166. dtlpy/repositories/computes.py +434 -406
  167. dtlpy/repositories/datasets.py +1291 -1291
  168. dtlpy/repositories/downloader.py +895 -895
  169. dtlpy/repositories/dpks.py +433 -433
  170. dtlpy/repositories/drivers.py +266 -266
  171. dtlpy/repositories/executions.py +817 -817
  172. dtlpy/repositories/feature_sets.py +226 -226
  173. dtlpy/repositories/features.py +238 -238
  174. dtlpy/repositories/integrations.py +484 -484
  175. dtlpy/repositories/items.py +909 -915
  176. dtlpy/repositories/messages.py +94 -94
  177. dtlpy/repositories/models.py +877 -867
  178. dtlpy/repositories/nodes.py +80 -80
  179. dtlpy/repositories/ontologies.py +511 -511
  180. dtlpy/repositories/organizations.py +525 -525
  181. dtlpy/repositories/packages.py +1941 -1941
  182. dtlpy/repositories/pipeline_executions.py +448 -448
  183. dtlpy/repositories/pipelines.py +642 -642
  184. dtlpy/repositories/projects.py +539 -539
  185. dtlpy/repositories/recipes.py +399 -399
  186. dtlpy/repositories/resource_executions.py +137 -137
  187. dtlpy/repositories/schema.py +120 -120
  188. dtlpy/repositories/service_drivers.py +213 -213
  189. dtlpy/repositories/services.py +1704 -1704
  190. dtlpy/repositories/settings.py +339 -339
  191. dtlpy/repositories/tasks.py +1124 -1124
  192. dtlpy/repositories/times_series.py +278 -278
  193. dtlpy/repositories/triggers.py +536 -536
  194. dtlpy/repositories/upload_element.py +257 -257
  195. dtlpy/repositories/uploader.py +651 -651
  196. dtlpy/repositories/webhooks.py +249 -249
  197. dtlpy/services/__init__.py +22 -22
  198. dtlpy/services/aihttp_retry.py +131 -131
  199. dtlpy/services/api_client.py +1782 -1782
  200. dtlpy/services/api_reference.py +40 -40
  201. dtlpy/services/async_utils.py +133 -133
  202. dtlpy/services/calls_counter.py +44 -44
  203. dtlpy/services/check_sdk.py +68 -68
  204. dtlpy/services/cookie.py +115 -115
  205. dtlpy/services/create_logger.py +156 -156
  206. dtlpy/services/events.py +84 -84
  207. dtlpy/services/logins.py +235 -235
  208. dtlpy/services/reporter.py +256 -256
  209. dtlpy/services/service_defaults.py +91 -91
  210. dtlpy/utilities/__init__.py +20 -20
  211. dtlpy/utilities/annotations/__init__.py +16 -16
  212. dtlpy/utilities/annotations/annotation_converters.py +269 -269
  213. dtlpy/utilities/base_package_runner.py +264 -264
  214. dtlpy/utilities/converter.py +1650 -1650
  215. dtlpy/utilities/dataset_generators/__init__.py +1 -1
  216. dtlpy/utilities/dataset_generators/dataset_generator.py +670 -670
  217. dtlpy/utilities/dataset_generators/dataset_generator_tensorflow.py +23 -23
  218. dtlpy/utilities/dataset_generators/dataset_generator_torch.py +21 -21
  219. dtlpy/utilities/local_development/__init__.py +1 -1
  220. dtlpy/utilities/local_development/local_session.py +179 -179
  221. dtlpy/utilities/reports/__init__.py +2 -2
  222. dtlpy/utilities/reports/figures.py +343 -343
  223. dtlpy/utilities/reports/report.py +71 -71
  224. dtlpy/utilities/videos/__init__.py +17 -17
  225. dtlpy/utilities/videos/video_player.py +598 -598
  226. dtlpy/utilities/videos/videos.py +470 -470
  227. {dtlpy-1.113.10.data → dtlpy-1.114.13.data}/scripts/dlp +1 -1
  228. dtlpy-1.114.13.data/scripts/dlp.bat +2 -0
  229. {dtlpy-1.113.10.data → dtlpy-1.114.13.data}/scripts/dlp.py +128 -128
  230. {dtlpy-1.113.10.dist-info → dtlpy-1.114.13.dist-info}/LICENSE +200 -200
  231. {dtlpy-1.113.10.dist-info → dtlpy-1.114.13.dist-info}/METADATA +172 -172
  232. dtlpy-1.114.13.dist-info/RECORD +240 -0
  233. {dtlpy-1.113.10.dist-info → dtlpy-1.114.13.dist-info}/WHEEL +1 -1
  234. tests/features/environment.py +551 -550
  235. dtlpy-1.113.10.data/scripts/dlp.bat +0 -2
  236. dtlpy-1.113.10.dist-info/RECORD +0 -244
  237. tests/assets/__init__.py +0 -0
  238. tests/assets/models_flow/__init__.py +0 -0
  239. tests/assets/models_flow/failedmain.py +0 -52
  240. tests/assets/models_flow/main.py +0 -62
  241. tests/assets/models_flow/main_model.py +0 -54
  242. {dtlpy-1.113.10.dist-info → dtlpy-1.114.13.dist-info}/entry_points.txt +0 -0
  243. {dtlpy-1.113.10.dist-info → dtlpy-1.114.13.dist-info}/top_level.txt +0 -0
@@ -1,1291 +1,1291 @@
1
- """
2
- Datasets Repository
3
- """
4
-
5
- import os
6
- import sys
7
- import time
8
- import copy
9
- import tqdm
10
- import logging
11
- import json
12
- from typing import Union
13
-
14
- from .. import entities, repositories, miscellaneous, exceptions, services, PlatformException, _api_reference
15
- from ..services.api_client import ApiClient
16
-
17
- logger = logging.getLogger(name='dtlpy')
18
-
19
-
20
- class Datasets:
21
- """
22
- Datasets Repository
23
-
24
- The Datasets class allows the user to manage datasets. Read more about datasets in our `documentation <https://dataloop.ai/docs/dataset>`_ and `SDK documentation <https://developers.dataloop.ai/tutorials/data_management/manage_datasets/chapter/>`_.
25
- """
26
-
27
- def __init__(self, client_api: ApiClient, project: entities.Project = None):
28
- self._client_api = client_api
29
- self._project = project
30
-
31
- ############
32
- # entities #
33
- ############
34
- @property
35
- def project(self) -> entities.Project:
36
- if self._project is None:
37
- # try get checkout
38
- project = self._client_api.state_io.get('project')
39
- if project is not None:
40
- self._project = entities.Project.from_json(_json=project, client_api=self._client_api)
41
- if self._project is None:
42
- raise exceptions.PlatformException(
43
- error='2001',
44
- message='Cannot perform action WITHOUT Project entity in Datasets repository.'
45
- ' Please checkout or set a project')
46
- assert isinstance(self._project, entities.Project)
47
- return self._project
48
-
49
- @project.setter
50
- def project(self, project: entities.Project):
51
- if not isinstance(project, entities.Project):
52
- raise ValueError('Must input a valid Project entity')
53
- self._project = project
54
-
55
- ###########
56
- # methods #
57
- ###########
58
- def __get_from_cache(self) -> entities.Dataset:
59
- dataset = self._client_api.state_io.get('dataset')
60
- if dataset is not None:
61
- dataset = entities.Dataset.from_json(_json=dataset,
62
- client_api=self._client_api,
63
- datasets=self,
64
- project=self._project)
65
- return dataset
66
-
67
- def __get_by_id(self, dataset_id) -> entities.Dataset:
68
- success, response = self._client_api.gen_request(req_type='get',
69
- path='/datasets/{}'.format(dataset_id))
70
- if dataset_id is None or dataset_id == '':
71
- raise exceptions.PlatformException('400', 'Please checkout a dataset')
72
-
73
- if success:
74
- dataset = entities.Dataset.from_json(client_api=self._client_api,
75
- _json=response.json(),
76
- datasets=self,
77
- project=self._project)
78
- else:
79
- raise exceptions.PlatformException(response)
80
- return dataset
81
-
82
- def __get_by_identifier(self, identifier=None) -> entities.Dataset:
83
- datasets = self.list()
84
- datasets_by_name = [dataset for dataset in datasets if identifier in dataset.name or identifier in dataset.id]
85
- if len(datasets_by_name) == 1:
86
- return datasets_by_name[0]
87
- elif len(datasets_by_name) > 1:
88
- raise Exception('Multiple datasets with this name exist')
89
- else:
90
- raise Exception("Dataset not found")
91
-
92
- def _bulid_folder_filter(self, folder_path, filters=None):
93
- if filters is None:
94
- filters = entities.Filters()
95
- filters._user_query = 'false'
96
- if not folder_path.startswith('/'):
97
- folder_path = '/' + folder_path
98
- filters.add(field='dir', values=folder_path, method=entities.FiltersMethod.OR)
99
- if not folder_path.endswith('*'):
100
- if not folder_path.endswith('/'):
101
- folder_path += '/'
102
- filters.add(field='dir', values=folder_path + '*', method=entities.FiltersMethod.OR)
103
- return filters
104
-
105
- def _get_binaries_dataset(self):
106
- filters = entities.Filters(resource=entities.FiltersResource.DATASET)
107
- filters.add(field='name', values='Binaries')
108
- filters.system_space = True
109
- datasets = self.list(filters=filters)
110
- if len(datasets) == 0:
111
- # empty list
112
- raise exceptions.PlatformException('404', 'Dataset not found. Name: "Binaries"')
113
- # dataset = None
114
- elif len(datasets) > 1:
115
- raise exceptions.PlatformException('400', 'More than one dataset with same name.')
116
- else:
117
- dataset = datasets[0]
118
- return dataset
119
-
120
- def _resolve_dataset_id(self, dataset, dataset_name, dataset_id):
121
- if dataset is None and dataset_name is None and dataset_id is None:
122
- raise ValueError('Must provide dataset, dataset name or dataset id')
123
- if dataset_id is None:
124
- if dataset is None:
125
- dataset = self.get(dataset_name=dataset_name)
126
- dataset_id = dataset.id
127
- return dataset_id
128
-
129
- @staticmethod
130
- def _build_payload(filters, include_feature_vectors, include_annotations,
131
- export_type, annotation_filters, feature_vector_filters, dataset_lock, lock_timeout_sec, export_summary):
132
- valid_list = [e.value for e in entities.ExportType]
133
- valid_types = ', '.join(valid_list)
134
- if export_type not in ['json', 'zip']:
135
- raise ValueError('export_type must be one of the following: {}'.format(valid_types))
136
- payload = {'exportType': export_type}
137
- if filters is None:
138
- filters = entities.Filters()
139
-
140
- if isinstance(filters, entities.Filters):
141
- payload['itemsQuery'] = {'filter': filters.prepare()['filter'], 'join': filters.prepare().get("join", {})}
142
- elif isinstance(filters, dict):
143
- payload['itemsQuery'] = filters
144
- else:
145
- raise exceptions.BadRequest(message='filters must be of type dict or Filters', status_code=500)
146
-
147
- payload['itemsVectorQuery'] = {}
148
- if include_feature_vectors:
149
- payload['includeItemVectors'] = True
150
- payload['itemsVectorQuery']['select'] = {"datasetId": 1, 'featureSetId': 1, 'value': 1}
151
-
152
- if feature_vector_filters is not None:
153
- payload['itemsVectorQuery']['filter'] = feature_vector_filters.prepare()['filter']
154
-
155
- payload['annotations'] = {"include": include_annotations, "convertSemantic": False}
156
-
157
- if annotation_filters is not None:
158
- payload['annotationsQuery'] = annotation_filters.prepare()['filter']
159
- payload['annotations']['filter'] = True
160
-
161
- if dataset_lock:
162
- payload['datasetLock'] = dataset_lock
163
-
164
- if export_summary:
165
- payload['summary'] = export_summary
166
-
167
- if lock_timeout_sec:
168
- payload['lockTimeoutSec'] = lock_timeout_sec
169
-
170
- return payload
171
-
172
- def _download_exported_item(self, item_id, export_type, local_path=None):
173
- export_item = repositories.Items(client_api=self._client_api).get(item_id=item_id)
174
- export_item_path = export_item.download(local_path=local_path)
175
-
176
- if export_type == entities.ExportType.ZIP:
177
- # unzipping annotations to directory
178
- if isinstance(export_item_path, list) or not os.path.isfile(export_item_path):
179
- raise exceptions.PlatformException(
180
- error='404',
181
- message='error downloading annotation zip file. see above for more information. item id: {!r}'.format(
182
- export_item.id))
183
- try:
184
- miscellaneous.Zipping.unzip_directory(zip_filename=export_item_path,
185
- to_directory=local_path)
186
- except Exception as e:
187
- logger.warning("Failed to extract zip file error: {}".format(e))
188
- finally:
189
- # cleanup
190
- if isinstance(export_item_path, str) and os.path.isfile(export_item_path):
191
- os.remove(export_item_path)
192
-
193
- @property
194
- def platform_url(self):
195
- return self._client_api._get_resource_url("projects/{}/datasets".format(self.project.id))
196
-
197
- def open_in_web(self,
198
- dataset_name: str = None,
199
- dataset_id: str = None,
200
- dataset: entities.Dataset = None):
201
- """
202
- Open the dataset in web platform.
203
-
204
- **Prerequisites**: You must be an *owner* or *developer* to use this method.
205
-
206
- :param str dataset_name: The Name of the dataset
207
- :param str dataset_id: The Id of the dataset
208
- :param dtlpy.entities.dataset.Dataset dataset: dataset object
209
-
210
- **Example**:
211
-
212
- .. code-block:: python
213
-
214
- project.datasets.open_in_web(dataset_id='dataset_id')
215
- """
216
- if dataset_name is not None:
217
- dataset = self.get(dataset_name=dataset_name)
218
- if dataset is not None:
219
- dataset.open_in_web()
220
- elif dataset_id is not None:
221
- self._client_api._open_in_web(url=f'{self.platform_url}/{dataset_id}/items')
222
- else:
223
- self._client_api._open_in_web(url=self.platform_url)
224
-
225
- def checkout(self,
226
- identifier: str = None,
227
- dataset_name: str = None,
228
- dataset_id: str = None,
229
- dataset: entities.Dataset = None):
230
- """
231
- Checkout (switch) to a dataset to work on it.
232
-
233
- **Prerequisites**: You must be an *owner* or *developer* to use this method.
234
-
235
- You must provide at least ONE of the following params: dataset_id, dataset_name.
236
-
237
- :param str identifier: project name or partial id that you wish to switch
238
- :param str dataset_name: The Name of the dataset
239
- :param str dataset_id: The Id of the dataset
240
- :param dtlpy.entities.dataset.Dataset dataset: dataset object
241
-
242
- **Example**:
243
-
244
- .. code-block:: python
245
-
246
- project.datasets.checkout(dataset_id='dataset_id')
247
- """
248
- if dataset is None:
249
- if dataset_id is not None or dataset_name is not None:
250
- try:
251
- dataset = self.project.datasets.get(dataset_name=dataset_name, dataset_id=dataset_id)
252
- except exceptions.MissingEntity:
253
- dataset = self.get(dataset_id=dataset_id, dataset_name=dataset_name)
254
- elif identifier is not None:
255
- dataset = self.__get_by_identifier(identifier=identifier)
256
- else:
257
- raise exceptions.PlatformException(error='400',
258
- message='Must provide partial/full id/name to checkout')
259
- self._client_api.state_io.put('dataset', dataset.to_json())
260
- logger.info('Checked out to dataset {}'.format(dataset.name))
261
-
262
- @_api_reference.add(path='/datasets/query', method='post')
263
- def list(self, name=None, creator=None, filters: entities.Filters = None) -> miscellaneous.List[entities.Dataset]:
264
- """
265
- List all datasets.
266
-
267
- **Prerequisites**: You must be an *owner* or *developer* to use this method.
268
-
269
- :param str name: list by name
270
- :param str creator: list by
271
- :param dtlpy.entities.filters.Filters filters: Filters entity containing filters parameters
272
- :return: List of datasets
273
- :rtype: list
274
-
275
- **Example**:
276
-
277
- .. code-block:: python
278
- filters = dl.Filters(resource='datasets')
279
- filters.add(field='readonly', values=False)
280
- datasets = project.datasets.list(filters=filters)
281
- """
282
- if filters is None:
283
- filters = entities.Filters(resource=entities.FiltersResource.DATASET)
284
- # assert type filters
285
- elif not isinstance(filters, entities.Filters):
286
- raise exceptions.PlatformException(error='400',
287
- message='Unknown filters type: {!r}'.format(type(filters)))
288
- if filters.resource != entities.FiltersResource.DATASET:
289
- raise exceptions.PlatformException(
290
- error='400',
291
- message='Filters resource must to be FiltersResource.DATASET. Got: {!r}'.format(filters.resource))
292
-
293
- url = '/datasets/query'
294
-
295
- if name is not None:
296
- filters.add(field='name', values=name)
297
- if creator is not None:
298
- filters.add(field='creator', values=creator)
299
- if self._project is not None:
300
- filters.context = {"projects": [self._project.id]}
301
- filters.page_size = 1000
302
- filters.page = 0
303
- datasets = list()
304
- while True:
305
- success, response = self._client_api.gen_request(req_type='POST',
306
- json_req=filters.prepare(),
307
- path=url,
308
- headers={'user_query': filters._user_query})
309
- if success:
310
- pool = self._client_api.thread_pools('entity.create')
311
- datasets_json = response.json()['items']
312
- jobs = [None for _ in range(len(datasets_json))]
313
- # return triggers list
314
- for i_dataset, dataset in enumerate(datasets_json):
315
- jobs[i_dataset] = pool.submit(entities.Dataset._protected_from_json,
316
- **{'client_api': self._client_api,
317
- '_json': dataset,
318
- 'datasets': self,
319
- 'project': self.project})
320
-
321
- # get all results
322
- results = [j.result() for j in jobs]
323
- # log errors
324
- _ = [logger.warning(r[1]) for r in results if r[0] is False]
325
- # return good jobs
326
- datasets.extend([r[1] for r in results if r[0] is True])
327
- if response.json()['hasNextPage'] is True:
328
- filters.page += 1
329
- else:
330
- break
331
- else:
332
- raise exceptions.PlatformException(response)
333
- datasets = miscellaneous.List(datasets)
334
- return datasets
335
-
336
- @_api_reference.add(path='/datasets/{id}', method='get')
337
- def get(self,
338
- dataset_name: str = None,
339
- dataset_id: str = None,
340
- checkout: bool = False,
341
- fetch: bool = None
342
- ) -> entities.Dataset:
343
- """
344
- Get dataset by name or id.
345
-
346
- **Prerequisites**: You must be an *owner* or *developer* to use this method.
347
-
348
- You must provide at least ONE of the following params: dataset_id, dataset_name.
349
-
350
- :param str dataset_name: optional - search by name
351
- :param str dataset_id: optional - search by id
352
- :param bool checkout: set the dataset as a default dataset object (cookies)
353
- :param bool fetch: optional - fetch entity from platform (True), default taken from cookie
354
- :return: Dataset object
355
- :rtype: dtlpy.entities.dataset.Dataset
356
-
357
- **Example**:
358
-
359
- .. code-block:: python
360
-
361
- dataset = project.datasets.get(dataset_id='dataset_id')
362
- """
363
- if fetch is None:
364
- fetch = self._client_api.fetch_entities
365
-
366
- if dataset_id is None and dataset_name is None:
367
- dataset = self.__get_from_cache()
368
- if dataset is None:
369
- raise exceptions.PlatformException(
370
- error='400',
371
- message='No checked-out Dataset was found, must checkout or provide an identifier in inputs')
372
- elif fetch:
373
- if dataset_id is not None and dataset_id != '':
374
- dataset = self.__get_by_id(dataset_id)
375
- # verify input dataset name is same as the given id
376
- if dataset_name is not None and dataset.name != dataset_name:
377
- logger.warning(
378
- "Mismatch found in datasets.get: dataset_name is different then dataset.name: "
379
- "{!r} != {!r}".format(
380
- dataset_name,
381
- dataset.name))
382
- elif dataset_name is not None:
383
- datasets = self.list(name=dataset_name)
384
- if not datasets:
385
- # empty list
386
- raise exceptions.PlatformException('404', 'Dataset not found. Name: {!r}'.format(dataset_name))
387
- # dataset = None
388
- elif len(datasets) > 1:
389
- raise exceptions.PlatformException('400', 'More than one dataset with same name.')
390
- else:
391
- dataset = datasets[0]
392
- else:
393
- raise exceptions.PlatformException(
394
- error='404',
395
- message='No input and no checked-out found')
396
- else:
397
- dataset = entities.Dataset.from_json(_json={'id': dataset_id,
398
- 'name': dataset_id},
399
- client_api=self._client_api,
400
- datasets=self,
401
- project=self._project,
402
- is_fetched=False)
403
- assert isinstance(dataset, entities.Dataset)
404
- if checkout:
405
- self.checkout(dataset=dataset)
406
- return dataset
407
-
408
- @_api_reference.add(path='/datasets/{id}', method='delete')
409
- def delete(self,
410
- dataset_name: str = None,
411
- dataset_id: str = None,
412
- sure: bool = False,
413
- really: bool = False):
414
- """
415
- Delete a dataset forever!
416
-
417
- **Prerequisites**: You must be an *owner* or *developer* to use this method.
418
-
419
- **Example**:
420
-
421
- .. code-block:: python
422
-
423
- is_deleted = project.datasets.delete(dataset_id='dataset_id', sure=True, really=True)
424
-
425
- :param str dataset_name: optional - search by name
426
- :param str dataset_id: optional - search by id
427
- :param bool sure: Are you sure you want to delete?
428
- :param bool really: Really really sure?
429
- :return: True is success
430
- :rtype: bool
431
- """
432
- if sure and really:
433
- dataset = self.get(dataset_name=dataset_name, dataset_id=dataset_id)
434
- success, response = self._client_api.gen_request(req_type='delete',
435
- path='/datasets/{}'.format(dataset.id))
436
- if not success:
437
- raise exceptions.PlatformException(response)
438
- logger.info('Dataset {!r} was deleted successfully'.format(dataset.name))
439
- return True
440
- else:
441
- raise exceptions.PlatformException(
442
- error='403',
443
- message='Cant delete dataset from SDK. Please login to platform to delete')
444
-
445
- @_api_reference.add(path='/datasets/{id}', method='patch')
446
- def update(self,
447
- dataset: entities.Dataset,
448
- system_metadata: bool = False,
449
- patch: dict = None
450
- ) -> entities.Dataset:
451
- """
452
- Update dataset field.
453
-
454
- **Prerequisites**: You must be an *owner* or *developer* to use this method.
455
-
456
- :param dtlpy.entities.dataset.Dataset dataset: dataset object
457
- :param bool system_metadata: True, if you want to change metadata system
458
- :param dict patch: Specific patch request
459
- :return: Dataset object
460
- :rtype: dtlpy.entities.dataset.Dataset
461
-
462
- **Example**:
463
-
464
- .. code-block:: python
465
-
466
- dataset = project.datasets.update(dataset='dataset_entity')
467
- """
468
- url_path = '/datasets/{}'.format(dataset.id)
469
- if system_metadata:
470
- url_path += '?system=true'
471
-
472
- if patch is None:
473
- patch = dataset.to_json()
474
-
475
- success, response = self._client_api.gen_request(req_type='patch',
476
- path=url_path,
477
- json_req=patch)
478
- if success:
479
- logger.info('Dataset was updated successfully')
480
- return dataset
481
- else:
482
- raise exceptions.PlatformException(response)
483
-
484
- @_api_reference.add(path='/datasets/{id}/unlock', method='patch')
485
- def unlock(self, dataset: entities.Dataset ) -> entities.Dataset:
486
- """
487
- Unlock dataset.
488
-
489
- **Prerequisites**: You must be an *owner* or *developer* to use this method.
490
-
491
- :param dtlpy.entities.dataset.Dataset dataset: dataset object
492
- :return: Dataset object
493
- :rtype: dtlpy.entities.dataset.Dataset
494
-
495
- **Example**:
496
-
497
- .. code-block:: python
498
-
499
- dataset = project.datasets.unlock(dataset='dataset_entity')
500
- """
501
- url_path = '/datasets/{}/unlock'.format(dataset.id)
502
-
503
- success, response = self._client_api.gen_request(req_type='patch', path=url_path)
504
- if success:
505
- logger.info('Dataset was unlocked successfully')
506
- return dataset
507
- else:
508
- raise exceptions.PlatformException(response)
509
-
510
- @_api_reference.add(path='/datasets/{id}/directoryTree', method='get')
511
- def directory_tree(self,
512
- dataset: entities.Dataset = None,
513
- dataset_name: str = None,
514
- dataset_id: str = None):
515
- """
516
- Get dataset's directory tree.
517
-
518
- **Prerequisites**: You must be an *owner* or *developer* to use this method.
519
-
520
- You must provide at least ONE of the following params: dataset, dataset_name, dataset_id.
521
-
522
- :param dtlpy.entities.dataset.Dataset dataset: dataset object
523
- :param str dataset_name: The Name of the dataset
524
- :param str dataset_id: The Id of the dataset
525
- :return: DirectoryTree
526
-
527
- **Example**:
528
-
529
- .. code-block:: python
530
- directory_tree = dataset.directory_tree
531
- directory_tree = project.datasets.directory_tree(dataset='dataset_entity')
532
- """
533
- dataset_id = self._resolve_dataset_id(dataset, dataset_name, dataset_id)
534
-
535
- url_path = '/datasets/{}/directoryTree'.format(dataset_id)
536
-
537
- success, response = self._client_api.gen_request(req_type='get',
538
- path=url_path)
539
-
540
- if success:
541
- return entities.DirectoryTree(_json=response.json())
542
- else:
543
- raise exceptions.PlatformException(response)
544
-
545
- @_api_reference.add(path='/datasets/{id}/clone', method='post')
546
- def clone(self,
547
- dataset_id: str,
548
- clone_name: str = None,
549
- filters: entities.Filters = None,
550
- with_items_annotations: bool = True,
551
- with_metadata: bool = True,
552
- with_task_annotations_status: bool = True,
553
- dst_dataset_id: str = None,
554
- target_directory: str = None):
555
- """
556
- Clone a dataset. Read more about cloning datatsets and items in our `documentation <https://dataloop.ai/docs/clone-merge-dataset#cloned-dataset>`_ and `SDK documentation <https://developers.dataloop.ai/tutorials/data_management/data_versioning/chapter/>`_.
557
-
558
- **Prerequisites**: You must be in the role of an *owner* or *developer*.
559
-
560
- :param str dataset_id: id of the dataset you wish to clone
561
- :param str clone_name: new dataset name
562
- :param dtlpy.entities.filters.Filters filters: Filters entity or a query dict
563
- :param bool with_items_annotations: true to clone with items annotations
564
- :param bool with_metadata: true to clone with metadata
565
- :param bool with_task_annotations_status: true to clone with task annotations' status
566
- :param str dst_dataset_id: destination dataset id
567
- :param str target_directory: target directory
568
- :return: dataset object
569
- :rtype: dtlpy.entities.dataset.Dataset
570
-
571
- **Example**:
572
-
573
- .. code-block:: python
574
-
575
- dataset = project.datasets.clone(dataset_id='dataset_id',
576
- clone_name='dataset_clone_name',
577
- with_metadata=True,
578
- with_items_annotations=False,
579
- with_task_annotations_status=False)
580
- """
581
- if clone_name is None and dst_dataset_id is None:
582
- raise exceptions.PlatformException('400', 'Must provide clone name or destination dataset id')
583
- if filters is None:
584
- filters = entities.Filters()
585
- filters._user_query = 'false'
586
- elif not isinstance(filters, entities.Filters):
587
- raise exceptions.PlatformException(
588
- error='400',
589
- message='"filters" must be a dl.Filters entity. got: {!r}'.format(type(filters)))
590
-
591
- copy_filters = copy.deepcopy(filters)
592
- if copy_filters.has_field('hidden'):
593
- copy_filters.pop('hidden')
594
-
595
- if target_directory is not None and not target_directory.startswith('/'):
596
- target_directory = '/' + target_directory
597
-
598
- payload = {
599
- "name": clone_name,
600
- "filter": copy_filters.prepare(),
601
- "cloneDatasetParams": {
602
- "withItemsAnnotations": with_items_annotations,
603
- "withMetadata": with_metadata,
604
- "withTaskAnnotationsStatus": with_task_annotations_status,
605
- "targetDirectory": target_directory
606
- }
607
- }
608
- if dst_dataset_id is not None:
609
- payload['cloneDatasetParams']['targetDatasetId'] = dst_dataset_id
610
- success, response = self._client_api.gen_request(req_type='post',
611
- path='/datasets/{}/clone'.format(dataset_id),
612
- json_req=payload,
613
- headers={'user_query': filters._user_query})
614
-
615
- if not success:
616
- raise exceptions.PlatformException(response)
617
-
618
- command = entities.Command.from_json(_json=response.json(),
619
- client_api=self._client_api)
620
- command = command.wait()
621
-
622
- if 'returnedModelId' not in command.spec:
623
- raise exceptions.PlatformException(error='400',
624
- message="returnedModelId key is missing in command response: {!r}"
625
- .format(response))
626
- return self.get(dataset_id=command.spec['returnedModelId'])
627
-
628
- @_api_reference.add(path='/datasets/{id}/export', method='post')
629
- def export(self,
630
- dataset: entities.Dataset = None,
631
- dataset_name: str = None,
632
- dataset_id: str = None,
633
- local_path: str = None,
634
- filters: Union[dict, entities.Filters] = None,
635
- annotation_filters: entities.Filters = None,
636
- feature_vector_filters: entities.Filters = None,
637
- include_feature_vectors: bool = False,
638
- include_annotations: bool = False,
639
- export_type: entities.ExportType = entities.ExportType.JSON,
640
- timeout: int = 0,
641
- dataset_lock: bool = False,
642
- lock_timeout_sec: int = None,
643
- export_summary: bool = False):
644
- """
645
- Export dataset items and annotations.
646
-
647
- **Prerequisites**: You must be an *owner* or *developer* to use this method.
648
-
649
- You must provide at least ONE of the following params: dataset, dataset_name, dataset_id.
650
-
651
- :param dtlpy.entities.dataset.Dataset dataset: Dataset object
652
- :param str dataset_name: The name of the dataset
653
- :param str dataset_id: The ID of the dataset
654
- :param str local_path: Local path to save the exported dataset
655
- :param Union[dict, dtlpy.entities.filters.Filters] filters: Filters entity or a query dictionary
656
- :param dtlpy.entities.filters.Filters annotation_filters: Filters entity to filter annotations for export
657
- :param dtlpy.entities.filters.Filters feature_vector_filters: Filters entity to filter feature vectors for export
658
- :param bool include_feature_vectors: Include item feature vectors in the export
659
- :param bool include_annotations: Include item annotations in the export
660
- :param bool dataset_lock: Make dataset readonly during the export
661
- :param bool export_summary: Get Summary of the dataset export
662
- :param int lock_timeout_sec: Timeout for locking the dataset during export in seconds
663
- :param entities.ExportType export_type: Type of export ('json' or 'zip')
664
- :param int timeout: Maximum time in seconds to wait for the export to complete
665
- :return: Exported item
666
- :rtype: dtlpy.entities.item.Item
667
-
668
- **Example**:
669
-
670
- .. code-block:: python
671
-
672
- export_item = project.datasets.export(dataset_id='dataset_id',
673
- filters=filters,
674
- include_feature_vectors=True,
675
- include_annotations=True,
676
- export_type=dl.ExportType.JSON,
677
- dataset_lock=True,
678
- lock_timeout_sec=300,
679
- export_summary=False)
680
- """
681
- dataset_id = self._resolve_dataset_id(dataset, dataset_name, dataset_id)
682
- payload = self._build_payload(filters, include_feature_vectors, include_annotations,
683
- export_type, annotation_filters, feature_vector_filters,
684
- dataset_lock, lock_timeout_sec, export_summary)
685
-
686
- success, response = self._client_api.gen_request(req_type='post', path=f'/datasets/{dataset_id}/export',
687
- json_req=payload)
688
- if not success:
689
- raise exceptions.PlatformException(response)
690
-
691
- command = entities.Command.from_json(_json=response.json(),
692
- client_api=self._client_api)
693
-
694
- time.sleep(2) # as the command have wrong progress in the beginning
695
- command = command.wait(timeout=timeout)
696
- if 'outputItemId' not in command.spec:
697
- raise exceptions.PlatformException(
698
- error='400',
699
- message="outputItemId key is missing in command response: {}".format(response))
700
- item_id = command.spec['outputItemId']
701
- self._download_exported_item(item_id=item_id, export_type=export_type, local_path=local_path)
702
- return local_path
703
-
704
- @_api_reference.add(path='/datasets/merge', method='post')
705
- def merge(self,
706
- merge_name: str,
707
- dataset_ids: list,
708
- project_ids: str,
709
- with_items_annotations: bool = True,
710
- with_metadata: bool = True,
711
- with_task_annotations_status: bool = True,
712
- wait: bool = True):
713
- """
714
- Merge a dataset. See our `SDK docs <https://developers.dataloop.ai/tutorials/data_management/data_versioning/chapter/>`_ for more information.
715
-
716
- **Prerequisites**: You must be an *owner* or *developer* to use this method.
717
-
718
- :param str merge_name: new dataset name
719
- :param list dataset_ids: list id's of the datatsets you wish to merge
720
- :param str project_ids: the project id that include the datasets
721
- :param bool with_items_annotations: true to merge with items annotations
722
- :param bool with_metadata: true to merge with metadata
723
- :param bool with_task_annotations_status: true to merge with task annotations' status
724
- :param bool wait: wait for the command to finish
725
- :return: True if success
726
- :rtype: bool
727
-
728
- **Example**:
729
-
730
- .. code-block:: python
731
-
732
- success = project.datasets.merge(dataset_ids=['dataset_id1','dataset_id2'],
733
- merge_name='dataset_merge_name',
734
- with_metadata=True,
735
- with_items_annotations=False,
736
- with_task_annotations_status=False)
737
- """
738
- payload = {
739
- "name": merge_name,
740
- "datasetsIds": dataset_ids,
741
- "projectIds": project_ids,
742
- "mergeDatasetParams": {
743
- "withItemsAnnotations": with_items_annotations,
744
- "withMetadata": with_metadata,
745
- "withTaskAnnotationsStatus": with_task_annotations_status
746
- },
747
- 'asynced': wait
748
- }
749
- success, response = self._client_api.gen_request(req_type='post',
750
- path='/datasets/merge',
751
- json_req=payload)
752
-
753
- if success:
754
- command = entities.Command.from_json(_json=response.json(),
755
- client_api=self._client_api)
756
- if not wait:
757
- return command
758
- command = command.wait(timeout=0)
759
- if 'mergeDatasetsConfiguration' not in command.spec:
760
- raise exceptions.PlatformException(error='400',
761
- message="mergeDatasetsConfiguration key is missing in command response: {}"
762
- .format(response))
763
- return True
764
- else:
765
- raise exceptions.PlatformException(response)
766
-
767
- @_api_reference.add(path='/datasets/{id}/sync', method='post')
768
- def sync(self, dataset_id: str, wait: bool = True):
769
- """
770
- Sync dataset with external storage.
771
-
772
- **Prerequisites**: You must be in the role of an *owner* or *developer*.
773
-
774
- :param str dataset_id: The Id of the dataset to sync
775
- :param bool wait: wait for the command to finish
776
- :return: True if success
777
- :rtype: bool
778
-
779
- **Example**:
780
-
781
- .. code-block:: python
782
-
783
- success = project.datasets.sync(dataset_id='dataset_id')
784
- """
785
-
786
- success, response = self._client_api.gen_request(req_type='post',
787
- path='/datasets/{}/sync'.format(dataset_id))
788
-
789
- if success:
790
- command = entities.Command.from_json(_json=response.json(),
791
- client_api=self._client_api)
792
- if not wait:
793
- return command
794
- command = command.wait(timeout=0)
795
- if 'datasetId' not in command.spec:
796
- raise exceptions.PlatformException(error='400',
797
- message="datasetId key is missing in command response: {}"
798
- .format(response))
799
- return True
800
- else:
801
- raise exceptions.PlatformException(response)
802
-
803
- @_api_reference.add(path='/datasets', method='post')
804
- def create(self,
805
- dataset_name: str,
806
- labels=None,
807
- attributes=None,
808
- ontology_ids=None,
809
- driver: entities.Driver = None,
810
- driver_id: str = None,
811
- checkout: bool = False,
812
- expiration_options: entities.ExpirationOptions = None,
813
- index_driver: entities.IndexDriver = None,
814
- recipe_id: str = None
815
- ) -> entities.Dataset:
816
- """
817
- Create a new dataset
818
-
819
- **Prerequisites**: You must be in the role of an *owner* or *developer*.
820
-
821
- :param str dataset_name: The Name of the dataset
822
- :param list labels: dictionary of {tag: color} or list of label entities
823
- :param list attributes: dataset's ontology's attributes
824
- :param list ontology_ids: optional - dataset ontology
825
- :param dtlpy.entities.driver.Driver driver: optional - storage driver Driver object or driver name
826
- :param str driver_id: optional - driver id
827
- :param bool checkout: set the dataset as a default dataset object (cookies)
828
- :param ExpirationOptions expiration_options: dl.ExpirationOptions object that contain definitions for dataset like MaxItemDays
829
- :param str index_driver: dl.IndexDriver, dataset driver version
830
- :param str recipe_id: optional - recipe id
831
- :return: Dataset object
832
- :rtype: dtlpy.entities.dataset.Dataset
833
-
834
- **Example**:
835
-
836
- .. code-block:: python
837
-
838
- dataset = project.datasets.create(dataset_name='dataset_name', ontology_ids='ontology_ids')
839
- """
840
- create_default_recipe = True
841
- if any([labels, attributes, ontology_ids, recipe_id]):
842
- create_default_recipe = False
843
-
844
- # labels to list
845
- if labels is not None:
846
- if not isinstance(labels, list):
847
- labels = [labels]
848
- if not all(isinstance(label, entities.Label) for label in labels):
849
- labels = entities.Dataset.serialize_labels(labels)
850
- else:
851
- labels = list()
852
-
853
- # get creator from token
854
- payload = {'name': dataset_name,
855
- 'projects': [self.project.id],
856
- 'createDefaultRecipe': create_default_recipe
857
- }
858
-
859
- if driver_id is None and driver is not None:
860
- if isinstance(driver, entities.Driver):
861
- driver_id = driver.id
862
- elif isinstance(driver, str):
863
- driver_id = self.project.drivers.get(driver_name=driver).id
864
- else:
865
- raise exceptions.PlatformException(
866
- error=400,
867
- message='Input arg "driver" must be Driver object or a string driver name. got type: {!r}'.format(
868
- type(driver)))
869
- if driver_id is not None:
870
- payload['driver'] = driver_id
871
-
872
- if expiration_options:
873
- payload['expirationOptions'] = expiration_options.to_json()
874
- if index_driver is not None:
875
- payload['indexDriver'] = index_driver
876
-
877
- success, response = self._client_api.gen_request(req_type='post',
878
- path='/datasets',
879
- json_req=payload)
880
- if success:
881
- dataset = entities.Dataset.from_json(client_api=self._client_api,
882
- _json=response.json(),
883
- datasets=self,
884
- project=self.project)
885
- # create ontology and recipe
886
- if not create_default_recipe:
887
- if recipe_id is not None:
888
- dataset.switch_recipe(recipe_id=recipe_id)
889
- else:
890
- dataset = dataset.recipes.create(ontology_ids=ontology_ids,
891
- labels=labels,
892
- attributes=attributes).dataset
893
- else:
894
- raise exceptions.PlatformException(response)
895
- logger.info('Dataset was created successfully. Dataset id: {!r}'.format(dataset.id))
896
- assert isinstance(dataset, entities.Dataset)
897
- if checkout:
898
- self.checkout(dataset=dataset)
899
- return dataset
900
-
901
- @staticmethod
902
- def _convert_single(downloader,
903
- item,
904
- img_filepath,
905
- local_path,
906
- overwrite,
907
- annotation_options,
908
- annotation_filters,
909
- thickness,
910
- with_text,
911
- progress,
912
- alpha,
913
- export_version):
914
- # this is to convert the downloaded json files to any other annotation type
915
- try:
916
- if entities.ViewAnnotationOptions.ANNOTATION_ON_IMAGE in annotation_options:
917
- if img_filepath is None:
918
- img_filepath = item.download()
919
- downloader._download_img_annotations(item=item,
920
- img_filepath=img_filepath,
921
- local_path=local_path,
922
- overwrite=overwrite,
923
- annotation_options=annotation_options,
924
- annotation_filters=annotation_filters,
925
- thickness=thickness,
926
- alpha=alpha,
927
- with_text=with_text,
928
- export_version=export_version
929
- )
930
- except Exception:
931
- logger.error('Failed to download annotation for item: {!r}'.format(item.name))
932
- progress.update()
933
-
934
- @staticmethod
935
- def download_annotations(dataset: entities.Dataset,
936
- local_path: str = None,
937
- filters: entities.Filters = None,
938
- annotation_options: entities.ViewAnnotationOptions = None,
939
- annotation_filters: entities.Filters = None,
940
- overwrite: bool = False,
941
- thickness: int = 1,
942
- with_text: bool = False,
943
- remote_path: str = None,
944
- include_annotations_in_output: bool = True,
945
- export_png_files: bool = False,
946
- filter_output_annotations: bool = False,
947
- alpha: float = None,
948
- export_version=entities.ExportVersion.V1,
949
- dataset_lock: bool = False,
950
- lock_timeout_sec: int = None,
951
- export_summary: bool = False,
952
- ) -> str:
953
- """
954
- Download dataset's annotations by filters.
955
-
956
- You may filter the dataset both for items and for annotations and download annotations.
957
-
958
- Optional -- download annotations as: mask, instance, image mask of the item.
959
-
960
- **Prerequisites**: You must be in the role of an *owner* or *developer*.
961
-
962
- :param dtlpy.entities.dataset.Dataset dataset: dataset object
963
- :param str local_path: local folder or filename to save to.
964
- :param dtlpy.entities.filters.Filters filters: Filters entity or a dictionary containing filters parameters
965
- :param list annotation_options: type of download annotations: list(dl.ViewAnnotationOptions)
966
- :param dtlpy.entities.filters.Filters annotation_filters: Filters entity to filter annotations for download
967
- :param bool overwrite: optional - default = False to overwrite the existing files
968
- :param bool dataset_loc: optional - default = False to make the dataset readonly
969
- :param int thickness: optional - line thickness, if -1 annotation will be filled, default =1
970
- :param bool with_text: optional - add text to annotations, default = False
971
- :param str remote_path: DEPRECATED and ignored
972
- :param bool include_annotations_in_output: default - False , if export should contain annotations
973
- :param bool export_png_files: default - if True, semantic annotations should be exported as png files
974
- :param bool filter_output_annotations: default - False, given an export by filter - determine if to filter out annotations
975
- :param float alpha: opacity value [0 1], default 1
976
- :param str export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
977
- :return: local_path of the directory where all the downloaded item
978
- :param bool dataset_lock: optional - default = False
979
- :param bool export_summary: optional - default = False
980
- :param int lock_timeout_sec: optional
981
- :rtype: str
982
-
983
- **Example**:
984
-
985
- .. code-block:: python
986
-
987
- file_path = project.datasets.download_annotations(dataset='dataset_entity',
988
- local_path='local_path',
989
- annotation_options=dl.ViewAnnotationOptions,
990
- overwrite=False,
991
- thickness=1,
992
- with_text=False,
993
- alpha=1,
994
- dataset_lock=False,
995
- lock_timeout_sec=300,
996
- export_summary=False
997
- )
998
- """
999
- if annotation_options is None:
1000
- annotation_options = list()
1001
- elif not isinstance(annotation_options, list):
1002
- annotation_options = [annotation_options]
1003
- for ann_option in annotation_options:
1004
- if not isinstance(ann_option, entities.ViewAnnotationOptions):
1005
- if ann_option not in list(entities.ViewAnnotationOptions):
1006
- raise PlatformException(
1007
- error='400',
1008
- message='Unknown annotation download option: {}, please choose from: {}'.format(
1009
- ann_option, list(entities.ViewAnnotationOptions)))
1010
-
1011
- if remote_path is not None:
1012
- logger.warning(
1013
- '"remote_path" is ignored. Use "filters=dl.Filters(field="dir, values={!r}"'.format(remote_path))
1014
- if local_path is None:
1015
- if dataset.project is None:
1016
- # by dataset name
1017
- local_path = os.path.join(
1018
- services.service_defaults.DATALOOP_PATH,
1019
- "datasets",
1020
- "{}_{}".format(dataset.name, dataset.id),
1021
- )
1022
- else:
1023
- # by dataset and project name
1024
- local_path = os.path.join(
1025
- services.service_defaults.DATALOOP_PATH,
1026
- "projects",
1027
- dataset.project.name,
1028
- "datasets",
1029
- dataset.name,
1030
- )
1031
-
1032
- if filters is None:
1033
- filters = entities.Filters()
1034
- filters._user_query = 'false'
1035
- if annotation_filters is not None:
1036
- for annotation_filter_and in annotation_filters.and_filter_list:
1037
- filters.add_join(field=annotation_filter_and.field,
1038
- values=annotation_filter_and.values,
1039
- operator=annotation_filter_and.operator,
1040
- method=entities.FiltersMethod.AND)
1041
- for annotation_filter_or in annotation_filters.or_filter_list:
1042
- filters.add_join(field=annotation_filter_or.field,
1043
- values=annotation_filter_or.values,
1044
- operator=annotation_filter_or.operator,
1045
- method=entities.FiltersMethod.OR)
1046
-
1047
- downloader = repositories.Downloader(items_repository=dataset.items)
1048
- downloader.download_annotations(dataset=dataset,
1049
- filters=filters,
1050
- annotation_filters=annotation_filters,
1051
- local_path=local_path,
1052
- overwrite=overwrite,
1053
- include_annotations_in_output=include_annotations_in_output,
1054
- export_png_files=export_png_files,
1055
- filter_output_annotations=filter_output_annotations,
1056
- export_version=export_version,
1057
- dataset_lock=dataset_lock,
1058
- lock_timeout_sec=lock_timeout_sec,
1059
- export_summary=export_summary
1060
- )
1061
- if annotation_options:
1062
- pages = dataset.items.list(filters=filters)
1063
- if not isinstance(annotation_options, list):
1064
- annotation_options = [annotation_options]
1065
- # convert all annotations to annotation_options
1066
- pool = dataset._client_api.thread_pools(pool_name='dataset.download')
1067
- jobs = [None for _ in range(pages.items_count)]
1068
- progress = tqdm.tqdm(total=pages.items_count,
1069
- disable=dataset._client_api.verbose.disable_progress_bar_download_annotations,
1070
- file=sys.stdout, desc='Download Annotations')
1071
- i_item = 0
1072
- for page in pages:
1073
- for item in page:
1074
- jobs[i_item] = pool.submit(
1075
- Datasets._convert_single,
1076
- **{
1077
- 'downloader': downloader,
1078
- 'item': item,
1079
- 'img_filepath': None,
1080
- 'local_path': local_path,
1081
- 'overwrite': overwrite,
1082
- 'annotation_options': annotation_options,
1083
- 'annotation_filters': annotation_filters,
1084
- 'thickness': thickness,
1085
- 'with_text': with_text,
1086
- 'progress': progress,
1087
- 'alpha': alpha,
1088
- 'export_version': export_version
1089
- }
1090
- )
1091
- i_item += 1
1092
- # get all results
1093
- _ = [j.result() for j in jobs]
1094
- progress.close()
1095
- return local_path
1096
-
1097
- def _upload_single_item_annotation(self, item, file, pbar):
1098
- try:
1099
- item.annotations.upload(file)
1100
- except Exception as err:
1101
- raise err
1102
- finally:
1103
- pbar.update()
1104
-
1105
- def upload_annotations(self,
1106
- dataset,
1107
- local_path,
1108
- filters: entities.Filters = None,
1109
- clean=False,
1110
- remote_root_path='/',
1111
- export_version=entities.ExportVersion.V1
1112
- ):
1113
- """
1114
- Upload annotations to dataset.
1115
-
1116
- Example for remote_root_path: If the item filepath is "/a/b/item" and remote_root_path is "/a" - the start folder will be b instead of a
1117
-
1118
- **Prerequisites**: You must have a dataset with items that are related to the annotations. The relationship between the dataset and annotations is shown in the name. You must be in the role of an *owner* or *developer*.
1119
-
1120
- :param dtlpy.entities.dataset.Dataset dataset: dataset to upload to
1121
- :param str local_path: str - local folder where the annotations files are
1122
- :param dtlpy.entities.filters.Filters filters: Filters entity or a dictionary containing filters parameters
1123
- :param bool clean: True to remove the old annotations
1124
- :param str remote_root_path: the remote root path to match remote and local items
1125
- :param str export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
1126
-
1127
- **Example**:
1128
-
1129
- .. code-block:: python
1130
-
1131
- project.datasets.upload_annotations(dataset='dataset_entity',
1132
- local_path='local_path',
1133
- clean=False,
1134
- export_version=dl.ExportVersion.V1
1135
- )
1136
- """
1137
- if filters is None:
1138
- filters = entities.Filters()
1139
- filters._user_query = 'false'
1140
- pages = dataset.items.list(filters=filters)
1141
- total_items = pages.items_count
1142
- pbar = tqdm.tqdm(total=total_items, disable=dataset._client_api.verbose.disable_progress_bar_upload_annotations,
1143
- file=sys.stdout, desc='Upload Annotations')
1144
- pool = self._client_api.thread_pools('annotation.upload')
1145
- annotations_uploaded_count = 0
1146
- for item in pages.all():
1147
- if export_version == entities.ExportVersion.V1:
1148
- _, ext = os.path.splitext(item.filename)
1149
- filepath = item.filename.replace(ext, '.json')
1150
- else:
1151
- filepath = item.filename + '.json'
1152
- # make the file path ignore the hierarchy of the files that in remote_root_path
1153
- filepath = os.path.relpath(filepath, remote_root_path)
1154
- json_file = os.path.join(local_path, filepath)
1155
- if not os.path.isfile(json_file):
1156
- pbar.update()
1157
- continue
1158
- annotations_uploaded_count += 1
1159
- if item.annotated and clean:
1160
- item.annotations.delete(filters=entities.Filters(resource=entities.FiltersResource.ANNOTATION))
1161
- pool.submit(self._upload_single_item_annotation, **{'item': item,
1162
- 'file': json_file,
1163
- 'pbar': pbar})
1164
- pool.shutdown()
1165
- if annotations_uploaded_count == 0:
1166
- logger.warning(msg="No annotations uploaded to dataset! ")
1167
- else:
1168
- logger.info(msg='Found and uploaded {} annotations.'.format(annotations_uploaded_count))
1169
-
1170
- def set_readonly(self, state: bool, dataset: entities.Dataset):
1171
- """
1172
- Set dataset readonly mode.
1173
-
1174
- **Prerequisites**: You must be in the role of an *owner* or *developer*.
1175
-
1176
- :param bool state: state to update readonly mode
1177
- :param dtlpy.entities.dataset.Dataset dataset: dataset object
1178
-
1179
- **Example**:
1180
-
1181
- .. code-block:: python
1182
-
1183
- project.datasets.set_readonly(dataset='dataset_entity', state=True)
1184
- """
1185
- import warnings
1186
- warnings.warn("`readonly` flag on dataset is deprecated, doing nothing.", DeprecationWarning)
1187
-
1188
-
1189
- @_api_reference.add(path='/datasets/{id}/split', method='post')
1190
- def split_ml_subsets(self,
1191
- dataset_id: str,
1192
- items_query: entities.filters,
1193
- ml_split_list: dict) -> bool:
1194
- """
1195
- Split dataset items into ML subsets.
1196
-
1197
- :param str dataset_id: The ID of the dataset.
1198
- :param dict items_query: Query to select items.
1199
- :param dict ml_split_list: Dictionary with 'train', 'validation', 'test' keys and integer percentages.
1200
- :return: True if the split operation was successful.
1201
- :rtype: bool
1202
- :raises: PlatformException on failure and ValueError if percentages do not sum to 100 or invalid keys/values.
1203
- """
1204
- # Validate percentages
1205
- if not ml_split_list:
1206
- ml_split_list = {'train': 80, 'validation': 10, 'test': 10}
1207
-
1208
- if not items_query:
1209
- items_query = entities.Filters()
1210
-
1211
- items_query_dict = items_query.prepare()
1212
- required_keys = {'train', 'validation', 'test'}
1213
- if set(ml_split_list.keys()) != required_keys:
1214
- raise ValueError("MLSplitList must have exactly the keys 'train', 'validation', 'test'.")
1215
- total = sum(ml_split_list.values())
1216
- if total != 100:
1217
- raise ValueError(
1218
- "Please set the Train, Validation, and Test subsets percentages to add up to 100%. "
1219
- "For example: 70, 15, 15."
1220
- )
1221
- for key, value in ml_split_list.items():
1222
- if not isinstance(value, int) or value < 0:
1223
- raise ValueError("Percentages must be integers >= 0.")
1224
- payload = {
1225
- 'itemsQuery': items_query_dict,
1226
- 'MLSplitList': ml_split_list
1227
- }
1228
- path = f'/datasets/{dataset_id}/split'
1229
- success, response = self._client_api.gen_request(req_type='post',
1230
- path=path,
1231
- json_req=payload)
1232
- if success:
1233
- # Wait for the split operation to complete
1234
- command = entities.Command.from_json(_json=response.json(),
1235
- client_api=self._client_api)
1236
- command.wait()
1237
- return True
1238
- else:
1239
- raise exceptions.PlatformException(response)
1240
-
1241
-
1242
- @_api_reference.add(path='/datasets/{id}/items/bulk-update-metadata', method='post')
1243
- def bulk_update_ml_subset(self, dataset_id: str, items_query: dict, subset: str = None, deleteTag: bool = False) -> bool:
1244
- """
1245
- Bulk update ML subset assignment for selected items.
1246
- If subset is None, remove subsets. Otherwise, assign the specified subset.
1247
-
1248
- :param str dataset_id: ID of the dataset
1249
- :param dict items_query: DQLResourceQuery (filters) for selecting items
1250
- :param str subset: 'train', 'validation', 'test' or None to remove all
1251
- :return: True if success
1252
- :rtype: bool
1253
- """
1254
- if items_query is None:
1255
- items_query = entities.Filters()
1256
- items_query_dict = items_query.prepare()
1257
- if not deleteTag and subset not in ['train', 'validation', 'test']:
1258
- raise ValueError("subset must be one of: 'train', 'validation', 'test'")
1259
- # Determine tag values based on subset
1260
- tags = {
1261
- 'train': True if subset == 'train' else None,
1262
- 'validation': True if subset == 'validation' else None,
1263
- 'test': True if subset == 'test' else None
1264
- }
1265
-
1266
- payload = {
1267
- "query": items_query_dict,
1268
- "updateQuery": {
1269
- "update": {
1270
- "metadata": {
1271
- "system": {
1272
- "tags": tags
1273
- }
1274
- }
1275
- },
1276
- "systemSpace": True
1277
- }
1278
- }
1279
-
1280
- success, response = self._client_api.gen_request(
1281
- req_type='post',
1282
- path=f'/datasets/{dataset_id}/items/bulk-update-metadata',
1283
- json_req=payload
1284
- )
1285
- if success:
1286
- # Similar to split operation, a command is returned
1287
- command = entities.Command.from_json(_json=response.json(), client_api=self._client_api)
1288
- command.wait()
1289
- return True
1290
- else:
1291
- raise exceptions.PlatformException(response)
1
+ """
2
+ Datasets Repository
3
+ """
4
+
5
+ import os
6
+ import sys
7
+ import time
8
+ import copy
9
+ import tqdm
10
+ import logging
11
+ import json
12
+ from typing import Union
13
+
14
+ from .. import entities, repositories, miscellaneous, exceptions, services, PlatformException, _api_reference
15
+ from ..services.api_client import ApiClient
16
+
17
+ logger = logging.getLogger(name='dtlpy')
18
+
19
+
20
+ class Datasets:
21
+ """
22
+ Datasets Repository
23
+
24
+ The Datasets class allows the user to manage datasets. Read more about datasets in our `documentation <https://dataloop.ai/docs/dataset>`_ and `SDK documentation <https://developers.dataloop.ai/tutorials/data_management/manage_datasets/chapter/>`_.
25
+ """
26
+
27
+ def __init__(self, client_api: ApiClient, project: entities.Project = None):
28
+ self._client_api = client_api
29
+ self._project = project
30
+
31
+ ############
32
+ # entities #
33
+ ############
34
+ @property
35
+ def project(self) -> entities.Project:
36
+ if self._project is None:
37
+ # try get checkout
38
+ project = self._client_api.state_io.get('project')
39
+ if project is not None:
40
+ self._project = entities.Project.from_json(_json=project, client_api=self._client_api)
41
+ if self._project is None:
42
+ raise exceptions.PlatformException(
43
+ error='2001',
44
+ message='Cannot perform action WITHOUT Project entity in Datasets repository.'
45
+ ' Please checkout or set a project')
46
+ assert isinstance(self._project, entities.Project)
47
+ return self._project
48
+
49
+ @project.setter
50
+ def project(self, project: entities.Project):
51
+ if not isinstance(project, entities.Project):
52
+ raise ValueError('Must input a valid Project entity')
53
+ self._project = project
54
+
55
+ ###########
56
+ # methods #
57
+ ###########
58
+ def __get_from_cache(self) -> entities.Dataset:
59
+ dataset = self._client_api.state_io.get('dataset')
60
+ if dataset is not None:
61
+ dataset = entities.Dataset.from_json(_json=dataset,
62
+ client_api=self._client_api,
63
+ datasets=self,
64
+ project=self._project)
65
+ return dataset
66
+
67
+ def __get_by_id(self, dataset_id) -> entities.Dataset:
68
+ success, response = self._client_api.gen_request(req_type='get',
69
+ path='/datasets/{}'.format(dataset_id))
70
+ if dataset_id is None or dataset_id == '':
71
+ raise exceptions.PlatformException('400', 'Please checkout a dataset')
72
+
73
+ if success:
74
+ dataset = entities.Dataset.from_json(client_api=self._client_api,
75
+ _json=response.json(),
76
+ datasets=self,
77
+ project=self._project)
78
+ else:
79
+ raise exceptions.PlatformException(response)
80
+ return dataset
81
+
82
+ def __get_by_identifier(self, identifier=None) -> entities.Dataset:
83
+ datasets = self.list()
84
+ datasets_by_name = [dataset for dataset in datasets if identifier in dataset.name or identifier in dataset.id]
85
+ if len(datasets_by_name) == 1:
86
+ return datasets_by_name[0]
87
+ elif len(datasets_by_name) > 1:
88
+ raise Exception('Multiple datasets with this name exist')
89
+ else:
90
+ raise Exception("Dataset not found")
91
+
92
+ def _bulid_folder_filter(self, folder_path, filters=None):
93
+ if filters is None:
94
+ filters = entities.Filters()
95
+ filters._user_query = 'false'
96
+ if not folder_path.startswith('/'):
97
+ folder_path = '/' + folder_path
98
+ filters.add(field='dir', values=folder_path, method=entities.FiltersMethod.OR)
99
+ if not folder_path.endswith('*'):
100
+ if not folder_path.endswith('/'):
101
+ folder_path += '/'
102
+ filters.add(field='dir', values=folder_path + '*', method=entities.FiltersMethod.OR)
103
+ return filters
104
+
105
+ def _get_binaries_dataset(self):
106
+ filters = entities.Filters(resource=entities.FiltersResource.DATASET)
107
+ filters.add(field='name', values='Binaries')
108
+ filters.system_space = True
109
+ datasets = self.list(filters=filters)
110
+ if len(datasets) == 0:
111
+ # empty list
112
+ raise exceptions.PlatformException('404', 'Dataset not found. Name: "Binaries"')
113
+ # dataset = None
114
+ elif len(datasets) > 1:
115
+ raise exceptions.PlatformException('400', 'More than one dataset with same name.')
116
+ else:
117
+ dataset = datasets[0]
118
+ return dataset
119
+
120
+ def _resolve_dataset_id(self, dataset, dataset_name, dataset_id):
121
+ if dataset is None and dataset_name is None and dataset_id is None:
122
+ raise ValueError('Must provide dataset, dataset name or dataset id')
123
+ if dataset_id is None:
124
+ if dataset is None:
125
+ dataset = self.get(dataset_name=dataset_name)
126
+ dataset_id = dataset.id
127
+ return dataset_id
128
+
129
+ @staticmethod
130
+ def _build_payload(filters, include_feature_vectors, include_annotations,
131
+ export_type, annotation_filters, feature_vector_filters, dataset_lock, lock_timeout_sec, export_summary):
132
+ valid_list = [e.value for e in entities.ExportType]
133
+ valid_types = ', '.join(valid_list)
134
+ if export_type not in ['json', 'zip']:
135
+ raise ValueError('export_type must be one of the following: {}'.format(valid_types))
136
+ payload = {'exportType': export_type}
137
+ if filters is None:
138
+ filters = entities.Filters()
139
+
140
+ if isinstance(filters, entities.Filters):
141
+ payload['itemsQuery'] = {'filter': filters.prepare()['filter'], 'join': filters.prepare().get("join", {})}
142
+ elif isinstance(filters, dict):
143
+ payload['itemsQuery'] = filters
144
+ else:
145
+ raise exceptions.BadRequest(message='filters must be of type dict or Filters', status_code=500)
146
+
147
+ payload['itemsVectorQuery'] = {}
148
+ if include_feature_vectors:
149
+ payload['includeItemVectors'] = True
150
+ payload['itemsVectorQuery']['select'] = {"datasetId": 1, 'featureSetId': 1, 'value': 1}
151
+
152
+ if feature_vector_filters is not None:
153
+ payload['itemsVectorQuery']['filter'] = feature_vector_filters.prepare()['filter']
154
+
155
+ payload['annotations'] = {"include": include_annotations, "convertSemantic": False}
156
+
157
+ if annotation_filters is not None:
158
+ payload['annotationsQuery'] = annotation_filters.prepare()['filter']
159
+ payload['annotations']['filter'] = True
160
+
161
+ if dataset_lock:
162
+ payload['datasetLock'] = dataset_lock
163
+
164
+ if export_summary:
165
+ payload['summary'] = export_summary
166
+
167
+ if lock_timeout_sec:
168
+ payload['lockTimeoutSec'] = lock_timeout_sec
169
+
170
+ return payload
171
+
172
+ def _download_exported_item(self, item_id, export_type, local_path=None):
173
+ export_item = repositories.Items(client_api=self._client_api).get(item_id=item_id)
174
+ export_item_path = export_item.download(local_path=local_path)
175
+
176
+ if export_type == entities.ExportType.ZIP:
177
+ # unzipping annotations to directory
178
+ if isinstance(export_item_path, list) or not os.path.isfile(export_item_path):
179
+ raise exceptions.PlatformException(
180
+ error='404',
181
+ message='error downloading annotation zip file. see above for more information. item id: {!r}'.format(
182
+ export_item.id))
183
+ try:
184
+ miscellaneous.Zipping.unzip_directory(zip_filename=export_item_path,
185
+ to_directory=local_path)
186
+ except Exception as e:
187
+ logger.warning("Failed to extract zip file error: {}".format(e))
188
+ finally:
189
+ # cleanup
190
+ if isinstance(export_item_path, str) and os.path.isfile(export_item_path):
191
+ os.remove(export_item_path)
192
+
193
+ @property
194
+ def platform_url(self):
195
+ return self._client_api._get_resource_url("projects/{}/datasets".format(self.project.id))
196
+
197
+ def open_in_web(self,
198
+ dataset_name: str = None,
199
+ dataset_id: str = None,
200
+ dataset: entities.Dataset = None):
201
+ """
202
+ Open the dataset in web platform.
203
+
204
+ **Prerequisites**: You must be an *owner* or *developer* to use this method.
205
+
206
+ :param str dataset_name: The Name of the dataset
207
+ :param str dataset_id: The Id of the dataset
208
+ :param dtlpy.entities.dataset.Dataset dataset: dataset object
209
+
210
+ **Example**:
211
+
212
+ .. code-block:: python
213
+
214
+ project.datasets.open_in_web(dataset_id='dataset_id')
215
+ """
216
+ if dataset_name is not None:
217
+ dataset = self.get(dataset_name=dataset_name)
218
+ if dataset is not None:
219
+ dataset.open_in_web()
220
+ elif dataset_id is not None:
221
+ self._client_api._open_in_web(url=f'{self.platform_url}/{dataset_id}/items')
222
+ else:
223
+ self._client_api._open_in_web(url=self.platform_url)
224
+
225
+ def checkout(self,
226
+ identifier: str = None,
227
+ dataset_name: str = None,
228
+ dataset_id: str = None,
229
+ dataset: entities.Dataset = None):
230
+ """
231
+ Checkout (switch) to a dataset to work on it.
232
+
233
+ **Prerequisites**: You must be an *owner* or *developer* to use this method.
234
+
235
+ You must provide at least ONE of the following params: dataset_id, dataset_name.
236
+
237
+ :param str identifier: project name or partial id that you wish to switch
238
+ :param str dataset_name: The Name of the dataset
239
+ :param str dataset_id: The Id of the dataset
240
+ :param dtlpy.entities.dataset.Dataset dataset: dataset object
241
+
242
+ **Example**:
243
+
244
+ .. code-block:: python
245
+
246
+ project.datasets.checkout(dataset_id='dataset_id')
247
+ """
248
+ if dataset is None:
249
+ if dataset_id is not None or dataset_name is not None:
250
+ try:
251
+ dataset = self.project.datasets.get(dataset_name=dataset_name, dataset_id=dataset_id)
252
+ except exceptions.MissingEntity:
253
+ dataset = self.get(dataset_id=dataset_id, dataset_name=dataset_name)
254
+ elif identifier is not None:
255
+ dataset = self.__get_by_identifier(identifier=identifier)
256
+ else:
257
+ raise exceptions.PlatformException(error='400',
258
+ message='Must provide partial/full id/name to checkout')
259
+ self._client_api.state_io.put('dataset', dataset.to_json())
260
+ logger.info('Checked out to dataset {}'.format(dataset.name))
261
+
262
+ @_api_reference.add(path='/datasets/query', method='post')
263
+ def list(self, name=None, creator=None, filters: entities.Filters = None) -> miscellaneous.List[entities.Dataset]:
264
+ """
265
+ List all datasets.
266
+
267
+ **Prerequisites**: You must be an *owner* or *developer* to use this method.
268
+
269
+ :param str name: list by name
270
+ :param str creator: list by
271
+ :param dtlpy.entities.filters.Filters filters: Filters entity containing filters parameters
272
+ :return: List of datasets
273
+ :rtype: list
274
+
275
+ **Example**:
276
+
277
+ .. code-block:: python
278
+ filters = dl.Filters(resource='datasets')
279
+ filters.add(field='readonly', values=False)
280
+ datasets = project.datasets.list(filters=filters)
281
+ """
282
+ if filters is None:
283
+ filters = entities.Filters(resource=entities.FiltersResource.DATASET)
284
+ # assert type filters
285
+ elif not isinstance(filters, entities.Filters):
286
+ raise exceptions.PlatformException(error='400',
287
+ message='Unknown filters type: {!r}'.format(type(filters)))
288
+ if filters.resource != entities.FiltersResource.DATASET:
289
+ raise exceptions.PlatformException(
290
+ error='400',
291
+ message='Filters resource must to be FiltersResource.DATASET. Got: {!r}'.format(filters.resource))
292
+
293
+ url = '/datasets/query'
294
+
295
+ if name is not None:
296
+ filters.add(field='name', values=name)
297
+ if creator is not None:
298
+ filters.add(field='creator', values=creator)
299
+ if self._project is not None:
300
+ filters.context = {"projects": [self._project.id]}
301
+ filters.page_size = 1000
302
+ filters.page = 0
303
+ datasets = list()
304
+ while True:
305
+ success, response = self._client_api.gen_request(req_type='POST',
306
+ json_req=filters.prepare(),
307
+ path=url,
308
+ headers={'user_query': filters._user_query})
309
+ if success:
310
+ pool = self._client_api.thread_pools('entity.create')
311
+ datasets_json = response.json()['items']
312
+ jobs = [None for _ in range(len(datasets_json))]
313
+ # return triggers list
314
+ for i_dataset, dataset in enumerate(datasets_json):
315
+ jobs[i_dataset] = pool.submit(entities.Dataset._protected_from_json,
316
+ **{'client_api': self._client_api,
317
+ '_json': dataset,
318
+ 'datasets': self,
319
+ 'project': self.project})
320
+
321
+ # get all results
322
+ results = [j.result() for j in jobs]
323
+ # log errors
324
+ _ = [logger.warning(r[1]) for r in results if r[0] is False]
325
+ # return good jobs
326
+ datasets.extend([r[1] for r in results if r[0] is True])
327
+ if response.json()['hasNextPage'] is True:
328
+ filters.page += 1
329
+ else:
330
+ break
331
+ else:
332
+ raise exceptions.PlatformException(response)
333
+ datasets = miscellaneous.List(datasets)
334
+ return datasets
335
+
336
+ @_api_reference.add(path='/datasets/{id}', method='get')
337
+ def get(self,
338
+ dataset_name: str = None,
339
+ dataset_id: str = None,
340
+ checkout: bool = False,
341
+ fetch: bool = None
342
+ ) -> entities.Dataset:
343
+ """
344
+ Get dataset by name or id.
345
+
346
+ **Prerequisites**: You must be an *owner* or *developer* to use this method.
347
+
348
+ You must provide at least ONE of the following params: dataset_id, dataset_name.
349
+
350
+ :param str dataset_name: optional - search by name
351
+ :param str dataset_id: optional - search by id
352
+ :param bool checkout: set the dataset as a default dataset object (cookies)
353
+ :param bool fetch: optional - fetch entity from platform (True), default taken from cookie
354
+ :return: Dataset object
355
+ :rtype: dtlpy.entities.dataset.Dataset
356
+
357
+ **Example**:
358
+
359
+ .. code-block:: python
360
+
361
+ dataset = project.datasets.get(dataset_id='dataset_id')
362
+ """
363
+ if fetch is None:
364
+ fetch = self._client_api.fetch_entities
365
+
366
+ if dataset_id is None and dataset_name is None:
367
+ dataset = self.__get_from_cache()
368
+ if dataset is None:
369
+ raise exceptions.PlatformException(
370
+ error='400',
371
+ message='No checked-out Dataset was found, must checkout or provide an identifier in inputs')
372
+ elif fetch:
373
+ if dataset_id is not None and dataset_id != '':
374
+ dataset = self.__get_by_id(dataset_id)
375
+ # verify input dataset name is same as the given id
376
+ if dataset_name is not None and dataset.name != dataset_name:
377
+ logger.warning(
378
+ "Mismatch found in datasets.get: dataset_name is different then dataset.name: "
379
+ "{!r} != {!r}".format(
380
+ dataset_name,
381
+ dataset.name))
382
+ elif dataset_name is not None:
383
+ datasets = self.list(name=dataset_name)
384
+ if not datasets:
385
+ # empty list
386
+ raise exceptions.PlatformException('404', 'Dataset not found. Name: {!r}'.format(dataset_name))
387
+ # dataset = None
388
+ elif len(datasets) > 1:
389
+ raise exceptions.PlatformException('400', 'More than one dataset with same name.')
390
+ else:
391
+ dataset = datasets[0]
392
+ else:
393
+ raise exceptions.PlatformException(
394
+ error='404',
395
+ message='No input and no checked-out found')
396
+ else:
397
+ dataset = entities.Dataset.from_json(_json={'id': dataset_id,
398
+ 'name': dataset_id},
399
+ client_api=self._client_api,
400
+ datasets=self,
401
+ project=self._project,
402
+ is_fetched=False)
403
+ assert isinstance(dataset, entities.Dataset)
404
+ if checkout:
405
+ self.checkout(dataset=dataset)
406
+ return dataset
407
+
408
+ @_api_reference.add(path='/datasets/{id}', method='delete')
409
+ def delete(self,
410
+ dataset_name: str = None,
411
+ dataset_id: str = None,
412
+ sure: bool = False,
413
+ really: bool = False):
414
+ """
415
+ Delete a dataset forever!
416
+
417
+ **Prerequisites**: You must be an *owner* or *developer* to use this method.
418
+
419
+ **Example**:
420
+
421
+ .. code-block:: python
422
+
423
+ is_deleted = project.datasets.delete(dataset_id='dataset_id', sure=True, really=True)
424
+
425
+ :param str dataset_name: optional - search by name
426
+ :param str dataset_id: optional - search by id
427
+ :param bool sure: Are you sure you want to delete?
428
+ :param bool really: Really really sure?
429
+ :return: True is success
430
+ :rtype: bool
431
+ """
432
+ if sure and really:
433
+ dataset = self.get(dataset_name=dataset_name, dataset_id=dataset_id)
434
+ success, response = self._client_api.gen_request(req_type='delete',
435
+ path='/datasets/{}'.format(dataset.id))
436
+ if not success:
437
+ raise exceptions.PlatformException(response)
438
+ logger.info('Dataset {!r} was deleted successfully'.format(dataset.name))
439
+ return True
440
+ else:
441
+ raise exceptions.PlatformException(
442
+ error='403',
443
+ message='Cant delete dataset from SDK. Please login to platform to delete')
444
+
445
+ @_api_reference.add(path='/datasets/{id}', method='patch')
446
+ def update(self,
447
+ dataset: entities.Dataset,
448
+ system_metadata: bool = False,
449
+ patch: dict = None
450
+ ) -> entities.Dataset:
451
+ """
452
+ Update dataset field.
453
+
454
+ **Prerequisites**: You must be an *owner* or *developer* to use this method.
455
+
456
+ :param dtlpy.entities.dataset.Dataset dataset: dataset object
457
+ :param bool system_metadata: True, if you want to change metadata system
458
+ :param dict patch: Specific patch request
459
+ :return: Dataset object
460
+ :rtype: dtlpy.entities.dataset.Dataset
461
+
462
+ **Example**:
463
+
464
+ .. code-block:: python
465
+
466
+ dataset = project.datasets.update(dataset='dataset_entity')
467
+ """
468
+ url_path = '/datasets/{}'.format(dataset.id)
469
+ if system_metadata:
470
+ url_path += '?system=true'
471
+
472
+ if patch is None:
473
+ patch = dataset.to_json()
474
+
475
+ success, response = self._client_api.gen_request(req_type='patch',
476
+ path=url_path,
477
+ json_req=patch)
478
+ if success:
479
+ logger.info('Dataset was updated successfully')
480
+ return dataset
481
+ else:
482
+ raise exceptions.PlatformException(response)
483
+
484
+ @_api_reference.add(path='/datasets/{id}/unlock', method='patch')
485
+ def unlock(self, dataset: entities.Dataset ) -> entities.Dataset:
486
+ """
487
+ Unlock dataset.
488
+
489
+ **Prerequisites**: You must be an *owner* or *developer* to use this method.
490
+
491
+ :param dtlpy.entities.dataset.Dataset dataset: dataset object
492
+ :return: Dataset object
493
+ :rtype: dtlpy.entities.dataset.Dataset
494
+
495
+ **Example**:
496
+
497
+ .. code-block:: python
498
+
499
+ dataset = project.datasets.unlock(dataset='dataset_entity')
500
+ """
501
+ url_path = '/datasets/{}/unlock'.format(dataset.id)
502
+
503
+ success, response = self._client_api.gen_request(req_type='patch', path=url_path)
504
+ if success:
505
+ logger.info('Dataset was unlocked successfully')
506
+ return dataset
507
+ else:
508
+ raise exceptions.PlatformException(response)
509
+
510
+ @_api_reference.add(path='/datasets/{id}/directoryTree', method='get')
511
+ def directory_tree(self,
512
+ dataset: entities.Dataset = None,
513
+ dataset_name: str = None,
514
+ dataset_id: str = None):
515
+ """
516
+ Get dataset's directory tree.
517
+
518
+ **Prerequisites**: You must be an *owner* or *developer* to use this method.
519
+
520
+ You must provide at least ONE of the following params: dataset, dataset_name, dataset_id.
521
+
522
+ :param dtlpy.entities.dataset.Dataset dataset: dataset object
523
+ :param str dataset_name: The Name of the dataset
524
+ :param str dataset_id: The Id of the dataset
525
+ :return: DirectoryTree
526
+
527
+ **Example**:
528
+
529
+ .. code-block:: python
530
+ directory_tree = dataset.directory_tree
531
+ directory_tree = project.datasets.directory_tree(dataset='dataset_entity')
532
+ """
533
+ dataset_id = self._resolve_dataset_id(dataset, dataset_name, dataset_id)
534
+
535
+ url_path = '/datasets/{}/directoryTree'.format(dataset_id)
536
+
537
+ success, response = self._client_api.gen_request(req_type='get',
538
+ path=url_path)
539
+
540
+ if success:
541
+ return entities.DirectoryTree(_json=response.json())
542
+ else:
543
+ raise exceptions.PlatformException(response)
544
+
545
+ @_api_reference.add(path='/datasets/{id}/clone', method='post')
546
+ def clone(self,
547
+ dataset_id: str,
548
+ clone_name: str = None,
549
+ filters: entities.Filters = None,
550
+ with_items_annotations: bool = True,
551
+ with_metadata: bool = True,
552
+ with_task_annotations_status: bool = True,
553
+ dst_dataset_id: str = None,
554
+ target_directory: str = None):
555
+ """
556
+ Clone a dataset. Read more about cloning datatsets and items in our `documentation <https://dataloop.ai/docs/clone-merge-dataset#cloned-dataset>`_ and `SDK documentation <https://developers.dataloop.ai/tutorials/data_management/data_versioning/chapter/>`_.
557
+
558
+ **Prerequisites**: You must be in the role of an *owner* or *developer*.
559
+
560
+ :param str dataset_id: id of the dataset you wish to clone
561
+ :param str clone_name: new dataset name
562
+ :param dtlpy.entities.filters.Filters filters: Filters entity or a query dict
563
+ :param bool with_items_annotations: true to clone with items annotations
564
+ :param bool with_metadata: true to clone with metadata
565
+ :param bool with_task_annotations_status: true to clone with task annotations' status
566
+ :param str dst_dataset_id: destination dataset id
567
+ :param str target_directory: target directory
568
+ :return: dataset object
569
+ :rtype: dtlpy.entities.dataset.Dataset
570
+
571
+ **Example**:
572
+
573
+ .. code-block:: python
574
+
575
+ dataset = project.datasets.clone(dataset_id='dataset_id',
576
+ clone_name='dataset_clone_name',
577
+ with_metadata=True,
578
+ with_items_annotations=False,
579
+ with_task_annotations_status=False)
580
+ """
581
+ if clone_name is None and dst_dataset_id is None:
582
+ raise exceptions.PlatformException('400', 'Must provide clone name or destination dataset id')
583
+ if filters is None:
584
+ filters = entities.Filters()
585
+ filters._user_query = 'false'
586
+ elif not isinstance(filters, entities.Filters):
587
+ raise exceptions.PlatformException(
588
+ error='400',
589
+ message='"filters" must be a dl.Filters entity. got: {!r}'.format(type(filters)))
590
+
591
+ copy_filters = copy.deepcopy(filters)
592
+ if copy_filters.has_field('hidden'):
593
+ copy_filters.pop('hidden')
594
+
595
+ if target_directory is not None and not target_directory.startswith('/'):
596
+ target_directory = '/' + target_directory
597
+
598
+ payload = {
599
+ "name": clone_name,
600
+ "filter": copy_filters.prepare(),
601
+ "cloneDatasetParams": {
602
+ "withItemsAnnotations": with_items_annotations,
603
+ "withMetadata": with_metadata,
604
+ "withTaskAnnotationsStatus": with_task_annotations_status,
605
+ "targetDirectory": target_directory
606
+ }
607
+ }
608
+ if dst_dataset_id is not None:
609
+ payload['cloneDatasetParams']['targetDatasetId'] = dst_dataset_id
610
+ success, response = self._client_api.gen_request(req_type='post',
611
+ path='/datasets/{}/clone'.format(dataset_id),
612
+ json_req=payload,
613
+ headers={'user_query': filters._user_query})
614
+
615
+ if not success:
616
+ raise exceptions.PlatformException(response)
617
+
618
+ command = entities.Command.from_json(_json=response.json(),
619
+ client_api=self._client_api)
620
+ command = command.wait()
621
+
622
+ if 'returnedModelId' not in command.spec:
623
+ raise exceptions.PlatformException(error='400',
624
+ message="returnedModelId key is missing in command response: {!r}"
625
+ .format(response))
626
+ return self.get(dataset_id=command.spec['returnedModelId'])
627
+
628
+ @_api_reference.add(path='/datasets/{id}/export', method='post')
629
+ def export(self,
630
+ dataset: entities.Dataset = None,
631
+ dataset_name: str = None,
632
+ dataset_id: str = None,
633
+ local_path: str = None,
634
+ filters: Union[dict, entities.Filters] = None,
635
+ annotation_filters: entities.Filters = None,
636
+ feature_vector_filters: entities.Filters = None,
637
+ include_feature_vectors: bool = False,
638
+ include_annotations: bool = False,
639
+ export_type: entities.ExportType = entities.ExportType.JSON,
640
+ timeout: int = 0,
641
+ dataset_lock: bool = False,
642
+ lock_timeout_sec: int = None,
643
+ export_summary: bool = False):
644
+ """
645
+ Export dataset items and annotations.
646
+
647
+ **Prerequisites**: You must be an *owner* or *developer* to use this method.
648
+
649
+ You must provide at least ONE of the following params: dataset, dataset_name, dataset_id.
650
+
651
+ :param dtlpy.entities.dataset.Dataset dataset: Dataset object
652
+ :param str dataset_name: The name of the dataset
653
+ :param str dataset_id: The ID of the dataset
654
+ :param str local_path: Local path to save the exported dataset
655
+ :param Union[dict, dtlpy.entities.filters.Filters] filters: Filters entity or a query dictionary
656
+ :param dtlpy.entities.filters.Filters annotation_filters: Filters entity to filter annotations for export
657
+ :param dtlpy.entities.filters.Filters feature_vector_filters: Filters entity to filter feature vectors for export
658
+ :param bool include_feature_vectors: Include item feature vectors in the export
659
+ :param bool include_annotations: Include item annotations in the export
660
+ :param bool dataset_lock: Make dataset readonly during the export
661
+ :param bool export_summary: Get Summary of the dataset export
662
+ :param int lock_timeout_sec: Timeout for locking the dataset during export in seconds
663
+ :param entities.ExportType export_type: Type of export ('json' or 'zip')
664
+ :param int timeout: Maximum time in seconds to wait for the export to complete
665
+ :return: Exported item
666
+ :rtype: dtlpy.entities.item.Item
667
+
668
+ **Example**:
669
+
670
+ .. code-block:: python
671
+
672
+ export_item = project.datasets.export(dataset_id='dataset_id',
673
+ filters=filters,
674
+ include_feature_vectors=True,
675
+ include_annotations=True,
676
+ export_type=dl.ExportType.JSON,
677
+ dataset_lock=True,
678
+ lock_timeout_sec=300,
679
+ export_summary=False)
680
+ """
681
+ dataset_id = self._resolve_dataset_id(dataset, dataset_name, dataset_id)
682
+ payload = self._build_payload(filters, include_feature_vectors, include_annotations,
683
+ export_type, annotation_filters, feature_vector_filters,
684
+ dataset_lock, lock_timeout_sec, export_summary)
685
+
686
+ success, response = self._client_api.gen_request(req_type='post', path=f'/datasets/{dataset_id}/export',
687
+ json_req=payload)
688
+ if not success:
689
+ raise exceptions.PlatformException(response)
690
+
691
+ command = entities.Command.from_json(_json=response.json(),
692
+ client_api=self._client_api)
693
+
694
+ time.sleep(2) # as the command have wrong progress in the beginning
695
+ command = command.wait(timeout=timeout)
696
+ if 'outputItemId' not in command.spec:
697
+ raise exceptions.PlatformException(
698
+ error='400',
699
+ message="outputItemId key is missing in command response: {}".format(response))
700
+ item_id = command.spec['outputItemId']
701
+ self._download_exported_item(item_id=item_id, export_type=export_type, local_path=local_path)
702
+ return local_path
703
+
704
+ @_api_reference.add(path='/datasets/merge', method='post')
705
+ def merge(self,
706
+ merge_name: str,
707
+ dataset_ids: list,
708
+ project_ids: str,
709
+ with_items_annotations: bool = True,
710
+ with_metadata: bool = True,
711
+ with_task_annotations_status: bool = True,
712
+ wait: bool = True):
713
+ """
714
+ Merge a dataset. See our `SDK docs <https://developers.dataloop.ai/tutorials/data_management/data_versioning/chapter/>`_ for more information.
715
+
716
+ **Prerequisites**: You must be an *owner* or *developer* to use this method.
717
+
718
+ :param str merge_name: new dataset name
719
+ :param list dataset_ids: list id's of the datatsets you wish to merge
720
+ :param str project_ids: the project id that include the datasets
721
+ :param bool with_items_annotations: true to merge with items annotations
722
+ :param bool with_metadata: true to merge with metadata
723
+ :param bool with_task_annotations_status: true to merge with task annotations' status
724
+ :param bool wait: wait for the command to finish
725
+ :return: True if success
726
+ :rtype: bool
727
+
728
+ **Example**:
729
+
730
+ .. code-block:: python
731
+
732
+ success = project.datasets.merge(dataset_ids=['dataset_id1','dataset_id2'],
733
+ merge_name='dataset_merge_name',
734
+ with_metadata=True,
735
+ with_items_annotations=False,
736
+ with_task_annotations_status=False)
737
+ """
738
+ payload = {
739
+ "name": merge_name,
740
+ "datasetsIds": dataset_ids,
741
+ "projectIds": project_ids,
742
+ "mergeDatasetParams": {
743
+ "withItemsAnnotations": with_items_annotations,
744
+ "withMetadata": with_metadata,
745
+ "withTaskAnnotationsStatus": with_task_annotations_status
746
+ },
747
+ 'asynced': wait
748
+ }
749
+ success, response = self._client_api.gen_request(req_type='post',
750
+ path='/datasets/merge',
751
+ json_req=payload)
752
+
753
+ if success:
754
+ command = entities.Command.from_json(_json=response.json(),
755
+ client_api=self._client_api)
756
+ if not wait:
757
+ return command
758
+ command = command.wait(timeout=0)
759
+ if 'mergeDatasetsConfiguration' not in command.spec:
760
+ raise exceptions.PlatformException(error='400',
761
+ message="mergeDatasetsConfiguration key is missing in command response: {}"
762
+ .format(response))
763
+ return True
764
+ else:
765
+ raise exceptions.PlatformException(response)
766
+
767
+ @_api_reference.add(path='/datasets/{id}/sync', method='post')
768
+ def sync(self, dataset_id: str, wait: bool = True):
769
+ """
770
+ Sync dataset with external storage.
771
+
772
+ **Prerequisites**: You must be in the role of an *owner* or *developer*.
773
+
774
+ :param str dataset_id: The Id of the dataset to sync
775
+ :param bool wait: wait for the command to finish
776
+ :return: True if success
777
+ :rtype: bool
778
+
779
+ **Example**:
780
+
781
+ .. code-block:: python
782
+
783
+ success = project.datasets.sync(dataset_id='dataset_id')
784
+ """
785
+
786
+ success, response = self._client_api.gen_request(req_type='post',
787
+ path='/datasets/{}/sync'.format(dataset_id))
788
+
789
+ if success:
790
+ command = entities.Command.from_json(_json=response.json(),
791
+ client_api=self._client_api)
792
+ if not wait:
793
+ return command
794
+ command = command.wait(timeout=0)
795
+ if 'datasetId' not in command.spec:
796
+ raise exceptions.PlatformException(error='400',
797
+ message="datasetId key is missing in command response: {}"
798
+ .format(response))
799
+ return True
800
+ else:
801
+ raise exceptions.PlatformException(response)
802
+
803
+ @_api_reference.add(path='/datasets', method='post')
804
+ def create(self,
805
+ dataset_name: str,
806
+ labels=None,
807
+ attributes=None,
808
+ ontology_ids=None,
809
+ driver: entities.Driver = None,
810
+ driver_id: str = None,
811
+ checkout: bool = False,
812
+ expiration_options: entities.ExpirationOptions = None,
813
+ index_driver: entities.IndexDriver = None,
814
+ recipe_id: str = None
815
+ ) -> entities.Dataset:
816
+ """
817
+ Create a new dataset
818
+
819
+ **Prerequisites**: You must be in the role of an *owner* or *developer*.
820
+
821
+ :param str dataset_name: The Name of the dataset
822
+ :param list labels: dictionary of {tag: color} or list of label entities
823
+ :param list attributes: dataset's ontology's attributes
824
+ :param list ontology_ids: optional - dataset ontology
825
+ :param dtlpy.entities.driver.Driver driver: optional - storage driver Driver object or driver name
826
+ :param str driver_id: optional - driver id
827
+ :param bool checkout: set the dataset as a default dataset object (cookies)
828
+ :param ExpirationOptions expiration_options: dl.ExpirationOptions object that contain definitions for dataset like MaxItemDays
829
+ :param str index_driver: dl.IndexDriver, dataset driver version
830
+ :param str recipe_id: optional - recipe id
831
+ :return: Dataset object
832
+ :rtype: dtlpy.entities.dataset.Dataset
833
+
834
+ **Example**:
835
+
836
+ .. code-block:: python
837
+
838
+ dataset = project.datasets.create(dataset_name='dataset_name', ontology_ids='ontology_ids')
839
+ """
840
+ create_default_recipe = True
841
+ if any([labels, attributes, ontology_ids, recipe_id]):
842
+ create_default_recipe = False
843
+
844
+ # labels to list
845
+ if labels is not None:
846
+ if not isinstance(labels, list):
847
+ labels = [labels]
848
+ if not all(isinstance(label, entities.Label) for label in labels):
849
+ labels = entities.Dataset.serialize_labels(labels)
850
+ else:
851
+ labels = list()
852
+
853
+ # get creator from token
854
+ payload = {'name': dataset_name,
855
+ 'projects': [self.project.id],
856
+ 'createDefaultRecipe': create_default_recipe
857
+ }
858
+
859
+ if driver_id is None and driver is not None:
860
+ if isinstance(driver, entities.Driver):
861
+ driver_id = driver.id
862
+ elif isinstance(driver, str):
863
+ driver_id = self.project.drivers.get(driver_name=driver).id
864
+ else:
865
+ raise exceptions.PlatformException(
866
+ error=400,
867
+ message='Input arg "driver" must be Driver object or a string driver name. got type: {!r}'.format(
868
+ type(driver)))
869
+ if driver_id is not None:
870
+ payload['driver'] = driver_id
871
+
872
+ if expiration_options:
873
+ payload['expirationOptions'] = expiration_options.to_json()
874
+ if index_driver is not None:
875
+ payload['indexDriver'] = index_driver
876
+
877
+ success, response = self._client_api.gen_request(req_type='post',
878
+ path='/datasets',
879
+ json_req=payload)
880
+ if success:
881
+ dataset = entities.Dataset.from_json(client_api=self._client_api,
882
+ _json=response.json(),
883
+ datasets=self,
884
+ project=self.project)
885
+ # create ontology and recipe
886
+ if not create_default_recipe:
887
+ if recipe_id is not None:
888
+ dataset.switch_recipe(recipe_id=recipe_id)
889
+ else:
890
+ dataset = dataset.recipes.create(ontology_ids=ontology_ids,
891
+ labels=labels,
892
+ attributes=attributes).dataset
893
+ else:
894
+ raise exceptions.PlatformException(response)
895
+ logger.info('Dataset was created successfully. Dataset id: {!r}'.format(dataset.id))
896
+ assert isinstance(dataset, entities.Dataset)
897
+ if checkout:
898
+ self.checkout(dataset=dataset)
899
+ return dataset
900
+
901
+ @staticmethod
902
+ def _convert_single(downloader,
903
+ item,
904
+ img_filepath,
905
+ local_path,
906
+ overwrite,
907
+ annotation_options,
908
+ annotation_filters,
909
+ thickness,
910
+ with_text,
911
+ progress,
912
+ alpha,
913
+ export_version):
914
+ # this is to convert the downloaded json files to any other annotation type
915
+ try:
916
+ if entities.ViewAnnotationOptions.ANNOTATION_ON_IMAGE in annotation_options:
917
+ if img_filepath is None:
918
+ img_filepath = item.download()
919
+ downloader._download_img_annotations(item=item,
920
+ img_filepath=img_filepath,
921
+ local_path=local_path,
922
+ overwrite=overwrite,
923
+ annotation_options=annotation_options,
924
+ annotation_filters=annotation_filters,
925
+ thickness=thickness,
926
+ alpha=alpha,
927
+ with_text=with_text,
928
+ export_version=export_version
929
+ )
930
+ except Exception:
931
+ logger.error('Failed to download annotation for item: {!r}'.format(item.name))
932
+ progress.update()
933
+
934
+ @staticmethod
935
+ def download_annotations(dataset: entities.Dataset,
936
+ local_path: str = None,
937
+ filters: entities.Filters = None,
938
+ annotation_options: entities.ViewAnnotationOptions = None,
939
+ annotation_filters: entities.Filters = None,
940
+ overwrite: bool = False,
941
+ thickness: int = 1,
942
+ with_text: bool = False,
943
+ remote_path: str = None,
944
+ include_annotations_in_output: bool = True,
945
+ export_png_files: bool = False,
946
+ filter_output_annotations: bool = False,
947
+ alpha: float = None,
948
+ export_version=entities.ExportVersion.V1,
949
+ dataset_lock: bool = False,
950
+ lock_timeout_sec: int = None,
951
+ export_summary: bool = False,
952
+ ) -> str:
953
+ """
954
+ Download dataset's annotations by filters.
955
+
956
+ You may filter the dataset both for items and for annotations and download annotations.
957
+
958
+ Optional -- download annotations as: mask, instance, image mask of the item.
959
+
960
+ **Prerequisites**: You must be in the role of an *owner* or *developer*.
961
+
962
+ :param dtlpy.entities.dataset.Dataset dataset: dataset object
963
+ :param str local_path: local folder or filename to save to.
964
+ :param dtlpy.entities.filters.Filters filters: Filters entity or a dictionary containing filters parameters
965
+ :param list annotation_options: type of download annotations: list(dl.ViewAnnotationOptions)
966
+ :param dtlpy.entities.filters.Filters annotation_filters: Filters entity to filter annotations for download
967
+ :param bool overwrite: optional - default = False to overwrite the existing files
968
+ :param bool dataset_loc: optional - default = False to make the dataset readonly
969
+ :param int thickness: optional - line thickness, if -1 annotation will be filled, default =1
970
+ :param bool with_text: optional - add text to annotations, default = False
971
+ :param str remote_path: DEPRECATED and ignored
972
+ :param bool include_annotations_in_output: default - False , if export should contain annotations
973
+ :param bool export_png_files: default - if True, semantic annotations should be exported as png files
974
+ :param bool filter_output_annotations: default - False, given an export by filter - determine if to filter out annotations
975
+ :param float alpha: opacity value [0 1], default 1
976
+ :param str export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
977
+ :return: local_path of the directory where all the downloaded item
978
+ :param bool dataset_lock: optional - default = False
979
+ :param bool export_summary: optional - default = False
980
+ :param int lock_timeout_sec: optional
981
+ :rtype: str
982
+
983
+ **Example**:
984
+
985
+ .. code-block:: python
986
+
987
+ file_path = project.datasets.download_annotations(dataset='dataset_entity',
988
+ local_path='local_path',
989
+ annotation_options=dl.ViewAnnotationOptions,
990
+ overwrite=False,
991
+ thickness=1,
992
+ with_text=False,
993
+ alpha=1,
994
+ dataset_lock=False,
995
+ lock_timeout_sec=300,
996
+ export_summary=False
997
+ )
998
+ """
999
+ if annotation_options is None:
1000
+ annotation_options = list()
1001
+ elif not isinstance(annotation_options, list):
1002
+ annotation_options = [annotation_options]
1003
+ for ann_option in annotation_options:
1004
+ if not isinstance(ann_option, entities.ViewAnnotationOptions):
1005
+ if ann_option not in list(entities.ViewAnnotationOptions):
1006
+ raise PlatformException(
1007
+ error='400',
1008
+ message='Unknown annotation download option: {}, please choose from: {}'.format(
1009
+ ann_option, list(entities.ViewAnnotationOptions)))
1010
+
1011
+ if remote_path is not None:
1012
+ logger.warning(
1013
+ '"remote_path" is ignored. Use "filters=dl.Filters(field="dir, values={!r}"'.format(remote_path))
1014
+ if local_path is None:
1015
+ if dataset.project is None:
1016
+ # by dataset name
1017
+ local_path = os.path.join(
1018
+ services.service_defaults.DATALOOP_PATH,
1019
+ "datasets",
1020
+ "{}_{}".format(dataset.name, dataset.id),
1021
+ )
1022
+ else:
1023
+ # by dataset and project name
1024
+ local_path = os.path.join(
1025
+ services.service_defaults.DATALOOP_PATH,
1026
+ "projects",
1027
+ dataset.project.name,
1028
+ "datasets",
1029
+ dataset.name,
1030
+ )
1031
+
1032
+ if filters is None:
1033
+ filters = entities.Filters()
1034
+ filters._user_query = 'false'
1035
+ if annotation_filters is not None:
1036
+ for annotation_filter_and in annotation_filters.and_filter_list:
1037
+ filters.add_join(field=annotation_filter_and.field,
1038
+ values=annotation_filter_and.values,
1039
+ operator=annotation_filter_and.operator,
1040
+ method=entities.FiltersMethod.AND)
1041
+ for annotation_filter_or in annotation_filters.or_filter_list:
1042
+ filters.add_join(field=annotation_filter_or.field,
1043
+ values=annotation_filter_or.values,
1044
+ operator=annotation_filter_or.operator,
1045
+ method=entities.FiltersMethod.OR)
1046
+
1047
+ downloader = repositories.Downloader(items_repository=dataset.items)
1048
+ downloader.download_annotations(dataset=dataset,
1049
+ filters=filters,
1050
+ annotation_filters=annotation_filters,
1051
+ local_path=local_path,
1052
+ overwrite=overwrite,
1053
+ include_annotations_in_output=include_annotations_in_output,
1054
+ export_png_files=export_png_files,
1055
+ filter_output_annotations=filter_output_annotations,
1056
+ export_version=export_version,
1057
+ dataset_lock=dataset_lock,
1058
+ lock_timeout_sec=lock_timeout_sec,
1059
+ export_summary=export_summary
1060
+ )
1061
+ if annotation_options:
1062
+ pages = dataset.items.list(filters=filters)
1063
+ if not isinstance(annotation_options, list):
1064
+ annotation_options = [annotation_options]
1065
+ # convert all annotations to annotation_options
1066
+ pool = dataset._client_api.thread_pools(pool_name='dataset.download')
1067
+ jobs = [None for _ in range(pages.items_count)]
1068
+ progress = tqdm.tqdm(total=pages.items_count,
1069
+ disable=dataset._client_api.verbose.disable_progress_bar_download_annotations,
1070
+ file=sys.stdout, desc='Download Annotations')
1071
+ i_item = 0
1072
+ for page in pages:
1073
+ for item in page:
1074
+ jobs[i_item] = pool.submit(
1075
+ Datasets._convert_single,
1076
+ **{
1077
+ 'downloader': downloader,
1078
+ 'item': item,
1079
+ 'img_filepath': None,
1080
+ 'local_path': local_path,
1081
+ 'overwrite': overwrite,
1082
+ 'annotation_options': annotation_options,
1083
+ 'annotation_filters': annotation_filters,
1084
+ 'thickness': thickness,
1085
+ 'with_text': with_text,
1086
+ 'progress': progress,
1087
+ 'alpha': alpha,
1088
+ 'export_version': export_version
1089
+ }
1090
+ )
1091
+ i_item += 1
1092
+ # get all results
1093
+ _ = [j.result() for j in jobs]
1094
+ progress.close()
1095
+ return local_path
1096
+
1097
+ def _upload_single_item_annotation(self, item, file, pbar):
1098
+ try:
1099
+ item.annotations.upload(file)
1100
+ except Exception as err:
1101
+ raise err
1102
+ finally:
1103
+ pbar.update()
1104
+
1105
+ def upload_annotations(self,
1106
+ dataset,
1107
+ local_path,
1108
+ filters: entities.Filters = None,
1109
+ clean=False,
1110
+ remote_root_path='/',
1111
+ export_version=entities.ExportVersion.V1
1112
+ ):
1113
+ """
1114
+ Upload annotations to dataset.
1115
+
1116
+ Example for remote_root_path: If the item filepath is "/a/b/item" and remote_root_path is "/a" - the start folder will be b instead of a
1117
+
1118
+ **Prerequisites**: You must have a dataset with items that are related to the annotations. The relationship between the dataset and annotations is shown in the name. You must be in the role of an *owner* or *developer*.
1119
+
1120
+ :param dtlpy.entities.dataset.Dataset dataset: dataset to upload to
1121
+ :param str local_path: str - local folder where the annotations files are
1122
+ :param dtlpy.entities.filters.Filters filters: Filters entity or a dictionary containing filters parameters
1123
+ :param bool clean: True to remove the old annotations
1124
+ :param str remote_root_path: the remote root path to match remote and local items
1125
+ :param str export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
1126
+
1127
+ **Example**:
1128
+
1129
+ .. code-block:: python
1130
+
1131
+ project.datasets.upload_annotations(dataset='dataset_entity',
1132
+ local_path='local_path',
1133
+ clean=False,
1134
+ export_version=dl.ExportVersion.V1
1135
+ )
1136
+ """
1137
+ if filters is None:
1138
+ filters = entities.Filters()
1139
+ filters._user_query = 'false'
1140
+ pages = dataset.items.list(filters=filters)
1141
+ total_items = pages.items_count
1142
+ pbar = tqdm.tqdm(total=total_items, disable=dataset._client_api.verbose.disable_progress_bar_upload_annotations,
1143
+ file=sys.stdout, desc='Upload Annotations')
1144
+ pool = self._client_api.thread_pools('annotation.upload')
1145
+ annotations_uploaded_count = 0
1146
+ for item in pages.all():
1147
+ if export_version == entities.ExportVersion.V1:
1148
+ _, ext = os.path.splitext(item.filename)
1149
+ filepath = item.filename.replace(ext, '.json')
1150
+ else:
1151
+ filepath = item.filename + '.json'
1152
+ # make the file path ignore the hierarchy of the files that in remote_root_path
1153
+ filepath = os.path.relpath(filepath, remote_root_path)
1154
+ json_file = os.path.join(local_path, filepath)
1155
+ if not os.path.isfile(json_file):
1156
+ pbar.update()
1157
+ continue
1158
+ annotations_uploaded_count += 1
1159
+ if item.annotated and clean:
1160
+ item.annotations.delete(filters=entities.Filters(resource=entities.FiltersResource.ANNOTATION))
1161
+ pool.submit(self._upload_single_item_annotation, **{'item': item,
1162
+ 'file': json_file,
1163
+ 'pbar': pbar})
1164
+ pool.shutdown()
1165
+ if annotations_uploaded_count == 0:
1166
+ logger.warning(msg="No annotations uploaded to dataset! ")
1167
+ else:
1168
+ logger.info(msg='Found and uploaded {} annotations.'.format(annotations_uploaded_count))
1169
+
1170
+ def set_readonly(self, state: bool, dataset: entities.Dataset):
1171
+ """
1172
+ Set dataset readonly mode.
1173
+
1174
+ **Prerequisites**: You must be in the role of an *owner* or *developer*.
1175
+
1176
+ :param bool state: state to update readonly mode
1177
+ :param dtlpy.entities.dataset.Dataset dataset: dataset object
1178
+
1179
+ **Example**:
1180
+
1181
+ .. code-block:: python
1182
+
1183
+ project.datasets.set_readonly(dataset='dataset_entity', state=True)
1184
+ """
1185
+ import warnings
1186
+ warnings.warn("`readonly` flag on dataset is deprecated, doing nothing.", DeprecationWarning)
1187
+
1188
+
1189
+ @_api_reference.add(path='/datasets/{id}/split', method='post')
1190
+ def split_ml_subsets(self,
1191
+ dataset_id: str,
1192
+ items_query: entities.filters,
1193
+ ml_split_list: dict) -> bool:
1194
+ """
1195
+ Split dataset items into ML subsets.
1196
+
1197
+ :param str dataset_id: The ID of the dataset.
1198
+ :param dict items_query: Query to select items.
1199
+ :param dict ml_split_list: Dictionary with 'train', 'validation', 'test' keys and integer percentages.
1200
+ :return: True if the split operation was successful.
1201
+ :rtype: bool
1202
+ :raises: PlatformException on failure and ValueError if percentages do not sum to 100 or invalid keys/values.
1203
+ """
1204
+ # Validate percentages
1205
+ if not ml_split_list:
1206
+ ml_split_list = {'train': 80, 'validation': 10, 'test': 10}
1207
+
1208
+ if not items_query:
1209
+ items_query = entities.Filters()
1210
+
1211
+ items_query_dict = items_query.prepare()
1212
+ required_keys = {'train', 'validation', 'test'}
1213
+ if set(ml_split_list.keys()) != required_keys:
1214
+ raise ValueError("MLSplitList must have exactly the keys 'train', 'validation', 'test'.")
1215
+ total = sum(ml_split_list.values())
1216
+ if total != 100:
1217
+ raise ValueError(
1218
+ "Please set the Train, Validation, and Test subsets percentages to add up to 100%. "
1219
+ "For example: 70, 15, 15."
1220
+ )
1221
+ for key, value in ml_split_list.items():
1222
+ if not isinstance(value, int) or value < 0:
1223
+ raise ValueError("Percentages must be integers >= 0.")
1224
+ payload = {
1225
+ 'itemsQuery': items_query_dict,
1226
+ 'MLSplitList': ml_split_list
1227
+ }
1228
+ path = f'/datasets/{dataset_id}/split'
1229
+ success, response = self._client_api.gen_request(req_type='post',
1230
+ path=path,
1231
+ json_req=payload)
1232
+ if success:
1233
+ # Wait for the split operation to complete
1234
+ command = entities.Command.from_json(_json=response.json(),
1235
+ client_api=self._client_api)
1236
+ command.wait()
1237
+ return True
1238
+ else:
1239
+ raise exceptions.PlatformException(response)
1240
+
1241
+
1242
+ @_api_reference.add(path='/datasets/{id}/items/bulk-update-metadata', method='post')
1243
+ def bulk_update_ml_subset(self, dataset_id: str, items_query: dict, subset: str = None, deleteTag: bool = False) -> bool:
1244
+ """
1245
+ Bulk update ML subset assignment for selected items.
1246
+ If subset is None, remove subsets. Otherwise, assign the specified subset.
1247
+
1248
+ :param str dataset_id: ID of the dataset
1249
+ :param dict items_query: DQLResourceQuery (filters) for selecting items
1250
+ :param str subset: 'train', 'validation', 'test' or None to remove all
1251
+ :return: True if success
1252
+ :rtype: bool
1253
+ """
1254
+ if items_query is None:
1255
+ items_query = entities.Filters()
1256
+ items_query_dict = items_query.prepare()
1257
+ if not deleteTag and subset not in ['train', 'validation', 'test']:
1258
+ raise ValueError("subset must be one of: 'train', 'validation', 'test'")
1259
+ # Determine tag values based on subset
1260
+ tags = {
1261
+ 'train': True if subset == 'train' else None,
1262
+ 'validation': True if subset == 'validation' else None,
1263
+ 'test': True if subset == 'test' else None
1264
+ }
1265
+
1266
+ payload = {
1267
+ "query": items_query_dict,
1268
+ "updateQuery": {
1269
+ "update": {
1270
+ "metadata": {
1271
+ "system": {
1272
+ "tags": tags
1273
+ }
1274
+ }
1275
+ },
1276
+ "systemSpace": True
1277
+ }
1278
+ }
1279
+
1280
+ success, response = self._client_api.gen_request(
1281
+ req_type='post',
1282
+ path=f'/datasets/{dataset_id}/items/bulk-update-metadata',
1283
+ json_req=payload
1284
+ )
1285
+ if success:
1286
+ # Similar to split operation, a command is returned
1287
+ command = entities.Command.from_json(_json=response.json(), client_api=self._client_api)
1288
+ command.wait()
1289
+ return True
1290
+ else:
1291
+ raise exceptions.PlatformException(response)