dtlpy 1.113.10__py3-none-any.whl → 1.114.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dtlpy/__init__.py +488 -488
- dtlpy/__version__.py +1 -1
- dtlpy/assets/__init__.py +26 -26
- dtlpy/assets/__pycache__/__init__.cpython-38.pyc +0 -0
- dtlpy/assets/code_server/config.yaml +2 -2
- dtlpy/assets/code_server/installation.sh +24 -24
- dtlpy/assets/code_server/launch.json +13 -13
- dtlpy/assets/code_server/settings.json +2 -2
- dtlpy/assets/main.py +53 -53
- dtlpy/assets/main_partial.py +18 -18
- dtlpy/assets/mock.json +11 -11
- dtlpy/assets/model_adapter.py +83 -83
- dtlpy/assets/package.json +61 -61
- dtlpy/assets/package_catalog.json +29 -29
- dtlpy/assets/package_gitignore +307 -307
- dtlpy/assets/service_runners/__init__.py +33 -33
- dtlpy/assets/service_runners/converter.py +96 -96
- dtlpy/assets/service_runners/multi_method.py +49 -49
- dtlpy/assets/service_runners/multi_method_annotation.py +54 -54
- dtlpy/assets/service_runners/multi_method_dataset.py +55 -55
- dtlpy/assets/service_runners/multi_method_item.py +52 -52
- dtlpy/assets/service_runners/multi_method_json.py +52 -52
- dtlpy/assets/service_runners/single_method.py +37 -37
- dtlpy/assets/service_runners/single_method_annotation.py +43 -43
- dtlpy/assets/service_runners/single_method_dataset.py +43 -43
- dtlpy/assets/service_runners/single_method_item.py +41 -41
- dtlpy/assets/service_runners/single_method_json.py +42 -42
- dtlpy/assets/service_runners/single_method_multi_input.py +45 -45
- dtlpy/assets/voc_annotation_template.xml +23 -23
- dtlpy/caches/base_cache.py +32 -32
- dtlpy/caches/cache.py +473 -473
- dtlpy/caches/dl_cache.py +201 -201
- dtlpy/caches/filesystem_cache.py +89 -89
- dtlpy/caches/redis_cache.py +84 -84
- dtlpy/dlp/__init__.py +20 -20
- dtlpy/dlp/cli_utilities.py +367 -367
- dtlpy/dlp/command_executor.py +764 -764
- dtlpy/dlp/dlp +1 -1
- dtlpy/dlp/dlp.bat +1 -1
- dtlpy/dlp/dlp.py +128 -128
- dtlpy/dlp/parser.py +651 -651
- dtlpy/entities/__init__.py +83 -83
- dtlpy/entities/analytic.py +311 -311
- dtlpy/entities/annotation.py +1879 -1879
- dtlpy/entities/annotation_collection.py +699 -699
- dtlpy/entities/annotation_definitions/__init__.py +20 -20
- dtlpy/entities/annotation_definitions/base_annotation_definition.py +100 -100
- dtlpy/entities/annotation_definitions/box.py +195 -195
- dtlpy/entities/annotation_definitions/classification.py +67 -67
- dtlpy/entities/annotation_definitions/comparison.py +72 -72
- dtlpy/entities/annotation_definitions/cube.py +204 -204
- dtlpy/entities/annotation_definitions/cube_3d.py +149 -149
- dtlpy/entities/annotation_definitions/description.py +32 -32
- dtlpy/entities/annotation_definitions/ellipse.py +124 -124
- dtlpy/entities/annotation_definitions/free_text.py +62 -62
- dtlpy/entities/annotation_definitions/gis.py +69 -69
- dtlpy/entities/annotation_definitions/note.py +139 -139
- dtlpy/entities/annotation_definitions/point.py +117 -117
- dtlpy/entities/annotation_definitions/polygon.py +182 -182
- dtlpy/entities/annotation_definitions/polyline.py +111 -111
- dtlpy/entities/annotation_definitions/pose.py +92 -92
- dtlpy/entities/annotation_definitions/ref_image.py +86 -86
- dtlpy/entities/annotation_definitions/segmentation.py +240 -240
- dtlpy/entities/annotation_definitions/subtitle.py +34 -34
- dtlpy/entities/annotation_definitions/text.py +85 -85
- dtlpy/entities/annotation_definitions/undefined_annotation.py +74 -74
- dtlpy/entities/app.py +220 -220
- dtlpy/entities/app_module.py +107 -107
- dtlpy/entities/artifact.py +174 -174
- dtlpy/entities/assignment.py +399 -399
- dtlpy/entities/base_entity.py +214 -214
- dtlpy/entities/bot.py +113 -113
- dtlpy/entities/codebase.py +296 -296
- dtlpy/entities/collection.py +38 -38
- dtlpy/entities/command.py +169 -169
- dtlpy/entities/compute.py +442 -442
- dtlpy/entities/dataset.py +1285 -1285
- dtlpy/entities/directory_tree.py +44 -44
- dtlpy/entities/dpk.py +470 -470
- dtlpy/entities/driver.py +222 -222
- dtlpy/entities/execution.py +397 -397
- dtlpy/entities/feature.py +124 -124
- dtlpy/entities/feature_set.py +145 -145
- dtlpy/entities/filters.py +641 -641
- dtlpy/entities/gis_item.py +107 -107
- dtlpy/entities/integration.py +184 -184
- dtlpy/entities/item.py +953 -953
- dtlpy/entities/label.py +123 -123
- dtlpy/entities/links.py +85 -85
- dtlpy/entities/message.py +175 -175
- dtlpy/entities/model.py +694 -691
- dtlpy/entities/node.py +1005 -1005
- dtlpy/entities/ontology.py +803 -803
- dtlpy/entities/organization.py +287 -287
- dtlpy/entities/package.py +657 -657
- dtlpy/entities/package_defaults.py +5 -5
- dtlpy/entities/package_function.py +185 -185
- dtlpy/entities/package_module.py +113 -113
- dtlpy/entities/package_slot.py +118 -118
- dtlpy/entities/paged_entities.py +290 -267
- dtlpy/entities/pipeline.py +593 -593
- dtlpy/entities/pipeline_execution.py +279 -279
- dtlpy/entities/project.py +394 -394
- dtlpy/entities/prompt_item.py +499 -499
- dtlpy/entities/recipe.py +301 -301
- dtlpy/entities/reflect_dict.py +102 -102
- dtlpy/entities/resource_execution.py +138 -138
- dtlpy/entities/service.py +958 -958
- dtlpy/entities/service_driver.py +117 -117
- dtlpy/entities/setting.py +294 -294
- dtlpy/entities/task.py +491 -491
- dtlpy/entities/time_series.py +143 -143
- dtlpy/entities/trigger.py +426 -426
- dtlpy/entities/user.py +118 -118
- dtlpy/entities/webhook.py +124 -124
- dtlpy/examples/__init__.py +19 -19
- dtlpy/examples/add_labels.py +135 -135
- dtlpy/examples/add_metadata_to_item.py +21 -21
- dtlpy/examples/annotate_items_using_model.py +65 -65
- dtlpy/examples/annotate_video_using_model_and_tracker.py +75 -75
- dtlpy/examples/annotations_convert_to_voc.py +9 -9
- dtlpy/examples/annotations_convert_to_yolo.py +9 -9
- dtlpy/examples/convert_annotation_types.py +51 -51
- dtlpy/examples/converter.py +143 -143
- dtlpy/examples/copy_annotations.py +22 -22
- dtlpy/examples/copy_folder.py +31 -31
- dtlpy/examples/create_annotations.py +51 -51
- dtlpy/examples/create_video_annotations.py +83 -83
- dtlpy/examples/delete_annotations.py +26 -26
- dtlpy/examples/filters.py +113 -113
- dtlpy/examples/move_item.py +23 -23
- dtlpy/examples/play_video_annotation.py +13 -13
- dtlpy/examples/show_item_and_mask.py +53 -53
- dtlpy/examples/triggers.py +49 -49
- dtlpy/examples/upload_batch_of_items.py +20 -20
- dtlpy/examples/upload_items_and_custom_format_annotations.py +55 -55
- dtlpy/examples/upload_items_with_modalities.py +43 -43
- dtlpy/examples/upload_segmentation_annotations_from_mask_image.py +44 -44
- dtlpy/examples/upload_yolo_format_annotations.py +70 -70
- dtlpy/exceptions.py +125 -125
- dtlpy/miscellaneous/__init__.py +20 -20
- dtlpy/miscellaneous/dict_differ.py +95 -95
- dtlpy/miscellaneous/git_utils.py +217 -217
- dtlpy/miscellaneous/json_utils.py +14 -14
- dtlpy/miscellaneous/list_print.py +105 -105
- dtlpy/miscellaneous/zipping.py +130 -130
- dtlpy/ml/__init__.py +20 -20
- dtlpy/ml/base_feature_extractor_adapter.py +27 -27
- dtlpy/ml/base_model_adapter.py +945 -940
- dtlpy/ml/metrics.py +461 -461
- dtlpy/ml/predictions_utils.py +274 -274
- dtlpy/ml/summary_writer.py +57 -57
- dtlpy/ml/train_utils.py +60 -60
- dtlpy/new_instance.py +252 -252
- dtlpy/repositories/__init__.py +56 -56
- dtlpy/repositories/analytics.py +85 -85
- dtlpy/repositories/annotations.py +916 -916
- dtlpy/repositories/apps.py +383 -383
- dtlpy/repositories/artifacts.py +452 -452
- dtlpy/repositories/assignments.py +599 -599
- dtlpy/repositories/bots.py +213 -213
- dtlpy/repositories/codebases.py +559 -559
- dtlpy/repositories/collections.py +332 -348
- dtlpy/repositories/commands.py +158 -158
- dtlpy/repositories/compositions.py +61 -61
- dtlpy/repositories/computes.py +434 -406
- dtlpy/repositories/datasets.py +1291 -1291
- dtlpy/repositories/downloader.py +895 -895
- dtlpy/repositories/dpks.py +433 -433
- dtlpy/repositories/drivers.py +266 -266
- dtlpy/repositories/executions.py +817 -817
- dtlpy/repositories/feature_sets.py +226 -226
- dtlpy/repositories/features.py +238 -238
- dtlpy/repositories/integrations.py +484 -484
- dtlpy/repositories/items.py +909 -915
- dtlpy/repositories/messages.py +94 -94
- dtlpy/repositories/models.py +877 -867
- dtlpy/repositories/nodes.py +80 -80
- dtlpy/repositories/ontologies.py +511 -511
- dtlpy/repositories/organizations.py +525 -525
- dtlpy/repositories/packages.py +1941 -1941
- dtlpy/repositories/pipeline_executions.py +448 -448
- dtlpy/repositories/pipelines.py +642 -642
- dtlpy/repositories/projects.py +539 -539
- dtlpy/repositories/recipes.py +399 -399
- dtlpy/repositories/resource_executions.py +137 -137
- dtlpy/repositories/schema.py +120 -120
- dtlpy/repositories/service_drivers.py +213 -213
- dtlpy/repositories/services.py +1704 -1704
- dtlpy/repositories/settings.py +339 -339
- dtlpy/repositories/tasks.py +1124 -1124
- dtlpy/repositories/times_series.py +278 -278
- dtlpy/repositories/triggers.py +536 -536
- dtlpy/repositories/upload_element.py +257 -257
- dtlpy/repositories/uploader.py +651 -651
- dtlpy/repositories/webhooks.py +249 -249
- dtlpy/services/__init__.py +22 -22
- dtlpy/services/aihttp_retry.py +131 -131
- dtlpy/services/api_client.py +1782 -1782
- dtlpy/services/api_reference.py +40 -40
- dtlpy/services/async_utils.py +133 -133
- dtlpy/services/calls_counter.py +44 -44
- dtlpy/services/check_sdk.py +68 -68
- dtlpy/services/cookie.py +115 -115
- dtlpy/services/create_logger.py +156 -156
- dtlpy/services/events.py +84 -84
- dtlpy/services/logins.py +235 -235
- dtlpy/services/reporter.py +256 -256
- dtlpy/services/service_defaults.py +91 -91
- dtlpy/utilities/__init__.py +20 -20
- dtlpy/utilities/annotations/__init__.py +16 -16
- dtlpy/utilities/annotations/annotation_converters.py +269 -269
- dtlpy/utilities/base_package_runner.py +264 -264
- dtlpy/utilities/converter.py +1650 -1650
- dtlpy/utilities/dataset_generators/__init__.py +1 -1
- dtlpy/utilities/dataset_generators/dataset_generator.py +670 -670
- dtlpy/utilities/dataset_generators/dataset_generator_tensorflow.py +23 -23
- dtlpy/utilities/dataset_generators/dataset_generator_torch.py +21 -21
- dtlpy/utilities/local_development/__init__.py +1 -1
- dtlpy/utilities/local_development/local_session.py +179 -179
- dtlpy/utilities/reports/__init__.py +2 -2
- dtlpy/utilities/reports/figures.py +343 -343
- dtlpy/utilities/reports/report.py +71 -71
- dtlpy/utilities/videos/__init__.py +17 -17
- dtlpy/utilities/videos/video_player.py +598 -598
- dtlpy/utilities/videos/videos.py +470 -470
- {dtlpy-1.113.10.data → dtlpy-1.114.13.data}/scripts/dlp +1 -1
- dtlpy-1.114.13.data/scripts/dlp.bat +2 -0
- {dtlpy-1.113.10.data → dtlpy-1.114.13.data}/scripts/dlp.py +128 -128
- {dtlpy-1.113.10.dist-info → dtlpy-1.114.13.dist-info}/LICENSE +200 -200
- {dtlpy-1.113.10.dist-info → dtlpy-1.114.13.dist-info}/METADATA +172 -172
- dtlpy-1.114.13.dist-info/RECORD +240 -0
- {dtlpy-1.113.10.dist-info → dtlpy-1.114.13.dist-info}/WHEEL +1 -1
- tests/features/environment.py +551 -550
- dtlpy-1.113.10.data/scripts/dlp.bat +0 -2
- dtlpy-1.113.10.dist-info/RECORD +0 -244
- tests/assets/__init__.py +0 -0
- tests/assets/models_flow/__init__.py +0 -0
- tests/assets/models_flow/failedmain.py +0 -52
- tests/assets/models_flow/main.py +0 -62
- tests/assets/models_flow/main_model.py +0 -54
- {dtlpy-1.113.10.dist-info → dtlpy-1.114.13.dist-info}/entry_points.txt +0 -0
- {dtlpy-1.113.10.dist-info → dtlpy-1.114.13.dist-info}/top_level.txt +0 -0
dtlpy/repositories/datasets.py
CHANGED
|
@@ -1,1291 +1,1291 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Datasets Repository
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
import os
|
|
6
|
-
import sys
|
|
7
|
-
import time
|
|
8
|
-
import copy
|
|
9
|
-
import tqdm
|
|
10
|
-
import logging
|
|
11
|
-
import json
|
|
12
|
-
from typing import Union
|
|
13
|
-
|
|
14
|
-
from .. import entities, repositories, miscellaneous, exceptions, services, PlatformException, _api_reference
|
|
15
|
-
from ..services.api_client import ApiClient
|
|
16
|
-
|
|
17
|
-
logger = logging.getLogger(name='dtlpy')
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
class Datasets:
|
|
21
|
-
"""
|
|
22
|
-
Datasets Repository
|
|
23
|
-
|
|
24
|
-
The Datasets class allows the user to manage datasets. Read more about datasets in our `documentation <https://dataloop.ai/docs/dataset>`_ and `SDK documentation <https://developers.dataloop.ai/tutorials/data_management/manage_datasets/chapter/>`_.
|
|
25
|
-
"""
|
|
26
|
-
|
|
27
|
-
def __init__(self, client_api: ApiClient, project: entities.Project = None):
|
|
28
|
-
self._client_api = client_api
|
|
29
|
-
self._project = project
|
|
30
|
-
|
|
31
|
-
############
|
|
32
|
-
# entities #
|
|
33
|
-
############
|
|
34
|
-
@property
|
|
35
|
-
def project(self) -> entities.Project:
|
|
36
|
-
if self._project is None:
|
|
37
|
-
# try get checkout
|
|
38
|
-
project = self._client_api.state_io.get('project')
|
|
39
|
-
if project is not None:
|
|
40
|
-
self._project = entities.Project.from_json(_json=project, client_api=self._client_api)
|
|
41
|
-
if self._project is None:
|
|
42
|
-
raise exceptions.PlatformException(
|
|
43
|
-
error='2001',
|
|
44
|
-
message='Cannot perform action WITHOUT Project entity in Datasets repository.'
|
|
45
|
-
' Please checkout or set a project')
|
|
46
|
-
assert isinstance(self._project, entities.Project)
|
|
47
|
-
return self._project
|
|
48
|
-
|
|
49
|
-
@project.setter
|
|
50
|
-
def project(self, project: entities.Project):
|
|
51
|
-
if not isinstance(project, entities.Project):
|
|
52
|
-
raise ValueError('Must input a valid Project entity')
|
|
53
|
-
self._project = project
|
|
54
|
-
|
|
55
|
-
###########
|
|
56
|
-
# methods #
|
|
57
|
-
###########
|
|
58
|
-
def __get_from_cache(self) -> entities.Dataset:
|
|
59
|
-
dataset = self._client_api.state_io.get('dataset')
|
|
60
|
-
if dataset is not None:
|
|
61
|
-
dataset = entities.Dataset.from_json(_json=dataset,
|
|
62
|
-
client_api=self._client_api,
|
|
63
|
-
datasets=self,
|
|
64
|
-
project=self._project)
|
|
65
|
-
return dataset
|
|
66
|
-
|
|
67
|
-
def __get_by_id(self, dataset_id) -> entities.Dataset:
|
|
68
|
-
success, response = self._client_api.gen_request(req_type='get',
|
|
69
|
-
path='/datasets/{}'.format(dataset_id))
|
|
70
|
-
if dataset_id is None or dataset_id == '':
|
|
71
|
-
raise exceptions.PlatformException('400', 'Please checkout a dataset')
|
|
72
|
-
|
|
73
|
-
if success:
|
|
74
|
-
dataset = entities.Dataset.from_json(client_api=self._client_api,
|
|
75
|
-
_json=response.json(),
|
|
76
|
-
datasets=self,
|
|
77
|
-
project=self._project)
|
|
78
|
-
else:
|
|
79
|
-
raise exceptions.PlatformException(response)
|
|
80
|
-
return dataset
|
|
81
|
-
|
|
82
|
-
def __get_by_identifier(self, identifier=None) -> entities.Dataset:
|
|
83
|
-
datasets = self.list()
|
|
84
|
-
datasets_by_name = [dataset for dataset in datasets if identifier in dataset.name or identifier in dataset.id]
|
|
85
|
-
if len(datasets_by_name) == 1:
|
|
86
|
-
return datasets_by_name[0]
|
|
87
|
-
elif len(datasets_by_name) > 1:
|
|
88
|
-
raise Exception('Multiple datasets with this name exist')
|
|
89
|
-
else:
|
|
90
|
-
raise Exception("Dataset not found")
|
|
91
|
-
|
|
92
|
-
def _bulid_folder_filter(self, folder_path, filters=None):
|
|
93
|
-
if filters is None:
|
|
94
|
-
filters = entities.Filters()
|
|
95
|
-
filters._user_query = 'false'
|
|
96
|
-
if not folder_path.startswith('/'):
|
|
97
|
-
folder_path = '/' + folder_path
|
|
98
|
-
filters.add(field='dir', values=folder_path, method=entities.FiltersMethod.OR)
|
|
99
|
-
if not folder_path.endswith('*'):
|
|
100
|
-
if not folder_path.endswith('/'):
|
|
101
|
-
folder_path += '/'
|
|
102
|
-
filters.add(field='dir', values=folder_path + '*', method=entities.FiltersMethod.OR)
|
|
103
|
-
return filters
|
|
104
|
-
|
|
105
|
-
def _get_binaries_dataset(self):
|
|
106
|
-
filters = entities.Filters(resource=entities.FiltersResource.DATASET)
|
|
107
|
-
filters.add(field='name', values='Binaries')
|
|
108
|
-
filters.system_space = True
|
|
109
|
-
datasets = self.list(filters=filters)
|
|
110
|
-
if len(datasets) == 0:
|
|
111
|
-
# empty list
|
|
112
|
-
raise exceptions.PlatformException('404', 'Dataset not found. Name: "Binaries"')
|
|
113
|
-
# dataset = None
|
|
114
|
-
elif len(datasets) > 1:
|
|
115
|
-
raise exceptions.PlatformException('400', 'More than one dataset with same name.')
|
|
116
|
-
else:
|
|
117
|
-
dataset = datasets[0]
|
|
118
|
-
return dataset
|
|
119
|
-
|
|
120
|
-
def _resolve_dataset_id(self, dataset, dataset_name, dataset_id):
|
|
121
|
-
if dataset is None and dataset_name is None and dataset_id is None:
|
|
122
|
-
raise ValueError('Must provide dataset, dataset name or dataset id')
|
|
123
|
-
if dataset_id is None:
|
|
124
|
-
if dataset is None:
|
|
125
|
-
dataset = self.get(dataset_name=dataset_name)
|
|
126
|
-
dataset_id = dataset.id
|
|
127
|
-
return dataset_id
|
|
128
|
-
|
|
129
|
-
@staticmethod
|
|
130
|
-
def _build_payload(filters, include_feature_vectors, include_annotations,
|
|
131
|
-
export_type, annotation_filters, feature_vector_filters, dataset_lock, lock_timeout_sec, export_summary):
|
|
132
|
-
valid_list = [e.value for e in entities.ExportType]
|
|
133
|
-
valid_types = ', '.join(valid_list)
|
|
134
|
-
if export_type not in ['json', 'zip']:
|
|
135
|
-
raise ValueError('export_type must be one of the following: {}'.format(valid_types))
|
|
136
|
-
payload = {'exportType': export_type}
|
|
137
|
-
if filters is None:
|
|
138
|
-
filters = entities.Filters()
|
|
139
|
-
|
|
140
|
-
if isinstance(filters, entities.Filters):
|
|
141
|
-
payload['itemsQuery'] = {'filter': filters.prepare()['filter'], 'join': filters.prepare().get("join", {})}
|
|
142
|
-
elif isinstance(filters, dict):
|
|
143
|
-
payload['itemsQuery'] = filters
|
|
144
|
-
else:
|
|
145
|
-
raise exceptions.BadRequest(message='filters must be of type dict or Filters', status_code=500)
|
|
146
|
-
|
|
147
|
-
payload['itemsVectorQuery'] = {}
|
|
148
|
-
if include_feature_vectors:
|
|
149
|
-
payload['includeItemVectors'] = True
|
|
150
|
-
payload['itemsVectorQuery']['select'] = {"datasetId": 1, 'featureSetId': 1, 'value': 1}
|
|
151
|
-
|
|
152
|
-
if feature_vector_filters is not None:
|
|
153
|
-
payload['itemsVectorQuery']['filter'] = feature_vector_filters.prepare()['filter']
|
|
154
|
-
|
|
155
|
-
payload['annotations'] = {"include": include_annotations, "convertSemantic": False}
|
|
156
|
-
|
|
157
|
-
if annotation_filters is not None:
|
|
158
|
-
payload['annotationsQuery'] = annotation_filters.prepare()['filter']
|
|
159
|
-
payload['annotations']['filter'] = True
|
|
160
|
-
|
|
161
|
-
if dataset_lock:
|
|
162
|
-
payload['datasetLock'] = dataset_lock
|
|
163
|
-
|
|
164
|
-
if export_summary:
|
|
165
|
-
payload['summary'] = export_summary
|
|
166
|
-
|
|
167
|
-
if lock_timeout_sec:
|
|
168
|
-
payload['lockTimeoutSec'] = lock_timeout_sec
|
|
169
|
-
|
|
170
|
-
return payload
|
|
171
|
-
|
|
172
|
-
def _download_exported_item(self, item_id, export_type, local_path=None):
|
|
173
|
-
export_item = repositories.Items(client_api=self._client_api).get(item_id=item_id)
|
|
174
|
-
export_item_path = export_item.download(local_path=local_path)
|
|
175
|
-
|
|
176
|
-
if export_type == entities.ExportType.ZIP:
|
|
177
|
-
# unzipping annotations to directory
|
|
178
|
-
if isinstance(export_item_path, list) or not os.path.isfile(export_item_path):
|
|
179
|
-
raise exceptions.PlatformException(
|
|
180
|
-
error='404',
|
|
181
|
-
message='error downloading annotation zip file. see above for more information. item id: {!r}'.format(
|
|
182
|
-
export_item.id))
|
|
183
|
-
try:
|
|
184
|
-
miscellaneous.Zipping.unzip_directory(zip_filename=export_item_path,
|
|
185
|
-
to_directory=local_path)
|
|
186
|
-
except Exception as e:
|
|
187
|
-
logger.warning("Failed to extract zip file error: {}".format(e))
|
|
188
|
-
finally:
|
|
189
|
-
# cleanup
|
|
190
|
-
if isinstance(export_item_path, str) and os.path.isfile(export_item_path):
|
|
191
|
-
os.remove(export_item_path)
|
|
192
|
-
|
|
193
|
-
@property
|
|
194
|
-
def platform_url(self):
|
|
195
|
-
return self._client_api._get_resource_url("projects/{}/datasets".format(self.project.id))
|
|
196
|
-
|
|
197
|
-
def open_in_web(self,
|
|
198
|
-
dataset_name: str = None,
|
|
199
|
-
dataset_id: str = None,
|
|
200
|
-
dataset: entities.Dataset = None):
|
|
201
|
-
"""
|
|
202
|
-
Open the dataset in web platform.
|
|
203
|
-
|
|
204
|
-
**Prerequisites**: You must be an *owner* or *developer* to use this method.
|
|
205
|
-
|
|
206
|
-
:param str dataset_name: The Name of the dataset
|
|
207
|
-
:param str dataset_id: The Id of the dataset
|
|
208
|
-
:param dtlpy.entities.dataset.Dataset dataset: dataset object
|
|
209
|
-
|
|
210
|
-
**Example**:
|
|
211
|
-
|
|
212
|
-
.. code-block:: python
|
|
213
|
-
|
|
214
|
-
project.datasets.open_in_web(dataset_id='dataset_id')
|
|
215
|
-
"""
|
|
216
|
-
if dataset_name is not None:
|
|
217
|
-
dataset = self.get(dataset_name=dataset_name)
|
|
218
|
-
if dataset is not None:
|
|
219
|
-
dataset.open_in_web()
|
|
220
|
-
elif dataset_id is not None:
|
|
221
|
-
self._client_api._open_in_web(url=f'{self.platform_url}/{dataset_id}/items')
|
|
222
|
-
else:
|
|
223
|
-
self._client_api._open_in_web(url=self.platform_url)
|
|
224
|
-
|
|
225
|
-
def checkout(self,
|
|
226
|
-
identifier: str = None,
|
|
227
|
-
dataset_name: str = None,
|
|
228
|
-
dataset_id: str = None,
|
|
229
|
-
dataset: entities.Dataset = None):
|
|
230
|
-
"""
|
|
231
|
-
Checkout (switch) to a dataset to work on it.
|
|
232
|
-
|
|
233
|
-
**Prerequisites**: You must be an *owner* or *developer* to use this method.
|
|
234
|
-
|
|
235
|
-
You must provide at least ONE of the following params: dataset_id, dataset_name.
|
|
236
|
-
|
|
237
|
-
:param str identifier: project name or partial id that you wish to switch
|
|
238
|
-
:param str dataset_name: The Name of the dataset
|
|
239
|
-
:param str dataset_id: The Id of the dataset
|
|
240
|
-
:param dtlpy.entities.dataset.Dataset dataset: dataset object
|
|
241
|
-
|
|
242
|
-
**Example**:
|
|
243
|
-
|
|
244
|
-
.. code-block:: python
|
|
245
|
-
|
|
246
|
-
project.datasets.checkout(dataset_id='dataset_id')
|
|
247
|
-
"""
|
|
248
|
-
if dataset is None:
|
|
249
|
-
if dataset_id is not None or dataset_name is not None:
|
|
250
|
-
try:
|
|
251
|
-
dataset = self.project.datasets.get(dataset_name=dataset_name, dataset_id=dataset_id)
|
|
252
|
-
except exceptions.MissingEntity:
|
|
253
|
-
dataset = self.get(dataset_id=dataset_id, dataset_name=dataset_name)
|
|
254
|
-
elif identifier is not None:
|
|
255
|
-
dataset = self.__get_by_identifier(identifier=identifier)
|
|
256
|
-
else:
|
|
257
|
-
raise exceptions.PlatformException(error='400',
|
|
258
|
-
message='Must provide partial/full id/name to checkout')
|
|
259
|
-
self._client_api.state_io.put('dataset', dataset.to_json())
|
|
260
|
-
logger.info('Checked out to dataset {}'.format(dataset.name))
|
|
261
|
-
|
|
262
|
-
@_api_reference.add(path='/datasets/query', method='post')
|
|
263
|
-
def list(self, name=None, creator=None, filters: entities.Filters = None) -> miscellaneous.List[entities.Dataset]:
|
|
264
|
-
"""
|
|
265
|
-
List all datasets.
|
|
266
|
-
|
|
267
|
-
**Prerequisites**: You must be an *owner* or *developer* to use this method.
|
|
268
|
-
|
|
269
|
-
:param str name: list by name
|
|
270
|
-
:param str creator: list by
|
|
271
|
-
:param dtlpy.entities.filters.Filters filters: Filters entity containing filters parameters
|
|
272
|
-
:return: List of datasets
|
|
273
|
-
:rtype: list
|
|
274
|
-
|
|
275
|
-
**Example**:
|
|
276
|
-
|
|
277
|
-
.. code-block:: python
|
|
278
|
-
filters = dl.Filters(resource='datasets')
|
|
279
|
-
filters.add(field='readonly', values=False)
|
|
280
|
-
datasets = project.datasets.list(filters=filters)
|
|
281
|
-
"""
|
|
282
|
-
if filters is None:
|
|
283
|
-
filters = entities.Filters(resource=entities.FiltersResource.DATASET)
|
|
284
|
-
# assert type filters
|
|
285
|
-
elif not isinstance(filters, entities.Filters):
|
|
286
|
-
raise exceptions.PlatformException(error='400',
|
|
287
|
-
message='Unknown filters type: {!r}'.format(type(filters)))
|
|
288
|
-
if filters.resource != entities.FiltersResource.DATASET:
|
|
289
|
-
raise exceptions.PlatformException(
|
|
290
|
-
error='400',
|
|
291
|
-
message='Filters resource must to be FiltersResource.DATASET. Got: {!r}'.format(filters.resource))
|
|
292
|
-
|
|
293
|
-
url = '/datasets/query'
|
|
294
|
-
|
|
295
|
-
if name is not None:
|
|
296
|
-
filters.add(field='name', values=name)
|
|
297
|
-
if creator is not None:
|
|
298
|
-
filters.add(field='creator', values=creator)
|
|
299
|
-
if self._project is not None:
|
|
300
|
-
filters.context = {"projects": [self._project.id]}
|
|
301
|
-
filters.page_size = 1000
|
|
302
|
-
filters.page = 0
|
|
303
|
-
datasets = list()
|
|
304
|
-
while True:
|
|
305
|
-
success, response = self._client_api.gen_request(req_type='POST',
|
|
306
|
-
json_req=filters.prepare(),
|
|
307
|
-
path=url,
|
|
308
|
-
headers={'user_query': filters._user_query})
|
|
309
|
-
if success:
|
|
310
|
-
pool = self._client_api.thread_pools('entity.create')
|
|
311
|
-
datasets_json = response.json()['items']
|
|
312
|
-
jobs = [None for _ in range(len(datasets_json))]
|
|
313
|
-
# return triggers list
|
|
314
|
-
for i_dataset, dataset in enumerate(datasets_json):
|
|
315
|
-
jobs[i_dataset] = pool.submit(entities.Dataset._protected_from_json,
|
|
316
|
-
**{'client_api': self._client_api,
|
|
317
|
-
'_json': dataset,
|
|
318
|
-
'datasets': self,
|
|
319
|
-
'project': self.project})
|
|
320
|
-
|
|
321
|
-
# get all results
|
|
322
|
-
results = [j.result() for j in jobs]
|
|
323
|
-
# log errors
|
|
324
|
-
_ = [logger.warning(r[1]) for r in results if r[0] is False]
|
|
325
|
-
# return good jobs
|
|
326
|
-
datasets.extend([r[1] for r in results if r[0] is True])
|
|
327
|
-
if response.json()['hasNextPage'] is True:
|
|
328
|
-
filters.page += 1
|
|
329
|
-
else:
|
|
330
|
-
break
|
|
331
|
-
else:
|
|
332
|
-
raise exceptions.PlatformException(response)
|
|
333
|
-
datasets = miscellaneous.List(datasets)
|
|
334
|
-
return datasets
|
|
335
|
-
|
|
336
|
-
@_api_reference.add(path='/datasets/{id}', method='get')
|
|
337
|
-
def get(self,
|
|
338
|
-
dataset_name: str = None,
|
|
339
|
-
dataset_id: str = None,
|
|
340
|
-
checkout: bool = False,
|
|
341
|
-
fetch: bool = None
|
|
342
|
-
) -> entities.Dataset:
|
|
343
|
-
"""
|
|
344
|
-
Get dataset by name or id.
|
|
345
|
-
|
|
346
|
-
**Prerequisites**: You must be an *owner* or *developer* to use this method.
|
|
347
|
-
|
|
348
|
-
You must provide at least ONE of the following params: dataset_id, dataset_name.
|
|
349
|
-
|
|
350
|
-
:param str dataset_name: optional - search by name
|
|
351
|
-
:param str dataset_id: optional - search by id
|
|
352
|
-
:param bool checkout: set the dataset as a default dataset object (cookies)
|
|
353
|
-
:param bool fetch: optional - fetch entity from platform (True), default taken from cookie
|
|
354
|
-
:return: Dataset object
|
|
355
|
-
:rtype: dtlpy.entities.dataset.Dataset
|
|
356
|
-
|
|
357
|
-
**Example**:
|
|
358
|
-
|
|
359
|
-
.. code-block:: python
|
|
360
|
-
|
|
361
|
-
dataset = project.datasets.get(dataset_id='dataset_id')
|
|
362
|
-
"""
|
|
363
|
-
if fetch is None:
|
|
364
|
-
fetch = self._client_api.fetch_entities
|
|
365
|
-
|
|
366
|
-
if dataset_id is None and dataset_name is None:
|
|
367
|
-
dataset = self.__get_from_cache()
|
|
368
|
-
if dataset is None:
|
|
369
|
-
raise exceptions.PlatformException(
|
|
370
|
-
error='400',
|
|
371
|
-
message='No checked-out Dataset was found, must checkout or provide an identifier in inputs')
|
|
372
|
-
elif fetch:
|
|
373
|
-
if dataset_id is not None and dataset_id != '':
|
|
374
|
-
dataset = self.__get_by_id(dataset_id)
|
|
375
|
-
# verify input dataset name is same as the given id
|
|
376
|
-
if dataset_name is not None and dataset.name != dataset_name:
|
|
377
|
-
logger.warning(
|
|
378
|
-
"Mismatch found in datasets.get: dataset_name is different then dataset.name: "
|
|
379
|
-
"{!r} != {!r}".format(
|
|
380
|
-
dataset_name,
|
|
381
|
-
dataset.name))
|
|
382
|
-
elif dataset_name is not None:
|
|
383
|
-
datasets = self.list(name=dataset_name)
|
|
384
|
-
if not datasets:
|
|
385
|
-
# empty list
|
|
386
|
-
raise exceptions.PlatformException('404', 'Dataset not found. Name: {!r}'.format(dataset_name))
|
|
387
|
-
# dataset = None
|
|
388
|
-
elif len(datasets) > 1:
|
|
389
|
-
raise exceptions.PlatformException('400', 'More than one dataset with same name.')
|
|
390
|
-
else:
|
|
391
|
-
dataset = datasets[0]
|
|
392
|
-
else:
|
|
393
|
-
raise exceptions.PlatformException(
|
|
394
|
-
error='404',
|
|
395
|
-
message='No input and no checked-out found')
|
|
396
|
-
else:
|
|
397
|
-
dataset = entities.Dataset.from_json(_json={'id': dataset_id,
|
|
398
|
-
'name': dataset_id},
|
|
399
|
-
client_api=self._client_api,
|
|
400
|
-
datasets=self,
|
|
401
|
-
project=self._project,
|
|
402
|
-
is_fetched=False)
|
|
403
|
-
assert isinstance(dataset, entities.Dataset)
|
|
404
|
-
if checkout:
|
|
405
|
-
self.checkout(dataset=dataset)
|
|
406
|
-
return dataset
|
|
407
|
-
|
|
408
|
-
@_api_reference.add(path='/datasets/{id}', method='delete')
|
|
409
|
-
def delete(self,
|
|
410
|
-
dataset_name: str = None,
|
|
411
|
-
dataset_id: str = None,
|
|
412
|
-
sure: bool = False,
|
|
413
|
-
really: bool = False):
|
|
414
|
-
"""
|
|
415
|
-
Delete a dataset forever!
|
|
416
|
-
|
|
417
|
-
**Prerequisites**: You must be an *owner* or *developer* to use this method.
|
|
418
|
-
|
|
419
|
-
**Example**:
|
|
420
|
-
|
|
421
|
-
.. code-block:: python
|
|
422
|
-
|
|
423
|
-
is_deleted = project.datasets.delete(dataset_id='dataset_id', sure=True, really=True)
|
|
424
|
-
|
|
425
|
-
:param str dataset_name: optional - search by name
|
|
426
|
-
:param str dataset_id: optional - search by id
|
|
427
|
-
:param bool sure: Are you sure you want to delete?
|
|
428
|
-
:param bool really: Really really sure?
|
|
429
|
-
:return: True is success
|
|
430
|
-
:rtype: bool
|
|
431
|
-
"""
|
|
432
|
-
if sure and really:
|
|
433
|
-
dataset = self.get(dataset_name=dataset_name, dataset_id=dataset_id)
|
|
434
|
-
success, response = self._client_api.gen_request(req_type='delete',
|
|
435
|
-
path='/datasets/{}'.format(dataset.id))
|
|
436
|
-
if not success:
|
|
437
|
-
raise exceptions.PlatformException(response)
|
|
438
|
-
logger.info('Dataset {!r} was deleted successfully'.format(dataset.name))
|
|
439
|
-
return True
|
|
440
|
-
else:
|
|
441
|
-
raise exceptions.PlatformException(
|
|
442
|
-
error='403',
|
|
443
|
-
message='Cant delete dataset from SDK. Please login to platform to delete')
|
|
444
|
-
|
|
445
|
-
@_api_reference.add(path='/datasets/{id}', method='patch')
|
|
446
|
-
def update(self,
|
|
447
|
-
dataset: entities.Dataset,
|
|
448
|
-
system_metadata: bool = False,
|
|
449
|
-
patch: dict = None
|
|
450
|
-
) -> entities.Dataset:
|
|
451
|
-
"""
|
|
452
|
-
Update dataset field.
|
|
453
|
-
|
|
454
|
-
**Prerequisites**: You must be an *owner* or *developer* to use this method.
|
|
455
|
-
|
|
456
|
-
:param dtlpy.entities.dataset.Dataset dataset: dataset object
|
|
457
|
-
:param bool system_metadata: True, if you want to change metadata system
|
|
458
|
-
:param dict patch: Specific patch request
|
|
459
|
-
:return: Dataset object
|
|
460
|
-
:rtype: dtlpy.entities.dataset.Dataset
|
|
461
|
-
|
|
462
|
-
**Example**:
|
|
463
|
-
|
|
464
|
-
.. code-block:: python
|
|
465
|
-
|
|
466
|
-
dataset = project.datasets.update(dataset='dataset_entity')
|
|
467
|
-
"""
|
|
468
|
-
url_path = '/datasets/{}'.format(dataset.id)
|
|
469
|
-
if system_metadata:
|
|
470
|
-
url_path += '?system=true'
|
|
471
|
-
|
|
472
|
-
if patch is None:
|
|
473
|
-
patch = dataset.to_json()
|
|
474
|
-
|
|
475
|
-
success, response = self._client_api.gen_request(req_type='patch',
|
|
476
|
-
path=url_path,
|
|
477
|
-
json_req=patch)
|
|
478
|
-
if success:
|
|
479
|
-
logger.info('Dataset was updated successfully')
|
|
480
|
-
return dataset
|
|
481
|
-
else:
|
|
482
|
-
raise exceptions.PlatformException(response)
|
|
483
|
-
|
|
484
|
-
@_api_reference.add(path='/datasets/{id}/unlock', method='patch')
|
|
485
|
-
def unlock(self, dataset: entities.Dataset ) -> entities.Dataset:
|
|
486
|
-
"""
|
|
487
|
-
Unlock dataset.
|
|
488
|
-
|
|
489
|
-
**Prerequisites**: You must be an *owner* or *developer* to use this method.
|
|
490
|
-
|
|
491
|
-
:param dtlpy.entities.dataset.Dataset dataset: dataset object
|
|
492
|
-
:return: Dataset object
|
|
493
|
-
:rtype: dtlpy.entities.dataset.Dataset
|
|
494
|
-
|
|
495
|
-
**Example**:
|
|
496
|
-
|
|
497
|
-
.. code-block:: python
|
|
498
|
-
|
|
499
|
-
dataset = project.datasets.unlock(dataset='dataset_entity')
|
|
500
|
-
"""
|
|
501
|
-
url_path = '/datasets/{}/unlock'.format(dataset.id)
|
|
502
|
-
|
|
503
|
-
success, response = self._client_api.gen_request(req_type='patch', path=url_path)
|
|
504
|
-
if success:
|
|
505
|
-
logger.info('Dataset was unlocked successfully')
|
|
506
|
-
return dataset
|
|
507
|
-
else:
|
|
508
|
-
raise exceptions.PlatformException(response)
|
|
509
|
-
|
|
510
|
-
@_api_reference.add(path='/datasets/{id}/directoryTree', method='get')
|
|
511
|
-
def directory_tree(self,
|
|
512
|
-
dataset: entities.Dataset = None,
|
|
513
|
-
dataset_name: str = None,
|
|
514
|
-
dataset_id: str = None):
|
|
515
|
-
"""
|
|
516
|
-
Get dataset's directory tree.
|
|
517
|
-
|
|
518
|
-
**Prerequisites**: You must be an *owner* or *developer* to use this method.
|
|
519
|
-
|
|
520
|
-
You must provide at least ONE of the following params: dataset, dataset_name, dataset_id.
|
|
521
|
-
|
|
522
|
-
:param dtlpy.entities.dataset.Dataset dataset: dataset object
|
|
523
|
-
:param str dataset_name: The Name of the dataset
|
|
524
|
-
:param str dataset_id: The Id of the dataset
|
|
525
|
-
:return: DirectoryTree
|
|
526
|
-
|
|
527
|
-
**Example**:
|
|
528
|
-
|
|
529
|
-
.. code-block:: python
|
|
530
|
-
directory_tree = dataset.directory_tree
|
|
531
|
-
directory_tree = project.datasets.directory_tree(dataset='dataset_entity')
|
|
532
|
-
"""
|
|
533
|
-
dataset_id = self._resolve_dataset_id(dataset, dataset_name, dataset_id)
|
|
534
|
-
|
|
535
|
-
url_path = '/datasets/{}/directoryTree'.format(dataset_id)
|
|
536
|
-
|
|
537
|
-
success, response = self._client_api.gen_request(req_type='get',
|
|
538
|
-
path=url_path)
|
|
539
|
-
|
|
540
|
-
if success:
|
|
541
|
-
return entities.DirectoryTree(_json=response.json())
|
|
542
|
-
else:
|
|
543
|
-
raise exceptions.PlatformException(response)
|
|
544
|
-
|
|
545
|
-
@_api_reference.add(path='/datasets/{id}/clone', method='post')
|
|
546
|
-
def clone(self,
|
|
547
|
-
dataset_id: str,
|
|
548
|
-
clone_name: str = None,
|
|
549
|
-
filters: entities.Filters = None,
|
|
550
|
-
with_items_annotations: bool = True,
|
|
551
|
-
with_metadata: bool = True,
|
|
552
|
-
with_task_annotations_status: bool = True,
|
|
553
|
-
dst_dataset_id: str = None,
|
|
554
|
-
target_directory: str = None):
|
|
555
|
-
"""
|
|
556
|
-
Clone a dataset. Read more about cloning datatsets and items in our `documentation <https://dataloop.ai/docs/clone-merge-dataset#cloned-dataset>`_ and `SDK documentation <https://developers.dataloop.ai/tutorials/data_management/data_versioning/chapter/>`_.
|
|
557
|
-
|
|
558
|
-
**Prerequisites**: You must be in the role of an *owner* or *developer*.
|
|
559
|
-
|
|
560
|
-
:param str dataset_id: id of the dataset you wish to clone
|
|
561
|
-
:param str clone_name: new dataset name
|
|
562
|
-
:param dtlpy.entities.filters.Filters filters: Filters entity or a query dict
|
|
563
|
-
:param bool with_items_annotations: true to clone with items annotations
|
|
564
|
-
:param bool with_metadata: true to clone with metadata
|
|
565
|
-
:param bool with_task_annotations_status: true to clone with task annotations' status
|
|
566
|
-
:param str dst_dataset_id: destination dataset id
|
|
567
|
-
:param str target_directory: target directory
|
|
568
|
-
:return: dataset object
|
|
569
|
-
:rtype: dtlpy.entities.dataset.Dataset
|
|
570
|
-
|
|
571
|
-
**Example**:
|
|
572
|
-
|
|
573
|
-
.. code-block:: python
|
|
574
|
-
|
|
575
|
-
dataset = project.datasets.clone(dataset_id='dataset_id',
|
|
576
|
-
clone_name='dataset_clone_name',
|
|
577
|
-
with_metadata=True,
|
|
578
|
-
with_items_annotations=False,
|
|
579
|
-
with_task_annotations_status=False)
|
|
580
|
-
"""
|
|
581
|
-
if clone_name is None and dst_dataset_id is None:
|
|
582
|
-
raise exceptions.PlatformException('400', 'Must provide clone name or destination dataset id')
|
|
583
|
-
if filters is None:
|
|
584
|
-
filters = entities.Filters()
|
|
585
|
-
filters._user_query = 'false'
|
|
586
|
-
elif not isinstance(filters, entities.Filters):
|
|
587
|
-
raise exceptions.PlatformException(
|
|
588
|
-
error='400',
|
|
589
|
-
message='"filters" must be a dl.Filters entity. got: {!r}'.format(type(filters)))
|
|
590
|
-
|
|
591
|
-
copy_filters = copy.deepcopy(filters)
|
|
592
|
-
if copy_filters.has_field('hidden'):
|
|
593
|
-
copy_filters.pop('hidden')
|
|
594
|
-
|
|
595
|
-
if target_directory is not None and not target_directory.startswith('/'):
|
|
596
|
-
target_directory = '/' + target_directory
|
|
597
|
-
|
|
598
|
-
payload = {
|
|
599
|
-
"name": clone_name,
|
|
600
|
-
"filter": copy_filters.prepare(),
|
|
601
|
-
"cloneDatasetParams": {
|
|
602
|
-
"withItemsAnnotations": with_items_annotations,
|
|
603
|
-
"withMetadata": with_metadata,
|
|
604
|
-
"withTaskAnnotationsStatus": with_task_annotations_status,
|
|
605
|
-
"targetDirectory": target_directory
|
|
606
|
-
}
|
|
607
|
-
}
|
|
608
|
-
if dst_dataset_id is not None:
|
|
609
|
-
payload['cloneDatasetParams']['targetDatasetId'] = dst_dataset_id
|
|
610
|
-
success, response = self._client_api.gen_request(req_type='post',
|
|
611
|
-
path='/datasets/{}/clone'.format(dataset_id),
|
|
612
|
-
json_req=payload,
|
|
613
|
-
headers={'user_query': filters._user_query})
|
|
614
|
-
|
|
615
|
-
if not success:
|
|
616
|
-
raise exceptions.PlatformException(response)
|
|
617
|
-
|
|
618
|
-
command = entities.Command.from_json(_json=response.json(),
|
|
619
|
-
client_api=self._client_api)
|
|
620
|
-
command = command.wait()
|
|
621
|
-
|
|
622
|
-
if 'returnedModelId' not in command.spec:
|
|
623
|
-
raise exceptions.PlatformException(error='400',
|
|
624
|
-
message="returnedModelId key is missing in command response: {!r}"
|
|
625
|
-
.format(response))
|
|
626
|
-
return self.get(dataset_id=command.spec['returnedModelId'])
|
|
627
|
-
|
|
628
|
-
@_api_reference.add(path='/datasets/{id}/export', method='post')
|
|
629
|
-
def export(self,
|
|
630
|
-
dataset: entities.Dataset = None,
|
|
631
|
-
dataset_name: str = None,
|
|
632
|
-
dataset_id: str = None,
|
|
633
|
-
local_path: str = None,
|
|
634
|
-
filters: Union[dict, entities.Filters] = None,
|
|
635
|
-
annotation_filters: entities.Filters = None,
|
|
636
|
-
feature_vector_filters: entities.Filters = None,
|
|
637
|
-
include_feature_vectors: bool = False,
|
|
638
|
-
include_annotations: bool = False,
|
|
639
|
-
export_type: entities.ExportType = entities.ExportType.JSON,
|
|
640
|
-
timeout: int = 0,
|
|
641
|
-
dataset_lock: bool = False,
|
|
642
|
-
lock_timeout_sec: int = None,
|
|
643
|
-
export_summary: bool = False):
|
|
644
|
-
"""
|
|
645
|
-
Export dataset items and annotations.
|
|
646
|
-
|
|
647
|
-
**Prerequisites**: You must be an *owner* or *developer* to use this method.
|
|
648
|
-
|
|
649
|
-
You must provide at least ONE of the following params: dataset, dataset_name, dataset_id.
|
|
650
|
-
|
|
651
|
-
:param dtlpy.entities.dataset.Dataset dataset: Dataset object
|
|
652
|
-
:param str dataset_name: The name of the dataset
|
|
653
|
-
:param str dataset_id: The ID of the dataset
|
|
654
|
-
:param str local_path: Local path to save the exported dataset
|
|
655
|
-
:param Union[dict, dtlpy.entities.filters.Filters] filters: Filters entity or a query dictionary
|
|
656
|
-
:param dtlpy.entities.filters.Filters annotation_filters: Filters entity to filter annotations for export
|
|
657
|
-
:param dtlpy.entities.filters.Filters feature_vector_filters: Filters entity to filter feature vectors for export
|
|
658
|
-
:param bool include_feature_vectors: Include item feature vectors in the export
|
|
659
|
-
:param bool include_annotations: Include item annotations in the export
|
|
660
|
-
:param bool dataset_lock: Make dataset readonly during the export
|
|
661
|
-
:param bool export_summary: Get Summary of the dataset export
|
|
662
|
-
:param int lock_timeout_sec: Timeout for locking the dataset during export in seconds
|
|
663
|
-
:param entities.ExportType export_type: Type of export ('json' or 'zip')
|
|
664
|
-
:param int timeout: Maximum time in seconds to wait for the export to complete
|
|
665
|
-
:return: Exported item
|
|
666
|
-
:rtype: dtlpy.entities.item.Item
|
|
667
|
-
|
|
668
|
-
**Example**:
|
|
669
|
-
|
|
670
|
-
.. code-block:: python
|
|
671
|
-
|
|
672
|
-
export_item = project.datasets.export(dataset_id='dataset_id',
|
|
673
|
-
filters=filters,
|
|
674
|
-
include_feature_vectors=True,
|
|
675
|
-
include_annotations=True,
|
|
676
|
-
export_type=dl.ExportType.JSON,
|
|
677
|
-
dataset_lock=True,
|
|
678
|
-
lock_timeout_sec=300,
|
|
679
|
-
export_summary=False)
|
|
680
|
-
"""
|
|
681
|
-
dataset_id = self._resolve_dataset_id(dataset, dataset_name, dataset_id)
|
|
682
|
-
payload = self._build_payload(filters, include_feature_vectors, include_annotations,
|
|
683
|
-
export_type, annotation_filters, feature_vector_filters,
|
|
684
|
-
dataset_lock, lock_timeout_sec, export_summary)
|
|
685
|
-
|
|
686
|
-
success, response = self._client_api.gen_request(req_type='post', path=f'/datasets/{dataset_id}/export',
|
|
687
|
-
json_req=payload)
|
|
688
|
-
if not success:
|
|
689
|
-
raise exceptions.PlatformException(response)
|
|
690
|
-
|
|
691
|
-
command = entities.Command.from_json(_json=response.json(),
|
|
692
|
-
client_api=self._client_api)
|
|
693
|
-
|
|
694
|
-
time.sleep(2) # as the command have wrong progress in the beginning
|
|
695
|
-
command = command.wait(timeout=timeout)
|
|
696
|
-
if 'outputItemId' not in command.spec:
|
|
697
|
-
raise exceptions.PlatformException(
|
|
698
|
-
error='400',
|
|
699
|
-
message="outputItemId key is missing in command response: {}".format(response))
|
|
700
|
-
item_id = command.spec['outputItemId']
|
|
701
|
-
self._download_exported_item(item_id=item_id, export_type=export_type, local_path=local_path)
|
|
702
|
-
return local_path
|
|
703
|
-
|
|
704
|
-
@_api_reference.add(path='/datasets/merge', method='post')
|
|
705
|
-
def merge(self,
|
|
706
|
-
merge_name: str,
|
|
707
|
-
dataset_ids: list,
|
|
708
|
-
project_ids: str,
|
|
709
|
-
with_items_annotations: bool = True,
|
|
710
|
-
with_metadata: bool = True,
|
|
711
|
-
with_task_annotations_status: bool = True,
|
|
712
|
-
wait: bool = True):
|
|
713
|
-
"""
|
|
714
|
-
Merge a dataset. See our `SDK docs <https://developers.dataloop.ai/tutorials/data_management/data_versioning/chapter/>`_ for more information.
|
|
715
|
-
|
|
716
|
-
**Prerequisites**: You must be an *owner* or *developer* to use this method.
|
|
717
|
-
|
|
718
|
-
:param str merge_name: new dataset name
|
|
719
|
-
:param list dataset_ids: list id's of the datatsets you wish to merge
|
|
720
|
-
:param str project_ids: the project id that include the datasets
|
|
721
|
-
:param bool with_items_annotations: true to merge with items annotations
|
|
722
|
-
:param bool with_metadata: true to merge with metadata
|
|
723
|
-
:param bool with_task_annotations_status: true to merge with task annotations' status
|
|
724
|
-
:param bool wait: wait for the command to finish
|
|
725
|
-
:return: True if success
|
|
726
|
-
:rtype: bool
|
|
727
|
-
|
|
728
|
-
**Example**:
|
|
729
|
-
|
|
730
|
-
.. code-block:: python
|
|
731
|
-
|
|
732
|
-
success = project.datasets.merge(dataset_ids=['dataset_id1','dataset_id2'],
|
|
733
|
-
merge_name='dataset_merge_name',
|
|
734
|
-
with_metadata=True,
|
|
735
|
-
with_items_annotations=False,
|
|
736
|
-
with_task_annotations_status=False)
|
|
737
|
-
"""
|
|
738
|
-
payload = {
|
|
739
|
-
"name": merge_name,
|
|
740
|
-
"datasetsIds": dataset_ids,
|
|
741
|
-
"projectIds": project_ids,
|
|
742
|
-
"mergeDatasetParams": {
|
|
743
|
-
"withItemsAnnotations": with_items_annotations,
|
|
744
|
-
"withMetadata": with_metadata,
|
|
745
|
-
"withTaskAnnotationsStatus": with_task_annotations_status
|
|
746
|
-
},
|
|
747
|
-
'asynced': wait
|
|
748
|
-
}
|
|
749
|
-
success, response = self._client_api.gen_request(req_type='post',
|
|
750
|
-
path='/datasets/merge',
|
|
751
|
-
json_req=payload)
|
|
752
|
-
|
|
753
|
-
if success:
|
|
754
|
-
command = entities.Command.from_json(_json=response.json(),
|
|
755
|
-
client_api=self._client_api)
|
|
756
|
-
if not wait:
|
|
757
|
-
return command
|
|
758
|
-
command = command.wait(timeout=0)
|
|
759
|
-
if 'mergeDatasetsConfiguration' not in command.spec:
|
|
760
|
-
raise exceptions.PlatformException(error='400',
|
|
761
|
-
message="mergeDatasetsConfiguration key is missing in command response: {}"
|
|
762
|
-
.format(response))
|
|
763
|
-
return True
|
|
764
|
-
else:
|
|
765
|
-
raise exceptions.PlatformException(response)
|
|
766
|
-
|
|
767
|
-
@_api_reference.add(path='/datasets/{id}/sync', method='post')
|
|
768
|
-
def sync(self, dataset_id: str, wait: bool = True):
|
|
769
|
-
"""
|
|
770
|
-
Sync dataset with external storage.
|
|
771
|
-
|
|
772
|
-
**Prerequisites**: You must be in the role of an *owner* or *developer*.
|
|
773
|
-
|
|
774
|
-
:param str dataset_id: The Id of the dataset to sync
|
|
775
|
-
:param bool wait: wait for the command to finish
|
|
776
|
-
:return: True if success
|
|
777
|
-
:rtype: bool
|
|
778
|
-
|
|
779
|
-
**Example**:
|
|
780
|
-
|
|
781
|
-
.. code-block:: python
|
|
782
|
-
|
|
783
|
-
success = project.datasets.sync(dataset_id='dataset_id')
|
|
784
|
-
"""
|
|
785
|
-
|
|
786
|
-
success, response = self._client_api.gen_request(req_type='post',
|
|
787
|
-
path='/datasets/{}/sync'.format(dataset_id))
|
|
788
|
-
|
|
789
|
-
if success:
|
|
790
|
-
command = entities.Command.from_json(_json=response.json(),
|
|
791
|
-
client_api=self._client_api)
|
|
792
|
-
if not wait:
|
|
793
|
-
return command
|
|
794
|
-
command = command.wait(timeout=0)
|
|
795
|
-
if 'datasetId' not in command.spec:
|
|
796
|
-
raise exceptions.PlatformException(error='400',
|
|
797
|
-
message="datasetId key is missing in command response: {}"
|
|
798
|
-
.format(response))
|
|
799
|
-
return True
|
|
800
|
-
else:
|
|
801
|
-
raise exceptions.PlatformException(response)
|
|
802
|
-
|
|
803
|
-
@_api_reference.add(path='/datasets', method='post')
|
|
804
|
-
def create(self,
|
|
805
|
-
dataset_name: str,
|
|
806
|
-
labels=None,
|
|
807
|
-
attributes=None,
|
|
808
|
-
ontology_ids=None,
|
|
809
|
-
driver: entities.Driver = None,
|
|
810
|
-
driver_id: str = None,
|
|
811
|
-
checkout: bool = False,
|
|
812
|
-
expiration_options: entities.ExpirationOptions = None,
|
|
813
|
-
index_driver: entities.IndexDriver = None,
|
|
814
|
-
recipe_id: str = None
|
|
815
|
-
) -> entities.Dataset:
|
|
816
|
-
"""
|
|
817
|
-
Create a new dataset
|
|
818
|
-
|
|
819
|
-
**Prerequisites**: You must be in the role of an *owner* or *developer*.
|
|
820
|
-
|
|
821
|
-
:param str dataset_name: The Name of the dataset
|
|
822
|
-
:param list labels: dictionary of {tag: color} or list of label entities
|
|
823
|
-
:param list attributes: dataset's ontology's attributes
|
|
824
|
-
:param list ontology_ids: optional - dataset ontology
|
|
825
|
-
:param dtlpy.entities.driver.Driver driver: optional - storage driver Driver object or driver name
|
|
826
|
-
:param str driver_id: optional - driver id
|
|
827
|
-
:param bool checkout: set the dataset as a default dataset object (cookies)
|
|
828
|
-
:param ExpirationOptions expiration_options: dl.ExpirationOptions object that contain definitions for dataset like MaxItemDays
|
|
829
|
-
:param str index_driver: dl.IndexDriver, dataset driver version
|
|
830
|
-
:param str recipe_id: optional - recipe id
|
|
831
|
-
:return: Dataset object
|
|
832
|
-
:rtype: dtlpy.entities.dataset.Dataset
|
|
833
|
-
|
|
834
|
-
**Example**:
|
|
835
|
-
|
|
836
|
-
.. code-block:: python
|
|
837
|
-
|
|
838
|
-
dataset = project.datasets.create(dataset_name='dataset_name', ontology_ids='ontology_ids')
|
|
839
|
-
"""
|
|
840
|
-
create_default_recipe = True
|
|
841
|
-
if any([labels, attributes, ontology_ids, recipe_id]):
|
|
842
|
-
create_default_recipe = False
|
|
843
|
-
|
|
844
|
-
# labels to list
|
|
845
|
-
if labels is not None:
|
|
846
|
-
if not isinstance(labels, list):
|
|
847
|
-
labels = [labels]
|
|
848
|
-
if not all(isinstance(label, entities.Label) for label in labels):
|
|
849
|
-
labels = entities.Dataset.serialize_labels(labels)
|
|
850
|
-
else:
|
|
851
|
-
labels = list()
|
|
852
|
-
|
|
853
|
-
# get creator from token
|
|
854
|
-
payload = {'name': dataset_name,
|
|
855
|
-
'projects': [self.project.id],
|
|
856
|
-
'createDefaultRecipe': create_default_recipe
|
|
857
|
-
}
|
|
858
|
-
|
|
859
|
-
if driver_id is None and driver is not None:
|
|
860
|
-
if isinstance(driver, entities.Driver):
|
|
861
|
-
driver_id = driver.id
|
|
862
|
-
elif isinstance(driver, str):
|
|
863
|
-
driver_id = self.project.drivers.get(driver_name=driver).id
|
|
864
|
-
else:
|
|
865
|
-
raise exceptions.PlatformException(
|
|
866
|
-
error=400,
|
|
867
|
-
message='Input arg "driver" must be Driver object or a string driver name. got type: {!r}'.format(
|
|
868
|
-
type(driver)))
|
|
869
|
-
if driver_id is not None:
|
|
870
|
-
payload['driver'] = driver_id
|
|
871
|
-
|
|
872
|
-
if expiration_options:
|
|
873
|
-
payload['expirationOptions'] = expiration_options.to_json()
|
|
874
|
-
if index_driver is not None:
|
|
875
|
-
payload['indexDriver'] = index_driver
|
|
876
|
-
|
|
877
|
-
success, response = self._client_api.gen_request(req_type='post',
|
|
878
|
-
path='/datasets',
|
|
879
|
-
json_req=payload)
|
|
880
|
-
if success:
|
|
881
|
-
dataset = entities.Dataset.from_json(client_api=self._client_api,
|
|
882
|
-
_json=response.json(),
|
|
883
|
-
datasets=self,
|
|
884
|
-
project=self.project)
|
|
885
|
-
# create ontology and recipe
|
|
886
|
-
if not create_default_recipe:
|
|
887
|
-
if recipe_id is not None:
|
|
888
|
-
dataset.switch_recipe(recipe_id=recipe_id)
|
|
889
|
-
else:
|
|
890
|
-
dataset = dataset.recipes.create(ontology_ids=ontology_ids,
|
|
891
|
-
labels=labels,
|
|
892
|
-
attributes=attributes).dataset
|
|
893
|
-
else:
|
|
894
|
-
raise exceptions.PlatformException(response)
|
|
895
|
-
logger.info('Dataset was created successfully. Dataset id: {!r}'.format(dataset.id))
|
|
896
|
-
assert isinstance(dataset, entities.Dataset)
|
|
897
|
-
if checkout:
|
|
898
|
-
self.checkout(dataset=dataset)
|
|
899
|
-
return dataset
|
|
900
|
-
|
|
901
|
-
@staticmethod
|
|
902
|
-
def _convert_single(downloader,
|
|
903
|
-
item,
|
|
904
|
-
img_filepath,
|
|
905
|
-
local_path,
|
|
906
|
-
overwrite,
|
|
907
|
-
annotation_options,
|
|
908
|
-
annotation_filters,
|
|
909
|
-
thickness,
|
|
910
|
-
with_text,
|
|
911
|
-
progress,
|
|
912
|
-
alpha,
|
|
913
|
-
export_version):
|
|
914
|
-
# this is to convert the downloaded json files to any other annotation type
|
|
915
|
-
try:
|
|
916
|
-
if entities.ViewAnnotationOptions.ANNOTATION_ON_IMAGE in annotation_options:
|
|
917
|
-
if img_filepath is None:
|
|
918
|
-
img_filepath = item.download()
|
|
919
|
-
downloader._download_img_annotations(item=item,
|
|
920
|
-
img_filepath=img_filepath,
|
|
921
|
-
local_path=local_path,
|
|
922
|
-
overwrite=overwrite,
|
|
923
|
-
annotation_options=annotation_options,
|
|
924
|
-
annotation_filters=annotation_filters,
|
|
925
|
-
thickness=thickness,
|
|
926
|
-
alpha=alpha,
|
|
927
|
-
with_text=with_text,
|
|
928
|
-
export_version=export_version
|
|
929
|
-
)
|
|
930
|
-
except Exception:
|
|
931
|
-
logger.error('Failed to download annotation for item: {!r}'.format(item.name))
|
|
932
|
-
progress.update()
|
|
933
|
-
|
|
934
|
-
@staticmethod
|
|
935
|
-
def download_annotations(dataset: entities.Dataset,
|
|
936
|
-
local_path: str = None,
|
|
937
|
-
filters: entities.Filters = None,
|
|
938
|
-
annotation_options: entities.ViewAnnotationOptions = None,
|
|
939
|
-
annotation_filters: entities.Filters = None,
|
|
940
|
-
overwrite: bool = False,
|
|
941
|
-
thickness: int = 1,
|
|
942
|
-
with_text: bool = False,
|
|
943
|
-
remote_path: str = None,
|
|
944
|
-
include_annotations_in_output: bool = True,
|
|
945
|
-
export_png_files: bool = False,
|
|
946
|
-
filter_output_annotations: bool = False,
|
|
947
|
-
alpha: float = None,
|
|
948
|
-
export_version=entities.ExportVersion.V1,
|
|
949
|
-
dataset_lock: bool = False,
|
|
950
|
-
lock_timeout_sec: int = None,
|
|
951
|
-
export_summary: bool = False,
|
|
952
|
-
) -> str:
|
|
953
|
-
"""
|
|
954
|
-
Download dataset's annotations by filters.
|
|
955
|
-
|
|
956
|
-
You may filter the dataset both for items and for annotations and download annotations.
|
|
957
|
-
|
|
958
|
-
Optional -- download annotations as: mask, instance, image mask of the item.
|
|
959
|
-
|
|
960
|
-
**Prerequisites**: You must be in the role of an *owner* or *developer*.
|
|
961
|
-
|
|
962
|
-
:param dtlpy.entities.dataset.Dataset dataset: dataset object
|
|
963
|
-
:param str local_path: local folder or filename to save to.
|
|
964
|
-
:param dtlpy.entities.filters.Filters filters: Filters entity or a dictionary containing filters parameters
|
|
965
|
-
:param list annotation_options: type of download annotations: list(dl.ViewAnnotationOptions)
|
|
966
|
-
:param dtlpy.entities.filters.Filters annotation_filters: Filters entity to filter annotations for download
|
|
967
|
-
:param bool overwrite: optional - default = False to overwrite the existing files
|
|
968
|
-
:param bool dataset_loc: optional - default = False to make the dataset readonly
|
|
969
|
-
:param int thickness: optional - line thickness, if -1 annotation will be filled, default =1
|
|
970
|
-
:param bool with_text: optional - add text to annotations, default = False
|
|
971
|
-
:param str remote_path: DEPRECATED and ignored
|
|
972
|
-
:param bool include_annotations_in_output: default - False , if export should contain annotations
|
|
973
|
-
:param bool export_png_files: default - if True, semantic annotations should be exported as png files
|
|
974
|
-
:param bool filter_output_annotations: default - False, given an export by filter - determine if to filter out annotations
|
|
975
|
-
:param float alpha: opacity value [0 1], default 1
|
|
976
|
-
:param str export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
|
|
977
|
-
:return: local_path of the directory where all the downloaded item
|
|
978
|
-
:param bool dataset_lock: optional - default = False
|
|
979
|
-
:param bool export_summary: optional - default = False
|
|
980
|
-
:param int lock_timeout_sec: optional
|
|
981
|
-
:rtype: str
|
|
982
|
-
|
|
983
|
-
**Example**:
|
|
984
|
-
|
|
985
|
-
.. code-block:: python
|
|
986
|
-
|
|
987
|
-
file_path = project.datasets.download_annotations(dataset='dataset_entity',
|
|
988
|
-
local_path='local_path',
|
|
989
|
-
annotation_options=dl.ViewAnnotationOptions,
|
|
990
|
-
overwrite=False,
|
|
991
|
-
thickness=1,
|
|
992
|
-
with_text=False,
|
|
993
|
-
alpha=1,
|
|
994
|
-
dataset_lock=False,
|
|
995
|
-
lock_timeout_sec=300,
|
|
996
|
-
export_summary=False
|
|
997
|
-
)
|
|
998
|
-
"""
|
|
999
|
-
if annotation_options is None:
|
|
1000
|
-
annotation_options = list()
|
|
1001
|
-
elif not isinstance(annotation_options, list):
|
|
1002
|
-
annotation_options = [annotation_options]
|
|
1003
|
-
for ann_option in annotation_options:
|
|
1004
|
-
if not isinstance(ann_option, entities.ViewAnnotationOptions):
|
|
1005
|
-
if ann_option not in list(entities.ViewAnnotationOptions):
|
|
1006
|
-
raise PlatformException(
|
|
1007
|
-
error='400',
|
|
1008
|
-
message='Unknown annotation download option: {}, please choose from: {}'.format(
|
|
1009
|
-
ann_option, list(entities.ViewAnnotationOptions)))
|
|
1010
|
-
|
|
1011
|
-
if remote_path is not None:
|
|
1012
|
-
logger.warning(
|
|
1013
|
-
'"remote_path" is ignored. Use "filters=dl.Filters(field="dir, values={!r}"'.format(remote_path))
|
|
1014
|
-
if local_path is None:
|
|
1015
|
-
if dataset.project is None:
|
|
1016
|
-
# by dataset name
|
|
1017
|
-
local_path = os.path.join(
|
|
1018
|
-
services.service_defaults.DATALOOP_PATH,
|
|
1019
|
-
"datasets",
|
|
1020
|
-
"{}_{}".format(dataset.name, dataset.id),
|
|
1021
|
-
)
|
|
1022
|
-
else:
|
|
1023
|
-
# by dataset and project name
|
|
1024
|
-
local_path = os.path.join(
|
|
1025
|
-
services.service_defaults.DATALOOP_PATH,
|
|
1026
|
-
"projects",
|
|
1027
|
-
dataset.project.name,
|
|
1028
|
-
"datasets",
|
|
1029
|
-
dataset.name,
|
|
1030
|
-
)
|
|
1031
|
-
|
|
1032
|
-
if filters is None:
|
|
1033
|
-
filters = entities.Filters()
|
|
1034
|
-
filters._user_query = 'false'
|
|
1035
|
-
if annotation_filters is not None:
|
|
1036
|
-
for annotation_filter_and in annotation_filters.and_filter_list:
|
|
1037
|
-
filters.add_join(field=annotation_filter_and.field,
|
|
1038
|
-
values=annotation_filter_and.values,
|
|
1039
|
-
operator=annotation_filter_and.operator,
|
|
1040
|
-
method=entities.FiltersMethod.AND)
|
|
1041
|
-
for annotation_filter_or in annotation_filters.or_filter_list:
|
|
1042
|
-
filters.add_join(field=annotation_filter_or.field,
|
|
1043
|
-
values=annotation_filter_or.values,
|
|
1044
|
-
operator=annotation_filter_or.operator,
|
|
1045
|
-
method=entities.FiltersMethod.OR)
|
|
1046
|
-
|
|
1047
|
-
downloader = repositories.Downloader(items_repository=dataset.items)
|
|
1048
|
-
downloader.download_annotations(dataset=dataset,
|
|
1049
|
-
filters=filters,
|
|
1050
|
-
annotation_filters=annotation_filters,
|
|
1051
|
-
local_path=local_path,
|
|
1052
|
-
overwrite=overwrite,
|
|
1053
|
-
include_annotations_in_output=include_annotations_in_output,
|
|
1054
|
-
export_png_files=export_png_files,
|
|
1055
|
-
filter_output_annotations=filter_output_annotations,
|
|
1056
|
-
export_version=export_version,
|
|
1057
|
-
dataset_lock=dataset_lock,
|
|
1058
|
-
lock_timeout_sec=lock_timeout_sec,
|
|
1059
|
-
export_summary=export_summary
|
|
1060
|
-
)
|
|
1061
|
-
if annotation_options:
|
|
1062
|
-
pages = dataset.items.list(filters=filters)
|
|
1063
|
-
if not isinstance(annotation_options, list):
|
|
1064
|
-
annotation_options = [annotation_options]
|
|
1065
|
-
# convert all annotations to annotation_options
|
|
1066
|
-
pool = dataset._client_api.thread_pools(pool_name='dataset.download')
|
|
1067
|
-
jobs = [None for _ in range(pages.items_count)]
|
|
1068
|
-
progress = tqdm.tqdm(total=pages.items_count,
|
|
1069
|
-
disable=dataset._client_api.verbose.disable_progress_bar_download_annotations,
|
|
1070
|
-
file=sys.stdout, desc='Download Annotations')
|
|
1071
|
-
i_item = 0
|
|
1072
|
-
for page in pages:
|
|
1073
|
-
for item in page:
|
|
1074
|
-
jobs[i_item] = pool.submit(
|
|
1075
|
-
Datasets._convert_single,
|
|
1076
|
-
**{
|
|
1077
|
-
'downloader': downloader,
|
|
1078
|
-
'item': item,
|
|
1079
|
-
'img_filepath': None,
|
|
1080
|
-
'local_path': local_path,
|
|
1081
|
-
'overwrite': overwrite,
|
|
1082
|
-
'annotation_options': annotation_options,
|
|
1083
|
-
'annotation_filters': annotation_filters,
|
|
1084
|
-
'thickness': thickness,
|
|
1085
|
-
'with_text': with_text,
|
|
1086
|
-
'progress': progress,
|
|
1087
|
-
'alpha': alpha,
|
|
1088
|
-
'export_version': export_version
|
|
1089
|
-
}
|
|
1090
|
-
)
|
|
1091
|
-
i_item += 1
|
|
1092
|
-
# get all results
|
|
1093
|
-
_ = [j.result() for j in jobs]
|
|
1094
|
-
progress.close()
|
|
1095
|
-
return local_path
|
|
1096
|
-
|
|
1097
|
-
def _upload_single_item_annotation(self, item, file, pbar):
|
|
1098
|
-
try:
|
|
1099
|
-
item.annotations.upload(file)
|
|
1100
|
-
except Exception as err:
|
|
1101
|
-
raise err
|
|
1102
|
-
finally:
|
|
1103
|
-
pbar.update()
|
|
1104
|
-
|
|
1105
|
-
def upload_annotations(self,
|
|
1106
|
-
dataset,
|
|
1107
|
-
local_path,
|
|
1108
|
-
filters: entities.Filters = None,
|
|
1109
|
-
clean=False,
|
|
1110
|
-
remote_root_path='/',
|
|
1111
|
-
export_version=entities.ExportVersion.V1
|
|
1112
|
-
):
|
|
1113
|
-
"""
|
|
1114
|
-
Upload annotations to dataset.
|
|
1115
|
-
|
|
1116
|
-
Example for remote_root_path: If the item filepath is "/a/b/item" and remote_root_path is "/a" - the start folder will be b instead of a
|
|
1117
|
-
|
|
1118
|
-
**Prerequisites**: You must have a dataset with items that are related to the annotations. The relationship between the dataset and annotations is shown in the name. You must be in the role of an *owner* or *developer*.
|
|
1119
|
-
|
|
1120
|
-
:param dtlpy.entities.dataset.Dataset dataset: dataset to upload to
|
|
1121
|
-
:param str local_path: str - local folder where the annotations files are
|
|
1122
|
-
:param dtlpy.entities.filters.Filters filters: Filters entity or a dictionary containing filters parameters
|
|
1123
|
-
:param bool clean: True to remove the old annotations
|
|
1124
|
-
:param str remote_root_path: the remote root path to match remote and local items
|
|
1125
|
-
:param str export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
|
|
1126
|
-
|
|
1127
|
-
**Example**:
|
|
1128
|
-
|
|
1129
|
-
.. code-block:: python
|
|
1130
|
-
|
|
1131
|
-
project.datasets.upload_annotations(dataset='dataset_entity',
|
|
1132
|
-
local_path='local_path',
|
|
1133
|
-
clean=False,
|
|
1134
|
-
export_version=dl.ExportVersion.V1
|
|
1135
|
-
)
|
|
1136
|
-
"""
|
|
1137
|
-
if filters is None:
|
|
1138
|
-
filters = entities.Filters()
|
|
1139
|
-
filters._user_query = 'false'
|
|
1140
|
-
pages = dataset.items.list(filters=filters)
|
|
1141
|
-
total_items = pages.items_count
|
|
1142
|
-
pbar = tqdm.tqdm(total=total_items, disable=dataset._client_api.verbose.disable_progress_bar_upload_annotations,
|
|
1143
|
-
file=sys.stdout, desc='Upload Annotations')
|
|
1144
|
-
pool = self._client_api.thread_pools('annotation.upload')
|
|
1145
|
-
annotations_uploaded_count = 0
|
|
1146
|
-
for item in pages.all():
|
|
1147
|
-
if export_version == entities.ExportVersion.V1:
|
|
1148
|
-
_, ext = os.path.splitext(item.filename)
|
|
1149
|
-
filepath = item.filename.replace(ext, '.json')
|
|
1150
|
-
else:
|
|
1151
|
-
filepath = item.filename + '.json'
|
|
1152
|
-
# make the file path ignore the hierarchy of the files that in remote_root_path
|
|
1153
|
-
filepath = os.path.relpath(filepath, remote_root_path)
|
|
1154
|
-
json_file = os.path.join(local_path, filepath)
|
|
1155
|
-
if not os.path.isfile(json_file):
|
|
1156
|
-
pbar.update()
|
|
1157
|
-
continue
|
|
1158
|
-
annotations_uploaded_count += 1
|
|
1159
|
-
if item.annotated and clean:
|
|
1160
|
-
item.annotations.delete(filters=entities.Filters(resource=entities.FiltersResource.ANNOTATION))
|
|
1161
|
-
pool.submit(self._upload_single_item_annotation, **{'item': item,
|
|
1162
|
-
'file': json_file,
|
|
1163
|
-
'pbar': pbar})
|
|
1164
|
-
pool.shutdown()
|
|
1165
|
-
if annotations_uploaded_count == 0:
|
|
1166
|
-
logger.warning(msg="No annotations uploaded to dataset! ")
|
|
1167
|
-
else:
|
|
1168
|
-
logger.info(msg='Found and uploaded {} annotations.'.format(annotations_uploaded_count))
|
|
1169
|
-
|
|
1170
|
-
def set_readonly(self, state: bool, dataset: entities.Dataset):
|
|
1171
|
-
"""
|
|
1172
|
-
Set dataset readonly mode.
|
|
1173
|
-
|
|
1174
|
-
**Prerequisites**: You must be in the role of an *owner* or *developer*.
|
|
1175
|
-
|
|
1176
|
-
:param bool state: state to update readonly mode
|
|
1177
|
-
:param dtlpy.entities.dataset.Dataset dataset: dataset object
|
|
1178
|
-
|
|
1179
|
-
**Example**:
|
|
1180
|
-
|
|
1181
|
-
.. code-block:: python
|
|
1182
|
-
|
|
1183
|
-
project.datasets.set_readonly(dataset='dataset_entity', state=True)
|
|
1184
|
-
"""
|
|
1185
|
-
import warnings
|
|
1186
|
-
warnings.warn("`readonly` flag on dataset is deprecated, doing nothing.", DeprecationWarning)
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
@_api_reference.add(path='/datasets/{id}/split', method='post')
|
|
1190
|
-
def split_ml_subsets(self,
|
|
1191
|
-
dataset_id: str,
|
|
1192
|
-
items_query: entities.filters,
|
|
1193
|
-
ml_split_list: dict) -> bool:
|
|
1194
|
-
"""
|
|
1195
|
-
Split dataset items into ML subsets.
|
|
1196
|
-
|
|
1197
|
-
:param str dataset_id: The ID of the dataset.
|
|
1198
|
-
:param dict items_query: Query to select items.
|
|
1199
|
-
:param dict ml_split_list: Dictionary with 'train', 'validation', 'test' keys and integer percentages.
|
|
1200
|
-
:return: True if the split operation was successful.
|
|
1201
|
-
:rtype: bool
|
|
1202
|
-
:raises: PlatformException on failure and ValueError if percentages do not sum to 100 or invalid keys/values.
|
|
1203
|
-
"""
|
|
1204
|
-
# Validate percentages
|
|
1205
|
-
if not ml_split_list:
|
|
1206
|
-
ml_split_list = {'train': 80, 'validation': 10, 'test': 10}
|
|
1207
|
-
|
|
1208
|
-
if not items_query:
|
|
1209
|
-
items_query = entities.Filters()
|
|
1210
|
-
|
|
1211
|
-
items_query_dict = items_query.prepare()
|
|
1212
|
-
required_keys = {'train', 'validation', 'test'}
|
|
1213
|
-
if set(ml_split_list.keys()) != required_keys:
|
|
1214
|
-
raise ValueError("MLSplitList must have exactly the keys 'train', 'validation', 'test'.")
|
|
1215
|
-
total = sum(ml_split_list.values())
|
|
1216
|
-
if total != 100:
|
|
1217
|
-
raise ValueError(
|
|
1218
|
-
"Please set the Train, Validation, and Test subsets percentages to add up to 100%. "
|
|
1219
|
-
"For example: 70, 15, 15."
|
|
1220
|
-
)
|
|
1221
|
-
for key, value in ml_split_list.items():
|
|
1222
|
-
if not isinstance(value, int) or value < 0:
|
|
1223
|
-
raise ValueError("Percentages must be integers >= 0.")
|
|
1224
|
-
payload = {
|
|
1225
|
-
'itemsQuery': items_query_dict,
|
|
1226
|
-
'MLSplitList': ml_split_list
|
|
1227
|
-
}
|
|
1228
|
-
path = f'/datasets/{dataset_id}/split'
|
|
1229
|
-
success, response = self._client_api.gen_request(req_type='post',
|
|
1230
|
-
path=path,
|
|
1231
|
-
json_req=payload)
|
|
1232
|
-
if success:
|
|
1233
|
-
# Wait for the split operation to complete
|
|
1234
|
-
command = entities.Command.from_json(_json=response.json(),
|
|
1235
|
-
client_api=self._client_api)
|
|
1236
|
-
command.wait()
|
|
1237
|
-
return True
|
|
1238
|
-
else:
|
|
1239
|
-
raise exceptions.PlatformException(response)
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
@_api_reference.add(path='/datasets/{id}/items/bulk-update-metadata', method='post')
|
|
1243
|
-
def bulk_update_ml_subset(self, dataset_id: str, items_query: dict, subset: str = None, deleteTag: bool = False) -> bool:
|
|
1244
|
-
"""
|
|
1245
|
-
Bulk update ML subset assignment for selected items.
|
|
1246
|
-
If subset is None, remove subsets. Otherwise, assign the specified subset.
|
|
1247
|
-
|
|
1248
|
-
:param str dataset_id: ID of the dataset
|
|
1249
|
-
:param dict items_query: DQLResourceQuery (filters) for selecting items
|
|
1250
|
-
:param str subset: 'train', 'validation', 'test' or None to remove all
|
|
1251
|
-
:return: True if success
|
|
1252
|
-
:rtype: bool
|
|
1253
|
-
"""
|
|
1254
|
-
if items_query is None:
|
|
1255
|
-
items_query = entities.Filters()
|
|
1256
|
-
items_query_dict = items_query.prepare()
|
|
1257
|
-
if not deleteTag and subset not in ['train', 'validation', 'test']:
|
|
1258
|
-
raise ValueError("subset must be one of: 'train', 'validation', 'test'")
|
|
1259
|
-
# Determine tag values based on subset
|
|
1260
|
-
tags = {
|
|
1261
|
-
'train': True if subset == 'train' else None,
|
|
1262
|
-
'validation': True if subset == 'validation' else None,
|
|
1263
|
-
'test': True if subset == 'test' else None
|
|
1264
|
-
}
|
|
1265
|
-
|
|
1266
|
-
payload = {
|
|
1267
|
-
"query": items_query_dict,
|
|
1268
|
-
"updateQuery": {
|
|
1269
|
-
"update": {
|
|
1270
|
-
"metadata": {
|
|
1271
|
-
"system": {
|
|
1272
|
-
"tags": tags
|
|
1273
|
-
}
|
|
1274
|
-
}
|
|
1275
|
-
},
|
|
1276
|
-
"systemSpace": True
|
|
1277
|
-
}
|
|
1278
|
-
}
|
|
1279
|
-
|
|
1280
|
-
success, response = self._client_api.gen_request(
|
|
1281
|
-
req_type='post',
|
|
1282
|
-
path=f'/datasets/{dataset_id}/items/bulk-update-metadata',
|
|
1283
|
-
json_req=payload
|
|
1284
|
-
)
|
|
1285
|
-
if success:
|
|
1286
|
-
# Similar to split operation, a command is returned
|
|
1287
|
-
command = entities.Command.from_json(_json=response.json(), client_api=self._client_api)
|
|
1288
|
-
command.wait()
|
|
1289
|
-
return True
|
|
1290
|
-
else:
|
|
1291
|
-
raise exceptions.PlatformException(response)
|
|
1
|
+
"""
|
|
2
|
+
Datasets Repository
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import sys
|
|
7
|
+
import time
|
|
8
|
+
import copy
|
|
9
|
+
import tqdm
|
|
10
|
+
import logging
|
|
11
|
+
import json
|
|
12
|
+
from typing import Union
|
|
13
|
+
|
|
14
|
+
from .. import entities, repositories, miscellaneous, exceptions, services, PlatformException, _api_reference
|
|
15
|
+
from ..services.api_client import ApiClient
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(name='dtlpy')
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class Datasets:
|
|
21
|
+
"""
|
|
22
|
+
Datasets Repository
|
|
23
|
+
|
|
24
|
+
The Datasets class allows the user to manage datasets. Read more about datasets in our `documentation <https://dataloop.ai/docs/dataset>`_ and `SDK documentation <https://developers.dataloop.ai/tutorials/data_management/manage_datasets/chapter/>`_.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(self, client_api: ApiClient, project: entities.Project = None):
|
|
28
|
+
self._client_api = client_api
|
|
29
|
+
self._project = project
|
|
30
|
+
|
|
31
|
+
############
|
|
32
|
+
# entities #
|
|
33
|
+
############
|
|
34
|
+
@property
|
|
35
|
+
def project(self) -> entities.Project:
|
|
36
|
+
if self._project is None:
|
|
37
|
+
# try get checkout
|
|
38
|
+
project = self._client_api.state_io.get('project')
|
|
39
|
+
if project is not None:
|
|
40
|
+
self._project = entities.Project.from_json(_json=project, client_api=self._client_api)
|
|
41
|
+
if self._project is None:
|
|
42
|
+
raise exceptions.PlatformException(
|
|
43
|
+
error='2001',
|
|
44
|
+
message='Cannot perform action WITHOUT Project entity in Datasets repository.'
|
|
45
|
+
' Please checkout or set a project')
|
|
46
|
+
assert isinstance(self._project, entities.Project)
|
|
47
|
+
return self._project
|
|
48
|
+
|
|
49
|
+
@project.setter
|
|
50
|
+
def project(self, project: entities.Project):
|
|
51
|
+
if not isinstance(project, entities.Project):
|
|
52
|
+
raise ValueError('Must input a valid Project entity')
|
|
53
|
+
self._project = project
|
|
54
|
+
|
|
55
|
+
###########
|
|
56
|
+
# methods #
|
|
57
|
+
###########
|
|
58
|
+
def __get_from_cache(self) -> entities.Dataset:
|
|
59
|
+
dataset = self._client_api.state_io.get('dataset')
|
|
60
|
+
if dataset is not None:
|
|
61
|
+
dataset = entities.Dataset.from_json(_json=dataset,
|
|
62
|
+
client_api=self._client_api,
|
|
63
|
+
datasets=self,
|
|
64
|
+
project=self._project)
|
|
65
|
+
return dataset
|
|
66
|
+
|
|
67
|
+
def __get_by_id(self, dataset_id) -> entities.Dataset:
|
|
68
|
+
success, response = self._client_api.gen_request(req_type='get',
|
|
69
|
+
path='/datasets/{}'.format(dataset_id))
|
|
70
|
+
if dataset_id is None or dataset_id == '':
|
|
71
|
+
raise exceptions.PlatformException('400', 'Please checkout a dataset')
|
|
72
|
+
|
|
73
|
+
if success:
|
|
74
|
+
dataset = entities.Dataset.from_json(client_api=self._client_api,
|
|
75
|
+
_json=response.json(),
|
|
76
|
+
datasets=self,
|
|
77
|
+
project=self._project)
|
|
78
|
+
else:
|
|
79
|
+
raise exceptions.PlatformException(response)
|
|
80
|
+
return dataset
|
|
81
|
+
|
|
82
|
+
def __get_by_identifier(self, identifier=None) -> entities.Dataset:
|
|
83
|
+
datasets = self.list()
|
|
84
|
+
datasets_by_name = [dataset for dataset in datasets if identifier in dataset.name or identifier in dataset.id]
|
|
85
|
+
if len(datasets_by_name) == 1:
|
|
86
|
+
return datasets_by_name[0]
|
|
87
|
+
elif len(datasets_by_name) > 1:
|
|
88
|
+
raise Exception('Multiple datasets with this name exist')
|
|
89
|
+
else:
|
|
90
|
+
raise Exception("Dataset not found")
|
|
91
|
+
|
|
92
|
+
def _bulid_folder_filter(self, folder_path, filters=None):
|
|
93
|
+
if filters is None:
|
|
94
|
+
filters = entities.Filters()
|
|
95
|
+
filters._user_query = 'false'
|
|
96
|
+
if not folder_path.startswith('/'):
|
|
97
|
+
folder_path = '/' + folder_path
|
|
98
|
+
filters.add(field='dir', values=folder_path, method=entities.FiltersMethod.OR)
|
|
99
|
+
if not folder_path.endswith('*'):
|
|
100
|
+
if not folder_path.endswith('/'):
|
|
101
|
+
folder_path += '/'
|
|
102
|
+
filters.add(field='dir', values=folder_path + '*', method=entities.FiltersMethod.OR)
|
|
103
|
+
return filters
|
|
104
|
+
|
|
105
|
+
def _get_binaries_dataset(self):
|
|
106
|
+
filters = entities.Filters(resource=entities.FiltersResource.DATASET)
|
|
107
|
+
filters.add(field='name', values='Binaries')
|
|
108
|
+
filters.system_space = True
|
|
109
|
+
datasets = self.list(filters=filters)
|
|
110
|
+
if len(datasets) == 0:
|
|
111
|
+
# empty list
|
|
112
|
+
raise exceptions.PlatformException('404', 'Dataset not found. Name: "Binaries"')
|
|
113
|
+
# dataset = None
|
|
114
|
+
elif len(datasets) > 1:
|
|
115
|
+
raise exceptions.PlatformException('400', 'More than one dataset with same name.')
|
|
116
|
+
else:
|
|
117
|
+
dataset = datasets[0]
|
|
118
|
+
return dataset
|
|
119
|
+
|
|
120
|
+
def _resolve_dataset_id(self, dataset, dataset_name, dataset_id):
|
|
121
|
+
if dataset is None and dataset_name is None and dataset_id is None:
|
|
122
|
+
raise ValueError('Must provide dataset, dataset name or dataset id')
|
|
123
|
+
if dataset_id is None:
|
|
124
|
+
if dataset is None:
|
|
125
|
+
dataset = self.get(dataset_name=dataset_name)
|
|
126
|
+
dataset_id = dataset.id
|
|
127
|
+
return dataset_id
|
|
128
|
+
|
|
129
|
+
@staticmethod
|
|
130
|
+
def _build_payload(filters, include_feature_vectors, include_annotations,
|
|
131
|
+
export_type, annotation_filters, feature_vector_filters, dataset_lock, lock_timeout_sec, export_summary):
|
|
132
|
+
valid_list = [e.value for e in entities.ExportType]
|
|
133
|
+
valid_types = ', '.join(valid_list)
|
|
134
|
+
if export_type not in ['json', 'zip']:
|
|
135
|
+
raise ValueError('export_type must be one of the following: {}'.format(valid_types))
|
|
136
|
+
payload = {'exportType': export_type}
|
|
137
|
+
if filters is None:
|
|
138
|
+
filters = entities.Filters()
|
|
139
|
+
|
|
140
|
+
if isinstance(filters, entities.Filters):
|
|
141
|
+
payload['itemsQuery'] = {'filter': filters.prepare()['filter'], 'join': filters.prepare().get("join", {})}
|
|
142
|
+
elif isinstance(filters, dict):
|
|
143
|
+
payload['itemsQuery'] = filters
|
|
144
|
+
else:
|
|
145
|
+
raise exceptions.BadRequest(message='filters must be of type dict or Filters', status_code=500)
|
|
146
|
+
|
|
147
|
+
payload['itemsVectorQuery'] = {}
|
|
148
|
+
if include_feature_vectors:
|
|
149
|
+
payload['includeItemVectors'] = True
|
|
150
|
+
payload['itemsVectorQuery']['select'] = {"datasetId": 1, 'featureSetId': 1, 'value': 1}
|
|
151
|
+
|
|
152
|
+
if feature_vector_filters is not None:
|
|
153
|
+
payload['itemsVectorQuery']['filter'] = feature_vector_filters.prepare()['filter']
|
|
154
|
+
|
|
155
|
+
payload['annotations'] = {"include": include_annotations, "convertSemantic": False}
|
|
156
|
+
|
|
157
|
+
if annotation_filters is not None:
|
|
158
|
+
payload['annotationsQuery'] = annotation_filters.prepare()['filter']
|
|
159
|
+
payload['annotations']['filter'] = True
|
|
160
|
+
|
|
161
|
+
if dataset_lock:
|
|
162
|
+
payload['datasetLock'] = dataset_lock
|
|
163
|
+
|
|
164
|
+
if export_summary:
|
|
165
|
+
payload['summary'] = export_summary
|
|
166
|
+
|
|
167
|
+
if lock_timeout_sec:
|
|
168
|
+
payload['lockTimeoutSec'] = lock_timeout_sec
|
|
169
|
+
|
|
170
|
+
return payload
|
|
171
|
+
|
|
172
|
+
def _download_exported_item(self, item_id, export_type, local_path=None):
|
|
173
|
+
export_item = repositories.Items(client_api=self._client_api).get(item_id=item_id)
|
|
174
|
+
export_item_path = export_item.download(local_path=local_path)
|
|
175
|
+
|
|
176
|
+
if export_type == entities.ExportType.ZIP:
|
|
177
|
+
# unzipping annotations to directory
|
|
178
|
+
if isinstance(export_item_path, list) or not os.path.isfile(export_item_path):
|
|
179
|
+
raise exceptions.PlatformException(
|
|
180
|
+
error='404',
|
|
181
|
+
message='error downloading annotation zip file. see above for more information. item id: {!r}'.format(
|
|
182
|
+
export_item.id))
|
|
183
|
+
try:
|
|
184
|
+
miscellaneous.Zipping.unzip_directory(zip_filename=export_item_path,
|
|
185
|
+
to_directory=local_path)
|
|
186
|
+
except Exception as e:
|
|
187
|
+
logger.warning("Failed to extract zip file error: {}".format(e))
|
|
188
|
+
finally:
|
|
189
|
+
# cleanup
|
|
190
|
+
if isinstance(export_item_path, str) and os.path.isfile(export_item_path):
|
|
191
|
+
os.remove(export_item_path)
|
|
192
|
+
|
|
193
|
+
@property
|
|
194
|
+
def platform_url(self):
|
|
195
|
+
return self._client_api._get_resource_url("projects/{}/datasets".format(self.project.id))
|
|
196
|
+
|
|
197
|
+
def open_in_web(self,
|
|
198
|
+
dataset_name: str = None,
|
|
199
|
+
dataset_id: str = None,
|
|
200
|
+
dataset: entities.Dataset = None):
|
|
201
|
+
"""
|
|
202
|
+
Open the dataset in web platform.
|
|
203
|
+
|
|
204
|
+
**Prerequisites**: You must be an *owner* or *developer* to use this method.
|
|
205
|
+
|
|
206
|
+
:param str dataset_name: The Name of the dataset
|
|
207
|
+
:param str dataset_id: The Id of the dataset
|
|
208
|
+
:param dtlpy.entities.dataset.Dataset dataset: dataset object
|
|
209
|
+
|
|
210
|
+
**Example**:
|
|
211
|
+
|
|
212
|
+
.. code-block:: python
|
|
213
|
+
|
|
214
|
+
project.datasets.open_in_web(dataset_id='dataset_id')
|
|
215
|
+
"""
|
|
216
|
+
if dataset_name is not None:
|
|
217
|
+
dataset = self.get(dataset_name=dataset_name)
|
|
218
|
+
if dataset is not None:
|
|
219
|
+
dataset.open_in_web()
|
|
220
|
+
elif dataset_id is not None:
|
|
221
|
+
self._client_api._open_in_web(url=f'{self.platform_url}/{dataset_id}/items')
|
|
222
|
+
else:
|
|
223
|
+
self._client_api._open_in_web(url=self.platform_url)
|
|
224
|
+
|
|
225
|
+
def checkout(self,
|
|
226
|
+
identifier: str = None,
|
|
227
|
+
dataset_name: str = None,
|
|
228
|
+
dataset_id: str = None,
|
|
229
|
+
dataset: entities.Dataset = None):
|
|
230
|
+
"""
|
|
231
|
+
Checkout (switch) to a dataset to work on it.
|
|
232
|
+
|
|
233
|
+
**Prerequisites**: You must be an *owner* or *developer* to use this method.
|
|
234
|
+
|
|
235
|
+
You must provide at least ONE of the following params: dataset_id, dataset_name.
|
|
236
|
+
|
|
237
|
+
:param str identifier: project name or partial id that you wish to switch
|
|
238
|
+
:param str dataset_name: The Name of the dataset
|
|
239
|
+
:param str dataset_id: The Id of the dataset
|
|
240
|
+
:param dtlpy.entities.dataset.Dataset dataset: dataset object
|
|
241
|
+
|
|
242
|
+
**Example**:
|
|
243
|
+
|
|
244
|
+
.. code-block:: python
|
|
245
|
+
|
|
246
|
+
project.datasets.checkout(dataset_id='dataset_id')
|
|
247
|
+
"""
|
|
248
|
+
if dataset is None:
|
|
249
|
+
if dataset_id is not None or dataset_name is not None:
|
|
250
|
+
try:
|
|
251
|
+
dataset = self.project.datasets.get(dataset_name=dataset_name, dataset_id=dataset_id)
|
|
252
|
+
except exceptions.MissingEntity:
|
|
253
|
+
dataset = self.get(dataset_id=dataset_id, dataset_name=dataset_name)
|
|
254
|
+
elif identifier is not None:
|
|
255
|
+
dataset = self.__get_by_identifier(identifier=identifier)
|
|
256
|
+
else:
|
|
257
|
+
raise exceptions.PlatformException(error='400',
|
|
258
|
+
message='Must provide partial/full id/name to checkout')
|
|
259
|
+
self._client_api.state_io.put('dataset', dataset.to_json())
|
|
260
|
+
logger.info('Checked out to dataset {}'.format(dataset.name))
|
|
261
|
+
|
|
262
|
+
@_api_reference.add(path='/datasets/query', method='post')
|
|
263
|
+
def list(self, name=None, creator=None, filters: entities.Filters = None) -> miscellaneous.List[entities.Dataset]:
|
|
264
|
+
"""
|
|
265
|
+
List all datasets.
|
|
266
|
+
|
|
267
|
+
**Prerequisites**: You must be an *owner* or *developer* to use this method.
|
|
268
|
+
|
|
269
|
+
:param str name: list by name
|
|
270
|
+
:param str creator: list by
|
|
271
|
+
:param dtlpy.entities.filters.Filters filters: Filters entity containing filters parameters
|
|
272
|
+
:return: List of datasets
|
|
273
|
+
:rtype: list
|
|
274
|
+
|
|
275
|
+
**Example**:
|
|
276
|
+
|
|
277
|
+
.. code-block:: python
|
|
278
|
+
filters = dl.Filters(resource='datasets')
|
|
279
|
+
filters.add(field='readonly', values=False)
|
|
280
|
+
datasets = project.datasets.list(filters=filters)
|
|
281
|
+
"""
|
|
282
|
+
if filters is None:
|
|
283
|
+
filters = entities.Filters(resource=entities.FiltersResource.DATASET)
|
|
284
|
+
# assert type filters
|
|
285
|
+
elif not isinstance(filters, entities.Filters):
|
|
286
|
+
raise exceptions.PlatformException(error='400',
|
|
287
|
+
message='Unknown filters type: {!r}'.format(type(filters)))
|
|
288
|
+
if filters.resource != entities.FiltersResource.DATASET:
|
|
289
|
+
raise exceptions.PlatformException(
|
|
290
|
+
error='400',
|
|
291
|
+
message='Filters resource must to be FiltersResource.DATASET. Got: {!r}'.format(filters.resource))
|
|
292
|
+
|
|
293
|
+
url = '/datasets/query'
|
|
294
|
+
|
|
295
|
+
if name is not None:
|
|
296
|
+
filters.add(field='name', values=name)
|
|
297
|
+
if creator is not None:
|
|
298
|
+
filters.add(field='creator', values=creator)
|
|
299
|
+
if self._project is not None:
|
|
300
|
+
filters.context = {"projects": [self._project.id]}
|
|
301
|
+
filters.page_size = 1000
|
|
302
|
+
filters.page = 0
|
|
303
|
+
datasets = list()
|
|
304
|
+
while True:
|
|
305
|
+
success, response = self._client_api.gen_request(req_type='POST',
|
|
306
|
+
json_req=filters.prepare(),
|
|
307
|
+
path=url,
|
|
308
|
+
headers={'user_query': filters._user_query})
|
|
309
|
+
if success:
|
|
310
|
+
pool = self._client_api.thread_pools('entity.create')
|
|
311
|
+
datasets_json = response.json()['items']
|
|
312
|
+
jobs = [None for _ in range(len(datasets_json))]
|
|
313
|
+
# return triggers list
|
|
314
|
+
for i_dataset, dataset in enumerate(datasets_json):
|
|
315
|
+
jobs[i_dataset] = pool.submit(entities.Dataset._protected_from_json,
|
|
316
|
+
**{'client_api': self._client_api,
|
|
317
|
+
'_json': dataset,
|
|
318
|
+
'datasets': self,
|
|
319
|
+
'project': self.project})
|
|
320
|
+
|
|
321
|
+
# get all results
|
|
322
|
+
results = [j.result() for j in jobs]
|
|
323
|
+
# log errors
|
|
324
|
+
_ = [logger.warning(r[1]) for r in results if r[0] is False]
|
|
325
|
+
# return good jobs
|
|
326
|
+
datasets.extend([r[1] for r in results if r[0] is True])
|
|
327
|
+
if response.json()['hasNextPage'] is True:
|
|
328
|
+
filters.page += 1
|
|
329
|
+
else:
|
|
330
|
+
break
|
|
331
|
+
else:
|
|
332
|
+
raise exceptions.PlatformException(response)
|
|
333
|
+
datasets = miscellaneous.List(datasets)
|
|
334
|
+
return datasets
|
|
335
|
+
|
|
336
|
+
@_api_reference.add(path='/datasets/{id}', method='get')
|
|
337
|
+
def get(self,
|
|
338
|
+
dataset_name: str = None,
|
|
339
|
+
dataset_id: str = None,
|
|
340
|
+
checkout: bool = False,
|
|
341
|
+
fetch: bool = None
|
|
342
|
+
) -> entities.Dataset:
|
|
343
|
+
"""
|
|
344
|
+
Get dataset by name or id.
|
|
345
|
+
|
|
346
|
+
**Prerequisites**: You must be an *owner* or *developer* to use this method.
|
|
347
|
+
|
|
348
|
+
You must provide at least ONE of the following params: dataset_id, dataset_name.
|
|
349
|
+
|
|
350
|
+
:param str dataset_name: optional - search by name
|
|
351
|
+
:param str dataset_id: optional - search by id
|
|
352
|
+
:param bool checkout: set the dataset as a default dataset object (cookies)
|
|
353
|
+
:param bool fetch: optional - fetch entity from platform (True), default taken from cookie
|
|
354
|
+
:return: Dataset object
|
|
355
|
+
:rtype: dtlpy.entities.dataset.Dataset
|
|
356
|
+
|
|
357
|
+
**Example**:
|
|
358
|
+
|
|
359
|
+
.. code-block:: python
|
|
360
|
+
|
|
361
|
+
dataset = project.datasets.get(dataset_id='dataset_id')
|
|
362
|
+
"""
|
|
363
|
+
if fetch is None:
|
|
364
|
+
fetch = self._client_api.fetch_entities
|
|
365
|
+
|
|
366
|
+
if dataset_id is None and dataset_name is None:
|
|
367
|
+
dataset = self.__get_from_cache()
|
|
368
|
+
if dataset is None:
|
|
369
|
+
raise exceptions.PlatformException(
|
|
370
|
+
error='400',
|
|
371
|
+
message='No checked-out Dataset was found, must checkout or provide an identifier in inputs')
|
|
372
|
+
elif fetch:
|
|
373
|
+
if dataset_id is not None and dataset_id != '':
|
|
374
|
+
dataset = self.__get_by_id(dataset_id)
|
|
375
|
+
# verify input dataset name is same as the given id
|
|
376
|
+
if dataset_name is not None and dataset.name != dataset_name:
|
|
377
|
+
logger.warning(
|
|
378
|
+
"Mismatch found in datasets.get: dataset_name is different then dataset.name: "
|
|
379
|
+
"{!r} != {!r}".format(
|
|
380
|
+
dataset_name,
|
|
381
|
+
dataset.name))
|
|
382
|
+
elif dataset_name is not None:
|
|
383
|
+
datasets = self.list(name=dataset_name)
|
|
384
|
+
if not datasets:
|
|
385
|
+
# empty list
|
|
386
|
+
raise exceptions.PlatformException('404', 'Dataset not found. Name: {!r}'.format(dataset_name))
|
|
387
|
+
# dataset = None
|
|
388
|
+
elif len(datasets) > 1:
|
|
389
|
+
raise exceptions.PlatformException('400', 'More than one dataset with same name.')
|
|
390
|
+
else:
|
|
391
|
+
dataset = datasets[0]
|
|
392
|
+
else:
|
|
393
|
+
raise exceptions.PlatformException(
|
|
394
|
+
error='404',
|
|
395
|
+
message='No input and no checked-out found')
|
|
396
|
+
else:
|
|
397
|
+
dataset = entities.Dataset.from_json(_json={'id': dataset_id,
|
|
398
|
+
'name': dataset_id},
|
|
399
|
+
client_api=self._client_api,
|
|
400
|
+
datasets=self,
|
|
401
|
+
project=self._project,
|
|
402
|
+
is_fetched=False)
|
|
403
|
+
assert isinstance(dataset, entities.Dataset)
|
|
404
|
+
if checkout:
|
|
405
|
+
self.checkout(dataset=dataset)
|
|
406
|
+
return dataset
|
|
407
|
+
|
|
408
|
+
@_api_reference.add(path='/datasets/{id}', method='delete')
|
|
409
|
+
def delete(self,
|
|
410
|
+
dataset_name: str = None,
|
|
411
|
+
dataset_id: str = None,
|
|
412
|
+
sure: bool = False,
|
|
413
|
+
really: bool = False):
|
|
414
|
+
"""
|
|
415
|
+
Delete a dataset forever!
|
|
416
|
+
|
|
417
|
+
**Prerequisites**: You must be an *owner* or *developer* to use this method.
|
|
418
|
+
|
|
419
|
+
**Example**:
|
|
420
|
+
|
|
421
|
+
.. code-block:: python
|
|
422
|
+
|
|
423
|
+
is_deleted = project.datasets.delete(dataset_id='dataset_id', sure=True, really=True)
|
|
424
|
+
|
|
425
|
+
:param str dataset_name: optional - search by name
|
|
426
|
+
:param str dataset_id: optional - search by id
|
|
427
|
+
:param bool sure: Are you sure you want to delete?
|
|
428
|
+
:param bool really: Really really sure?
|
|
429
|
+
:return: True is success
|
|
430
|
+
:rtype: bool
|
|
431
|
+
"""
|
|
432
|
+
if sure and really:
|
|
433
|
+
dataset = self.get(dataset_name=dataset_name, dataset_id=dataset_id)
|
|
434
|
+
success, response = self._client_api.gen_request(req_type='delete',
|
|
435
|
+
path='/datasets/{}'.format(dataset.id))
|
|
436
|
+
if not success:
|
|
437
|
+
raise exceptions.PlatformException(response)
|
|
438
|
+
logger.info('Dataset {!r} was deleted successfully'.format(dataset.name))
|
|
439
|
+
return True
|
|
440
|
+
else:
|
|
441
|
+
raise exceptions.PlatformException(
|
|
442
|
+
error='403',
|
|
443
|
+
message='Cant delete dataset from SDK. Please login to platform to delete')
|
|
444
|
+
|
|
445
|
+
@_api_reference.add(path='/datasets/{id}', method='patch')
|
|
446
|
+
def update(self,
|
|
447
|
+
dataset: entities.Dataset,
|
|
448
|
+
system_metadata: bool = False,
|
|
449
|
+
patch: dict = None
|
|
450
|
+
) -> entities.Dataset:
|
|
451
|
+
"""
|
|
452
|
+
Update dataset field.
|
|
453
|
+
|
|
454
|
+
**Prerequisites**: You must be an *owner* or *developer* to use this method.
|
|
455
|
+
|
|
456
|
+
:param dtlpy.entities.dataset.Dataset dataset: dataset object
|
|
457
|
+
:param bool system_metadata: True, if you want to change metadata system
|
|
458
|
+
:param dict patch: Specific patch request
|
|
459
|
+
:return: Dataset object
|
|
460
|
+
:rtype: dtlpy.entities.dataset.Dataset
|
|
461
|
+
|
|
462
|
+
**Example**:
|
|
463
|
+
|
|
464
|
+
.. code-block:: python
|
|
465
|
+
|
|
466
|
+
dataset = project.datasets.update(dataset='dataset_entity')
|
|
467
|
+
"""
|
|
468
|
+
url_path = '/datasets/{}'.format(dataset.id)
|
|
469
|
+
if system_metadata:
|
|
470
|
+
url_path += '?system=true'
|
|
471
|
+
|
|
472
|
+
if patch is None:
|
|
473
|
+
patch = dataset.to_json()
|
|
474
|
+
|
|
475
|
+
success, response = self._client_api.gen_request(req_type='patch',
|
|
476
|
+
path=url_path,
|
|
477
|
+
json_req=patch)
|
|
478
|
+
if success:
|
|
479
|
+
logger.info('Dataset was updated successfully')
|
|
480
|
+
return dataset
|
|
481
|
+
else:
|
|
482
|
+
raise exceptions.PlatformException(response)
|
|
483
|
+
|
|
484
|
+
@_api_reference.add(path='/datasets/{id}/unlock', method='patch')
|
|
485
|
+
def unlock(self, dataset: entities.Dataset ) -> entities.Dataset:
|
|
486
|
+
"""
|
|
487
|
+
Unlock dataset.
|
|
488
|
+
|
|
489
|
+
**Prerequisites**: You must be an *owner* or *developer* to use this method.
|
|
490
|
+
|
|
491
|
+
:param dtlpy.entities.dataset.Dataset dataset: dataset object
|
|
492
|
+
:return: Dataset object
|
|
493
|
+
:rtype: dtlpy.entities.dataset.Dataset
|
|
494
|
+
|
|
495
|
+
**Example**:
|
|
496
|
+
|
|
497
|
+
.. code-block:: python
|
|
498
|
+
|
|
499
|
+
dataset = project.datasets.unlock(dataset='dataset_entity')
|
|
500
|
+
"""
|
|
501
|
+
url_path = '/datasets/{}/unlock'.format(dataset.id)
|
|
502
|
+
|
|
503
|
+
success, response = self._client_api.gen_request(req_type='patch', path=url_path)
|
|
504
|
+
if success:
|
|
505
|
+
logger.info('Dataset was unlocked successfully')
|
|
506
|
+
return dataset
|
|
507
|
+
else:
|
|
508
|
+
raise exceptions.PlatformException(response)
|
|
509
|
+
|
|
510
|
+
@_api_reference.add(path='/datasets/{id}/directoryTree', method='get')
|
|
511
|
+
def directory_tree(self,
|
|
512
|
+
dataset: entities.Dataset = None,
|
|
513
|
+
dataset_name: str = None,
|
|
514
|
+
dataset_id: str = None):
|
|
515
|
+
"""
|
|
516
|
+
Get dataset's directory tree.
|
|
517
|
+
|
|
518
|
+
**Prerequisites**: You must be an *owner* or *developer* to use this method.
|
|
519
|
+
|
|
520
|
+
You must provide at least ONE of the following params: dataset, dataset_name, dataset_id.
|
|
521
|
+
|
|
522
|
+
:param dtlpy.entities.dataset.Dataset dataset: dataset object
|
|
523
|
+
:param str dataset_name: The Name of the dataset
|
|
524
|
+
:param str dataset_id: The Id of the dataset
|
|
525
|
+
:return: DirectoryTree
|
|
526
|
+
|
|
527
|
+
**Example**:
|
|
528
|
+
|
|
529
|
+
.. code-block:: python
|
|
530
|
+
directory_tree = dataset.directory_tree
|
|
531
|
+
directory_tree = project.datasets.directory_tree(dataset='dataset_entity')
|
|
532
|
+
"""
|
|
533
|
+
dataset_id = self._resolve_dataset_id(dataset, dataset_name, dataset_id)
|
|
534
|
+
|
|
535
|
+
url_path = '/datasets/{}/directoryTree'.format(dataset_id)
|
|
536
|
+
|
|
537
|
+
success, response = self._client_api.gen_request(req_type='get',
|
|
538
|
+
path=url_path)
|
|
539
|
+
|
|
540
|
+
if success:
|
|
541
|
+
return entities.DirectoryTree(_json=response.json())
|
|
542
|
+
else:
|
|
543
|
+
raise exceptions.PlatformException(response)
|
|
544
|
+
|
|
545
|
+
@_api_reference.add(path='/datasets/{id}/clone', method='post')
|
|
546
|
+
def clone(self,
|
|
547
|
+
dataset_id: str,
|
|
548
|
+
clone_name: str = None,
|
|
549
|
+
filters: entities.Filters = None,
|
|
550
|
+
with_items_annotations: bool = True,
|
|
551
|
+
with_metadata: bool = True,
|
|
552
|
+
with_task_annotations_status: bool = True,
|
|
553
|
+
dst_dataset_id: str = None,
|
|
554
|
+
target_directory: str = None):
|
|
555
|
+
"""
|
|
556
|
+
Clone a dataset. Read more about cloning datatsets and items in our `documentation <https://dataloop.ai/docs/clone-merge-dataset#cloned-dataset>`_ and `SDK documentation <https://developers.dataloop.ai/tutorials/data_management/data_versioning/chapter/>`_.
|
|
557
|
+
|
|
558
|
+
**Prerequisites**: You must be in the role of an *owner* or *developer*.
|
|
559
|
+
|
|
560
|
+
:param str dataset_id: id of the dataset you wish to clone
|
|
561
|
+
:param str clone_name: new dataset name
|
|
562
|
+
:param dtlpy.entities.filters.Filters filters: Filters entity or a query dict
|
|
563
|
+
:param bool with_items_annotations: true to clone with items annotations
|
|
564
|
+
:param bool with_metadata: true to clone with metadata
|
|
565
|
+
:param bool with_task_annotations_status: true to clone with task annotations' status
|
|
566
|
+
:param str dst_dataset_id: destination dataset id
|
|
567
|
+
:param str target_directory: target directory
|
|
568
|
+
:return: dataset object
|
|
569
|
+
:rtype: dtlpy.entities.dataset.Dataset
|
|
570
|
+
|
|
571
|
+
**Example**:
|
|
572
|
+
|
|
573
|
+
.. code-block:: python
|
|
574
|
+
|
|
575
|
+
dataset = project.datasets.clone(dataset_id='dataset_id',
|
|
576
|
+
clone_name='dataset_clone_name',
|
|
577
|
+
with_metadata=True,
|
|
578
|
+
with_items_annotations=False,
|
|
579
|
+
with_task_annotations_status=False)
|
|
580
|
+
"""
|
|
581
|
+
if clone_name is None and dst_dataset_id is None:
|
|
582
|
+
raise exceptions.PlatformException('400', 'Must provide clone name or destination dataset id')
|
|
583
|
+
if filters is None:
|
|
584
|
+
filters = entities.Filters()
|
|
585
|
+
filters._user_query = 'false'
|
|
586
|
+
elif not isinstance(filters, entities.Filters):
|
|
587
|
+
raise exceptions.PlatformException(
|
|
588
|
+
error='400',
|
|
589
|
+
message='"filters" must be a dl.Filters entity. got: {!r}'.format(type(filters)))
|
|
590
|
+
|
|
591
|
+
copy_filters = copy.deepcopy(filters)
|
|
592
|
+
if copy_filters.has_field('hidden'):
|
|
593
|
+
copy_filters.pop('hidden')
|
|
594
|
+
|
|
595
|
+
if target_directory is not None and not target_directory.startswith('/'):
|
|
596
|
+
target_directory = '/' + target_directory
|
|
597
|
+
|
|
598
|
+
payload = {
|
|
599
|
+
"name": clone_name,
|
|
600
|
+
"filter": copy_filters.prepare(),
|
|
601
|
+
"cloneDatasetParams": {
|
|
602
|
+
"withItemsAnnotations": with_items_annotations,
|
|
603
|
+
"withMetadata": with_metadata,
|
|
604
|
+
"withTaskAnnotationsStatus": with_task_annotations_status,
|
|
605
|
+
"targetDirectory": target_directory
|
|
606
|
+
}
|
|
607
|
+
}
|
|
608
|
+
if dst_dataset_id is not None:
|
|
609
|
+
payload['cloneDatasetParams']['targetDatasetId'] = dst_dataset_id
|
|
610
|
+
success, response = self._client_api.gen_request(req_type='post',
|
|
611
|
+
path='/datasets/{}/clone'.format(dataset_id),
|
|
612
|
+
json_req=payload,
|
|
613
|
+
headers={'user_query': filters._user_query})
|
|
614
|
+
|
|
615
|
+
if not success:
|
|
616
|
+
raise exceptions.PlatformException(response)
|
|
617
|
+
|
|
618
|
+
command = entities.Command.from_json(_json=response.json(),
|
|
619
|
+
client_api=self._client_api)
|
|
620
|
+
command = command.wait()
|
|
621
|
+
|
|
622
|
+
if 'returnedModelId' not in command.spec:
|
|
623
|
+
raise exceptions.PlatformException(error='400',
|
|
624
|
+
message="returnedModelId key is missing in command response: {!r}"
|
|
625
|
+
.format(response))
|
|
626
|
+
return self.get(dataset_id=command.spec['returnedModelId'])
|
|
627
|
+
|
|
628
|
+
@_api_reference.add(path='/datasets/{id}/export', method='post')
|
|
629
|
+
def export(self,
|
|
630
|
+
dataset: entities.Dataset = None,
|
|
631
|
+
dataset_name: str = None,
|
|
632
|
+
dataset_id: str = None,
|
|
633
|
+
local_path: str = None,
|
|
634
|
+
filters: Union[dict, entities.Filters] = None,
|
|
635
|
+
annotation_filters: entities.Filters = None,
|
|
636
|
+
feature_vector_filters: entities.Filters = None,
|
|
637
|
+
include_feature_vectors: bool = False,
|
|
638
|
+
include_annotations: bool = False,
|
|
639
|
+
export_type: entities.ExportType = entities.ExportType.JSON,
|
|
640
|
+
timeout: int = 0,
|
|
641
|
+
dataset_lock: bool = False,
|
|
642
|
+
lock_timeout_sec: int = None,
|
|
643
|
+
export_summary: bool = False):
|
|
644
|
+
"""
|
|
645
|
+
Export dataset items and annotations.
|
|
646
|
+
|
|
647
|
+
**Prerequisites**: You must be an *owner* or *developer* to use this method.
|
|
648
|
+
|
|
649
|
+
You must provide at least ONE of the following params: dataset, dataset_name, dataset_id.
|
|
650
|
+
|
|
651
|
+
:param dtlpy.entities.dataset.Dataset dataset: Dataset object
|
|
652
|
+
:param str dataset_name: The name of the dataset
|
|
653
|
+
:param str dataset_id: The ID of the dataset
|
|
654
|
+
:param str local_path: Local path to save the exported dataset
|
|
655
|
+
:param Union[dict, dtlpy.entities.filters.Filters] filters: Filters entity or a query dictionary
|
|
656
|
+
:param dtlpy.entities.filters.Filters annotation_filters: Filters entity to filter annotations for export
|
|
657
|
+
:param dtlpy.entities.filters.Filters feature_vector_filters: Filters entity to filter feature vectors for export
|
|
658
|
+
:param bool include_feature_vectors: Include item feature vectors in the export
|
|
659
|
+
:param bool include_annotations: Include item annotations in the export
|
|
660
|
+
:param bool dataset_lock: Make dataset readonly during the export
|
|
661
|
+
:param bool export_summary: Get Summary of the dataset export
|
|
662
|
+
:param int lock_timeout_sec: Timeout for locking the dataset during export in seconds
|
|
663
|
+
:param entities.ExportType export_type: Type of export ('json' or 'zip')
|
|
664
|
+
:param int timeout: Maximum time in seconds to wait for the export to complete
|
|
665
|
+
:return: Exported item
|
|
666
|
+
:rtype: dtlpy.entities.item.Item
|
|
667
|
+
|
|
668
|
+
**Example**:
|
|
669
|
+
|
|
670
|
+
.. code-block:: python
|
|
671
|
+
|
|
672
|
+
export_item = project.datasets.export(dataset_id='dataset_id',
|
|
673
|
+
filters=filters,
|
|
674
|
+
include_feature_vectors=True,
|
|
675
|
+
include_annotations=True,
|
|
676
|
+
export_type=dl.ExportType.JSON,
|
|
677
|
+
dataset_lock=True,
|
|
678
|
+
lock_timeout_sec=300,
|
|
679
|
+
export_summary=False)
|
|
680
|
+
"""
|
|
681
|
+
dataset_id = self._resolve_dataset_id(dataset, dataset_name, dataset_id)
|
|
682
|
+
payload = self._build_payload(filters, include_feature_vectors, include_annotations,
|
|
683
|
+
export_type, annotation_filters, feature_vector_filters,
|
|
684
|
+
dataset_lock, lock_timeout_sec, export_summary)
|
|
685
|
+
|
|
686
|
+
success, response = self._client_api.gen_request(req_type='post', path=f'/datasets/{dataset_id}/export',
|
|
687
|
+
json_req=payload)
|
|
688
|
+
if not success:
|
|
689
|
+
raise exceptions.PlatformException(response)
|
|
690
|
+
|
|
691
|
+
command = entities.Command.from_json(_json=response.json(),
|
|
692
|
+
client_api=self._client_api)
|
|
693
|
+
|
|
694
|
+
time.sleep(2) # as the command have wrong progress in the beginning
|
|
695
|
+
command = command.wait(timeout=timeout)
|
|
696
|
+
if 'outputItemId' not in command.spec:
|
|
697
|
+
raise exceptions.PlatformException(
|
|
698
|
+
error='400',
|
|
699
|
+
message="outputItemId key is missing in command response: {}".format(response))
|
|
700
|
+
item_id = command.spec['outputItemId']
|
|
701
|
+
self._download_exported_item(item_id=item_id, export_type=export_type, local_path=local_path)
|
|
702
|
+
return local_path
|
|
703
|
+
|
|
704
|
+
@_api_reference.add(path='/datasets/merge', method='post')
|
|
705
|
+
def merge(self,
|
|
706
|
+
merge_name: str,
|
|
707
|
+
dataset_ids: list,
|
|
708
|
+
project_ids: str,
|
|
709
|
+
with_items_annotations: bool = True,
|
|
710
|
+
with_metadata: bool = True,
|
|
711
|
+
with_task_annotations_status: bool = True,
|
|
712
|
+
wait: bool = True):
|
|
713
|
+
"""
|
|
714
|
+
Merge a dataset. See our `SDK docs <https://developers.dataloop.ai/tutorials/data_management/data_versioning/chapter/>`_ for more information.
|
|
715
|
+
|
|
716
|
+
**Prerequisites**: You must be an *owner* or *developer* to use this method.
|
|
717
|
+
|
|
718
|
+
:param str merge_name: new dataset name
|
|
719
|
+
:param list dataset_ids: list id's of the datatsets you wish to merge
|
|
720
|
+
:param str project_ids: the project id that include the datasets
|
|
721
|
+
:param bool with_items_annotations: true to merge with items annotations
|
|
722
|
+
:param bool with_metadata: true to merge with metadata
|
|
723
|
+
:param bool with_task_annotations_status: true to merge with task annotations' status
|
|
724
|
+
:param bool wait: wait for the command to finish
|
|
725
|
+
:return: True if success
|
|
726
|
+
:rtype: bool
|
|
727
|
+
|
|
728
|
+
**Example**:
|
|
729
|
+
|
|
730
|
+
.. code-block:: python
|
|
731
|
+
|
|
732
|
+
success = project.datasets.merge(dataset_ids=['dataset_id1','dataset_id2'],
|
|
733
|
+
merge_name='dataset_merge_name',
|
|
734
|
+
with_metadata=True,
|
|
735
|
+
with_items_annotations=False,
|
|
736
|
+
with_task_annotations_status=False)
|
|
737
|
+
"""
|
|
738
|
+
payload = {
|
|
739
|
+
"name": merge_name,
|
|
740
|
+
"datasetsIds": dataset_ids,
|
|
741
|
+
"projectIds": project_ids,
|
|
742
|
+
"mergeDatasetParams": {
|
|
743
|
+
"withItemsAnnotations": with_items_annotations,
|
|
744
|
+
"withMetadata": with_metadata,
|
|
745
|
+
"withTaskAnnotationsStatus": with_task_annotations_status
|
|
746
|
+
},
|
|
747
|
+
'asynced': wait
|
|
748
|
+
}
|
|
749
|
+
success, response = self._client_api.gen_request(req_type='post',
|
|
750
|
+
path='/datasets/merge',
|
|
751
|
+
json_req=payload)
|
|
752
|
+
|
|
753
|
+
if success:
|
|
754
|
+
command = entities.Command.from_json(_json=response.json(),
|
|
755
|
+
client_api=self._client_api)
|
|
756
|
+
if not wait:
|
|
757
|
+
return command
|
|
758
|
+
command = command.wait(timeout=0)
|
|
759
|
+
if 'mergeDatasetsConfiguration' not in command.spec:
|
|
760
|
+
raise exceptions.PlatformException(error='400',
|
|
761
|
+
message="mergeDatasetsConfiguration key is missing in command response: {}"
|
|
762
|
+
.format(response))
|
|
763
|
+
return True
|
|
764
|
+
else:
|
|
765
|
+
raise exceptions.PlatformException(response)
|
|
766
|
+
|
|
767
|
+
@_api_reference.add(path='/datasets/{id}/sync', method='post')
|
|
768
|
+
def sync(self, dataset_id: str, wait: bool = True):
|
|
769
|
+
"""
|
|
770
|
+
Sync dataset with external storage.
|
|
771
|
+
|
|
772
|
+
**Prerequisites**: You must be in the role of an *owner* or *developer*.
|
|
773
|
+
|
|
774
|
+
:param str dataset_id: The Id of the dataset to sync
|
|
775
|
+
:param bool wait: wait for the command to finish
|
|
776
|
+
:return: True if success
|
|
777
|
+
:rtype: bool
|
|
778
|
+
|
|
779
|
+
**Example**:
|
|
780
|
+
|
|
781
|
+
.. code-block:: python
|
|
782
|
+
|
|
783
|
+
success = project.datasets.sync(dataset_id='dataset_id')
|
|
784
|
+
"""
|
|
785
|
+
|
|
786
|
+
success, response = self._client_api.gen_request(req_type='post',
|
|
787
|
+
path='/datasets/{}/sync'.format(dataset_id))
|
|
788
|
+
|
|
789
|
+
if success:
|
|
790
|
+
command = entities.Command.from_json(_json=response.json(),
|
|
791
|
+
client_api=self._client_api)
|
|
792
|
+
if not wait:
|
|
793
|
+
return command
|
|
794
|
+
command = command.wait(timeout=0)
|
|
795
|
+
if 'datasetId' not in command.spec:
|
|
796
|
+
raise exceptions.PlatformException(error='400',
|
|
797
|
+
message="datasetId key is missing in command response: {}"
|
|
798
|
+
.format(response))
|
|
799
|
+
return True
|
|
800
|
+
else:
|
|
801
|
+
raise exceptions.PlatformException(response)
|
|
802
|
+
|
|
803
|
+
@_api_reference.add(path='/datasets', method='post')
|
|
804
|
+
def create(self,
|
|
805
|
+
dataset_name: str,
|
|
806
|
+
labels=None,
|
|
807
|
+
attributes=None,
|
|
808
|
+
ontology_ids=None,
|
|
809
|
+
driver: entities.Driver = None,
|
|
810
|
+
driver_id: str = None,
|
|
811
|
+
checkout: bool = False,
|
|
812
|
+
expiration_options: entities.ExpirationOptions = None,
|
|
813
|
+
index_driver: entities.IndexDriver = None,
|
|
814
|
+
recipe_id: str = None
|
|
815
|
+
) -> entities.Dataset:
|
|
816
|
+
"""
|
|
817
|
+
Create a new dataset
|
|
818
|
+
|
|
819
|
+
**Prerequisites**: You must be in the role of an *owner* or *developer*.
|
|
820
|
+
|
|
821
|
+
:param str dataset_name: The Name of the dataset
|
|
822
|
+
:param list labels: dictionary of {tag: color} or list of label entities
|
|
823
|
+
:param list attributes: dataset's ontology's attributes
|
|
824
|
+
:param list ontology_ids: optional - dataset ontology
|
|
825
|
+
:param dtlpy.entities.driver.Driver driver: optional - storage driver Driver object or driver name
|
|
826
|
+
:param str driver_id: optional - driver id
|
|
827
|
+
:param bool checkout: set the dataset as a default dataset object (cookies)
|
|
828
|
+
:param ExpirationOptions expiration_options: dl.ExpirationOptions object that contain definitions for dataset like MaxItemDays
|
|
829
|
+
:param str index_driver: dl.IndexDriver, dataset driver version
|
|
830
|
+
:param str recipe_id: optional - recipe id
|
|
831
|
+
:return: Dataset object
|
|
832
|
+
:rtype: dtlpy.entities.dataset.Dataset
|
|
833
|
+
|
|
834
|
+
**Example**:
|
|
835
|
+
|
|
836
|
+
.. code-block:: python
|
|
837
|
+
|
|
838
|
+
dataset = project.datasets.create(dataset_name='dataset_name', ontology_ids='ontology_ids')
|
|
839
|
+
"""
|
|
840
|
+
create_default_recipe = True
|
|
841
|
+
if any([labels, attributes, ontology_ids, recipe_id]):
|
|
842
|
+
create_default_recipe = False
|
|
843
|
+
|
|
844
|
+
# labels to list
|
|
845
|
+
if labels is not None:
|
|
846
|
+
if not isinstance(labels, list):
|
|
847
|
+
labels = [labels]
|
|
848
|
+
if not all(isinstance(label, entities.Label) for label in labels):
|
|
849
|
+
labels = entities.Dataset.serialize_labels(labels)
|
|
850
|
+
else:
|
|
851
|
+
labels = list()
|
|
852
|
+
|
|
853
|
+
# get creator from token
|
|
854
|
+
payload = {'name': dataset_name,
|
|
855
|
+
'projects': [self.project.id],
|
|
856
|
+
'createDefaultRecipe': create_default_recipe
|
|
857
|
+
}
|
|
858
|
+
|
|
859
|
+
if driver_id is None and driver is not None:
|
|
860
|
+
if isinstance(driver, entities.Driver):
|
|
861
|
+
driver_id = driver.id
|
|
862
|
+
elif isinstance(driver, str):
|
|
863
|
+
driver_id = self.project.drivers.get(driver_name=driver).id
|
|
864
|
+
else:
|
|
865
|
+
raise exceptions.PlatformException(
|
|
866
|
+
error=400,
|
|
867
|
+
message='Input arg "driver" must be Driver object or a string driver name. got type: {!r}'.format(
|
|
868
|
+
type(driver)))
|
|
869
|
+
if driver_id is not None:
|
|
870
|
+
payload['driver'] = driver_id
|
|
871
|
+
|
|
872
|
+
if expiration_options:
|
|
873
|
+
payload['expirationOptions'] = expiration_options.to_json()
|
|
874
|
+
if index_driver is not None:
|
|
875
|
+
payload['indexDriver'] = index_driver
|
|
876
|
+
|
|
877
|
+
success, response = self._client_api.gen_request(req_type='post',
|
|
878
|
+
path='/datasets',
|
|
879
|
+
json_req=payload)
|
|
880
|
+
if success:
|
|
881
|
+
dataset = entities.Dataset.from_json(client_api=self._client_api,
|
|
882
|
+
_json=response.json(),
|
|
883
|
+
datasets=self,
|
|
884
|
+
project=self.project)
|
|
885
|
+
# create ontology and recipe
|
|
886
|
+
if not create_default_recipe:
|
|
887
|
+
if recipe_id is not None:
|
|
888
|
+
dataset.switch_recipe(recipe_id=recipe_id)
|
|
889
|
+
else:
|
|
890
|
+
dataset = dataset.recipes.create(ontology_ids=ontology_ids,
|
|
891
|
+
labels=labels,
|
|
892
|
+
attributes=attributes).dataset
|
|
893
|
+
else:
|
|
894
|
+
raise exceptions.PlatformException(response)
|
|
895
|
+
logger.info('Dataset was created successfully. Dataset id: {!r}'.format(dataset.id))
|
|
896
|
+
assert isinstance(dataset, entities.Dataset)
|
|
897
|
+
if checkout:
|
|
898
|
+
self.checkout(dataset=dataset)
|
|
899
|
+
return dataset
|
|
900
|
+
|
|
901
|
+
@staticmethod
|
|
902
|
+
def _convert_single(downloader,
|
|
903
|
+
item,
|
|
904
|
+
img_filepath,
|
|
905
|
+
local_path,
|
|
906
|
+
overwrite,
|
|
907
|
+
annotation_options,
|
|
908
|
+
annotation_filters,
|
|
909
|
+
thickness,
|
|
910
|
+
with_text,
|
|
911
|
+
progress,
|
|
912
|
+
alpha,
|
|
913
|
+
export_version):
|
|
914
|
+
# this is to convert the downloaded json files to any other annotation type
|
|
915
|
+
try:
|
|
916
|
+
if entities.ViewAnnotationOptions.ANNOTATION_ON_IMAGE in annotation_options:
|
|
917
|
+
if img_filepath is None:
|
|
918
|
+
img_filepath = item.download()
|
|
919
|
+
downloader._download_img_annotations(item=item,
|
|
920
|
+
img_filepath=img_filepath,
|
|
921
|
+
local_path=local_path,
|
|
922
|
+
overwrite=overwrite,
|
|
923
|
+
annotation_options=annotation_options,
|
|
924
|
+
annotation_filters=annotation_filters,
|
|
925
|
+
thickness=thickness,
|
|
926
|
+
alpha=alpha,
|
|
927
|
+
with_text=with_text,
|
|
928
|
+
export_version=export_version
|
|
929
|
+
)
|
|
930
|
+
except Exception:
|
|
931
|
+
logger.error('Failed to download annotation for item: {!r}'.format(item.name))
|
|
932
|
+
progress.update()
|
|
933
|
+
|
|
934
|
+
@staticmethod
|
|
935
|
+
def download_annotations(dataset: entities.Dataset,
|
|
936
|
+
local_path: str = None,
|
|
937
|
+
filters: entities.Filters = None,
|
|
938
|
+
annotation_options: entities.ViewAnnotationOptions = None,
|
|
939
|
+
annotation_filters: entities.Filters = None,
|
|
940
|
+
overwrite: bool = False,
|
|
941
|
+
thickness: int = 1,
|
|
942
|
+
with_text: bool = False,
|
|
943
|
+
remote_path: str = None,
|
|
944
|
+
include_annotations_in_output: bool = True,
|
|
945
|
+
export_png_files: bool = False,
|
|
946
|
+
filter_output_annotations: bool = False,
|
|
947
|
+
alpha: float = None,
|
|
948
|
+
export_version=entities.ExportVersion.V1,
|
|
949
|
+
dataset_lock: bool = False,
|
|
950
|
+
lock_timeout_sec: int = None,
|
|
951
|
+
export_summary: bool = False,
|
|
952
|
+
) -> str:
|
|
953
|
+
"""
|
|
954
|
+
Download dataset's annotations by filters.
|
|
955
|
+
|
|
956
|
+
You may filter the dataset both for items and for annotations and download annotations.
|
|
957
|
+
|
|
958
|
+
Optional -- download annotations as: mask, instance, image mask of the item.
|
|
959
|
+
|
|
960
|
+
**Prerequisites**: You must be in the role of an *owner* or *developer*.
|
|
961
|
+
|
|
962
|
+
:param dtlpy.entities.dataset.Dataset dataset: dataset object
|
|
963
|
+
:param str local_path: local folder or filename to save to.
|
|
964
|
+
:param dtlpy.entities.filters.Filters filters: Filters entity or a dictionary containing filters parameters
|
|
965
|
+
:param list annotation_options: type of download annotations: list(dl.ViewAnnotationOptions)
|
|
966
|
+
:param dtlpy.entities.filters.Filters annotation_filters: Filters entity to filter annotations for download
|
|
967
|
+
:param bool overwrite: optional - default = False to overwrite the existing files
|
|
968
|
+
:param bool dataset_loc: optional - default = False to make the dataset readonly
|
|
969
|
+
:param int thickness: optional - line thickness, if -1 annotation will be filled, default =1
|
|
970
|
+
:param bool with_text: optional - add text to annotations, default = False
|
|
971
|
+
:param str remote_path: DEPRECATED and ignored
|
|
972
|
+
:param bool include_annotations_in_output: default - False , if export should contain annotations
|
|
973
|
+
:param bool export_png_files: default - if True, semantic annotations should be exported as png files
|
|
974
|
+
:param bool filter_output_annotations: default - False, given an export by filter - determine if to filter out annotations
|
|
975
|
+
:param float alpha: opacity value [0 1], default 1
|
|
976
|
+
:param str export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
|
|
977
|
+
:return: local_path of the directory where all the downloaded item
|
|
978
|
+
:param bool dataset_lock: optional - default = False
|
|
979
|
+
:param bool export_summary: optional - default = False
|
|
980
|
+
:param int lock_timeout_sec: optional
|
|
981
|
+
:rtype: str
|
|
982
|
+
|
|
983
|
+
**Example**:
|
|
984
|
+
|
|
985
|
+
.. code-block:: python
|
|
986
|
+
|
|
987
|
+
file_path = project.datasets.download_annotations(dataset='dataset_entity',
|
|
988
|
+
local_path='local_path',
|
|
989
|
+
annotation_options=dl.ViewAnnotationOptions,
|
|
990
|
+
overwrite=False,
|
|
991
|
+
thickness=1,
|
|
992
|
+
with_text=False,
|
|
993
|
+
alpha=1,
|
|
994
|
+
dataset_lock=False,
|
|
995
|
+
lock_timeout_sec=300,
|
|
996
|
+
export_summary=False
|
|
997
|
+
)
|
|
998
|
+
"""
|
|
999
|
+
if annotation_options is None:
|
|
1000
|
+
annotation_options = list()
|
|
1001
|
+
elif not isinstance(annotation_options, list):
|
|
1002
|
+
annotation_options = [annotation_options]
|
|
1003
|
+
for ann_option in annotation_options:
|
|
1004
|
+
if not isinstance(ann_option, entities.ViewAnnotationOptions):
|
|
1005
|
+
if ann_option not in list(entities.ViewAnnotationOptions):
|
|
1006
|
+
raise PlatformException(
|
|
1007
|
+
error='400',
|
|
1008
|
+
message='Unknown annotation download option: {}, please choose from: {}'.format(
|
|
1009
|
+
ann_option, list(entities.ViewAnnotationOptions)))
|
|
1010
|
+
|
|
1011
|
+
if remote_path is not None:
|
|
1012
|
+
logger.warning(
|
|
1013
|
+
'"remote_path" is ignored. Use "filters=dl.Filters(field="dir, values={!r}"'.format(remote_path))
|
|
1014
|
+
if local_path is None:
|
|
1015
|
+
if dataset.project is None:
|
|
1016
|
+
# by dataset name
|
|
1017
|
+
local_path = os.path.join(
|
|
1018
|
+
services.service_defaults.DATALOOP_PATH,
|
|
1019
|
+
"datasets",
|
|
1020
|
+
"{}_{}".format(dataset.name, dataset.id),
|
|
1021
|
+
)
|
|
1022
|
+
else:
|
|
1023
|
+
# by dataset and project name
|
|
1024
|
+
local_path = os.path.join(
|
|
1025
|
+
services.service_defaults.DATALOOP_PATH,
|
|
1026
|
+
"projects",
|
|
1027
|
+
dataset.project.name,
|
|
1028
|
+
"datasets",
|
|
1029
|
+
dataset.name,
|
|
1030
|
+
)
|
|
1031
|
+
|
|
1032
|
+
if filters is None:
|
|
1033
|
+
filters = entities.Filters()
|
|
1034
|
+
filters._user_query = 'false'
|
|
1035
|
+
if annotation_filters is not None:
|
|
1036
|
+
for annotation_filter_and in annotation_filters.and_filter_list:
|
|
1037
|
+
filters.add_join(field=annotation_filter_and.field,
|
|
1038
|
+
values=annotation_filter_and.values,
|
|
1039
|
+
operator=annotation_filter_and.operator,
|
|
1040
|
+
method=entities.FiltersMethod.AND)
|
|
1041
|
+
for annotation_filter_or in annotation_filters.or_filter_list:
|
|
1042
|
+
filters.add_join(field=annotation_filter_or.field,
|
|
1043
|
+
values=annotation_filter_or.values,
|
|
1044
|
+
operator=annotation_filter_or.operator,
|
|
1045
|
+
method=entities.FiltersMethod.OR)
|
|
1046
|
+
|
|
1047
|
+
downloader = repositories.Downloader(items_repository=dataset.items)
|
|
1048
|
+
downloader.download_annotations(dataset=dataset,
|
|
1049
|
+
filters=filters,
|
|
1050
|
+
annotation_filters=annotation_filters,
|
|
1051
|
+
local_path=local_path,
|
|
1052
|
+
overwrite=overwrite,
|
|
1053
|
+
include_annotations_in_output=include_annotations_in_output,
|
|
1054
|
+
export_png_files=export_png_files,
|
|
1055
|
+
filter_output_annotations=filter_output_annotations,
|
|
1056
|
+
export_version=export_version,
|
|
1057
|
+
dataset_lock=dataset_lock,
|
|
1058
|
+
lock_timeout_sec=lock_timeout_sec,
|
|
1059
|
+
export_summary=export_summary
|
|
1060
|
+
)
|
|
1061
|
+
if annotation_options:
|
|
1062
|
+
pages = dataset.items.list(filters=filters)
|
|
1063
|
+
if not isinstance(annotation_options, list):
|
|
1064
|
+
annotation_options = [annotation_options]
|
|
1065
|
+
# convert all annotations to annotation_options
|
|
1066
|
+
pool = dataset._client_api.thread_pools(pool_name='dataset.download')
|
|
1067
|
+
jobs = [None for _ in range(pages.items_count)]
|
|
1068
|
+
progress = tqdm.tqdm(total=pages.items_count,
|
|
1069
|
+
disable=dataset._client_api.verbose.disable_progress_bar_download_annotations,
|
|
1070
|
+
file=sys.stdout, desc='Download Annotations')
|
|
1071
|
+
i_item = 0
|
|
1072
|
+
for page in pages:
|
|
1073
|
+
for item in page:
|
|
1074
|
+
jobs[i_item] = pool.submit(
|
|
1075
|
+
Datasets._convert_single,
|
|
1076
|
+
**{
|
|
1077
|
+
'downloader': downloader,
|
|
1078
|
+
'item': item,
|
|
1079
|
+
'img_filepath': None,
|
|
1080
|
+
'local_path': local_path,
|
|
1081
|
+
'overwrite': overwrite,
|
|
1082
|
+
'annotation_options': annotation_options,
|
|
1083
|
+
'annotation_filters': annotation_filters,
|
|
1084
|
+
'thickness': thickness,
|
|
1085
|
+
'with_text': with_text,
|
|
1086
|
+
'progress': progress,
|
|
1087
|
+
'alpha': alpha,
|
|
1088
|
+
'export_version': export_version
|
|
1089
|
+
}
|
|
1090
|
+
)
|
|
1091
|
+
i_item += 1
|
|
1092
|
+
# get all results
|
|
1093
|
+
_ = [j.result() for j in jobs]
|
|
1094
|
+
progress.close()
|
|
1095
|
+
return local_path
|
|
1096
|
+
|
|
1097
|
+
def _upload_single_item_annotation(self, item, file, pbar):
|
|
1098
|
+
try:
|
|
1099
|
+
item.annotations.upload(file)
|
|
1100
|
+
except Exception as err:
|
|
1101
|
+
raise err
|
|
1102
|
+
finally:
|
|
1103
|
+
pbar.update()
|
|
1104
|
+
|
|
1105
|
+
def upload_annotations(self,
|
|
1106
|
+
dataset,
|
|
1107
|
+
local_path,
|
|
1108
|
+
filters: entities.Filters = None,
|
|
1109
|
+
clean=False,
|
|
1110
|
+
remote_root_path='/',
|
|
1111
|
+
export_version=entities.ExportVersion.V1
|
|
1112
|
+
):
|
|
1113
|
+
"""
|
|
1114
|
+
Upload annotations to dataset.
|
|
1115
|
+
|
|
1116
|
+
Example for remote_root_path: If the item filepath is "/a/b/item" and remote_root_path is "/a" - the start folder will be b instead of a
|
|
1117
|
+
|
|
1118
|
+
**Prerequisites**: You must have a dataset with items that are related to the annotations. The relationship between the dataset and annotations is shown in the name. You must be in the role of an *owner* or *developer*.
|
|
1119
|
+
|
|
1120
|
+
:param dtlpy.entities.dataset.Dataset dataset: dataset to upload to
|
|
1121
|
+
:param str local_path: str - local folder where the annotations files are
|
|
1122
|
+
:param dtlpy.entities.filters.Filters filters: Filters entity or a dictionary containing filters parameters
|
|
1123
|
+
:param bool clean: True to remove the old annotations
|
|
1124
|
+
:param str remote_root_path: the remote root path to match remote and local items
|
|
1125
|
+
:param str export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
|
|
1126
|
+
|
|
1127
|
+
**Example**:
|
|
1128
|
+
|
|
1129
|
+
.. code-block:: python
|
|
1130
|
+
|
|
1131
|
+
project.datasets.upload_annotations(dataset='dataset_entity',
|
|
1132
|
+
local_path='local_path',
|
|
1133
|
+
clean=False,
|
|
1134
|
+
export_version=dl.ExportVersion.V1
|
|
1135
|
+
)
|
|
1136
|
+
"""
|
|
1137
|
+
if filters is None:
|
|
1138
|
+
filters = entities.Filters()
|
|
1139
|
+
filters._user_query = 'false'
|
|
1140
|
+
pages = dataset.items.list(filters=filters)
|
|
1141
|
+
total_items = pages.items_count
|
|
1142
|
+
pbar = tqdm.tqdm(total=total_items, disable=dataset._client_api.verbose.disable_progress_bar_upload_annotations,
|
|
1143
|
+
file=sys.stdout, desc='Upload Annotations')
|
|
1144
|
+
pool = self._client_api.thread_pools('annotation.upload')
|
|
1145
|
+
annotations_uploaded_count = 0
|
|
1146
|
+
for item in pages.all():
|
|
1147
|
+
if export_version == entities.ExportVersion.V1:
|
|
1148
|
+
_, ext = os.path.splitext(item.filename)
|
|
1149
|
+
filepath = item.filename.replace(ext, '.json')
|
|
1150
|
+
else:
|
|
1151
|
+
filepath = item.filename + '.json'
|
|
1152
|
+
# make the file path ignore the hierarchy of the files that in remote_root_path
|
|
1153
|
+
filepath = os.path.relpath(filepath, remote_root_path)
|
|
1154
|
+
json_file = os.path.join(local_path, filepath)
|
|
1155
|
+
if not os.path.isfile(json_file):
|
|
1156
|
+
pbar.update()
|
|
1157
|
+
continue
|
|
1158
|
+
annotations_uploaded_count += 1
|
|
1159
|
+
if item.annotated and clean:
|
|
1160
|
+
item.annotations.delete(filters=entities.Filters(resource=entities.FiltersResource.ANNOTATION))
|
|
1161
|
+
pool.submit(self._upload_single_item_annotation, **{'item': item,
|
|
1162
|
+
'file': json_file,
|
|
1163
|
+
'pbar': pbar})
|
|
1164
|
+
pool.shutdown()
|
|
1165
|
+
if annotations_uploaded_count == 0:
|
|
1166
|
+
logger.warning(msg="No annotations uploaded to dataset! ")
|
|
1167
|
+
else:
|
|
1168
|
+
logger.info(msg='Found and uploaded {} annotations.'.format(annotations_uploaded_count))
|
|
1169
|
+
|
|
1170
|
+
def set_readonly(self, state: bool, dataset: entities.Dataset):
|
|
1171
|
+
"""
|
|
1172
|
+
Set dataset readonly mode.
|
|
1173
|
+
|
|
1174
|
+
**Prerequisites**: You must be in the role of an *owner* or *developer*.
|
|
1175
|
+
|
|
1176
|
+
:param bool state: state to update readonly mode
|
|
1177
|
+
:param dtlpy.entities.dataset.Dataset dataset: dataset object
|
|
1178
|
+
|
|
1179
|
+
**Example**:
|
|
1180
|
+
|
|
1181
|
+
.. code-block:: python
|
|
1182
|
+
|
|
1183
|
+
project.datasets.set_readonly(dataset='dataset_entity', state=True)
|
|
1184
|
+
"""
|
|
1185
|
+
import warnings
|
|
1186
|
+
warnings.warn("`readonly` flag on dataset is deprecated, doing nothing.", DeprecationWarning)
|
|
1187
|
+
|
|
1188
|
+
|
|
1189
|
+
@_api_reference.add(path='/datasets/{id}/split', method='post')
|
|
1190
|
+
def split_ml_subsets(self,
|
|
1191
|
+
dataset_id: str,
|
|
1192
|
+
items_query: entities.filters,
|
|
1193
|
+
ml_split_list: dict) -> bool:
|
|
1194
|
+
"""
|
|
1195
|
+
Split dataset items into ML subsets.
|
|
1196
|
+
|
|
1197
|
+
:param str dataset_id: The ID of the dataset.
|
|
1198
|
+
:param dict items_query: Query to select items.
|
|
1199
|
+
:param dict ml_split_list: Dictionary with 'train', 'validation', 'test' keys and integer percentages.
|
|
1200
|
+
:return: True if the split operation was successful.
|
|
1201
|
+
:rtype: bool
|
|
1202
|
+
:raises: PlatformException on failure and ValueError if percentages do not sum to 100 or invalid keys/values.
|
|
1203
|
+
"""
|
|
1204
|
+
# Validate percentages
|
|
1205
|
+
if not ml_split_list:
|
|
1206
|
+
ml_split_list = {'train': 80, 'validation': 10, 'test': 10}
|
|
1207
|
+
|
|
1208
|
+
if not items_query:
|
|
1209
|
+
items_query = entities.Filters()
|
|
1210
|
+
|
|
1211
|
+
items_query_dict = items_query.prepare()
|
|
1212
|
+
required_keys = {'train', 'validation', 'test'}
|
|
1213
|
+
if set(ml_split_list.keys()) != required_keys:
|
|
1214
|
+
raise ValueError("MLSplitList must have exactly the keys 'train', 'validation', 'test'.")
|
|
1215
|
+
total = sum(ml_split_list.values())
|
|
1216
|
+
if total != 100:
|
|
1217
|
+
raise ValueError(
|
|
1218
|
+
"Please set the Train, Validation, and Test subsets percentages to add up to 100%. "
|
|
1219
|
+
"For example: 70, 15, 15."
|
|
1220
|
+
)
|
|
1221
|
+
for key, value in ml_split_list.items():
|
|
1222
|
+
if not isinstance(value, int) or value < 0:
|
|
1223
|
+
raise ValueError("Percentages must be integers >= 0.")
|
|
1224
|
+
payload = {
|
|
1225
|
+
'itemsQuery': items_query_dict,
|
|
1226
|
+
'MLSplitList': ml_split_list
|
|
1227
|
+
}
|
|
1228
|
+
path = f'/datasets/{dataset_id}/split'
|
|
1229
|
+
success, response = self._client_api.gen_request(req_type='post',
|
|
1230
|
+
path=path,
|
|
1231
|
+
json_req=payload)
|
|
1232
|
+
if success:
|
|
1233
|
+
# Wait for the split operation to complete
|
|
1234
|
+
command = entities.Command.from_json(_json=response.json(),
|
|
1235
|
+
client_api=self._client_api)
|
|
1236
|
+
command.wait()
|
|
1237
|
+
return True
|
|
1238
|
+
else:
|
|
1239
|
+
raise exceptions.PlatformException(response)
|
|
1240
|
+
|
|
1241
|
+
|
|
1242
|
+
@_api_reference.add(path='/datasets/{id}/items/bulk-update-metadata', method='post')
|
|
1243
|
+
def bulk_update_ml_subset(self, dataset_id: str, items_query: dict, subset: str = None, deleteTag: bool = False) -> bool:
|
|
1244
|
+
"""
|
|
1245
|
+
Bulk update ML subset assignment for selected items.
|
|
1246
|
+
If subset is None, remove subsets. Otherwise, assign the specified subset.
|
|
1247
|
+
|
|
1248
|
+
:param str dataset_id: ID of the dataset
|
|
1249
|
+
:param dict items_query: DQLResourceQuery (filters) for selecting items
|
|
1250
|
+
:param str subset: 'train', 'validation', 'test' or None to remove all
|
|
1251
|
+
:return: True if success
|
|
1252
|
+
:rtype: bool
|
|
1253
|
+
"""
|
|
1254
|
+
if items_query is None:
|
|
1255
|
+
items_query = entities.Filters()
|
|
1256
|
+
items_query_dict = items_query.prepare()
|
|
1257
|
+
if not deleteTag and subset not in ['train', 'validation', 'test']:
|
|
1258
|
+
raise ValueError("subset must be one of: 'train', 'validation', 'test'")
|
|
1259
|
+
# Determine tag values based on subset
|
|
1260
|
+
tags = {
|
|
1261
|
+
'train': True if subset == 'train' else None,
|
|
1262
|
+
'validation': True if subset == 'validation' else None,
|
|
1263
|
+
'test': True if subset == 'test' else None
|
|
1264
|
+
}
|
|
1265
|
+
|
|
1266
|
+
payload = {
|
|
1267
|
+
"query": items_query_dict,
|
|
1268
|
+
"updateQuery": {
|
|
1269
|
+
"update": {
|
|
1270
|
+
"metadata": {
|
|
1271
|
+
"system": {
|
|
1272
|
+
"tags": tags
|
|
1273
|
+
}
|
|
1274
|
+
}
|
|
1275
|
+
},
|
|
1276
|
+
"systemSpace": True
|
|
1277
|
+
}
|
|
1278
|
+
}
|
|
1279
|
+
|
|
1280
|
+
success, response = self._client_api.gen_request(
|
|
1281
|
+
req_type='post',
|
|
1282
|
+
path=f'/datasets/{dataset_id}/items/bulk-update-metadata',
|
|
1283
|
+
json_req=payload
|
|
1284
|
+
)
|
|
1285
|
+
if success:
|
|
1286
|
+
# Similar to split operation, a command is returned
|
|
1287
|
+
command = entities.Command.from_json(_json=response.json(), client_api=self._client_api)
|
|
1288
|
+
command.wait()
|
|
1289
|
+
return True
|
|
1290
|
+
else:
|
|
1291
|
+
raise exceptions.PlatformException(response)
|