dtlpy 1.114.17__py3-none-any.whl → 1.115.44__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dtlpy/__init__.py +1 -1
- dtlpy/__version__.py +1 -1
- dtlpy/entities/__init__.py +1 -1
- dtlpy/entities/analytic.py +42 -6
- dtlpy/entities/codebase.py +1 -5
- dtlpy/entities/compute.py +12 -5
- dtlpy/entities/dataset.py +19 -5
- dtlpy/entities/driver.py +14 -2
- dtlpy/entities/filters.py +156 -3
- dtlpy/entities/item.py +9 -3
- dtlpy/entities/prompt_item.py +7 -1
- dtlpy/entities/service.py +5 -0
- dtlpy/ml/base_model_adapter.py +407 -263
- dtlpy/repositories/commands.py +1 -7
- dtlpy/repositories/computes.py +17 -13
- dtlpy/repositories/datasets.py +287 -74
- dtlpy/repositories/downloader.py +23 -3
- dtlpy/repositories/drivers.py +12 -0
- dtlpy/repositories/executions.py +1 -3
- dtlpy/repositories/features.py +31 -14
- dtlpy/repositories/items.py +5 -2
- dtlpy/repositories/models.py +16 -4
- dtlpy/repositories/uploader.py +22 -12
- dtlpy/services/api_client.py +6 -3
- dtlpy/services/reporter.py +1 -1
- {dtlpy-1.114.17.dist-info → dtlpy-1.115.44.dist-info}/METADATA +15 -12
- {dtlpy-1.114.17.dist-info → dtlpy-1.115.44.dist-info}/RECORD +34 -34
- {dtlpy-1.114.17.data → dtlpy-1.115.44.data}/scripts/dlp +0 -0
- {dtlpy-1.114.17.data → dtlpy-1.115.44.data}/scripts/dlp.bat +0 -0
- {dtlpy-1.114.17.data → dtlpy-1.115.44.data}/scripts/dlp.py +0 -0
- {dtlpy-1.114.17.dist-info → dtlpy-1.115.44.dist-info}/WHEEL +0 -0
- {dtlpy-1.114.17.dist-info → dtlpy-1.115.44.dist-info}/entry_points.txt +0 -0
- {dtlpy-1.114.17.dist-info → dtlpy-1.115.44.dist-info}/licenses/LICENSE +0 -0
- {dtlpy-1.114.17.dist-info → dtlpy-1.115.44.dist-info}/top_level.txt +0 -0
dtlpy/__init__.py
CHANGED
|
@@ -108,7 +108,7 @@ from .entities import (
|
|
|
108
108
|
# compute
|
|
109
109
|
ClusterProvider, ComputeType, ComputeStatus, Toleration, DeploymentResource, DeploymentResources,
|
|
110
110
|
NodePool, AuthenticationIntegration, Authentication, ComputeCluster, ComputeContext, Compute, KubernetesCompute,
|
|
111
|
-
ServiceDriver
|
|
111
|
+
ServiceDriver, ExportType, OutputExportType
|
|
112
112
|
)
|
|
113
113
|
from .ml import BaseModelAdapter
|
|
114
114
|
from .utilities import Converter, BaseServiceRunner, Progress, Context, AnnotationFormat
|
dtlpy/__version__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
version = '1.
|
|
1
|
+
version = '1.115.44'
|
dtlpy/entities/__init__.py
CHANGED
|
@@ -22,7 +22,7 @@ from .trigger import Trigger, TriggerResource, TriggerAction, TriggerExecutionMo
|
|
|
22
22
|
TriggerType
|
|
23
23
|
from .project import Project, MemberRole
|
|
24
24
|
from .artifact import ItemArtifact, LocalArtifact, LinkArtifact, ArtifactType, Artifact
|
|
25
|
-
from .dataset import Dataset, ExpirationOptions, IndexDriver, ExportType
|
|
25
|
+
from .dataset import Dataset, ExpirationOptions, IndexDriver, ExportType, OutputExportType
|
|
26
26
|
from .codebase import Codebase
|
|
27
27
|
from .annotation import Annotation, FrameAnnotation, ViewAnnotationOptions, AnnotationStatus, AnnotationType, \
|
|
28
28
|
ExportVersion
|
dtlpy/entities/analytic.py
CHANGED
|
@@ -75,7 +75,15 @@ class ServiceSample(BaseSample):
|
|
|
75
75
|
service_type: entities.ServiceType = None,
|
|
76
76
|
interval: int = None,
|
|
77
77
|
driver_id: str = None,
|
|
78
|
-
other_keys: dict = None
|
|
78
|
+
other_keys: dict = None,
|
|
79
|
+
concurrency_limit: int = None,
|
|
80
|
+
concurrency_count: int = None,
|
|
81
|
+
cpu_limit: int = None,
|
|
82
|
+
ram_limit: int = None,
|
|
83
|
+
gpu_limit: int = None,
|
|
84
|
+
gpu: int = None,
|
|
85
|
+
gpu_memory: int = None,
|
|
86
|
+
gpu_memory_limit: int = None
|
|
79
87
|
):
|
|
80
88
|
super().__init__(
|
|
81
89
|
start_time=start_time,
|
|
@@ -102,6 +110,14 @@ class ServiceSample(BaseSample):
|
|
|
102
110
|
self.service_type = service_type if service_type is not None else entities.ServiceType.REGULAR
|
|
103
111
|
self.interval = interval
|
|
104
112
|
self.driver_id = driver_id
|
|
113
|
+
self.concurrency_limit = concurrency_limit
|
|
114
|
+
self.concurrency_count = concurrency_count
|
|
115
|
+
self.cpu_limit = cpu_limit
|
|
116
|
+
self.ram_limit = ram_limit
|
|
117
|
+
self.gpu_limit = gpu_limit
|
|
118
|
+
self.gpu = gpu
|
|
119
|
+
self.gpu_memory = gpu_memory
|
|
120
|
+
self.gpu_memory_limit = gpu_memory_limit
|
|
105
121
|
|
|
106
122
|
def to_json(self):
|
|
107
123
|
_json = super().to_json()
|
|
@@ -120,7 +136,15 @@ class ServiceSample(BaseSample):
|
|
|
120
136
|
'queueSize': self.queue_size,
|
|
121
137
|
'numExecutions': self.num_executions,
|
|
122
138
|
'interval': self.interval,
|
|
123
|
-
'driverId': self.driver_id
|
|
139
|
+
'driverId': self.driver_id,
|
|
140
|
+
'concurrencyLimit': self.concurrency_limit,
|
|
141
|
+
'concurrencyCount': self.concurrency_count,
|
|
142
|
+
'cpuLimit': self.cpu_limit,
|
|
143
|
+
'ramLimit': self.ram_limit,
|
|
144
|
+
'gpuLimit': self.gpu_limit,
|
|
145
|
+
'gpu': self.gpu,
|
|
146
|
+
'gpuMemory': self.gpu_memory,
|
|
147
|
+
'gpuMemoryLimit': self.gpu_memory_limit
|
|
124
148
|
})
|
|
125
149
|
_json.update({
|
|
126
150
|
'entityType': self.entity_type
|
|
@@ -153,7 +177,15 @@ class ServiceSample(BaseSample):
|
|
|
153
177
|
num_executions=_json.get('data', {}).get('numExecutions', None),
|
|
154
178
|
service_type=_json.get('type', entities.ServiceType.REGULAR),
|
|
155
179
|
interval=_json.get('data', {}).get('interval', None),
|
|
156
|
-
driver_id=_json.get('data', {}).get('driverId', None)
|
|
180
|
+
driver_id=_json.get('data', {}).get('driverId', None),
|
|
181
|
+
concurrency_limit=_json.get('data', {}).get('concurrencyLimit', None),
|
|
182
|
+
concurrency_count=_json.get('data', {}).get('concurrencyCount', None),
|
|
183
|
+
cpu_limit=_json.get('data', {}).get('cpuLimit', None),
|
|
184
|
+
ram_limit=_json.get('data', {}).get('ramLimit', None),
|
|
185
|
+
gpu_limit=_json.get('data', {}).get('gpuLimit', None),
|
|
186
|
+
gpu=_json.get('data', {}).get('gpu', None),
|
|
187
|
+
gpu_memory=_json.get('data', {}).get('gpuMemory', None),
|
|
188
|
+
gpu_memory_limit=_json.get('data', {}).get('gpuMemoryLimit', None)
|
|
157
189
|
)
|
|
158
190
|
return inst
|
|
159
191
|
|
|
@@ -177,7 +209,8 @@ class ExecutionSample(BaseSample):
|
|
|
177
209
|
trigger_id=None,
|
|
178
210
|
function_name=None,
|
|
179
211
|
duration=None,
|
|
180
|
-
other_keys: dict = None
|
|
212
|
+
other_keys: dict = None,
|
|
213
|
+
function_duration=None
|
|
181
214
|
):
|
|
182
215
|
super().__init__(
|
|
183
216
|
start_time=start_time,
|
|
@@ -199,6 +232,7 @@ class ExecutionSample(BaseSample):
|
|
|
199
232
|
self.trigger_id = trigger_id
|
|
200
233
|
self.function_name = function_name
|
|
201
234
|
self.duration = duration
|
|
235
|
+
self.function_duration = function_duration
|
|
202
236
|
|
|
203
237
|
def to_json(self):
|
|
204
238
|
_json = super().to_json()
|
|
@@ -212,7 +246,8 @@ class ExecutionSample(BaseSample):
|
|
|
212
246
|
})
|
|
213
247
|
_json['data'].update({
|
|
214
248
|
'functionName': self.function_name,
|
|
215
|
-
'duration': self.duration
|
|
249
|
+
'duration': self.duration,
|
|
250
|
+
'functionDuration': self.function_duration,
|
|
216
251
|
})
|
|
217
252
|
_json['context'] = {k: v for k, v in _json['context'].items() if v is not None}
|
|
218
253
|
_json['data'] = {k: v for k, v in _json['data'].items() if v is not None}
|
|
@@ -237,7 +272,8 @@ class ExecutionSample(BaseSample):
|
|
|
237
272
|
action=_json.get('action', None),
|
|
238
273
|
status=_json.get('data', {}).get('status', None),
|
|
239
274
|
function_name=_json.get('data', {}).get('functionName', None),
|
|
240
|
-
duration=_json.get('data', {}).get('duration', None)
|
|
275
|
+
duration=_json.get('data', {}).get('duration', None),
|
|
276
|
+
function_duration=_json.get('data', {}).get('functionDuration', None)
|
|
241
277
|
)
|
|
242
278
|
return inst
|
|
243
279
|
|
dtlpy/entities/codebase.py
CHANGED
|
@@ -59,12 +59,8 @@ class GitCodebase(entities.DlEntity):
|
|
|
59
59
|
@property
|
|
60
60
|
def codebases(self):
|
|
61
61
|
if self._codebases is None:
|
|
62
|
-
if self._item is not None:
|
|
63
|
-
dataset = self.item.dataset
|
|
64
|
-
else:
|
|
65
|
-
dataset = None
|
|
66
62
|
self._codebases = repositories.Codebases(client_api=self.client_api,
|
|
67
|
-
dataset=
|
|
63
|
+
dataset=None)
|
|
68
64
|
assert isinstance(self._codebases, repositories.Codebases)
|
|
69
65
|
return self._codebases
|
|
70
66
|
|
dtlpy/entities/compute.py
CHANGED
|
@@ -217,7 +217,8 @@ class ComputeCluster:
|
|
|
217
217
|
node_pools: Optional[List[NodePool]] = None,
|
|
218
218
|
metadata: Optional[Dict] = None,
|
|
219
219
|
authentication: Optional[Authentication] = None,
|
|
220
|
-
plugins: Optional[dict] = None
|
|
220
|
+
plugins: Optional[dict] = None,
|
|
221
|
+
deployment_configuration: Optional[Dict] = None
|
|
221
222
|
):
|
|
222
223
|
self.name = name
|
|
223
224
|
self.endpoint = endpoint
|
|
@@ -228,6 +229,9 @@ class ComputeCluster:
|
|
|
228
229
|
self.authentication = authentication if authentication is not None else Authentication(
|
|
229
230
|
AuthenticationIntegration("", ""))
|
|
230
231
|
self.plugins = plugins
|
|
232
|
+
self.deployment_configuration = deployment_configuration if deployment_configuration is not None else {}
|
|
233
|
+
|
|
234
|
+
|
|
231
235
|
|
|
232
236
|
@classmethod
|
|
233
237
|
def from_json(cls, _json):
|
|
@@ -239,7 +243,8 @@ class ComputeCluster:
|
|
|
239
243
|
node_pools=[NodePool.from_json(np) for np in _json.get('nodePools', list())],
|
|
240
244
|
metadata=_json.get('metadata'),
|
|
241
245
|
authentication=Authentication.from_json(_json.get('authentication', dict())),
|
|
242
|
-
plugins=_json.get('plugins')
|
|
246
|
+
plugins=_json.get('plugins'),
|
|
247
|
+
deployment_configuration=_json.get('deploymentConfiguration'),
|
|
243
248
|
)
|
|
244
249
|
|
|
245
250
|
def to_json(self):
|
|
@@ -251,7 +256,8 @@ class ComputeCluster:
|
|
|
251
256
|
'nodePools': [np.to_json() for np in self.node_pools],
|
|
252
257
|
'metadata': self.metadata,
|
|
253
258
|
'authentication': self.authentication.to_json(),
|
|
254
|
-
'plugins': self.plugins
|
|
259
|
+
'plugins': self.plugins,
|
|
260
|
+
'deploymentConfiguration': self.deployment_configuration
|
|
255
261
|
}
|
|
256
262
|
|
|
257
263
|
@classmethod
|
|
@@ -265,7 +271,8 @@ class ComputeCluster:
|
|
|
265
271
|
node_pools=node_pools,
|
|
266
272
|
metadata={},
|
|
267
273
|
authentication=Authentication(AuthenticationIntegration(integration.id, integration.type)),
|
|
268
|
-
plugins=devops_output['config'].get('plugins')
|
|
274
|
+
plugins=devops_output['config'].get('plugins'),
|
|
275
|
+
deployment_configuration=devops_output['config'].get('deploymentConfiguration', {})
|
|
269
276
|
)
|
|
270
277
|
|
|
271
278
|
|
|
@@ -422,7 +429,7 @@ class KubernetesCompute(Compute):
|
|
|
422
429
|
metadata=_json.get('metadata'),
|
|
423
430
|
client_api=client_api,
|
|
424
431
|
settings=ComputeSettings.from_json(_json.get('settings', dict())) if _json.get('settings') else None,
|
|
425
|
-
url=_json.get('url')
|
|
432
|
+
url=_json.get('url')
|
|
426
433
|
)
|
|
427
434
|
|
|
428
435
|
def to_json(self):
|
dtlpy/entities/dataset.py
CHANGED
|
@@ -22,6 +22,10 @@ class ExportType(str, Enum):
|
|
|
22
22
|
JSON = "json"
|
|
23
23
|
ZIP = "zip"
|
|
24
24
|
|
|
25
|
+
class OutputExportType(str, Enum):
|
|
26
|
+
JSON = "json"
|
|
27
|
+
ZIP = "zip"
|
|
28
|
+
FOLDERS = "folders"
|
|
25
29
|
|
|
26
30
|
class ExpirationOptions:
|
|
27
31
|
"""
|
|
@@ -703,7 +707,8 @@ class Dataset(entities.BaseEntity):
|
|
|
703
707
|
timeout: int = 0,
|
|
704
708
|
dataset_lock: bool = False,
|
|
705
709
|
lock_timeout_sec: int = None,
|
|
706
|
-
export_summary: bool = False
|
|
710
|
+
export_summary: bool = False,
|
|
711
|
+
output_export_type: OutputExportType = None):
|
|
707
712
|
"""
|
|
708
713
|
Export dataset items and annotations.
|
|
709
714
|
|
|
@@ -721,6 +726,7 @@ class Dataset(entities.BaseEntity):
|
|
|
721
726
|
:param bool export_summary: Download dataset export summary
|
|
722
727
|
:param int lock_timeout_sec: Timeout for locking the dataset during export in seconds
|
|
723
728
|
:param entities.ExportType export_type: Type of export ('json' or 'zip')
|
|
729
|
+
:param entities.OutputExportType output_export_type: Output format ('json', 'zip', or 'folders'). If None, defaults to 'json'
|
|
724
730
|
:param int timeout: Maximum time in seconds to wait for the export to complete
|
|
725
731
|
:return: Exported item
|
|
726
732
|
:rtype: dtlpy.entities.item.Item
|
|
@@ -732,7 +738,8 @@ class Dataset(entities.BaseEntity):
|
|
|
732
738
|
export_item = dataset.export(filters=filters,
|
|
733
739
|
include_feature_vectors=True,
|
|
734
740
|
include_annotations=True,
|
|
735
|
-
export_type=dl.ExportType.JSON
|
|
741
|
+
export_type=dl.ExportType.JSON,
|
|
742
|
+
output_export_type=dl.OutputExportType.JSON)
|
|
736
743
|
"""
|
|
737
744
|
|
|
738
745
|
return self.datasets.export(dataset=self,
|
|
@@ -746,7 +753,8 @@ class Dataset(entities.BaseEntity):
|
|
|
746
753
|
timeout=timeout,
|
|
747
754
|
dataset_lock=dataset_lock,
|
|
748
755
|
lock_timeout_sec=lock_timeout_sec,
|
|
749
|
-
export_summary=export_summary
|
|
756
|
+
export_summary=export_summary,
|
|
757
|
+
output_export_type=output_export_type)
|
|
750
758
|
|
|
751
759
|
def upload_annotations(self,
|
|
752
760
|
local_path,
|
|
@@ -984,6 +992,7 @@ class Dataset(entities.BaseEntity):
|
|
|
984
992
|
dataset_lock=False,
|
|
985
993
|
lock_timeout_sec=None,
|
|
986
994
|
export_summary=False,
|
|
995
|
+
raise_on_error=False
|
|
987
996
|
):
|
|
988
997
|
"""
|
|
989
998
|
Download dataset by filters.
|
|
@@ -1007,6 +1016,7 @@ class Dataset(entities.BaseEntity):
|
|
|
1007
1016
|
:param bool without_relative_path: bool - download items without the relative path from platform
|
|
1008
1017
|
:param float alpha: opacity value [0 1], default 1
|
|
1009
1018
|
:param str export_version: `V2` - exported items will have original extension in filename, `V1` - no original extension in filenames
|
|
1019
|
+
:param bool raise_on_error: raise an exception if an error occurs
|
|
1010
1020
|
:return: `List` of local_path per each downloaded item
|
|
1011
1021
|
|
|
1012
1022
|
**Example**:
|
|
@@ -1038,7 +1048,8 @@ class Dataset(entities.BaseEntity):
|
|
|
1038
1048
|
export_version=export_version,
|
|
1039
1049
|
dataset_lock=dataset_lock,
|
|
1040
1050
|
lock_timeout_sec=lock_timeout_sec,
|
|
1041
|
-
export_summary=export_summary
|
|
1051
|
+
export_summary=export_summary,
|
|
1052
|
+
raise_on_error=raise_on_error
|
|
1042
1053
|
)
|
|
1043
1054
|
|
|
1044
1055
|
def download_folder(
|
|
@@ -1059,6 +1070,7 @@ class Dataset(entities.BaseEntity):
|
|
|
1059
1070
|
dataset_lock=False,
|
|
1060
1071
|
lock_timeout_sec=None,
|
|
1061
1072
|
export_summary=False,
|
|
1073
|
+
raise_on_error=False
|
|
1062
1074
|
):
|
|
1063
1075
|
"""
|
|
1064
1076
|
Download dataset folder.
|
|
@@ -1082,6 +1094,7 @@ class Dataset(entities.BaseEntity):
|
|
|
1082
1094
|
:param bool without_relative_path: bool - download items without the relative path from platform
|
|
1083
1095
|
:param float alpha: opacity value [0 1], default 1
|
|
1084
1096
|
:param str export_version: `V2` - exported items will have original extension in filename, `V1` - no original extension in filenames
|
|
1097
|
+
:param bool raise_on_error: raise an exception if an error occurs
|
|
1085
1098
|
:return: `List` of local_path per each downloaded item
|
|
1086
1099
|
|
|
1087
1100
|
**Example**:
|
|
@@ -1116,7 +1129,8 @@ class Dataset(entities.BaseEntity):
|
|
|
1116
1129
|
export_version=export_version,
|
|
1117
1130
|
dataset_lock=dataset_lock,
|
|
1118
1131
|
lock_timeout_sec=lock_timeout_sec,
|
|
1119
|
-
export_summary=export_summary
|
|
1132
|
+
export_summary=export_summary,
|
|
1133
|
+
raise_on_error=raise_on_error
|
|
1120
1134
|
)
|
|
1121
1135
|
|
|
1122
1136
|
def delete_labels(self, label_names):
|
dtlpy/entities/driver.py
CHANGED
|
@@ -51,6 +51,12 @@ class Driver(entities.BaseEntity):
|
|
|
51
51
|
name = attr.ib()
|
|
52
52
|
id = attr.ib()
|
|
53
53
|
path = attr.ib()
|
|
54
|
+
bucket_name = attr.ib()
|
|
55
|
+
end_point = attr.ib()
|
|
56
|
+
elastic_index = attr.ib()
|
|
57
|
+
elastic_index_path = attr.ib()
|
|
58
|
+
directory = attr.ib()
|
|
59
|
+
mount_path = attr.ib()
|
|
54
60
|
# api
|
|
55
61
|
_client_api = attr.ib(type=ApiClient, repr=False)
|
|
56
62
|
_repositories = attr.ib(repr=False)
|
|
@@ -89,7 +95,14 @@ class Driver(entities.BaseEntity):
|
|
|
89
95
|
name=_json.get('name', None),
|
|
90
96
|
id=_json.get('id', None),
|
|
91
97
|
client_api=client_api,
|
|
92
|
-
path=_json.get('path', None)
|
|
98
|
+
path=_json.get('path', None),
|
|
99
|
+
bucket_name=_json.get('bucketName', None),
|
|
100
|
+
end_point=_json.get('endpoint', None),
|
|
101
|
+
elastic_index=_json.get('elasticIndex', None),
|
|
102
|
+
elastic_index_path=_json.get('elasticIndexPath', None),
|
|
103
|
+
directory=_json.get('directory', None),
|
|
104
|
+
mount_path=_json.get('mountPath', None)
|
|
105
|
+
)
|
|
93
106
|
|
|
94
107
|
inst.is_fetched = is_fetched
|
|
95
108
|
return inst
|
|
@@ -108,7 +121,6 @@ class Driver(entities.BaseEntity):
|
|
|
108
121
|
attr.fields(Driver).created_at,
|
|
109
122
|
attr.fields(Driver).integration_id,
|
|
110
123
|
attr.fields(Driver).integration_type,
|
|
111
|
-
attr.fields(Driver).path
|
|
112
124
|
))
|
|
113
125
|
output_dict['allowExternalDelete'] = self.allow_external_delete
|
|
114
126
|
output_dict['allowExternalModification'] = self.allow_external_modification
|
dtlpy/entities/filters.py
CHANGED
|
@@ -1,10 +1,17 @@
|
|
|
1
|
+
import numpy as np
|
|
1
2
|
import urllib.parse
|
|
2
3
|
import logging
|
|
3
4
|
import json
|
|
4
5
|
import os
|
|
5
6
|
import io
|
|
6
|
-
|
|
7
|
+
import copy
|
|
8
|
+
from typing import Generator, Tuple, Optional
|
|
9
|
+
from collections import deque
|
|
10
|
+
from concurrent.futures import ThreadPoolExecutor, wait, FIRST_COMPLETED
|
|
11
|
+
from bson import ObjectId
|
|
12
|
+
|
|
7
13
|
|
|
14
|
+
from enum import Enum
|
|
8
15
|
from .. import exceptions, entities
|
|
9
16
|
|
|
10
17
|
logger = logging.getLogger(name="dtlpy")
|
|
@@ -62,6 +69,8 @@ class FiltersOperations(str, Enum):
|
|
|
62
69
|
EXISTS = "exists"
|
|
63
70
|
MATCH = "match"
|
|
64
71
|
NIN = "nin"
|
|
72
|
+
GREATER_THAN_OR_EQUAL = "gte"
|
|
73
|
+
LESS_THAN_OR_EQUAL = "lte"
|
|
65
74
|
|
|
66
75
|
|
|
67
76
|
class FiltersMethod(str, Enum):
|
|
@@ -215,9 +224,15 @@ class Filters:
|
|
|
215
224
|
|
|
216
225
|
def __override(self, field, values, operator=None):
|
|
217
226
|
if field in self._unique_fields:
|
|
227
|
+
indices_to_remove = []
|
|
218
228
|
for i_single_filter, single_filter in enumerate(self.and_filter_list):
|
|
219
229
|
if single_filter.field == field:
|
|
220
|
-
|
|
230
|
+
indices_to_remove.append(i_single_filter)
|
|
231
|
+
|
|
232
|
+
# Remove indices in descending order to avoid IndexError
|
|
233
|
+
# When removing items, indices shift down, so we must remove from highest to lowest
|
|
234
|
+
for index in sorted(indices_to_remove, reverse=True):
|
|
235
|
+
self.and_filter_list.pop(index)
|
|
221
236
|
self.and_filter_list.append(SingleFilter(field=field, values=values, operator=operator))
|
|
222
237
|
|
|
223
238
|
def generate_url_query_params(self, url):
|
|
@@ -366,7 +381,11 @@ class Filters:
|
|
|
366
381
|
query_dict["join"] = self.join
|
|
367
382
|
if "join" in query_dict and "on" not in query_dict["join"]:
|
|
368
383
|
if self.resource == FiltersResource.ITEM:
|
|
369
|
-
query_dict["join"]["on"] = {
|
|
384
|
+
query_dict["join"]["on"] = {
|
|
385
|
+
"resource": FiltersResource.ANNOTATION.value,
|
|
386
|
+
"local": "itemId",
|
|
387
|
+
"forigen": "id",
|
|
388
|
+
}
|
|
370
389
|
else:
|
|
371
390
|
query_dict["join"]["on"] = {"resource": FiltersResource.ITEM.value, "local": "id", "forigen": "itemId"}
|
|
372
391
|
|
|
@@ -599,6 +618,140 @@ class Filters:
|
|
|
599
618
|
names = [i.name for i in all_filter_items]
|
|
600
619
|
return names
|
|
601
620
|
|
|
621
|
+
@staticmethod
|
|
622
|
+
def _get_split_filters(dataset, filters, max_items, max_workers=4, max_depth=None) -> Generator[dict, None, None]:
|
|
623
|
+
"""
|
|
624
|
+
Generator that yields filter chunks for large datasets using a bounded
|
|
625
|
+
thread pool. Splits ranges by id until each subset holds <= max_items.
|
|
626
|
+
|
|
627
|
+
:param dataset: Dataset object to get filters for
|
|
628
|
+
:param filters: Base filters to apply
|
|
629
|
+
:param max_items: Maximum number of items per filter chunk
|
|
630
|
+
:param max_workers: Maximum number of threads for parallel processing
|
|
631
|
+
:param max_depth: Maximum depth of the filter tree. Default calculated by the formula: np.ceil(np.log2(count/max_items) + 3).
|
|
632
|
+
:yield: Filter payloads covering subsets of items
|
|
633
|
+
"""
|
|
634
|
+
if max_items <= 0:
|
|
635
|
+
raise ValueError("_get_split_filters : max_items must be greater than 0")
|
|
636
|
+
|
|
637
|
+
if filters is None:
|
|
638
|
+
filters = entities.Filters()
|
|
639
|
+
|
|
640
|
+
from_id, count = Filters._get_first_last_item(
|
|
641
|
+
items_repo=dataset.items, filters=filters, order_by_direction=FiltersOrderByDirection.ASCENDING
|
|
642
|
+
)
|
|
643
|
+
to_id, count = Filters._get_first_last_item(
|
|
644
|
+
items_repo=dataset.items, filters=filters, order_by_direction=FiltersOrderByDirection.DESCENDING
|
|
645
|
+
)
|
|
646
|
+
|
|
647
|
+
if from_id is None or to_id is None or count == 0:
|
|
648
|
+
return
|
|
649
|
+
|
|
650
|
+
max_depth = max_depth if max_depth is not None else np.ceil(np.log2(count / max_items) + 3)
|
|
651
|
+
|
|
652
|
+
def make_filter_dict(range_from_id, range_to_id, strict_from: bool = False):
|
|
653
|
+
fdict = copy.deepcopy(filters.prepare())
|
|
654
|
+
lower_op = "$gt" if strict_from else "$gte"
|
|
655
|
+
fdict["filter"].setdefault("$and", []).extend(
|
|
656
|
+
[{"id": {lower_op: range_from_id}}, {"id": {"$lte": range_to_id}}]
|
|
657
|
+
)
|
|
658
|
+
return fdict
|
|
659
|
+
|
|
660
|
+
def task(range_from_id, range_to_id, depth, strict_from: bool):
|
|
661
|
+
fdict = make_filter_dict(range_from_id, range_to_id, strict_from)
|
|
662
|
+
range_filters = entities.Filters(custom_filter=fdict, page_size=1)
|
|
663
|
+
actual_from, count = Filters._get_first_last_item(
|
|
664
|
+
dataset.items, range_filters, FiltersOrderByDirection.ASCENDING
|
|
665
|
+
)
|
|
666
|
+
if count == 0:
|
|
667
|
+
return ("none", None, None)
|
|
668
|
+
if count <= max_items or depth >= max_depth:
|
|
669
|
+
return ("yield", fdict, None)
|
|
670
|
+
actual_to, count = Filters._get_first_last_item(
|
|
671
|
+
dataset.items, range_filters, FiltersOrderByDirection.DESCENDING
|
|
672
|
+
)
|
|
673
|
+
if not actual_from or not actual_to or actual_from == actual_to:
|
|
674
|
+
return ("yield", fdict, None)
|
|
675
|
+
mid = Filters._get_middle_id(actual_from, actual_to)
|
|
676
|
+
if not mid or mid == actual_from or mid == actual_to:
|
|
677
|
+
return ("yield", fdict, None)
|
|
678
|
+
# Left child: [actual_from, mid] inclusive; Right child: (mid, actual_to] exclusive lower bound
|
|
679
|
+
return (
|
|
680
|
+
"split",
|
|
681
|
+
None,
|
|
682
|
+
(
|
|
683
|
+
(actual_from, mid, depth + 1, False), # left child includes lower bound
|
|
684
|
+
(mid, actual_to, depth + 1, True), # right child excludes midpoint
|
|
685
|
+
),
|
|
686
|
+
)
|
|
687
|
+
|
|
688
|
+
pending = deque([(from_id, to_id, 0, False)])
|
|
689
|
+
futures = set()
|
|
690
|
+
|
|
691
|
+
with ThreadPoolExecutor(max_workers=max_workers) as pool:
|
|
692
|
+
while futures or pending:
|
|
693
|
+
# Submit all pending tasks
|
|
694
|
+
while pending:
|
|
695
|
+
fr, to, d, strict = pending.popleft()
|
|
696
|
+
futures.add(pool.submit(task, fr, to, d, strict))
|
|
697
|
+
|
|
698
|
+
if not futures:
|
|
699
|
+
break
|
|
700
|
+
|
|
701
|
+
done, futures = wait(futures, return_when=FIRST_COMPLETED)
|
|
702
|
+
for fut in done:
|
|
703
|
+
try:
|
|
704
|
+
kind, fdict, ranges = fut.result()
|
|
705
|
+
except Exception as e:
|
|
706
|
+
logger.warning(f"split filters task failed: {e}")
|
|
707
|
+
continue
|
|
708
|
+
if kind == "yield" and fdict is not None:
|
|
709
|
+
yield fdict
|
|
710
|
+
elif kind == "split" and ranges is not None:
|
|
711
|
+
left, right = ranges
|
|
712
|
+
pending.append(left)
|
|
713
|
+
pending.append(right)
|
|
714
|
+
|
|
715
|
+
@staticmethod
|
|
716
|
+
def _get_first_last_item(
|
|
717
|
+
items_repo, filters, order_by_direction=FiltersOrderByDirection.ASCENDING
|
|
718
|
+
) -> Tuple[Optional[str], int]:
|
|
719
|
+
filters_dict = copy.deepcopy(filters.prepare())
|
|
720
|
+
filters_dict["sort"] = {"id": order_by_direction.value}
|
|
721
|
+
filters_dict["page"] = 0
|
|
722
|
+
filters_dict["pageSize"] = 1
|
|
723
|
+
cloned_filters = entities.Filters(custom_filter=filters_dict)
|
|
724
|
+
|
|
725
|
+
try:
|
|
726
|
+
pages = items_repo.list(filters=cloned_filters)
|
|
727
|
+
return (pages.items[0].id if pages.items else None, pages.items_count)
|
|
728
|
+
except Exception:
|
|
729
|
+
return None, 0
|
|
730
|
+
|
|
731
|
+
@staticmethod
|
|
732
|
+
def _get_middle_id(from_id, to_id):
|
|
733
|
+
"""Calculate middle ObjectId between two ObjectIds with sub-second precision.
|
|
734
|
+
|
|
735
|
+
Computes the midpoint in the full 12-byte ObjectId numeric space to avoid
|
|
736
|
+
second-level rounding inherent to datetime-based construction.
|
|
737
|
+
"""
|
|
738
|
+
try:
|
|
739
|
+
# Convert ObjectId strings to integers using base 16 (hexadecimal)
|
|
740
|
+
start_int = int(str(ObjectId(from_id)), base=16)
|
|
741
|
+
end_int = int(str(ObjectId(to_id)), base=16)
|
|
742
|
+
if start_int >= end_int:
|
|
743
|
+
return from_id
|
|
744
|
+
mid_int = (start_int + end_int) // 2
|
|
745
|
+
if mid_int <= start_int:
|
|
746
|
+
mid_int = start_int + 1
|
|
747
|
+
if mid_int > end_int:
|
|
748
|
+
mid_int = end_int
|
|
749
|
+
# Convert back to 12-byte ObjectId format
|
|
750
|
+
mid_bytes = mid_int.to_bytes(length=12, byteorder="big")
|
|
751
|
+
return str(ObjectId(mid_bytes))
|
|
752
|
+
except Exception:
|
|
753
|
+
return from_id # Fallback to from_id if calculation fails
|
|
754
|
+
|
|
602
755
|
|
|
603
756
|
class SingleFilter:
|
|
604
757
|
def __init__(self, field, values, operator: FiltersOperations = None):
|
dtlpy/entities/item.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import warnings
|
|
2
1
|
from collections import namedtuple
|
|
3
2
|
from enum import Enum
|
|
4
3
|
import traceback
|
|
4
|
+
import mimetypes
|
|
5
5
|
import logging
|
|
6
6
|
import attr
|
|
7
7
|
import copy
|
|
@@ -354,7 +354,10 @@ class Item(entities.BaseEntity):
|
|
|
354
354
|
|
|
355
355
|
@property
|
|
356
356
|
def mimetype(self):
|
|
357
|
-
|
|
357
|
+
mimetype = self.metadata.get('system', dict()).get('mimetype', None)
|
|
358
|
+
if mimetype is None:
|
|
359
|
+
mimetype = mimetypes.guess_type(self.filename)[0]
|
|
360
|
+
return mimetype
|
|
358
361
|
|
|
359
362
|
@property
|
|
360
363
|
def size(self):
|
|
@@ -457,6 +460,7 @@ class Item(entities.BaseEntity):
|
|
|
457
460
|
dataset_lock=False,
|
|
458
461
|
lock_timeout_sec=None,
|
|
459
462
|
export_summary=False,
|
|
463
|
+
raise_on_error=False
|
|
460
464
|
):
|
|
461
465
|
"""
|
|
462
466
|
Download dataset by filters.
|
|
@@ -478,6 +482,7 @@ class Item(entities.BaseEntity):
|
|
|
478
482
|
:param bool with_text: optional - add text to annotations, default = False
|
|
479
483
|
:param float alpha: opacity value [0 1], default 1
|
|
480
484
|
:param str export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
|
|
485
|
+
:param bool raise_on_error: raise an exception if an error occurs
|
|
481
486
|
:return: generator of local_path per each downloaded item
|
|
482
487
|
:rtype: generator or single item
|
|
483
488
|
|
|
@@ -531,7 +536,8 @@ class Item(entities.BaseEntity):
|
|
|
531
536
|
filters=filters,
|
|
532
537
|
dataset_lock=dataset_lock,
|
|
533
538
|
lock_timeout_sec=lock_timeout_sec,
|
|
534
|
-
export_summary=export_summary
|
|
539
|
+
export_summary=export_summary,
|
|
540
|
+
raise_on_error=raise_on_error)
|
|
535
541
|
|
|
536
542
|
def delete(self):
|
|
537
543
|
"""
|
dtlpy/entities/prompt_item.py
CHANGED
|
@@ -132,7 +132,13 @@ class Prompt:
|
|
|
132
132
|
elif element['mimetype'] == PromptType.AUDIO:
|
|
133
133
|
raise NotImplementedError('Audio prompt is not supported yet')
|
|
134
134
|
elif element['mimetype'] == PromptType.VIDEO:
|
|
135
|
-
|
|
135
|
+
data = {
|
|
136
|
+
"type": "video_url",
|
|
137
|
+
"video_url": {
|
|
138
|
+
"url": element['value']
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
messages.append(data)
|
|
136
142
|
else:
|
|
137
143
|
raise ValueError(f'Invalid mimetype: {element["mimetype"]}')
|
|
138
144
|
return messages, self.key
|
dtlpy/entities/service.py
CHANGED
|
@@ -135,6 +135,7 @@ class KubernetesRuntime(ServiceRuntime):
|
|
|
135
135
|
pod_type: InstanceCatalog = DEFAULT_POD_TYPE,
|
|
136
136
|
num_replicas=DEFAULT_NUM_REPLICAS,
|
|
137
137
|
concurrency=DEFAULT_CONCURRENCY,
|
|
138
|
+
dynamic_concurrency=None,
|
|
138
139
|
runner_image=None,
|
|
139
140
|
autoscaler=None,
|
|
140
141
|
**kwargs):
|
|
@@ -147,6 +148,7 @@ class KubernetesRuntime(ServiceRuntime):
|
|
|
147
148
|
self._proxy_image = kwargs.get('proxyImage', None)
|
|
148
149
|
self.single_agent = kwargs.get('singleAgent', None)
|
|
149
150
|
self.preemptible = kwargs.get('preemptible', None)
|
|
151
|
+
self.dynamic_concurrency = kwargs.get('dynamicConcurrency', dynamic_concurrency)
|
|
150
152
|
|
|
151
153
|
self.autoscaler = kwargs.get('autoscaler', autoscaler)
|
|
152
154
|
if self.autoscaler is not None and isinstance(self.autoscaler, dict):
|
|
@@ -178,6 +180,9 @@ class KubernetesRuntime(ServiceRuntime):
|
|
|
178
180
|
if self.preemptible is not None:
|
|
179
181
|
_json['preemptible'] = self.preemptible
|
|
180
182
|
|
|
183
|
+
if self.dynamic_concurrency is not None:
|
|
184
|
+
_json['dynamicConcurrency'] = self.dynamic_concurrency
|
|
185
|
+
|
|
181
186
|
return _json
|
|
182
187
|
|
|
183
188
|
|