dtlpy 1.114.16__py3-none-any.whl → 1.115.44__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. dtlpy/__init__.py +1 -1
  2. dtlpy/__version__.py +1 -1
  3. dtlpy/entities/__init__.py +1 -1
  4. dtlpy/entities/analytic.py +42 -6
  5. dtlpy/entities/codebase.py +1 -5
  6. dtlpy/entities/compute.py +12 -5
  7. dtlpy/entities/dataset.py +19 -5
  8. dtlpy/entities/driver.py +14 -2
  9. dtlpy/entities/filters.py +156 -3
  10. dtlpy/entities/item.py +9 -3
  11. dtlpy/entities/prompt_item.py +7 -1
  12. dtlpy/entities/service.py +5 -0
  13. dtlpy/ml/base_model_adapter.py +407 -263
  14. dtlpy/repositories/commands.py +1 -7
  15. dtlpy/repositories/computes.py +17 -13
  16. dtlpy/repositories/datasets.py +287 -74
  17. dtlpy/repositories/downloader.py +23 -3
  18. dtlpy/repositories/drivers.py +12 -8
  19. dtlpy/repositories/executions.py +1 -3
  20. dtlpy/repositories/features.py +31 -14
  21. dtlpy/repositories/items.py +5 -2
  22. dtlpy/repositories/models.py +16 -4
  23. dtlpy/repositories/uploader.py +22 -12
  24. dtlpy/services/api_client.py +6 -3
  25. dtlpy/services/reporter.py +1 -1
  26. {dtlpy-1.114.16.dist-info → dtlpy-1.115.44.dist-info}/METADATA +15 -12
  27. {dtlpy-1.114.16.dist-info → dtlpy-1.115.44.dist-info}/RECORD +34 -34
  28. {dtlpy-1.114.16.data → dtlpy-1.115.44.data}/scripts/dlp +0 -0
  29. {dtlpy-1.114.16.data → dtlpy-1.115.44.data}/scripts/dlp.bat +0 -0
  30. {dtlpy-1.114.16.data → dtlpy-1.115.44.data}/scripts/dlp.py +0 -0
  31. {dtlpy-1.114.16.dist-info → dtlpy-1.115.44.dist-info}/WHEEL +0 -0
  32. {dtlpy-1.114.16.dist-info → dtlpy-1.115.44.dist-info}/entry_points.txt +0 -0
  33. {dtlpy-1.114.16.dist-info → dtlpy-1.115.44.dist-info}/licenses/LICENSE +0 -0
  34. {dtlpy-1.114.16.dist-info → dtlpy-1.115.44.dist-info}/top_level.txt +0 -0
dtlpy/__init__.py CHANGED
@@ -108,7 +108,7 @@ from .entities import (
108
108
  # compute
109
109
  ClusterProvider, ComputeType, ComputeStatus, Toleration, DeploymentResource, DeploymentResources,
110
110
  NodePool, AuthenticationIntegration, Authentication, ComputeCluster, ComputeContext, Compute, KubernetesCompute,
111
- ServiceDriver
111
+ ServiceDriver, ExportType, OutputExportType
112
112
  )
113
113
  from .ml import BaseModelAdapter
114
114
  from .utilities import Converter, BaseServiceRunner, Progress, Context, AnnotationFormat
dtlpy/__version__.py CHANGED
@@ -1 +1 @@
1
- version = '1.114.16'
1
+ version = '1.115.44'
@@ -22,7 +22,7 @@ from .trigger import Trigger, TriggerResource, TriggerAction, TriggerExecutionMo
22
22
  TriggerType
23
23
  from .project import Project, MemberRole
24
24
  from .artifact import ItemArtifact, LocalArtifact, LinkArtifact, ArtifactType, Artifact
25
- from .dataset import Dataset, ExpirationOptions, IndexDriver, ExportType
25
+ from .dataset import Dataset, ExpirationOptions, IndexDriver, ExportType, OutputExportType
26
26
  from .codebase import Codebase
27
27
  from .annotation import Annotation, FrameAnnotation, ViewAnnotationOptions, AnnotationStatus, AnnotationType, \
28
28
  ExportVersion
@@ -75,7 +75,15 @@ class ServiceSample(BaseSample):
75
75
  service_type: entities.ServiceType = None,
76
76
  interval: int = None,
77
77
  driver_id: str = None,
78
- other_keys: dict = None
78
+ other_keys: dict = None,
79
+ concurrency_limit: int = None,
80
+ concurrency_count: int = None,
81
+ cpu_limit: int = None,
82
+ ram_limit: int = None,
83
+ gpu_limit: int = None,
84
+ gpu: int = None,
85
+ gpu_memory: int = None,
86
+ gpu_memory_limit: int = None
79
87
  ):
80
88
  super().__init__(
81
89
  start_time=start_time,
@@ -102,6 +110,14 @@ class ServiceSample(BaseSample):
102
110
  self.service_type = service_type if service_type is not None else entities.ServiceType.REGULAR
103
111
  self.interval = interval
104
112
  self.driver_id = driver_id
113
+ self.concurrency_limit = concurrency_limit
114
+ self.concurrency_count = concurrency_count
115
+ self.cpu_limit = cpu_limit
116
+ self.ram_limit = ram_limit
117
+ self.gpu_limit = gpu_limit
118
+ self.gpu = gpu
119
+ self.gpu_memory = gpu_memory
120
+ self.gpu_memory_limit = gpu_memory_limit
105
121
 
106
122
  def to_json(self):
107
123
  _json = super().to_json()
@@ -120,7 +136,15 @@ class ServiceSample(BaseSample):
120
136
  'queueSize': self.queue_size,
121
137
  'numExecutions': self.num_executions,
122
138
  'interval': self.interval,
123
- 'driverId': self.driver_id
139
+ 'driverId': self.driver_id,
140
+ 'concurrencyLimit': self.concurrency_limit,
141
+ 'concurrencyCount': self.concurrency_count,
142
+ 'cpuLimit': self.cpu_limit,
143
+ 'ramLimit': self.ram_limit,
144
+ 'gpuLimit': self.gpu_limit,
145
+ 'gpu': self.gpu,
146
+ 'gpuMemory': self.gpu_memory,
147
+ 'gpuMemoryLimit': self.gpu_memory_limit
124
148
  })
125
149
  _json.update({
126
150
  'entityType': self.entity_type
@@ -153,7 +177,15 @@ class ServiceSample(BaseSample):
153
177
  num_executions=_json.get('data', {}).get('numExecutions', None),
154
178
  service_type=_json.get('type', entities.ServiceType.REGULAR),
155
179
  interval=_json.get('data', {}).get('interval', None),
156
- driver_id=_json.get('data', {}).get('driverId', None)
180
+ driver_id=_json.get('data', {}).get('driverId', None),
181
+ concurrency_limit=_json.get('data', {}).get('concurrencyLimit', None),
182
+ concurrency_count=_json.get('data', {}).get('concurrencyCount', None),
183
+ cpu_limit=_json.get('data', {}).get('cpuLimit', None),
184
+ ram_limit=_json.get('data', {}).get('ramLimit', None),
185
+ gpu_limit=_json.get('data', {}).get('gpuLimit', None),
186
+ gpu=_json.get('data', {}).get('gpu', None),
187
+ gpu_memory=_json.get('data', {}).get('gpuMemory', None),
188
+ gpu_memory_limit=_json.get('data', {}).get('gpuMemoryLimit', None)
157
189
  )
158
190
  return inst
159
191
 
@@ -177,7 +209,8 @@ class ExecutionSample(BaseSample):
177
209
  trigger_id=None,
178
210
  function_name=None,
179
211
  duration=None,
180
- other_keys: dict = None
212
+ other_keys: dict = None,
213
+ function_duration=None
181
214
  ):
182
215
  super().__init__(
183
216
  start_time=start_time,
@@ -199,6 +232,7 @@ class ExecutionSample(BaseSample):
199
232
  self.trigger_id = trigger_id
200
233
  self.function_name = function_name
201
234
  self.duration = duration
235
+ self.function_duration = function_duration
202
236
 
203
237
  def to_json(self):
204
238
  _json = super().to_json()
@@ -212,7 +246,8 @@ class ExecutionSample(BaseSample):
212
246
  })
213
247
  _json['data'].update({
214
248
  'functionName': self.function_name,
215
- 'duration': self.duration
249
+ 'duration': self.duration,
250
+ 'functionDuration': self.function_duration,
216
251
  })
217
252
  _json['context'] = {k: v for k, v in _json['context'].items() if v is not None}
218
253
  _json['data'] = {k: v for k, v in _json['data'].items() if v is not None}
@@ -237,7 +272,8 @@ class ExecutionSample(BaseSample):
237
272
  action=_json.get('action', None),
238
273
  status=_json.get('data', {}).get('status', None),
239
274
  function_name=_json.get('data', {}).get('functionName', None),
240
- duration=_json.get('data', {}).get('duration', None)
275
+ duration=_json.get('data', {}).get('duration', None),
276
+ function_duration=_json.get('data', {}).get('functionDuration', None)
241
277
  )
242
278
  return inst
243
279
 
@@ -59,12 +59,8 @@ class GitCodebase(entities.DlEntity):
59
59
  @property
60
60
  def codebases(self):
61
61
  if self._codebases is None:
62
- if self._item is not None:
63
- dataset = self.item.dataset
64
- else:
65
- dataset = None
66
62
  self._codebases = repositories.Codebases(client_api=self.client_api,
67
- dataset=dataset)
63
+ dataset=None)
68
64
  assert isinstance(self._codebases, repositories.Codebases)
69
65
  return self._codebases
70
66
 
dtlpy/entities/compute.py CHANGED
@@ -217,7 +217,8 @@ class ComputeCluster:
217
217
  node_pools: Optional[List[NodePool]] = None,
218
218
  metadata: Optional[Dict] = None,
219
219
  authentication: Optional[Authentication] = None,
220
- plugins: Optional[dict] = None
220
+ plugins: Optional[dict] = None,
221
+ deployment_configuration: Optional[Dict] = None
221
222
  ):
222
223
  self.name = name
223
224
  self.endpoint = endpoint
@@ -228,6 +229,9 @@ class ComputeCluster:
228
229
  self.authentication = authentication if authentication is not None else Authentication(
229
230
  AuthenticationIntegration("", ""))
230
231
  self.plugins = plugins
232
+ self.deployment_configuration = deployment_configuration if deployment_configuration is not None else {}
233
+
234
+
231
235
 
232
236
  @classmethod
233
237
  def from_json(cls, _json):
@@ -239,7 +243,8 @@ class ComputeCluster:
239
243
  node_pools=[NodePool.from_json(np) for np in _json.get('nodePools', list())],
240
244
  metadata=_json.get('metadata'),
241
245
  authentication=Authentication.from_json(_json.get('authentication', dict())),
242
- plugins=_json.get('plugins')
246
+ plugins=_json.get('plugins'),
247
+ deployment_configuration=_json.get('deploymentConfiguration'),
243
248
  )
244
249
 
245
250
  def to_json(self):
@@ -251,7 +256,8 @@ class ComputeCluster:
251
256
  'nodePools': [np.to_json() for np in self.node_pools],
252
257
  'metadata': self.metadata,
253
258
  'authentication': self.authentication.to_json(),
254
- 'plugins': self.plugins
259
+ 'plugins': self.plugins,
260
+ 'deploymentConfiguration': self.deployment_configuration
255
261
  }
256
262
 
257
263
  @classmethod
@@ -265,7 +271,8 @@ class ComputeCluster:
265
271
  node_pools=node_pools,
266
272
  metadata={},
267
273
  authentication=Authentication(AuthenticationIntegration(integration.id, integration.type)),
268
- plugins=devops_output['config'].get('plugins')
274
+ plugins=devops_output['config'].get('plugins'),
275
+ deployment_configuration=devops_output['config'].get('deploymentConfiguration', {})
269
276
  )
270
277
 
271
278
 
@@ -422,7 +429,7 @@ class KubernetesCompute(Compute):
422
429
  metadata=_json.get('metadata'),
423
430
  client_api=client_api,
424
431
  settings=ComputeSettings.from_json(_json.get('settings', dict())) if _json.get('settings') else None,
425
- url=_json.get('url'),
432
+ url=_json.get('url')
426
433
  )
427
434
 
428
435
  def to_json(self):
dtlpy/entities/dataset.py CHANGED
@@ -22,6 +22,10 @@ class ExportType(str, Enum):
22
22
  JSON = "json"
23
23
  ZIP = "zip"
24
24
 
25
+ class OutputExportType(str, Enum):
26
+ JSON = "json"
27
+ ZIP = "zip"
28
+ FOLDERS = "folders"
25
29
 
26
30
  class ExpirationOptions:
27
31
  """
@@ -703,7 +707,8 @@ class Dataset(entities.BaseEntity):
703
707
  timeout: int = 0,
704
708
  dataset_lock: bool = False,
705
709
  lock_timeout_sec: int = None,
706
- export_summary: bool = False):
710
+ export_summary: bool = False,
711
+ output_export_type: OutputExportType = None):
707
712
  """
708
713
  Export dataset items and annotations.
709
714
 
@@ -721,6 +726,7 @@ class Dataset(entities.BaseEntity):
721
726
  :param bool export_summary: Download dataset export summary
722
727
  :param int lock_timeout_sec: Timeout for locking the dataset during export in seconds
723
728
  :param entities.ExportType export_type: Type of export ('json' or 'zip')
729
+ :param entities.OutputExportType output_export_type: Output format ('json', 'zip', or 'folders'). If None, defaults to 'json'
724
730
  :param int timeout: Maximum time in seconds to wait for the export to complete
725
731
  :return: Exported item
726
732
  :rtype: dtlpy.entities.item.Item
@@ -732,7 +738,8 @@ class Dataset(entities.BaseEntity):
732
738
  export_item = dataset.export(filters=filters,
733
739
  include_feature_vectors=True,
734
740
  include_annotations=True,
735
- export_type=dl.ExportType.JSON)
741
+ export_type=dl.ExportType.JSON,
742
+ output_export_type=dl.OutputExportType.JSON)
736
743
  """
737
744
 
738
745
  return self.datasets.export(dataset=self,
@@ -746,7 +753,8 @@ class Dataset(entities.BaseEntity):
746
753
  timeout=timeout,
747
754
  dataset_lock=dataset_lock,
748
755
  lock_timeout_sec=lock_timeout_sec,
749
- export_summary=export_summary)
756
+ export_summary=export_summary,
757
+ output_export_type=output_export_type)
750
758
 
751
759
  def upload_annotations(self,
752
760
  local_path,
@@ -984,6 +992,7 @@ class Dataset(entities.BaseEntity):
984
992
  dataset_lock=False,
985
993
  lock_timeout_sec=None,
986
994
  export_summary=False,
995
+ raise_on_error=False
987
996
  ):
988
997
  """
989
998
  Download dataset by filters.
@@ -1007,6 +1016,7 @@ class Dataset(entities.BaseEntity):
1007
1016
  :param bool without_relative_path: bool - download items without the relative path from platform
1008
1017
  :param float alpha: opacity value [0 1], default 1
1009
1018
  :param str export_version: `V2` - exported items will have original extension in filename, `V1` - no original extension in filenames
1019
+ :param bool raise_on_error: raise an exception if an error occurs
1010
1020
  :return: `List` of local_path per each downloaded item
1011
1021
 
1012
1022
  **Example**:
@@ -1038,7 +1048,8 @@ class Dataset(entities.BaseEntity):
1038
1048
  export_version=export_version,
1039
1049
  dataset_lock=dataset_lock,
1040
1050
  lock_timeout_sec=lock_timeout_sec,
1041
- export_summary=export_summary
1051
+ export_summary=export_summary,
1052
+ raise_on_error=raise_on_error
1042
1053
  )
1043
1054
 
1044
1055
  def download_folder(
@@ -1059,6 +1070,7 @@ class Dataset(entities.BaseEntity):
1059
1070
  dataset_lock=False,
1060
1071
  lock_timeout_sec=None,
1061
1072
  export_summary=False,
1073
+ raise_on_error=False
1062
1074
  ):
1063
1075
  """
1064
1076
  Download dataset folder.
@@ -1082,6 +1094,7 @@ class Dataset(entities.BaseEntity):
1082
1094
  :param bool without_relative_path: bool - download items without the relative path from platform
1083
1095
  :param float alpha: opacity value [0 1], default 1
1084
1096
  :param str export_version: `V2` - exported items will have original extension in filename, `V1` - no original extension in filenames
1097
+ :param bool raise_on_error: raise an exception if an error occurs
1085
1098
  :return: `List` of local_path per each downloaded item
1086
1099
 
1087
1100
  **Example**:
@@ -1116,7 +1129,8 @@ class Dataset(entities.BaseEntity):
1116
1129
  export_version=export_version,
1117
1130
  dataset_lock=dataset_lock,
1118
1131
  lock_timeout_sec=lock_timeout_sec,
1119
- export_summary=export_summary
1132
+ export_summary=export_summary,
1133
+ raise_on_error=raise_on_error
1120
1134
  )
1121
1135
 
1122
1136
  def delete_labels(self, label_names):
dtlpy/entities/driver.py CHANGED
@@ -51,6 +51,12 @@ class Driver(entities.BaseEntity):
51
51
  name = attr.ib()
52
52
  id = attr.ib()
53
53
  path = attr.ib()
54
+ bucket_name = attr.ib()
55
+ end_point = attr.ib()
56
+ elastic_index = attr.ib()
57
+ elastic_index_path = attr.ib()
58
+ directory = attr.ib()
59
+ mount_path = attr.ib()
54
60
  # api
55
61
  _client_api = attr.ib(type=ApiClient, repr=False)
56
62
  _repositories = attr.ib(repr=False)
@@ -89,7 +95,14 @@ class Driver(entities.BaseEntity):
89
95
  name=_json.get('name', None),
90
96
  id=_json.get('id', None),
91
97
  client_api=client_api,
92
- path=_json.get('path', None))
98
+ path=_json.get('path', None),
99
+ bucket_name=_json.get('bucketName', None),
100
+ end_point=_json.get('endpoint', None),
101
+ elastic_index=_json.get('elasticIndex', None),
102
+ elastic_index_path=_json.get('elasticIndexPath', None),
103
+ directory=_json.get('directory', None),
104
+ mount_path=_json.get('mountPath', None)
105
+ )
93
106
 
94
107
  inst.is_fetched = is_fetched
95
108
  return inst
@@ -108,7 +121,6 @@ class Driver(entities.BaseEntity):
108
121
  attr.fields(Driver).created_at,
109
122
  attr.fields(Driver).integration_id,
110
123
  attr.fields(Driver).integration_type,
111
- attr.fields(Driver).path
112
124
  ))
113
125
  output_dict['allowExternalDelete'] = self.allow_external_delete
114
126
  output_dict['allowExternalModification'] = self.allow_external_modification
dtlpy/entities/filters.py CHANGED
@@ -1,10 +1,17 @@
1
+ import numpy as np
1
2
  import urllib.parse
2
3
  import logging
3
4
  import json
4
5
  import os
5
6
  import io
6
- from enum import Enum
7
+ import copy
8
+ from typing import Generator, Tuple, Optional
9
+ from collections import deque
10
+ from concurrent.futures import ThreadPoolExecutor, wait, FIRST_COMPLETED
11
+ from bson import ObjectId
12
+
7
13
 
14
+ from enum import Enum
8
15
  from .. import exceptions, entities
9
16
 
10
17
  logger = logging.getLogger(name="dtlpy")
@@ -62,6 +69,8 @@ class FiltersOperations(str, Enum):
62
69
  EXISTS = "exists"
63
70
  MATCH = "match"
64
71
  NIN = "nin"
72
+ GREATER_THAN_OR_EQUAL = "gte"
73
+ LESS_THAN_OR_EQUAL = "lte"
65
74
 
66
75
 
67
76
  class FiltersMethod(str, Enum):
@@ -215,9 +224,15 @@ class Filters:
215
224
 
216
225
  def __override(self, field, values, operator=None):
217
226
  if field in self._unique_fields:
227
+ indices_to_remove = []
218
228
  for i_single_filter, single_filter in enumerate(self.and_filter_list):
219
229
  if single_filter.field == field:
220
- self.and_filter_list.pop(i_single_filter)
230
+ indices_to_remove.append(i_single_filter)
231
+
232
+ # Remove indices in descending order to avoid IndexError
233
+ # When removing items, indices shift down, so we must remove from highest to lowest
234
+ for index in sorted(indices_to_remove, reverse=True):
235
+ self.and_filter_list.pop(index)
221
236
  self.and_filter_list.append(SingleFilter(field=field, values=values, operator=operator))
222
237
 
223
238
  def generate_url_query_params(self, url):
@@ -366,7 +381,11 @@ class Filters:
366
381
  query_dict["join"] = self.join
367
382
  if "join" in query_dict and "on" not in query_dict["join"]:
368
383
  if self.resource == FiltersResource.ITEM:
369
- query_dict["join"]["on"] = {"resource": FiltersResource.ANNOTATION.value, "local": "itemId", "forigen": "id"}
384
+ query_dict["join"]["on"] = {
385
+ "resource": FiltersResource.ANNOTATION.value,
386
+ "local": "itemId",
387
+ "forigen": "id",
388
+ }
370
389
  else:
371
390
  query_dict["join"]["on"] = {"resource": FiltersResource.ITEM.value, "local": "id", "forigen": "itemId"}
372
391
 
@@ -599,6 +618,140 @@ class Filters:
599
618
  names = [i.name for i in all_filter_items]
600
619
  return names
601
620
 
621
+ @staticmethod
622
+ def _get_split_filters(dataset, filters, max_items, max_workers=4, max_depth=None) -> Generator[dict, None, None]:
623
+ """
624
+ Generator that yields filter chunks for large datasets using a bounded
625
+ thread pool. Splits ranges by id until each subset holds <= max_items.
626
+
627
+ :param dataset: Dataset object to get filters for
628
+ :param filters: Base filters to apply
629
+ :param max_items: Maximum number of items per filter chunk
630
+ :param max_workers: Maximum number of threads for parallel processing
631
+ :param max_depth: Maximum depth of the filter tree. Default calculated by the formula: np.ceil(np.log2(count/max_items) + 3).
632
+ :yield: Filter payloads covering subsets of items
633
+ """
634
+ if max_items <= 0:
635
+ raise ValueError("_get_split_filters : max_items must be greater than 0")
636
+
637
+ if filters is None:
638
+ filters = entities.Filters()
639
+
640
+ from_id, count = Filters._get_first_last_item(
641
+ items_repo=dataset.items, filters=filters, order_by_direction=FiltersOrderByDirection.ASCENDING
642
+ )
643
+ to_id, count = Filters._get_first_last_item(
644
+ items_repo=dataset.items, filters=filters, order_by_direction=FiltersOrderByDirection.DESCENDING
645
+ )
646
+
647
+ if from_id is None or to_id is None or count == 0:
648
+ return
649
+
650
+ max_depth = max_depth if max_depth is not None else np.ceil(np.log2(count / max_items) + 3)
651
+
652
+ def make_filter_dict(range_from_id, range_to_id, strict_from: bool = False):
653
+ fdict = copy.deepcopy(filters.prepare())
654
+ lower_op = "$gt" if strict_from else "$gte"
655
+ fdict["filter"].setdefault("$and", []).extend(
656
+ [{"id": {lower_op: range_from_id}}, {"id": {"$lte": range_to_id}}]
657
+ )
658
+ return fdict
659
+
660
+ def task(range_from_id, range_to_id, depth, strict_from: bool):
661
+ fdict = make_filter_dict(range_from_id, range_to_id, strict_from)
662
+ range_filters = entities.Filters(custom_filter=fdict, page_size=1)
663
+ actual_from, count = Filters._get_first_last_item(
664
+ dataset.items, range_filters, FiltersOrderByDirection.ASCENDING
665
+ )
666
+ if count == 0:
667
+ return ("none", None, None)
668
+ if count <= max_items or depth >= max_depth:
669
+ return ("yield", fdict, None)
670
+ actual_to, count = Filters._get_first_last_item(
671
+ dataset.items, range_filters, FiltersOrderByDirection.DESCENDING
672
+ )
673
+ if not actual_from or not actual_to or actual_from == actual_to:
674
+ return ("yield", fdict, None)
675
+ mid = Filters._get_middle_id(actual_from, actual_to)
676
+ if not mid or mid == actual_from or mid == actual_to:
677
+ return ("yield", fdict, None)
678
+ # Left child: [actual_from, mid] inclusive; Right child: (mid, actual_to] exclusive lower bound
679
+ return (
680
+ "split",
681
+ None,
682
+ (
683
+ (actual_from, mid, depth + 1, False), # left child includes lower bound
684
+ (mid, actual_to, depth + 1, True), # right child excludes midpoint
685
+ ),
686
+ )
687
+
688
+ pending = deque([(from_id, to_id, 0, False)])
689
+ futures = set()
690
+
691
+ with ThreadPoolExecutor(max_workers=max_workers) as pool:
692
+ while futures or pending:
693
+ # Submit all pending tasks
694
+ while pending:
695
+ fr, to, d, strict = pending.popleft()
696
+ futures.add(pool.submit(task, fr, to, d, strict))
697
+
698
+ if not futures:
699
+ break
700
+
701
+ done, futures = wait(futures, return_when=FIRST_COMPLETED)
702
+ for fut in done:
703
+ try:
704
+ kind, fdict, ranges = fut.result()
705
+ except Exception as e:
706
+ logger.warning(f"split filters task failed: {e}")
707
+ continue
708
+ if kind == "yield" and fdict is not None:
709
+ yield fdict
710
+ elif kind == "split" and ranges is not None:
711
+ left, right = ranges
712
+ pending.append(left)
713
+ pending.append(right)
714
+
715
+ @staticmethod
716
+ def _get_first_last_item(
717
+ items_repo, filters, order_by_direction=FiltersOrderByDirection.ASCENDING
718
+ ) -> Tuple[Optional[str], int]:
719
+ filters_dict = copy.deepcopy(filters.prepare())
720
+ filters_dict["sort"] = {"id": order_by_direction.value}
721
+ filters_dict["page"] = 0
722
+ filters_dict["pageSize"] = 1
723
+ cloned_filters = entities.Filters(custom_filter=filters_dict)
724
+
725
+ try:
726
+ pages = items_repo.list(filters=cloned_filters)
727
+ return (pages.items[0].id if pages.items else None, pages.items_count)
728
+ except Exception:
729
+ return None, 0
730
+
731
+ @staticmethod
732
+ def _get_middle_id(from_id, to_id):
733
+ """Calculate middle ObjectId between two ObjectIds with sub-second precision.
734
+
735
+ Computes the midpoint in the full 12-byte ObjectId numeric space to avoid
736
+ second-level rounding inherent to datetime-based construction.
737
+ """
738
+ try:
739
+ # Convert ObjectId strings to integers using base 16 (hexadecimal)
740
+ start_int = int(str(ObjectId(from_id)), base=16)
741
+ end_int = int(str(ObjectId(to_id)), base=16)
742
+ if start_int >= end_int:
743
+ return from_id
744
+ mid_int = (start_int + end_int) // 2
745
+ if mid_int <= start_int:
746
+ mid_int = start_int + 1
747
+ if mid_int > end_int:
748
+ mid_int = end_int
749
+ # Convert back to 12-byte ObjectId format
750
+ mid_bytes = mid_int.to_bytes(length=12, byteorder="big")
751
+ return str(ObjectId(mid_bytes))
752
+ except Exception:
753
+ return from_id # Fallback to from_id if calculation fails
754
+
602
755
 
603
756
  class SingleFilter:
604
757
  def __init__(self, field, values, operator: FiltersOperations = None):
dtlpy/entities/item.py CHANGED
@@ -1,7 +1,7 @@
1
- import warnings
2
1
  from collections import namedtuple
3
2
  from enum import Enum
4
3
  import traceback
4
+ import mimetypes
5
5
  import logging
6
6
  import attr
7
7
  import copy
@@ -354,7 +354,10 @@ class Item(entities.BaseEntity):
354
354
 
355
355
  @property
356
356
  def mimetype(self):
357
- return self.metadata.get('system', dict()).get('mimetype', None)
357
+ mimetype = self.metadata.get('system', dict()).get('mimetype', None)
358
+ if mimetype is None:
359
+ mimetype = mimetypes.guess_type(self.filename)[0]
360
+ return mimetype
358
361
 
359
362
  @property
360
363
  def size(self):
@@ -457,6 +460,7 @@ class Item(entities.BaseEntity):
457
460
  dataset_lock=False,
458
461
  lock_timeout_sec=None,
459
462
  export_summary=False,
463
+ raise_on_error=False
460
464
  ):
461
465
  """
462
466
  Download dataset by filters.
@@ -478,6 +482,7 @@ class Item(entities.BaseEntity):
478
482
  :param bool with_text: optional - add text to annotations, default = False
479
483
  :param float alpha: opacity value [0 1], default 1
480
484
  :param str export_version: exported items will have original extension in filename, `V1` - no original extension in filenames
485
+ :param bool raise_on_error: raise an exception if an error occurs
481
486
  :return: generator of local_path per each downloaded item
482
487
  :rtype: generator or single item
483
488
 
@@ -531,7 +536,8 @@ class Item(entities.BaseEntity):
531
536
  filters=filters,
532
537
  dataset_lock=dataset_lock,
533
538
  lock_timeout_sec=lock_timeout_sec,
534
- export_summary=export_summary)
539
+ export_summary=export_summary,
540
+ raise_on_error=raise_on_error)
535
541
 
536
542
  def delete(self):
537
543
  """
@@ -132,7 +132,13 @@ class Prompt:
132
132
  elif element['mimetype'] == PromptType.AUDIO:
133
133
  raise NotImplementedError('Audio prompt is not supported yet')
134
134
  elif element['mimetype'] == PromptType.VIDEO:
135
- raise NotImplementedError('Video prompt is not supported yet')
135
+ data = {
136
+ "type": "video_url",
137
+ "video_url": {
138
+ "url": element['value']
139
+ }
140
+ }
141
+ messages.append(data)
136
142
  else:
137
143
  raise ValueError(f'Invalid mimetype: {element["mimetype"]}')
138
144
  return messages, self.key
dtlpy/entities/service.py CHANGED
@@ -135,6 +135,7 @@ class KubernetesRuntime(ServiceRuntime):
135
135
  pod_type: InstanceCatalog = DEFAULT_POD_TYPE,
136
136
  num_replicas=DEFAULT_NUM_REPLICAS,
137
137
  concurrency=DEFAULT_CONCURRENCY,
138
+ dynamic_concurrency=None,
138
139
  runner_image=None,
139
140
  autoscaler=None,
140
141
  **kwargs):
@@ -147,6 +148,7 @@ class KubernetesRuntime(ServiceRuntime):
147
148
  self._proxy_image = kwargs.get('proxyImage', None)
148
149
  self.single_agent = kwargs.get('singleAgent', None)
149
150
  self.preemptible = kwargs.get('preemptible', None)
151
+ self.dynamic_concurrency = kwargs.get('dynamicConcurrency', dynamic_concurrency)
150
152
 
151
153
  self.autoscaler = kwargs.get('autoscaler', autoscaler)
152
154
  if self.autoscaler is not None and isinstance(self.autoscaler, dict):
@@ -178,6 +180,9 @@ class KubernetesRuntime(ServiceRuntime):
178
180
  if self.preemptible is not None:
179
181
  _json['preemptible'] = self.preemptible
180
182
 
183
+ if self.dynamic_concurrency is not None:
184
+ _json['dynamicConcurrency'] = self.dynamic_concurrency
185
+
181
186
  return _json
182
187
 
183
188