PyPI - dtlpy - Versions diffs - 1.117.6__py3-none-any.whl → 1.118.13__py3-none-any.whl - Mend

dtlpy 1.117.6py3-none-any.whl → 1.118.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

dtlpy/__version__.py +1 -1
dtlpy/dlp/command_executor.py +55 -5
dtlpy/entities/annotation.py +1 -1
dtlpy/entities/app.py +1 -1
dtlpy/entities/compute.py +1 -0
dtlpy/entities/dataset.py +17 -2
dtlpy/entities/model.py +1 -1
dtlpy/entities/paged_entities.py +7 -3
dtlpy/entities/service.py +4 -4
dtlpy/miscellaneous/__init__.py +1 -0
dtlpy/miscellaneous/path_utils.py +264 -0
dtlpy/ml/base_model_adapter.py +32 -31
dtlpy/repositories/annotations.py +1 -4
dtlpy/repositories/apps.py +12 -13
dtlpy/repositories/feature_sets.py +144 -85
dtlpy/repositories/packages.py +9 -0
dtlpy/repositories/projects.py +1 -3
dtlpy/services/api_client.py +20 -4
dtlpy/services/check_sdk.py +1 -4
dtlpy/services/logins.py +21 -17
dtlpy/utilities/videos/videos.py +4 -0
{dtlpy-1.117.6.dist-info → dtlpy-1.118.13.dist-info}/METADATA +14 -15
{dtlpy-1.117.6.dist-info → dtlpy-1.118.13.dist-info}/RECORD +30 -31
{dtlpy-1.117.6.dist-info → dtlpy-1.118.13.dist-info}/WHEEL +1 -1
{dtlpy-1.117.6.dist-info → dtlpy-1.118.13.dist-info}/top_level.txt +0 -1
tests/features/__init__.py +0 -0
tests/features/environment.py +0 -551
{dtlpy-1.117.6.data → dtlpy-1.118.13.data}/scripts/dlp +0 -0
{dtlpy-1.117.6.data → dtlpy-1.118.13.data}/scripts/dlp.bat +0 -0
{dtlpy-1.117.6.data → dtlpy-1.118.13.data}/scripts/dlp.py +0 -0
{dtlpy-1.117.6.dist-info → dtlpy-1.118.13.dist-info}/entry_points.txt +0 -0
{dtlpy-1.117.6.dist-info → dtlpy-1.118.13.dist-info}/licenses/LICENSE +0 -0

dtlpy/__version__.py CHANGED Viewed

	@@ -1 +1 @@
1	- version = '1.~~117~~.6'
1	+ version = '1.118.13'

dtlpy/dlp/command_executor.py CHANGED Viewed

@@ -6,7 +6,7 @@ import os
 import sys
 import jwt
-from .. import exceptions, entities, repositories, utilities, assets
+from .. import exceptions, entities, repositories, utilities, assets, miscellaneous
 logger = logging.getLogger(name='dtlpy')
@@ -76,8 +76,16 @@ class CommandExecutor:
         url = 'dtlpy'
         if args.url is None:
             try:
-                payload = jwt.decode(self.dl.client_api.token, algorithms=['HS256'],
-                                     verify=False, options={'verify_signature': False})
+                # oxsec-disable jwt-signature-disabled - Client-side SDK: signature verification disabled intentionally to check admin role; server validates on API calls
+                payload = jwt.decode(
+                    self.dl.client_api.token,
+                    options={
+                        "verify_signature": False,
+                        "verify_exp": False,
+                        "verify_aud": False,
+                        "verify_iss": False,
+                    }
+                )
                 if 'admin' in payload['https://dataloop.ai/authorization']['roles']:
                     url = "https://storage.googleapis.com/dtlpy/dev/dtlpy-latest-py3-none-any.whl"
             except Exception:
@@ -235,6 +243,13 @@ class CommandExecutor:
             project = self.dl.projects.get(project_name=args.project_name)
             dataset = project.datasets.get(dataset_name=args.dataset_name)
+            # Validate local_path and local_annotations_path to prevent path traversal
+            miscellaneous.PathUtils.validate_paths(
+                [args.local_path, args.local_annotations_path],
+                base_path=os.getcwd(),
+                must_exist=True
+            )
             dataset.items.upload(local_path=args.local_path,
                                  remote_path=args.remote_path,
                                  file_types=args.file_types,
@@ -277,6 +292,13 @@ class CommandExecutor:
                             remote_path.pop(remote_path.index(item))
                     filters.add(field="dir", values=remote_path, operator=entities.FiltersOperations.IN, method='or')
+            # Validate local_path to prevent path traversal
+            miscellaneous.PathUtils.validate_directory_path(
+                args.local_path,
+                base_path=os.getcwd(),
+                must_exist=False
+            )
             if not args.without_binaries:
                 dataset.items.download(filters=filters,
                                        local_path=args.local_path,
@@ -325,6 +347,9 @@ class CommandExecutor:
                     args.split_seconds = int(args.split_seconds)
                 if isinstance(args.split_times, str):
                     args.split_times = [int(sec) for sec in args.split_times.split(",")]
+                # Validate filepath to prevent path traversal
+                miscellaneous.PathUtils.validate_file_path(args.filename)
                 self.dl.utilities.videos.Videos.split_and_upload(
                     project_name=args.project_name,
                     dataset_name=args.dataset_name,
@@ -407,6 +432,8 @@ class CommandExecutor:
     def deploy(self, args):
         project = self.dl.projects.get(project_name=args.project_name)
         json_filepath = args.json_file
+        # Validate file path to prevent path traversal
+        miscellaneous.PathUtils.validate_file_path(json_filepath)
         deployed_services, package = self.dl.packages.deploy_from_file(project=project, json_filepath=json_filepath)
         logger.info("Successfully deployed {} from file: {}\nServices: {}".format(len(deployed_services),
                                                                                   json_filepath,
@@ -464,6 +491,13 @@ class CommandExecutor:
         elif args.packages == "push":
             packages = self.utils.get_packages_repo(args=args)
+            # Validate src_path to prevent path traversal
+            miscellaneous.PathUtils.validate_directory_path(
+                args.src_path,
+                base_path=os.getcwd(),
+                must_exist=True
+            )
             package = packages.push(src_path=args.src_path,
                                     package_name=args.package_name,
                                     checkout=args.checkout)
@@ -568,7 +602,13 @@ class CommandExecutor:
                 answers = inquirer.prompt(questions)
                 #####
                 # create a dir for that panel
-                os.makedirs(answers.get('name'), exist_ok=True)
+                # Validate panel name to prevent path traversal
+                panel_name = answers.get('name')
+                # Validate panel name to prevent path traversal
+                miscellaneous.PathUtils.validate_directory_name(panel_name)
+                # Create directory in current working directory
+                panel_dir = os.path.join(os.getcwd(), panel_name)
+                os.makedirs(panel_dir, exist_ok=True)
                 # dump to dataloop.json
                 app_filename = assets.paths.APP_JSON_FILENAME
                 if not os.path.isfile(app_filename):
@@ -630,12 +670,22 @@ class CommandExecutor:
         directory = args.dir
         if directory == '..':
             directory = os.path.split(os.getcwd())[0]
+        # Validate path to prevent path traversal
+        miscellaneous.PathUtils.validate_directory_path(
+            directory,
+            base_path=os.getcwd(),
+            must_exist=True
+        )
         os.chdir(directory)
         print(os.getcwd())
     @staticmethod
     def mkdir(args):
-        os.mkdir(args.name)
+        # Validate directory name to prevent path traversal
+        miscellaneous.PathUtils.validate_directory_name(args.name)
+        # Create directory in current working directory
+        dir_path = os.path.join(os.getcwd(), args.name)
+        os.mkdir(dir_path)
     # noinspection PyUnusedLocal
     @staticmethod

dtlpy/entities/annotation.py CHANGED Viewed

@@ -1827,7 +1827,7 @@ class FrameAnnotation(entities.BaseEntity):
         return frame
     @classmethod
-    def from_snapshot(cls, annotation, _json, fps):
+    def from_snapshot(cls, annotation, _json, fps=None):
         """
         new frame state to annotation

dtlpy/entities/app.py CHANGED Viewed

@@ -93,7 +93,7 @@ class App(entities.BaseEntity):
         .. code-block:: python
             succeed = app.uninstall()
         """
-        return self.apps.uninstall(self.id)
+        return self.apps.uninstall(app=self)
     def update(self):
         """

dtlpy/entities/compute.py CHANGED Viewed

@@ -13,6 +13,7 @@ class ClusterProvider(str, Enum):
     LOCAL = 'local'
     RANCHER_K3S = 'rancher-k3s'
     RANCHER_RKE = 'rancher-rke'
+    OPENSHIFT = 'openshift'
 class ComputeType(str, Enum):

dtlpy/entities/dataset.py CHANGED Viewed

@@ -86,6 +86,9 @@ class Dataset(entities.BaseEntity):
     # api
     _client_api = attr.ib(type=ApiClient, repr=False)
+    # syncing status
+    is_syncing = attr.ib(default=False, repr=False)
     # entities
     _project = attr.ib(default=None, repr=False)
@@ -183,6 +186,7 @@ class Dataset(entities.BaseEntity):
                    expiration_options=expiration_options,
                    index_driver=_json.get('indexDriver', None),
                    enable_sync_with_cloned=_json.get('enableSyncWithCloned', None),
+                   is_syncing=_json.get('isSyncing', False),
                    src_dataset=_json.get('srcDataset', None))
         inst.is_fetched = is_fetched
         return inst
@@ -215,6 +219,7 @@ class Dataset(entities.BaseEntity):
                                                               attr.fields(Dataset).items_count,
                                                               attr.fields(Dataset).index_driver,
                                                               attr.fields(Dataset).enable_sync_with_cloned,
+                                                              attr.fields(Dataset).is_syncing,
                                                               attr.fields(Dataset).src_dataset,
                                                               ))
         _json.update({'items': self.items_url})
@@ -231,6 +236,7 @@ class Dataset(entities.BaseEntity):
             _json['expirationOptions'] = self.expiration_options.to_json()
         if self.enable_sync_with_cloned is not None:
             _json['enableSyncWithCloned'] = self.enable_sync_with_cloned
+        _json['isSyncing'] = self.is_syncing
         if self.src_dataset is not None:
             _json['srcDataset'] = self.src_dataset
         return _json
@@ -288,12 +294,15 @@ class Dataset(entities.BaseEntity):
     def set_repositories(self):
         reps = namedtuple('repositories',
                           field_names=['items', 'recipes', 'datasets', 'assignments', 'tasks', 'annotations',
-                                       'ontologies', 'features', 'settings', 'schema', 'collections'])
+                                       'ontologies', 'features', 'feature_sets', 'settings', 'schema', 'collections'])
+        _project_id = None
         if self._project is None:
             datasets = repositories.Datasets(client_api=self._client_api, project=self._project)
+            if self.projects is not None and len(self.projects) > 0:
+                _project_id = self.projects[0]
         else:
             datasets = self._project.datasets
+            _project_id = self._project.id
         return reps(
             items=repositories.Items(client_api=self._client_api, dataset=self, datasets=datasets),
             recipes=repositories.Recipes(client_api=self._client_api, dataset=self),
@@ -303,6 +312,7 @@ class Dataset(entities.BaseEntity):
             datasets=datasets,
             ontologies=repositories.Ontologies(client_api=self._client_api, dataset=self),
             features=repositories.Features(client_api=self._client_api, project=self._project, dataset=self),
+            feature_sets=repositories.FeatureSets(client_api=self._client_api, project=self._project, project_id=_project_id, dataset=self),
             settings=repositories.Settings(client_api=self._client_api, dataset=self),
             schema=repositories.Schema(client_api=self._client_api, dataset=self),
             collections=repositories.Collections(client_api=self._client_api, dataset=self)
@@ -353,6 +363,11 @@ class Dataset(entities.BaseEntity):
         assert isinstance(self._repositories.features, repositories.Features)
         return self._repositories.features
+    @property
+    def feature_sets(self):
+        assert isinstance(self._repositories.feature_sets, repositories.FeatureSets)
+        return self._repositories.feature_sets
     @property
     def collections(self):
         assert isinstance(self._repositories.collections, repositories.Collections)

dtlpy/entities/model.py CHANGED Viewed

@@ -423,7 +423,7 @@ class Model(entities.BaseEntity):
         # default
         if 'id_to_label_map' not in self.configuration:
             if not (self.dataset_id == 'null' or self.dataset_id is None):
-                self.labels = [label.tag for label in self.dataset.labels]
+                self.labels = [flat_key for flat_key, _ in self.dataset.labels_flat_dict.items()]
             self.configuration['id_to_label_map'] = {int(idx): lbl for idx, lbl in enumerate(self.labels)}
         # use existing
         else:

dtlpy/entities/paged_entities.py CHANGED Viewed

@@ -9,7 +9,7 @@ from typing import Optional, List, Any
 import attr
 from .filters import FiltersOperations, FiltersOrderByDirection, FiltersResource
-from .. import miscellaneous
+from .. import miscellaneous, exceptions
 from ..services.api_client import ApiClient
 logger = logging.getLogger(name='dtlpy')
@@ -243,8 +243,12 @@ class PagedEntities:
         :param page_offset: page offset (for offset-based)
         :param page_size: page size
         """
-        items = self.return_page(page_offset=page_offset, page_size=page_size)
-        self.items = items
+        try:
+            items = self.return_page(page_offset=page_offset, page_size=page_size)
+            self.items = items
+        except exceptions.BadRequest as e:
+            logger.warning(f"BadRequest error received: {str(e)}")
+            self.items = miscellaneous.List(list())
     def next_page(self) -> None:
         """

dtlpy/entities/service.py CHANGED Viewed

@@ -142,7 +142,7 @@ class KubernetesRuntime(ServiceRuntime):
                  num_replicas=DEFAULT_NUM_REPLICAS,
                  concurrency=DEFAULT_CONCURRENCY,
                  dynamic_concurrency=None,
-                 concurrency_update_method=None,
+                 dynamic_concurrency_config=None,
                  runner_image=None,
                  autoscaler=None,
                  **kwargs):
@@ -156,7 +156,7 @@ class KubernetesRuntime(ServiceRuntime):
         self.single_agent = kwargs.get('singleAgent', None)
         self.preemptible = kwargs.get('preemptible', None)
         self.dynamic_concurrency = kwargs.get('dynamicConcurrency', dynamic_concurrency)
-        self.concurrency_update_method = kwargs.get('concurrencyUpdateMethod', concurrency_update_method)
+        self.dynamic_concurrency_config = kwargs.get('dynamicConcurrencyConfig', dynamic_concurrency_config)
         self.autoscaler = kwargs.get('autoscaler', autoscaler)
         if self.autoscaler is not None and isinstance(self.autoscaler, dict):
@@ -191,8 +191,8 @@ class KubernetesRuntime(ServiceRuntime):
         if self.dynamic_concurrency is not None:
             _json['dynamicConcurrency'] = self.dynamic_concurrency
-        if self.concurrency_update_method is not None:
-            _json['concurrencyUpdateMethod'] = self.concurrency_update_method
+        if self.dynamic_concurrency_config is not None:
+            _json['dynamicConcurrencyConfig'] = self.dynamic_concurrency_config
         return _json

dtlpy/miscellaneous/__init__.py CHANGED Viewed

@@ -18,3 +18,4 @@ from .git_utils import GitUtils
 from .zipping import Zipping
 from .list_print import List
 from .json_utils import JsonUtils
+from .path_utils import PathUtils

dtlpy/miscellaneous/path_utils.py ADDED Viewed

@@ -0,0 +1,264 @@
+import os
+import tempfile
+from pathlib import Path
+from .. import exceptions
+from ..services import service_defaults
+class PathUtils:
+    """
+    Utility class for path validation and sanitization to prevent path traversal attacks.
+    """
+    allowed_roots = [tempfile.gettempdir(), service_defaults.DATALOOP_PATH]
+    @staticmethod
+    def _contains_traversal(path: str) -> bool:
+        """
+        Check if path contains path traversal sequences.
+        :param str path: Path to check
+        :return: True if path contains traversal sequences
+        :rtype: bool
+        """
+        if not path:
+            return False
+        # Normalize the path to handle different separators
+        normalized = os.path.normpath(path)
+        # Check for parent directory references
+        parts = Path(normalized).parts
+        if '..' in parts:
+            return True
+        # Check for encoded traversal sequences (evasion attempts)
+        if '%2e%2e' in path.lower() or '..%2f' in path.lower() or '..%5c' in path.lower():
+            return True
+        return False
+    @staticmethod
+    def _is_within_base(resolved_path: str, base_path: str) -> bool:
+        """
+        Check if resolved_path is within base_path.
+        :param str resolved_path: Absolute resolved path
+        :param str base_path: Base directory path
+        :return: True if resolved_path is within base_path
+        :rtype: bool
+        """
+        try:
+            resolved = os.path.abspath(os.path.normpath(resolved_path))
+            base = os.path.abspath(os.path.normpath(base_path))
+            # Get common path
+            common = os.path.commonpath([resolved, base])
+            return common == base
+        except (ValueError, OSError):
+            # On Windows, if paths are on different drives, commonpath raises ValueError
+            return False
+    @staticmethod
+    def _is_allowed_path(resolved_path: str, base_path: str) -> bool:
+        """
+        Check if resolved_path is within base_path or any allowed_root.
+        :param str resolved_path: Absolute resolved path
+        :param str base_path: Base directory path
+        :return: True if resolved_path is within base_path or any allowed_root
+        :rtype: bool
+        """
+        for allowed_root in [base_path] + PathUtils.allowed_roots:
+            if PathUtils._is_within_base(resolved_path, allowed_root):
+                return True
+        return False
+    @staticmethod
+    def validate_directory_name(name: str) -> str:
+        """
+        Validate a directory name to ensure it doesn't contain path traversal sequences.
+        :param str name: Directory name to validate
+        :return: Validated directory name
+        :rtype: str
+        :raises PlatformException: If name contains invalid characters or traversal sequences
+        """
+        if not name:
+            raise exceptions.PlatformException(
+                error='400',
+                message='Directory name cannot be empty'
+            )
+        # Check for path separators
+        if os.sep in name or (os.altsep and os.altsep in name):
+            raise exceptions.PlatformException(
+                error='400',
+                message='Directory name cannot contain path separators'
+            )
+        # Check for traversal sequences
+        if PathUtils._contains_traversal(name):
+            raise exceptions.PlatformException(
+                error='400',
+                message='Directory name cannot contain path traversal sequences'
+            )
+        return name
+    @staticmethod
+    def _validate_single_path(path, base_path: str, must_exist: bool):
+        """
+        Internal method to validate a single path string.
+        :param path: Path to validate (str or Path object)
+        :param str base_path: Base directory to restrict path to
+        :param bool must_exist: If True, path must exist
+        :raises PlatformException: If path is invalid or contains traversal sequences
+        """
+        # Convert Path objects to strings
+        if isinstance(path, Path):
+            path = str(path)
+        if isinstance(base_path, Path):
+            base_path = str(base_path)
+        # Skip validation if not a string
+        if not isinstance(path, str):
+            return
+        # Skip validation for URLs and external paths
+        if path.startswith(('http://', 'https://', 'external://')):
+            return
+        # Empty string check
+        if not path:
+            raise exceptions.PlatformException(
+                error='400',
+                message='Path cannot be empty'
+            )
+        # Check for traversal sequences in the original path
+        if PathUtils._contains_traversal(path):
+            raise exceptions.PlatformException(
+                error='400',
+                message='Path contains invalid traversal sequences'
+            )
+        # Resolve path (absolute paths allowed if within base_path)
+        if os.path.isabs(path):
+            resolved = os.path.abspath(os.path.normpath(path))
+        else:
+            resolved = os.path.abspath(os.path.normpath(os.path.join(base_path, path)))
+        # Reject if path is outside base_path or allowed_roots
+        if not PathUtils._is_allowed_path(resolved, base_path):
+            raise exceptions.PlatformException(
+                error='400',
+                message='Path resolves outside allowed directory'
+            )
+        # Check if path must exist
+        if must_exist and not os.path.exists(resolved):
+            raise exceptions.PlatformException(
+                error='404',
+                message='Path does not exist: {}'.format(path)
+            )
+    @staticmethod
+    def validate_paths(paths, base_path = None, must_exist: bool = False):
+        """
+        Validate file or directory paths against path traversal attacks.
+        Accepts a list of paths and validates each one.
+        Skips validation if path is None or not a string.
+        Skips validation for URLs (http://, https://) and external paths (external://).
+        :param paths: Path(s) to validate - can be str, Path, list of str/Path, or None
+        :param base_path: Optional base directory to restrict path to (str or Path). If None, uses current working directory
+        :param bool must_exist: If True, path must exist
+        :raises PlatformException: If any path is invalid or contains traversal sequences
+        """
+        # Handle None - skip validation
+        if paths is None:
+            return
+        # Convert base_path Path object to string
+        if isinstance(base_path, Path):
+            base_path = str(base_path)
+        # Resolve base_path
+        if base_path is None:
+            base_path = os.getcwd()
+        # Handle list of paths
+        if isinstance(paths, list):
+            for path in paths:
+                PathUtils._validate_single_path(path, base_path, must_exist)
+        else:
+            # Single path
+            PathUtils._validate_single_path(paths, base_path, must_exist)
+    @staticmethod
+    def validate_file_path(file_path, base_path = None, must_exist: bool = True):
+        """
+        Validate a file path against path traversal attacks.
+        :param file_path: File path to validate (str or Path object)
+        :param base_path: Optional base directory to restrict path to (str or Path). If None, uses current working directory
+        :param bool must_exist: If True, file must exist (default: True)
+        :raises PlatformException: If path is invalid, contains traversal sequences, or is not a file
+        """
+        # Convert Path objects to strings
+        if isinstance(file_path, Path):
+            file_path = str(file_path)
+        if isinstance(base_path, Path):
+            base_path = str(base_path)
+        PathUtils.validate_paths(file_path, base_path=base_path, must_exist=must_exist)
+        if must_exist and isinstance(file_path, str) and not file_path.startswith(('http://', 'https://', 'external://')):
+            # Resolve path to check if it's a file
+            if base_path is None:
+                base_path = os.getcwd()
+            if os.path.isabs(file_path):
+                resolved = os.path.abspath(os.path.normpath(file_path))
+            else:
+                resolved = os.path.abspath(os.path.normpath(os.path.join(base_path, file_path)))
+            if not os.path.isfile(resolved):
+                raise exceptions.PlatformException(
+                    error='400',
+                    message='Path is not a file: {}'.format(file_path)
+                )
+    @staticmethod
+    def validate_directory_path(dir_path, base_path = None, must_exist: bool = True):
+        """
+        Validate a directory path against path traversal attacks.
+        :param dir_path: Directory path to validate (str or Path object)
+        :param base_path: Optional base directory to restrict path to (str or Path). If None, uses current working directory
+        :param bool must_exist: If True, directory must exist (default: True)
+        :raises PlatformException: If path is invalid, contains traversal sequences, or is not a directory
+        """
+        # Convert Path objects to strings
+        if isinstance(dir_path, Path):
+            dir_path = str(dir_path)
+        if isinstance(base_path, Path):
+            base_path = str(base_path)
+        PathUtils.validate_paths(dir_path, base_path=base_path, must_exist=must_exist)
+        if must_exist and isinstance(dir_path, str) and not dir_path.startswith(('http://', 'https://', 'external://')):
+            # Resolve path to check if it's a directory
+            if base_path is None:
+                base_path = os.getcwd()
+            if os.path.isabs(dir_path):
+                resolved = os.path.abspath(os.path.normpath(dir_path))
+            else:
+                resolved = os.path.abspath(os.path.normpath(os.path.join(base_path, dir_path)))
+            if not os.path.isdir(resolved):
+                raise exceptions.PlatformException(
+                    error='400',
+                    message='Path is not a directory: {}'.format(dir_path)
+                )

dtlpy/ml/base_model_adapter.py CHANGED Viewed

@@ -472,31 +472,32 @@ class BaseModelAdapter(utilities.BaseServiceRunner):
             self.logger.debug("Downloading subset {!r} of {}".format(subset, self.model_entity.dataset.name))
             annotation_filters = None
-            if subset in annotations_subsets:
-                annotation_filters = entities.Filters(
-                    use_defaults=False,
-                    resource=entities.FiltersResource.ANNOTATION,
-                    custom_filter=annotations_subsets[subset],
-                )
-            # if user provided annotation_filters, skip the default filters
-            elif self.model_entity.output_type is not None and self.model_entity.output_type != "embedding":
-                annotation_filters = entities.Filters(resource=entities.FiltersResource.ANNOTATION, use_defaults=False)
-                if self.model_entity.output_type in [
-                    entities.AnnotationType.SEGMENTATION,
-                    entities.AnnotationType.POLYGON,
-                ]:
-                    model_output_types = [entities.AnnotationType.SEGMENTATION, entities.AnnotationType.POLYGON]
-                else:
-                    model_output_types = [self.model_entity.output_type]
-                annotation_filters.add(
-                    field=entities.FiltersKnownFields.TYPE,
-                    values=model_output_types,
-                    operator=entities.FiltersOperations.IN,
-                )
+            if self.model_entity.output_type != "embedding":
+                if subset in annotations_subsets:
+                    annotation_filters = entities.Filters(
+                        use_defaults=False,
+                        resource=entities.FiltersResource.ANNOTATION,
+                        custom_filter=annotations_subsets[subset],
+                    )
+                # if user provided annotation_filters, skip the default filters
+                elif self.model_entity.output_type is not None:
+                    annotation_filters = entities.Filters(resource=entities.FiltersResource.ANNOTATION, use_defaults=False)
+                    if self.model_entity.output_type in [
+                        entities.AnnotationType.SEGMENTATION,
+                        entities.AnnotationType.POLYGON,
+                    ]:
+                        model_output_types = [entities.AnnotationType.SEGMENTATION, entities.AnnotationType.POLYGON]
+                    else:
+                        model_output_types = [self.model_entity.output_type]
+                    annotation_filters.add(
+                        field=entities.FiltersKnownFields.TYPE,
+                        values=model_output_types,
+                        operator=entities.FiltersOperations.IN,
+                    )
-            annotation_filters = self.__include_model_annotations(annotation_filters)
-            annotations_subsets[subset] = annotation_filters.prepare()
+                annotation_filters = self.__include_model_annotations(annotation_filters)
+                annotations_subsets[subset] = annotation_filters.prepare()
             ret_list = self.__download_items(
                 dataset=dataset,
@@ -709,7 +710,7 @@ class BaseModelAdapter(utilities.BaseServiceRunner):
             valid_vectors = []
             items_to_upload = []
             vectors_to_upload = []
             for item, vector in zip(_items, vectors):
                 # Check if vector is valid
                 if vector is None or len(vector) != embeddings_size:
@@ -719,25 +720,25 @@ class BaseModelAdapter(utilities.BaseServiceRunner):
                 # Item and vector are valid
                 valid_items.append(item)
                 valid_vectors.append(vector)
                 # Check if item should be skipped (prompt items)
                 _system_metadata = getattr(item, 'system', dict())
                 is_prompt = _system_metadata.get('shebang', dict()).get('dltype', '') == 'prompt'
                 if skip_default_items and is_prompt:
                     self.logger.debug(f"Skipping feature upload for prompt item {item.id}")
                     continue
                 # Items were not skipped - should be uploaded
                 items_to_upload.append(item)
                 vectors_to_upload.append(vector)
             # Update the original lists with valid items only
             _items[:] = valid_items
             vectors[:] = valid_vectors
             if len(_items) != len(vectors):
                 raise ValueError(f"The number of items ({len(_items)}) is not equal to the number of vectors ({len(vectors)}).")
             self.logger.debug(f"Uploading {len(items_to_upload)} items' feature vectors for model {self.model_entity.name}.")
             try:
                 start_time = time.time()
@@ -830,7 +831,7 @@ class BaseModelAdapter(utilities.BaseServiceRunner):
             logger.info("Received {s} for training".format(s=model.id))
             model = model.wait_for_model_ready()
             if model.status == 'failed':
-                raise ValueError("Model is in failed state, cannot train.")
+                logger.warning("Model failed. New training will attempt to resume from previous checkpoints.")
             ##############
             # Set status #

dtlpy 1.117.6__py3-none-any.whl → 1.118.13__py3-none-any.whl

dtlpy 1.117.6py3-none-any.whl → 1.118.13py3-none-any.whl