PyPI - dtlpy - Versions diffs - 1.91.37__py3-none-any.whl → 1.92.18__py3-none-any.whl - Mend

dtlpy 1.91.37py3-none-any.whl → 1.92.18py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

dtlpy/__init__.py +5 -2
dtlpy/__version__.py +1 -1
dtlpy/entities/__init__.py +1 -1
dtlpy/entities/command.py +3 -2
dtlpy/entities/dataset.py +52 -2
dtlpy/entities/feature_set.py +3 -0
dtlpy/entities/filters.py +2 -2
dtlpy/entities/item.py +15 -1
dtlpy/entities/node.py +11 -1
dtlpy/entities/ontology.py +36 -40
dtlpy/entities/pipeline.py +20 -1
dtlpy/entities/pipeline_execution.py +23 -0
dtlpy/entities/prompt_item.py +240 -37
dtlpy/entities/service.py +5 -5
dtlpy/ml/base_model_adapter.py +99 -41
dtlpy/new_instance.py +80 -9
dtlpy/repositories/apps.py +56 -10
dtlpy/repositories/commands.py +10 -2
dtlpy/repositories/datasets.py +142 -12
dtlpy/repositories/dpks.py +5 -1
dtlpy/repositories/feature_sets.py +23 -3
dtlpy/repositories/models.py +1 -1
dtlpy/repositories/pipeline_executions.py +53 -0
dtlpy/repositories/uploader.py +3 -0
dtlpy/services/api_client.py +59 -3
{dtlpy-1.91.37.dist-info → dtlpy-1.92.18.dist-info}/METADATA +1 -1
{dtlpy-1.91.37.dist-info → dtlpy-1.92.18.dist-info}/RECORD +35 -38
tests/features/environment.py +29 -0
dtlpy/callbacks/__init__.py +0 -16
dtlpy/callbacks/piper_progress_reporter.py +0 -29
dtlpy/callbacks/progress_viewer.py +0 -54
{dtlpy-1.91.37.data → dtlpy-1.92.18.data}/scripts/dlp +0 -0
{dtlpy-1.91.37.data → dtlpy-1.92.18.data}/scripts/dlp.bat +0 -0
{dtlpy-1.91.37.data → dtlpy-1.92.18.data}/scripts/dlp.py +0 -0
{dtlpy-1.91.37.dist-info → dtlpy-1.92.18.dist-info}/LICENSE +0 -0
{dtlpy-1.91.37.dist-info → dtlpy-1.92.18.dist-info}/WHEEL +0 -0
{dtlpy-1.91.37.dist-info → dtlpy-1.92.18.dist-info}/entry_points.txt +0 -0
{dtlpy-1.91.37.dist-info → dtlpy-1.92.18.dist-info}/top_level.txt +0 -0

dtlpy/entities/prompt_item.py CHANGED Viewed

@@ -1,7 +1,11 @@
 import logging
-import io
 import enum
 import json
+import os.path
+from dtlpy import entities, repositories
+from dtlpy.services.api_client import client as client_api
+import base64
+import requests
 logger = logging.getLogger(name='dtlpy')
@@ -11,27 +15,37 @@ class PromptType(str, enum.Enum):
     IMAGE = 'image/*'
     AUDIO = 'audio/*'
     VIDEO = 'video/*'
+    METADATA = 'metadata'
 class Prompt:
-    def __init__(self, key):
+    def __init__(self, key, role='user'):
         """
         Create a single Prompt. Prompt can contain multiple mimetype elements, e.g. text sentence and an image.
         :param key: unique identifier of the prompt in the item
         """
         self.key = key
         self.elements = list()
+        self._items = repositories.Items(client_api=client_api)
+        self.metadata = {'role': role}
-    def add(self, value, mimetype='text'):
+    def add_element(self, value, mimetype='application/text'):
         """
         :param value: url or string of the input
         :param mimetype: mimetype of the input. options: `text`, `image/*`, `video/*`, `audio/*`
         :return:
         """
-        self.elements.append({'mimetype': mimetype,
-                              'value': value})
+        allowed_prompt_types = [prompt_type for prompt_type in PromptType]
+        if mimetype not in allowed_prompt_types:
+            raise ValueError(f'Invalid mimetype: {mimetype}. Allowed values: {allowed_prompt_types}')
+        if not isinstance(value, str) and mimetype != PromptType.METADATA:
+            raise ValueError(f'Expected str for Prompt element value, got {type(value)} instead')
+        if mimetype == PromptType.METADATA and isinstance(value, dict):
+            self.metadata.update(value)
+        else:
+            self.elements.append({'mimetype': mimetype,
+                                  'value': value})
     def to_json(self):
         """
@@ -39,26 +53,169 @@ class Prompt:
         :return:
         """
+        elements_json = [
+            {
+                "mimetype": e['mimetype'],
+                "value": e['value'],
+            } for e in self.elements
+        ]
+        elements_json.append({
+            "mimetype": PromptType.METADATA,
+            "value": self.metadata
+        })
         return {
-            self.key: [
-                {
-                    "mimetype": e['mimetype'],
-                    "value": e['value']
-                } for e in self.elements
-            ]
+            self.key: elements_json
         }
-class PromptItem:
-    def __init__(self, name):
+    def _convert_stream_to_binary(self, image_url: str):
         """
-        Create a new Prompt Item. Single item can have multiple prompt, e.g. a conversation.
+        Convert a stream to binary
+        :param image_url: dataloop image stream url
+        :return: binary object
+        """
+        image_buffer = None
+        if '.' in image_url and 'dataloop.ai' not in image_url:
+            # URL and not DL item stream
+            try:
+                response = requests.get(image_url, stream=True)
+                response.raise_for_status()  # Raise an exception for bad status codes
+                # Check for valid image content type
+                if response.headers["Content-Type"].startswith("image/"):
+                    # Read the image data in chunks to avoid loading large images in memory
+                    image_buffer = b"".join(chunk for chunk in response.iter_content(1024))
+            except requests.exceptions.RequestException as e:
+                logger.error(f"Failed to download image from URL: {image_url}, error: {e}")
+        elif '.' in image_url and 'stream' in image_url:
+            # DL Stream URL
+            item_id = image_url.split("/stream")[0].split("/items/")[-1]
+            image_buffer = self._items.get(item_id=item_id).download(save_locally=False).getvalue()
+        else:
+            # DL item ID
+            image_buffer = self._items.get(item_id=image_url).download(save_locally=False).getvalue()
-        :param name: name of the item (filename)
+        if image_buffer is not None:
+            encoded_image = base64.b64encode(image_buffer).decode()
+        else:
+            logger.error(f'Invalid image url: {image_url}')
+            return None
+        return f'data:image/jpeg;base64,{encoded_image}'
+    def messages(self):
+        """
+        return a list of messages in the prompt item,
+        messages are returned following the openai SDK format https://platform.openai.com/docs/guides/vision
         """
+        messages = []
+        for element in self.elements:
+            if element['mimetype'] == PromptType.TEXT:
+                data = {
+                    "type": "text",
+                    "text": element['value']
+                }
+                messages.append(data)
+            elif element['mimetype'] == PromptType.IMAGE:
+                image_url = self._convert_stream_to_binary(element['value'])
+                data = {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": image_url
+                    }
+                }
+                messages.append(data)
+            elif element['mimetype'] == PromptType.AUDIO:
+                raise NotImplementedError('Audio prompt is not supported yet')
+            elif element['mimetype'] == PromptType.VIDEO:
+                raise NotImplementedError('Video prompt is not supported yet')
+            else:
+                raise ValueError(f'Invalid mimetype: {element["mimetype"]}')
+        return messages, self.key
+class PromptItem:
+    def __init__(self, name, item: entities.Item = None):
+        # prompt item name
         self.name = name
-        self.type = "prompt"
+        # list of user prompts in the prompt item
         self.prompts = list()
+        # list of assistant (annotations) prompts in the prompt item
+        self.assistant_prompts = dict()
+        # Dataloop Item
+        self._item = None
+    @classmethod
+    def from_item(cls, item: entities.Item):
+        """
+        Load a prompt item from the platform
+        :param item : Item object
+        :return: PromptItem object
+        """
+        if 'json' not in item.mimetype or item.system.get('shebang', dict()).get('dltype') != 'prompt':
+            raise ValueError('Expecting a json item with system.shebang.dltype = prompt')
+        # Not using `save_locally=False` to use the from_local_file method
+        item_file_path = item.download()
+        prompt_item = cls.from_local_file(file_path=item_file_path)
+        if os.path.exists(item_file_path):
+            os.remove(item_file_path)
+        prompt_item._item = item
+        return prompt_item
+    @classmethod
+    def from_local_file(cls, file_path):
+        """
+        Create a new prompt item from a file
+        :param file_path: path to the file
+        :return: PromptItem object
+        """
+        if os.path.exists(file_path) is False:
+            raise FileNotFoundError(f'File does not exists: {file_path}')
+        if 'json' not in os.path.splitext(file_path)[-1]:
+            raise ValueError(f'Expected path to json item, got {os.path.splitext(file_path)[-1]}')
+        prompt_item = cls(name=file_path)
+        with open(file_path, 'r') as f:
+            data = json.load(f)
+        for prompt_key, prompt_values in data.get('prompts', dict()).items():
+            prompt = Prompt(key=prompt_key)
+            for val in prompt_values:
+                if val['mimetype'] == PromptType.METADATA:
+                    _ = val.pop('mimetype')
+                    prompt.add_element(value=val, mimetype=PromptType.METADATA)
+                else:
+                    prompt.add_element(mimetype=val['mimetype'], value=val['value'])
+            prompt_item.add_prompt(prompt=prompt, update_item=False)
+        return prompt_item
+    def get_assistant_messages(self, annotations: entities.AnnotationCollection):
+        """
+        Get all the annotations in the item for the assistant messages
+        """
+        # clearing the assistant prompts from previous annotations that might not belong
+        self.assistant_prompts = dict()
+        for annotation in annotations:
+            prompt_id = annotation.metadata.get('system', dict()).get('promptId', None)
+            if annotation.type == 'ref_image':
+                prompt = Prompt(key=prompt_id)
+                prompt.add_element(value=annotation.coordinates.get('ref'), mimetype=PromptType.IMAGE)
+                self.assistant_prompts[annotation.id] = prompt
+            elif annotation.type == 'text':
+                prompt = Prompt(key=prompt_id)
+                prompt.add_element(value=annotation.coordinates, mimetype=PromptType.TEXT)
+                self.assistant_prompts[annotation.id] = prompt
+    def get_assistant_prompts(self, model_name):
+        """
+        Get assistant prompts
+        :return:
+        """
+        if self._item is None:
+            logger.warning('Item is not loaded, skipping annotations context')
+            return
+        filters = entities.Filters(resource=entities.FiltersResource.ANNOTATION)
+        filters.add(field='metadata.user.model.name', values=model_name)
+        annotations = self._item.annotations.list(filters=filters)
+        self.get_assistant_messages(annotations=annotations)
     def to_json(self):
         """
@@ -69,36 +226,82 @@ class PromptItem:
         prompts_json = {
             "shebang": "dataloop",
             "metadata": {
-                "dltype": self.type
+                "dltype": 'prompt'
             },
             "prompts": {}
         }
         for prompt in self.prompts:
             for prompt_key, prompt_values in prompt.to_json().items():
                 prompts_json["prompts"][prompt_key] = prompt_values
-        return prompts_json
-    @classmethod
-    def from_json(cls, _json):
-        inst = cls(name='dummy')
-        for prompt_key, prompt_values in _json["prompts"].items():
-            prompt = Prompt(key=prompt_key)
-            for val in prompt_values:
-                prompt.add(mimetype=val['mimetype'], value=val['value'])
-            inst.prompts.append(prompt)
-        return inst
+                prompts_json["prompts"][prompt_key].append({'metadata'})
-    def to_bytes_io(self):
-        byte_io = io.BytesIO()
-        byte_io.name = self.name
-        byte_io.write(json.dumps(self.to_json()).encode())
-        byte_io.seek(0)
-        return byte_io
+        return prompts_json
-    def add(self, prompt):
+    def add_prompt(self, prompt: Prompt, update_item=True):
         """
             add a prompt to the prompt item
             prompt: a dictionary. keys are prompt message id, values are prompt messages
             responses: a list of annotations representing responses to the prompt
         """
         self.prompts.append(prompt)
+        if update_item is True:
+            if self._item is not None:
+                self._item._Item__update_item_binary(_json=self.to_json())
+            else:
+                logger.warning('Item is not loaded, skipping upload')
+    def messages(self, model_name=None):
+        """
+        return a list of messages in the prompt item
+        messages are returned following the openai SDK format
+        """
+        if model_name is not None:
+            self.get_assistant_prompts(model_name=model_name)
+        else:
+            logger.warning('Model name is not provided, skipping assistant prompts')
+        all_prompts_messages = dict()
+        for prompt in self.prompts:
+            if prompt.key not in all_prompts_messages:
+                all_prompts_messages[prompt.key] = list()
+            prompt_messages, prompt_key = prompt.messages()
+            messages = {
+                'role': prompt.metadata.get('role', 'user'),
+                'content': prompt_messages
+            }
+            all_prompts_messages[prompt.key].append(messages)
+        for ann_id, prompt in self.assistant_prompts.items():
+            if prompt.key not in all_prompts_messages:
+                logger.warning(f'Prompt key {prompt.key} is not found in the user prompts, skipping Assistant prompt')
+                continue
+            prompt_messages, prompt_key = prompt.messages()
+            assistant_messages = {
+                'role': 'assistant',
+                'content': prompt_messages
+            }
+            all_prompts_messages[prompt.key].append(assistant_messages)
+        res = list()
+        for prompts in all_prompts_messages.values():
+            for prompt in prompts:
+                res.append(prompt)
+        return res
+    def add_responses(self, annotation: entities.BaseAnnotationDefinition, model: entities.Model):
+        """
+        Add an annotation to the prompt item
+        :param annotation: Annotation object
+        :param model: Model object
+        """
+        if self._item is None:
+            raise ValueError('Item is not loaded, cannot add annotation')
+        annotation_collection = entities.AnnotationCollection()
+        annotation_collection.add(annotation_definition=annotation,
+                                  prompt_id=self.prompts[-1].key,
+                                  model_info={
+                                      'name': model.name,
+                                      'model_id': model.id,
+                                      'confidence': 1.0
+                                  })
+        annotations = self._item.annotations.upload(annotations=annotation_collection)
+        self.get_assistant_messages(annotations=annotations)

dtlpy/entities/service.py CHANGED Viewed

@@ -370,11 +370,6 @@ class Service(entities.BaseEntity):
     def package(self):
         if self._package is None:
             try:
-                self._package = repositories.Packages(client_api=self._client_api).get(package_id=self.package_id,
-                                                                                       fetch=None,
-                                                                                       log_error=False)
-                assert isinstance(self._package, entities.Package)
-            except:
                 dpk_id = None
                 dpk_version = None
                 if self.app and isinstance(self.app, dict):
@@ -389,6 +384,11 @@ class Service(entities.BaseEntity):
                         version=dpk_version)
                 assert isinstance(self._package, entities.Dpk)
+            except:
+                self._package = repositories.Packages(client_api=self._client_api).get(package_id=self.package_id,
+                                                                                       fetch=None,
+                                                                                       log_error=False)
+                assert isinstance(self._package, entities.Package)
         return self._package
     @property

dtlpy/ml/base_model_adapter.py CHANGED Viewed

@@ -110,7 +110,7 @@ class BaseModelAdapter(utilities.BaseServiceRunner):
         :param local_path: `str` directory path in local FileSystem
         """
-        raise NotImplementedError("Please implement 'load' method in {}".format(self.__class__.__name__))
+        raise NotImplementedError("Please implement `load` method in {}".format(self.__class__.__name__))
     def save(self, local_path, **kwargs):
         """ saves configuration and weights locally
@@ -121,7 +121,7 @@ class BaseModelAdapter(utilities.BaseServiceRunner):
         :param local_path: `str` directory path in local FileSystem
         """
-        raise NotImplementedError("Please implement 'save' method in {}".format(self.__class__.__name__))
+        raise NotImplementedError("Please implement `save` method in {}".format(self.__class__.__name__))
     def train(self, data_path, output_path, **kwargs):
         """
@@ -133,27 +133,27 @@ class BaseModelAdapter(utilities.BaseServiceRunner):
         :param data_path: `str` local File System path to where the data was downloaded and converted at
         :param output_path: `str` local File System path where to dump training mid-results (checkpoints, logs...)
         """
-        raise NotImplementedError("Please implement 'train' method in {}".format(self.__class__.__name__))
+        raise NotImplementedError("Please implement `train` method in {}".format(self.__class__.__name__))
     def predict(self, batch, **kwargs):
-        """ Model inference (predictions) on batch of images
+        """ Model inference (predictions) on batch of items
             Virtual method - need to implement
-        :param batch: `np.ndarray`
+        :param batch: output of the `prepare_item_func` func
         :return: `list[dl.AnnotationCollection]` each collection is per each image / item in the batch
         """
-        raise NotImplementedError("Please implement 'predict' method in {}".format(self.__class__.__name__))
+        raise NotImplementedError("Please implement `predict` method in {}".format(self.__class__.__name__))
-    def extract_features(self, batch, **kwargs):
-        """ Extract model features on batch of images
+    def embed(self, batch, **kwargs):
+        """ Extract model embeddings on batch of items
             Virtual method - need to implement
-        :param batch: `np.ndarray`
-        :return: `list[list]` each feature is per each image / item in the batch
+        :param batch: output of the `prepare_item_func` func
+        :return: `list[list]` a feature vector per each item in the batch
         """
-        raise NotImplementedError("Please implement 'extract_features' method in {}".format(self.__class__.__name__))
+        raise NotImplementedError("Please implement `embed` method in {}".format(self.__class__.__name__))
     def evaluate(self, model: entities.Model, dataset: entities.Dataset, filters: entities.Filters) -> entities.Model:
         """
@@ -187,7 +187,7 @@ class BaseModelAdapter(utilities.BaseServiceRunner):
         :param data_path: `str` local File System directory path where we already downloaded the data from dataloop platform
         :return:
         """
-        raise NotImplementedError("Please implement 'convert_from_dtlpy' method in {}".format(self.__class__.__name__))
+        raise NotImplementedError("Please implement `convert_from_dtlpy` method in {}".format(self.__class__.__name__))
     #################
     # DTLPY METHODS #
@@ -265,14 +265,26 @@ class BaseModelAdapter(utilities.BaseServiceRunner):
                 self.logger.debug("Downloading subset {!r} of {}".format(subset,
                                                                          self.model_entity.dataset.name))
-                if self.configuration.get("include_model_annotations", False):
-                    annotation_filters = None
-                else:
+                if self.model_entity.output_type is not None:
+                    if self.model_entity.output_type in [entities.AnnotationType.SEGMENTATION,
+                                                         entities.AnnotationType.POLYGON]:
+                        model_output_types = [entities.AnnotationType.SEGMENTATION, entities.AnnotationType.POLYGON]
+                    else:
+                        model_output_types = [self.model_entity.output_type]
                     annotation_filters = entities.Filters(
+                        field=entities.FiltersKnownFields.TYPE,
+                        values=model_output_types,
+                        resource=entities.FiltersResource.ANNOTATION,
+                        operator=entities.FiltersOperations.IN
+                    )
+                else:
+                    annotation_filters = entities.Filters(resource=entities.FiltersResource.ANNOTATION)
+                if not self.configuration.get("include_model_annotations", False):
+                    annotation_filters.add(
                         field="metadata.system.model.name",
                         values=False,
-                        operator=entities.FiltersOperations.EXISTS,
-                        resource=entities.FiltersResource.ANNOTATION
+                        operator=entities.FiltersOperations.EXISTS
                     )
                 ret_list = dataset.items.download(filters=filters,
@@ -396,10 +408,10 @@ class BaseModelAdapter(utilities.BaseServiceRunner):
         pool.shutdown()
         return items, annotations
-    @entities.Package.decorators.function(display_name='Extract Feature',
+    @entities.Package.decorators.function(display_name='Embed Items',
                                           inputs={'items': 'Item[]'},
                                           outputs={'items': 'Item[]', 'features': '[]'})
-    def extract_item_features(self, items: list, upload_features=True, batch_size=None, **kwargs):
+    def embed_items(self, items: list, upload_features=True, batch_size=None, **kwargs):
         """
         Extract feature from an input list of items (or single) and return the items and the feature vector.
@@ -414,17 +426,18 @@ class BaseModelAdapter(utilities.BaseServiceRunner):
         input_type = self.model_entity.input_type
         self.logger.debug(
             "Predicting {} items, using batch size {}. input type: {}".format(len(items), batch_size, input_type))
-        pool = ThreadPoolExecutor(max_workers=16)
-        vectors = list()
-        feature_set_name = self.configuration.get('featureSetName', self.model_entity.name)
-        try:
-            feature_set = self.model_entity.project.feature_sets.get(feature_set_name)
-            logger.info(f'Feature Set found! name: {feature_set_name}')
-        except exceptions.NotFound as e:
+        # Search for existing feature set for this model id
+        filters = entities.Filters(field='modelId',
+                                   values=self.model_entity.id,
+                                   resource=entities.FiltersResource.FEATURE_SET)
+        pages = self.model_entity.project.feature_sets.list(filters)
+        if pages.items_count == 0:
+            feature_set_name = self.configuration.get('featureSetName', self.model_entity.name)
             logger.info('Feature Set not found. creating... ')
             feature_set = self.model_entity.project.feature_sets.create(name=feature_set_name,
                                                                         entity_type=entities.FeatureEntityType.ITEM,
+                                                                        model_id=self.model_entity.id,
                                                                         project_id=self.model_entity.project_id,
                                                                         set_type=self.model_entity.name,
                                                                         size=self.configuration.get('embeddings_size',
@@ -433,10 +446,16 @@ class BaseModelAdapter(utilities.BaseServiceRunner):
                 self.model_entity.configuration['featureSetName'] = feature_set_name
                 self.model_entity.update()
             logger.info(f'Feature Set created! name: {feature_set.name}, id: {feature_set.id}')
+        elif pages.items_count > 1:
+            raise ValueError(
+                f'More than one feature set for model. model_id: {self.model_entity.id}, feature_sets_ids: {[f.id for f in pages.all()]}')
+        else:
+            feature_set = pages.items[0]
+            logger.info(f'Feature Set found! name: {feature_set.name}, id: {feature_set.id}')
-        feature_set_id = feature_set.id
-        project_id = self.model_entity.project_id
+        # upload the feature vectors
+        pool = ThreadPoolExecutor(max_workers=16)
+        vectors = list()
         for i_batch in tqdm.tqdm(range(0, len(items), batch_size),
                                  desc='predicting',
                                  unit='bt',
@@ -444,24 +463,60 @@ class BaseModelAdapter(utilities.BaseServiceRunner):
                                  file=sys.stdout):
             batch_items = items[i_batch: i_batch + batch_size]
             batch = list(pool.map(self.prepare_item_func, batch_items))
-            batch_vectors = self.extract_features(batch, **kwargs)
-            batch_features = list()
+            batch_vectors = self.embed(batch, **kwargs)
+            vectors.extend(batch_vectors)
             if upload_features is True:
                 self.logger.debug(
                     "Uploading items' feature vectors for model {!r}.".format(self.model_entity.name))
                 try:
-                    batch_features = list(pool.map(partial(self._upload_model_features,
-                                                           feature_set_id,
-                                                           project_id),
-                                                   batch_items,
-                                                   batch_vectors))
+                    _ = list(pool.map(partial(self._upload_model_features,
+                                              feature_set.id,
+                                              self.model_entity.project_id),
+                                      batch_items,
+                                      batch_vectors))
                 except Exception as err:
                     self.logger.exception("Failed to upload feature vectors to items.")
-            vectors.extend(batch_features)
         pool.shutdown()
         return items, vectors
+    @entities.Package.decorators.function(display_name='Embed Dataset with DQL',
+                                          inputs={'dataset': 'Dataset',
+                                                  'filters': 'Json'})
+    def embed_dataset(self,
+                      dataset: entities.Dataset,
+                      filters: entities.Filters = None,
+                      upload_features=True,
+                      batch_size=None,
+                      **kwargs):
+        """
+        Extract feature from all items given
+        :param dataset: Dataset entity to predict
+        :param filters: Filters entity for a filtering before predicting
+        :param upload_features: `bool` uploads the features back to the given items
+        :param batch_size: `int` size of batch to run a single inference
+        :return: `bool` indicating if the prediction process completed successfully
+        """
+        if batch_size is None:
+            batch_size = self.configuration.get('batch_size', 4)
+        self.logger.debug("Creating embedings for dataset (name:{}, id:{}, using batch size {}".format(dataset.name,
+                                                                                                       dataset.id,
+                                                                                                       batch_size))
+        if not filters:
+            filters = entities.Filters()
+        if filters is not None and isinstance(filters, dict):
+            filters = entities.Filters(custom_filter=filters)
+        pages = dataset.items.list(filters=filters, page_size=batch_size)
+        items = [item for page in pages for item in page]
+        self.embed_items(items=items,
+                         upload_features=upload_features,
+                         batch_size=batch_size,
+                         **kwargs)
+        return True
     @entities.Package.decorators.function(display_name='Predict Dataset with DQL',
                                           inputs={'dataset': 'Dataset',
                                                   'filters': 'Json'})
@@ -481,9 +536,12 @@ class BaseModelAdapter(utilities.BaseServiceRunner):
         :param cleanup: `bool` if set removes existing predictions with the same package-model name (default: False)
         :param batch_size: `int` size of batch to run a single inference
-        :return: `List[dl.AnnotationCollection]` where all annotation in the collection are of type package.output_type
-                                                 and has prediction fields (model_info)
+        :return: `bool` indicating if the prediction process completed successfully
         """
+        if batch_size is None:
+            batch_size = self.configuration.get('batch_size', 4)
         self.logger.debug("Predicting dataset (name:{}, id:{}, using batch size {}".format(dataset.name,
                                                                                            dataset.id,
                                                                                            batch_size))
@@ -492,9 +550,9 @@ class BaseModelAdapter(utilities.BaseServiceRunner):
         if filters is not None and isinstance(filters, dict):
             filters = entities.Filters(custom_filter=filters)
         pages = dataset.items.list(filters=filters, page_size=batch_size)
-        items = [item for item in pages.all() if item.type == 'file']
+        items = [item for page in pages for item in page]
         self.predict_items(items=items,
-                           with_upload=with_upload,
+                           upload_annotations=with_upload,
                            cleanup=cleanup,
                            batch_size=batch_size,
                            **kwargs)

dtlpy 1.91.37__py3-none-any.whl → 1.92.18__py3-none-any.whl

dtlpy 1.91.37py3-none-any.whl → 1.92.18py3-none-any.whl