clarifai 10.8.4__py3-none-any.whl → 10.8.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clarifai/__init__.py +1 -1
- clarifai/client/dataset.py +9 -3
- clarifai/constants/dataset.py +1 -1
- clarifai/datasets/upload/base.py +6 -3
- clarifai/datasets/upload/features.py +10 -0
- clarifai/datasets/upload/image.py +22 -13
- clarifai/datasets/upload/multimodal.py +70 -0
- clarifai/datasets/upload/text.py +8 -5
- clarifai/runners/models/model_upload.py +67 -31
- clarifai/runners/utils/loader.py +0 -1
- clarifai/utils/misc.py +6 -0
- {clarifai-10.8.4.dist-info → clarifai-10.8.6.dist-info}/METADATA +2 -1
- {clarifai-10.8.4.dist-info → clarifai-10.8.6.dist-info}/RECORD +17 -60
- clarifai/models/model_serving/README.md +0 -158
- clarifai/models/model_serving/__init__.py +0 -14
- clarifai/models/model_serving/cli/__init__.py +0 -12
- clarifai/models/model_serving/cli/_utils.py +0 -53
- clarifai/models/model_serving/cli/base.py +0 -14
- clarifai/models/model_serving/cli/build.py +0 -79
- clarifai/models/model_serving/cli/clarifai_clis.py +0 -33
- clarifai/models/model_serving/cli/create.py +0 -171
- clarifai/models/model_serving/cli/example_cli.py +0 -34
- clarifai/models/model_serving/cli/login.py +0 -26
- clarifai/models/model_serving/cli/upload.py +0 -183
- clarifai/models/model_serving/constants.py +0 -21
- clarifai/models/model_serving/docs/cli.md +0 -161
- clarifai/models/model_serving/docs/concepts.md +0 -229
- clarifai/models/model_serving/docs/dependencies.md +0 -11
- clarifai/models/model_serving/docs/inference_parameters.md +0 -139
- clarifai/models/model_serving/docs/model_types.md +0 -19
- clarifai/models/model_serving/model_config/__init__.py +0 -16
- clarifai/models/model_serving/model_config/base.py +0 -369
- clarifai/models/model_serving/model_config/config.py +0 -312
- clarifai/models/model_serving/model_config/inference_parameter.py +0 -129
- clarifai/models/model_serving/model_config/model_types_config/multimodal-embedder.yaml +0 -25
- clarifai/models/model_serving/model_config/model_types_config/text-classifier.yaml +0 -19
- clarifai/models/model_serving/model_config/model_types_config/text-embedder.yaml +0 -20
- clarifai/models/model_serving/model_config/model_types_config/text-to-image.yaml +0 -19
- clarifai/models/model_serving/model_config/model_types_config/text-to-text.yaml +0 -19
- clarifai/models/model_serving/model_config/model_types_config/visual-classifier.yaml +0 -22
- clarifai/models/model_serving/model_config/model_types_config/visual-detector.yaml +0 -32
- clarifai/models/model_serving/model_config/model_types_config/visual-embedder.yaml +0 -19
- clarifai/models/model_serving/model_config/model_types_config/visual-segmenter.yaml +0 -19
- clarifai/models/model_serving/model_config/output.py +0 -133
- clarifai/models/model_serving/model_config/triton/__init__.py +0 -14
- clarifai/models/model_serving/model_config/triton/serializer.py +0 -136
- clarifai/models/model_serving/model_config/triton/triton_config.py +0 -182
- clarifai/models/model_serving/model_config/triton/wrappers.py +0 -281
- clarifai/models/model_serving/repo_build/__init__.py +0 -14
- clarifai/models/model_serving/repo_build/build.py +0 -198
- clarifai/models/model_serving/repo_build/static_files/_requirements.txt +0 -2
- clarifai/models/model_serving/repo_build/static_files/base_test.py +0 -169
- clarifai/models/model_serving/repo_build/static_files/inference.py +0 -26
- clarifai/models/model_serving/repo_build/static_files/sample_clarifai_config.yaml +0 -25
- clarifai/models/model_serving/repo_build/static_files/test.py +0 -40
- clarifai/models/model_serving/repo_build/static_files/triton/model.py +0 -75
- clarifai/models/model_serving/utils.py +0 -31
- {clarifai-10.8.4.dist-info → clarifai-10.8.6.dist-info}/LICENSE +0 -0
- {clarifai-10.8.4.dist-info → clarifai-10.8.6.dist-info}/WHEEL +0 -0
- {clarifai-10.8.4.dist-info → clarifai-10.8.6.dist-info}/entry_points.txt +0 -0
- {clarifai-10.8.4.dist-info → clarifai-10.8.6.dist-info}/top_level.txt +0 -0
    
        clarifai/__init__.py
    CHANGED
    
    | @@ -1 +1 @@ | |
| 1 | 
            -
            __version__ = "10.8. | 
| 1 | 
            +
            __version__ = "10.8.6"
         | 
    
        clarifai/client/dataset.py
    CHANGED
    
    | @@ -25,6 +25,7 @@ from clarifai.datasets.export.inputs_annotations import (DatasetExportReader, | |
| 25 25 | 
             
            from clarifai.datasets.upload.base import ClarifaiDataLoader
         | 
| 26 26 | 
             
            from clarifai.datasets.upload.image import (VisualClassificationDataset, VisualDetectionDataset,
         | 
| 27 27 | 
             
                                                        VisualSegmentationDataset)
         | 
| 28 | 
            +
            from clarifai.datasets.upload.multimodal import MultiModalDataset
         | 
| 28 29 | 
             
            from clarifai.datasets.upload.text import TextClassificationDataset
         | 
| 29 30 | 
             
            from clarifai.datasets.upload.utils import DisplayUploadStatus
         | 
| 30 31 | 
             
            from clarifai.errors import UserError
         | 
| @@ -352,14 +353,15 @@ class Dataset(Lister, BaseClient): | |
| 352 353 | 
             
                    if input_details:
         | 
| 353 354 | 
             
                      failed_input_details = [
         | 
| 354 355 | 
             
                          index, failed_id, input_details.status.details,
         | 
| 355 | 
            -
                          dataset_obj.data_generator[index] | 
| 356 | 
            +
                          getattr(dataset_obj.data_generator[index], 'image_path', None) or
         | 
| 357 | 
            +
                          getattr(dataset_obj.data_generator[index], 'text', None),
         | 
| 356 358 | 
             
                          dataset_obj.data_generator[index].labels, dataset_obj.data_generator[index].metadata
         | 
| 357 359 | 
             
                      ]
         | 
| 358 360 | 
             
                      failed_inputs_logs.append(failed_input_details)
         | 
| 359 361 |  | 
| 360 362 | 
             
                  failed_table = tabulate(
         | 
| 361 363 | 
             
                      failed_inputs_logs,
         | 
| 362 | 
            -
                      headers=["Index", "Input ID", "Status", " | 
| 364 | 
            +
                      headers=["Index", "Input ID", "Status", "Input", "Labels", "Metadata"],
         | 
| 363 365 | 
             
                      tablefmt="grid")
         | 
| 364 366 | 
             
                  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
         | 
| 365 367 | 
             
                  self.logger.warning(
         | 
| @@ -422,7 +424,8 @@ class Dataset(Lister, BaseClient): | |
| 422 424 | 
             
                if self.task not in DATASET_UPLOAD_TASKS:
         | 
| 423 425 | 
             
                  raise UserError("Task should be one of \
         | 
| 424 426 | 
             
                                  'text_classification', 'visual_classification', \
         | 
| 425 | 
            -
                                  'visual_detection', 'visual_segmentation', 'visual_captioning'" | 
| 427 | 
            +
                                  'visual_detection', 'visual_segmentation', 'visual_captioning', 'multimodal_dataset'"
         | 
| 428 | 
            +
                                 )
         | 
| 426 429 |  | 
| 427 430 | 
             
                if self.task == "text_classification":
         | 
| 428 431 | 
             
                  dataset_obj = TextClassificationDataset(dataloader, self.id)
         | 
| @@ -433,6 +436,9 @@ class Dataset(Lister, BaseClient): | |
| 433 436 | 
             
                elif self.task == "visual_segmentation":
         | 
| 434 437 | 
             
                  dataset_obj = VisualSegmentationDataset(dataloader, self.id)
         | 
| 435 438 |  | 
| 439 | 
            +
                elif self.task == "multimodal_dataset":
         | 
| 440 | 
            +
                  dataset_obj = MultiModalDataset(dataloader, self.id)
         | 
| 441 | 
            +
             | 
| 436 442 | 
             
                else:  # visual_classification & visual_captioning
         | 
| 437 443 | 
             
                  dataset_obj = VisualClassificationDataset(dataloader, self.id)
         | 
| 438 444 |  | 
    
        clarifai/constants/dataset.py
    CHANGED
    
    
    
        clarifai/datasets/upload/base.py
    CHANGED
    
    | @@ -4,21 +4,24 @@ from typing import Iterator, List, Tuple, TypeVar, Union | |
| 4 4 | 
             
            from clarifai_grpc.grpc.api import resources_pb2
         | 
| 5 5 |  | 
| 6 6 | 
             
            from clarifai.constants.dataset import DATASET_UPLOAD_TASKS
         | 
| 7 | 
            -
            from clarifai.datasets.upload.features import ( | 
| 7 | 
            +
            from clarifai.datasets.upload.features import (MultiModalFeatures, TextFeatures,
         | 
| 8 | 
            +
                                                           VisualClassificationFeatures,
         | 
| 8 9 | 
             
                                                           VisualDetectionFeatures, VisualSegmentationFeatures)
         | 
| 9 10 |  | 
| 10 11 | 
             
            OutputFeaturesType = TypeVar(
         | 
| 11 12 | 
             
                'OutputFeaturesType',
         | 
| 12 13 | 
             
                bound=Union[TextFeatures, VisualClassificationFeatures, VisualDetectionFeatures,
         | 
| 13 | 
            -
                            VisualSegmentationFeatures])
         | 
| 14 | 
            +
                            VisualSegmentationFeatures, MultiModalFeatures])
         | 
| 14 15 |  | 
| 15 16 |  | 
| 16 17 | 
             
            class ClarifaiDataset:
         | 
| 17 18 | 
             
              """Clarifai datasets base class."""
         | 
| 18 19 |  | 
| 19 | 
            -
              def __init__(self, data_generator: 'ClarifaiDataLoader', dataset_id: str | 
| 20 | 
            +
              def __init__(self, data_generator: 'ClarifaiDataLoader', dataset_id: str,
         | 
| 21 | 
            +
                           max_workers: int = 4) -> None:
         | 
| 20 22 | 
             
                self.data_generator = data_generator
         | 
| 21 23 | 
             
                self.dataset_id = dataset_id
         | 
| 24 | 
            +
                self.max_workers = max_workers
         | 
| 22 25 | 
             
                self.all_input_ids = {}
         | 
| 23 26 | 
             
                self._all_input_protos = {}
         | 
| 24 27 | 
             
                self._all_annotation_protos = defaultdict(list)
         | 
| @@ -49,3 +49,13 @@ class VisualSegmentationFeatures: | |
| 49 49 | 
             
              metadata: Optional[dict] = None
         | 
| 50 50 | 
             
              image_bytes: Optional[bytes] = None
         | 
| 51 51 | 
             
              label_ids: Optional[List[str]] = None
         | 
| 52 | 
            +
             | 
| 53 | 
            +
             | 
| 54 | 
            +
            @dataclass
         | 
| 55 | 
            +
            class MultiModalFeatures:
         | 
| 56 | 
            +
              """Multi-modal datasets preprocessing output features."""
         | 
| 57 | 
            +
              text: str
         | 
| 58 | 
            +
              image_bytes: str
         | 
| 59 | 
            +
              labels: List[Union[str, int]] = None  # List[str or int] to cater for multi-class tasks
         | 
| 60 | 
            +
              id: Optional[int] = None  # image_id
         | 
| 61 | 
            +
              metadata: Optional[dict] = None
         | 
| @@ -1,5 +1,4 @@ | |
| 1 1 | 
             
            import os
         | 
| 2 | 
            -
            import uuid
         | 
| 3 2 | 
             
            from concurrent.futures import ThreadPoolExecutor
         | 
| 4 3 | 
             
            from typing import List, Tuple, Type
         | 
| 5 4 |  | 
| @@ -8,12 +7,16 @@ from google.protobuf.struct_pb2 import Struct | |
| 8 7 |  | 
| 9 8 | 
             
            from clarifai.client.input import Inputs
         | 
| 10 9 | 
             
            from clarifai.datasets.upload.base import ClarifaiDataLoader, ClarifaiDataset
         | 
| 10 | 
            +
            from clarifai.utils.misc import get_uuid
         | 
| 11 11 |  | 
| 12 12 |  | 
| 13 13 | 
             
            class VisualClassificationDataset(ClarifaiDataset):
         | 
| 14 14 |  | 
| 15 | 
            -
              def __init__(self, | 
| 16 | 
            -
             | 
| 15 | 
            +
              def __init__(self,
         | 
| 16 | 
            +
                           data_generator: Type[ClarifaiDataLoader],
         | 
| 17 | 
            +
                           dataset_id: str,
         | 
| 18 | 
            +
                           max_workers: int = 4) -> None:
         | 
| 19 | 
            +
                super().__init__(data_generator, dataset_id, max_workers)
         | 
| 17 20 |  | 
| 18 21 | 
             
              def _extract_protos(self, batch_input_ids: List[str]
         | 
| 19 22 | 
             
                                 ) -> Tuple[List[resources_pb2.Input], List[resources_pb2.Annotation]]:
         | 
| @@ -33,7 +36,7 @@ class VisualClassificationDataset(ClarifaiDataset): | |
| 33 36 | 
             
                  labels = data_item.labels if isinstance(data_item.labels,
         | 
| 34 37 | 
             
                                                          list) else [data_item.labels]  # clarifai concept
         | 
| 35 38 | 
             
                  label_ids = data_item.label_ids
         | 
| 36 | 
            -
                  input_id = f"{self.dataset_id}-{ | 
| 39 | 
            +
                  input_id = f"{self.dataset_id}-{get_uuid(8)}" if data_item.id is None else f"{self.dataset_id}-{str(data_item.id)}"
         | 
| 37 40 | 
             
                  geo_info = data_item.geo_info
         | 
| 38 41 | 
             
                  if data_item.metadata is not None:
         | 
| 39 42 | 
             
                    metadata.update(data_item.metadata)
         | 
| @@ -64,7 +67,7 @@ class VisualClassificationDataset(ClarifaiDataset): | |
| 64 67 | 
             
                            geo_info=geo_info,
         | 
| 65 68 | 
             
                            metadata=metadata))
         | 
| 66 69 |  | 
| 67 | 
            -
                with ThreadPoolExecutor(max_workers= | 
| 70 | 
            +
                with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
         | 
| 68 71 | 
             
                  futures = [executor.submit(process_data_item, id) for id in batch_input_ids]
         | 
| 69 72 | 
             
                  for job in futures:
         | 
| 70 73 | 
             
                    job.result()
         | 
| @@ -75,8 +78,11 @@ class VisualClassificationDataset(ClarifaiDataset): | |
| 75 78 | 
             
            class VisualDetectionDataset(ClarifaiDataset):
         | 
| 76 79 | 
             
              """Visual detection dataset proto class."""
         | 
| 77 80 |  | 
| 78 | 
            -
              def __init__(self, | 
| 79 | 
            -
             | 
| 81 | 
            +
              def __init__(self,
         | 
| 82 | 
            +
                           data_generator: Type[ClarifaiDataLoader],
         | 
| 83 | 
            +
                           dataset_id: str,
         | 
| 84 | 
            +
                           max_workers: int = 4) -> None:
         | 
| 85 | 
            +
                super().__init__(data_generator, dataset_id, max_workers)
         | 
| 80 86 |  | 
| 81 87 | 
             
              def _extract_protos(self, batch_input_ids: List[int]
         | 
| 82 88 | 
             
                                 ) -> Tuple[List[resources_pb2.Input], List[resources_pb2.Annotation]]:
         | 
| @@ -101,7 +107,7 @@ class VisualDetectionDataset(ClarifaiDataset): | |
| 101 107 | 
             
                  else:
         | 
| 102 108 | 
             
                    label_ids = None
         | 
| 103 109 | 
             
                  bboxes = data_item.bboxes  # [[xmin,ymin,xmax,ymax],...,[xmin,ymin,xmax,ymax]]
         | 
| 104 | 
            -
                  input_id = f"{self.dataset_id}-{ | 
| 110 | 
            +
                  input_id = f"{self.dataset_id}-{get_uuid(8)}" if data_item.id is None else f"{self.dataset_id}-{str(data_item.id)}"
         | 
| 105 111 | 
             
                  if data_item.metadata is not None:
         | 
| 106 112 | 
             
                    metadata.update(data_item.metadata)
         | 
| 107 113 | 
             
                  else:
         | 
| @@ -135,7 +141,7 @@ class VisualDetectionDataset(ClarifaiDataset): | |
| 135 141 | 
             
                            bbox=bboxes[i],
         | 
| 136 142 | 
             
                            label_id=label_ids[i] if label_ids else None))
         | 
| 137 143 |  | 
| 138 | 
            -
                with ThreadPoolExecutor(max_workers= | 
| 144 | 
            +
                with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
         | 
| 139 145 | 
             
                  futures = [executor.submit(process_data_item, id) for id in batch_input_ids]
         | 
| 140 146 | 
             
                  for job in futures:
         | 
| 141 147 | 
             
                    job.result()
         | 
| @@ -146,8 +152,11 @@ class VisualDetectionDataset(ClarifaiDataset): | |
| 146 152 | 
             
            class VisualSegmentationDataset(ClarifaiDataset):
         | 
| 147 153 | 
             
              """Visual segmentation dataset proto class."""
         | 
| 148 154 |  | 
| 149 | 
            -
              def __init__(self, | 
| 150 | 
            -
             | 
| 155 | 
            +
              def __init__(self,
         | 
| 156 | 
            +
                           data_generator: Type[ClarifaiDataLoader],
         | 
| 157 | 
            +
                           dataset_id: str,
         | 
| 158 | 
            +
                           max_workers: int = 4) -> None:
         | 
| 159 | 
            +
                super().__init__(data_generator, dataset_id, max_workers)
         | 
| 151 160 |  | 
| 152 161 | 
             
              def _extract_protos(self, batch_input_ids: List[str]
         | 
| 153 162 | 
             
                                 ) -> Tuple[List[resources_pb2.Input], List[resources_pb2.Annotation]]:
         | 
| @@ -172,7 +181,7 @@ class VisualSegmentationDataset(ClarifaiDataset): | |
| 172 181 | 
             
                  else:
         | 
| 173 182 | 
             
                    label_ids = None
         | 
| 174 183 | 
             
                  _polygons = data_item.polygons  # list of polygons: [[[x,y],...,[x,y]],...]
         | 
| 175 | 
            -
                  input_id = f"{self.dataset_id}-{ | 
| 184 | 
            +
                  input_id = f"{self.dataset_id}-{get_uuid(8)}" if data_item.id is None else f"{self.dataset_id}-{str(data_item.id)}"
         | 
| 176 185 | 
             
                  if data_item.metadata is not None:
         | 
| 177 186 | 
             
                    metadata.update(data_item.metadata)
         | 
| 178 187 | 
             
                  else:
         | 
| @@ -210,7 +219,7 @@ class VisualSegmentationDataset(ClarifaiDataset): | |
| 210 219 | 
             
                    except IndexError:
         | 
| 211 220 | 
             
                      continue
         | 
| 212 221 |  | 
| 213 | 
            -
                with ThreadPoolExecutor(max_workers= | 
| 222 | 
            +
                with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
         | 
| 214 223 | 
             
                  futures = [executor.submit(process_data_item, id) for id in batch_input_ids]
         | 
| 215 224 | 
             
                  for job in futures:
         | 
| 216 225 | 
             
                    job.result()
         | 
| @@ -0,0 +1,70 @@ | |
| 1 | 
            +
            from concurrent.futures import ThreadPoolExecutor
         | 
| 2 | 
            +
            from typing import List, Tuple, Type
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            from clarifai_grpc.grpc.api import resources_pb2
         | 
| 5 | 
            +
            from google.protobuf.struct_pb2 import Struct
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            from clarifai.client.input import Inputs
         | 
| 8 | 
            +
            from clarifai.datasets.upload.base import ClarifaiDataLoader, ClarifaiDataset
         | 
| 9 | 
            +
            from clarifai.utils.misc import get_uuid
         | 
| 10 | 
            +
             | 
| 11 | 
            +
             | 
| 12 | 
            +
            class MultiModalDataset(ClarifaiDataset):
         | 
| 13 | 
            +
             | 
| 14 | 
            +
              def __init__(self,
         | 
| 15 | 
            +
                           data_generator: Type[ClarifaiDataLoader],
         | 
| 16 | 
            +
                           dataset_id: str,
         | 
| 17 | 
            +
                           max_workers: int = 4) -> None:
         | 
| 18 | 
            +
                super().__init__(data_generator, dataset_id, max_workers)
         | 
| 19 | 
            +
             | 
| 20 | 
            +
              def _extract_protos(
         | 
| 21 | 
            +
                  self,
         | 
| 22 | 
            +
                  batch_input_ids: List[str],
         | 
| 23 | 
            +
              ) -> Tuple[List[resources_pb2.Input]]:
         | 
| 24 | 
            +
                """ Creats Multimodal (image and text) input protos for batch of input ids.
         | 
| 25 | 
            +
                    Args:
         | 
| 26 | 
            +
                        batch_input_ids: List of input IDs to retrieve the protos for.
         | 
| 27 | 
            +
                    Returns:
         | 
| 28 | 
            +
                        input_protos: List of input protos.
         | 
| 29 | 
            +
             | 
| 30 | 
            +
                    """
         | 
| 31 | 
            +
                input_protos, annotation_protos = [], []
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                def process_data_item(id):
         | 
| 34 | 
            +
                  data_item = self.data_generator[id]
         | 
| 35 | 
            +
                  metadata = Struct()
         | 
| 36 | 
            +
                  image_bytes = data_item.image_bytes
         | 
| 37 | 
            +
                  text = data_item.text
         | 
| 38 | 
            +
                  labels = data_item.labels if isinstance(data_item.labels, list) else [data_item.labels]
         | 
| 39 | 
            +
                  id = get_uuid(8)
         | 
| 40 | 
            +
                  input_id = f"{self.dataset_id}-{id}" if data_item.id is None else f"{self.dataset_id}-{str(data_item.id)}"
         | 
| 41 | 
            +
                  if data_item.metadata is not None:
         | 
| 42 | 
            +
                    metadata.update(data_item.metadata)
         | 
| 43 | 
            +
                  else:
         | 
| 44 | 
            +
                    metadata = None
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                  self.all_input_ids[id] = input_id
         | 
| 47 | 
            +
                  if data_item.image_bytes is not None:
         | 
| 48 | 
            +
                    input_protos.append(
         | 
| 49 | 
            +
                        Inputs.get_input_from_bytes(
         | 
| 50 | 
            +
                            input_id=input_id,
         | 
| 51 | 
            +
                            image_bytes=image_bytes,
         | 
| 52 | 
            +
                            dataset_id=self.dataset_id,
         | 
| 53 | 
            +
                            labels=labels,
         | 
| 54 | 
            +
                            metadata=metadata))
         | 
| 55 | 
            +
                  else:
         | 
| 56 | 
            +
                    input_protos.append(
         | 
| 57 | 
            +
                        Inputs.get_text_input(
         | 
| 58 | 
            +
                            input_id=input_id,
         | 
| 59 | 
            +
                            raw_text=text,
         | 
| 60 | 
            +
                            dataset_id=self.dataset_id,
         | 
| 61 | 
            +
                            labels=labels,
         | 
| 62 | 
            +
                            metadata=metadata))
         | 
| 63 | 
            +
             | 
| 64 | 
            +
                with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
         | 
| 65 | 
            +
                  futures = [executor.submit(process_data_item, id) for id in batch_input_ids]
         | 
| 66 | 
            +
             | 
| 67 | 
            +
                  for job in futures:
         | 
| 68 | 
            +
                    job.result()
         | 
| 69 | 
            +
             | 
| 70 | 
            +
                return input_protos, annotation_protos
         | 
    
        clarifai/datasets/upload/text.py
    CHANGED
    
    | @@ -1,4 +1,3 @@ | |
| 1 | 
            -
            import uuid
         | 
| 2 1 | 
             
            from concurrent.futures import ThreadPoolExecutor
         | 
| 3 2 | 
             
            from typing import List, Tuple, Type
         | 
| 4 3 |  | 
| @@ -6,6 +5,7 @@ from clarifai_grpc.grpc.api import resources_pb2 | |
| 6 5 | 
             
            from google.protobuf.struct_pb2 import Struct
         | 
| 7 6 |  | 
| 8 7 | 
             
            from clarifai.client.input import Inputs
         | 
| 8 | 
            +
            from clarifai.utils.misc import get_uuid
         | 
| 9 9 |  | 
| 10 10 | 
             
            from .base import ClarifaiDataLoader, ClarifaiDataset
         | 
| 11 11 |  | 
| @@ -13,8 +13,11 @@ from .base import ClarifaiDataLoader, ClarifaiDataset | |
| 13 13 | 
             
            class TextClassificationDataset(ClarifaiDataset):
         | 
| 14 14 | 
             
              """Upload text classification datasets to clarifai datasets"""
         | 
| 15 15 |  | 
| 16 | 
            -
              def __init__(self, | 
| 17 | 
            -
             | 
| 16 | 
            +
              def __init__(self,
         | 
| 17 | 
            +
                           data_generator: Type[ClarifaiDataLoader],
         | 
| 18 | 
            +
                           dataset_id: str,
         | 
| 19 | 
            +
                           max_workers: int = 4) -> None:
         | 
| 20 | 
            +
                super().__init__(data_generator, dataset_id, max_workers)
         | 
| 18 21 |  | 
| 19 22 | 
             
              def _extract_protos(self, batch_input_ids: List[int]
         | 
| 20 23 | 
             
                                 ) -> Tuple[List[resources_pb2.Input], List[resources_pb2.Annotation]]:
         | 
| @@ -34,7 +37,7 @@ class TextClassificationDataset(ClarifaiDataset): | |
| 34 37 | 
             
                  labels = data_item.labels if isinstance(data_item.labels,
         | 
| 35 38 | 
             
                                                          list) else [data_item.labels]  # clarifai concept
         | 
| 36 39 | 
             
                  label_ids = data_item.label_ids
         | 
| 37 | 
            -
                  input_id = f"{self.dataset_id}-{ | 
| 40 | 
            +
                  input_id = f"{self.dataset_id}-{get_uuid(8)}" if data_item.id is None else f"{self.dataset_id}-{str(data_item.id)}"
         | 
| 38 41 | 
             
                  if data_item.metadata is not None:
         | 
| 39 42 | 
             
                    metadata.update(data_item.metadata)
         | 
| 40 43 |  | 
| @@ -48,7 +51,7 @@ class TextClassificationDataset(ClarifaiDataset): | |
| 48 51 | 
             
                          label_ids=label_ids,
         | 
| 49 52 | 
             
                          metadata=metadata))
         | 
| 50 53 |  | 
| 51 | 
            -
                with ThreadPoolExecutor(max_workers= | 
| 54 | 
            +
                with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
         | 
| 52 55 | 
             
                  futures = [executor.submit(process_data_item, id) for id in batch_input_ids]
         | 
| 53 56 | 
             
                  for job in futures:
         | 
| 54 57 | 
             
                    job.result()
         | 
| @@ -28,12 +28,11 @@ class ModelUploader: | |
| 28 28 | 
             
              ]
         | 
| 29 29 |  | 
| 30 30 | 
             
              def __init__(self, folder: str):
         | 
| 31 | 
            +
                self._client = None
         | 
| 31 32 | 
             
                self.folder = self._validate_folder(folder)
         | 
| 32 33 | 
             
                self.config = self._load_config(os.path.join(self.folder, 'config.yaml'))
         | 
| 33 | 
            -
                self.initialize_client()
         | 
| 34 34 | 
             
                self.model_proto = self._get_model_proto()
         | 
| 35 35 | 
             
                self.model_id = self.model_proto.id
         | 
| 36 | 
            -
                self.user_app_id = self.client.user_app_id
         | 
| 37 36 | 
             
                self.inference_compute_info = self._get_inference_compute_info()
         | 
| 38 37 | 
             
                self.is_v3 = True  # Do model build for v3
         | 
| 39 38 |  | 
| @@ -42,6 +41,8 @@ class ModelUploader: | |
| 42 41 | 
             
                if not folder.startswith("/"):
         | 
| 43 42 | 
             
                  folder = os.path.join(os.getcwd(), folder)
         | 
| 44 43 | 
             
                print(f"Validating folder: {folder}")
         | 
| 44 | 
            +
                if not os.path.exists(folder):
         | 
| 45 | 
            +
                  raise FileNotFoundError(f"Folder {folder} not found, please provide a valid folder path")
         | 
| 45 46 | 
             
                files = os.listdir(folder)
         | 
| 46 47 | 
             
                assert "requirements.txt" in files, "requirements.txt not found in the folder"
         | 
| 47 48 | 
             
                assert "config.yaml" in files, "config.yaml not found in the folder"
         | 
| @@ -56,18 +57,21 @@ class ModelUploader: | |
| 56 57 | 
             
                  config = yaml.safe_load(file)
         | 
| 57 58 | 
             
                return config
         | 
| 58 59 |  | 
| 59 | 
            -
               | 
| 60 | 
            -
             | 
| 61 | 
            -
                 | 
| 62 | 
            -
             | 
| 63 | 
            -
             | 
| 64 | 
            -
             | 
| 65 | 
            -
             | 
| 60 | 
            +
              @property
         | 
| 61 | 
            +
              def client(self):
         | 
| 62 | 
            +
                if self._client is None:
         | 
| 63 | 
            +
                  assert "model" in self.config, "model info not found in the config file"
         | 
| 64 | 
            +
                  model = self.config.get('model')
         | 
| 65 | 
            +
                  assert "user_id" in model, "user_id not found in the config file"
         | 
| 66 | 
            +
                  assert "app_id" in model, "app_id not found in the config file"
         | 
| 67 | 
            +
                  user_id = model.get('user_id')
         | 
| 68 | 
            +
                  app_id = model.get('app_id')
         | 
| 66 69 |  | 
| 67 | 
            -
             | 
| 70 | 
            +
                  base = os.environ.get('CLARIFAI_API_BASE', 'https://api-dev.clarifai.com')
         | 
| 68 71 |  | 
| 69 | 
            -
             | 
| 70 | 
            -
             | 
| 72 | 
            +
                  self._client = BaseClient(user_id=user_id, app_id=app_id, base=base)
         | 
| 73 | 
            +
                  print(f"Client initialized for user {user_id} and app {app_id}")
         | 
| 74 | 
            +
                return self._client
         | 
| 71 75 |  | 
| 72 76 | 
             
              def _get_model_proto(self):
         | 
| 73 77 | 
             
                assert "model" in self.config, "model info not found in the config file"
         | 
| @@ -127,7 +131,14 @@ class ModelUploader: | |
| 127 131 |  | 
| 128 132 | 
             
                # Get the Python version from the config file
         | 
| 129 133 | 
             
                build_info = self.config.get('build_info', {})
         | 
| 130 | 
            -
                 | 
| 134 | 
            +
                if 'python_version' in build_info:
         | 
| 135 | 
            +
                  python_version = build_info['python_version']
         | 
| 136 | 
            +
                  print(f"Using Python version {python_version} from the config file to build the Dockerfile")
         | 
| 137 | 
            +
                else:
         | 
| 138 | 
            +
                  print(
         | 
| 139 | 
            +
                      f"Python version not found in the config file, using default Python version: {self.DEFAULT_PYTHON_VERSION}"
         | 
| 140 | 
            +
                  )
         | 
| 141 | 
            +
                  python_version = self.DEFAULT_PYTHON_VERSION
         | 
| 131 142 |  | 
| 132 143 | 
             
                # Replace placeholders with actual values
         | 
| 133 144 | 
             
                dockerfile_content = dockerfile_template.safe_substitute(
         | 
| @@ -153,7 +164,12 @@ class ModelUploader: | |
| 153 164 | 
             
                  assert "repo_id" in self.config.get("checkpoints"), "No repo_id specified in the config file"
         | 
| 154 165 | 
             
                  repo_id = self.config.get("checkpoints").get("repo_id")
         | 
| 155 166 |  | 
| 156 | 
            -
                   | 
| 167 | 
            +
                  # prefer env var for HF_TOKEN but if not provided then use the one from config.yaml if any.
         | 
| 168 | 
            +
                  if 'HF_TOKEN' in os.environ:
         | 
| 169 | 
            +
                    hf_token = os.environ['HF_TOKEN']
         | 
| 170 | 
            +
                  else:
         | 
| 171 | 
            +
                    hf_token = self.config.get("checkpoints").get("hf_token", None)
         | 
| 172 | 
            +
                    assert hf_token != 'hf_token', "The default 'hf_token' is not valid. Please provide a valid token or leave that field out of config.yaml if not needed."
         | 
| 157 173 | 
             
                  loader = HuggingFaceLoarder(repo_id=repo_id, token=hf_token)
         | 
| 158 174 |  | 
| 159 175 | 
             
                  checkpoint_path = os.path.join(self.folder, '1', 'checkpoints')
         | 
| @@ -230,6 +246,7 @@ class ModelUploader: | |
| 230 246 | 
             
                  print(
         | 
| 231 247 | 
             
                      f"Status: {response.status.description}, "
         | 
| 232 248 | 
             
                      f"Progress: {percent_completed}% - {details} ",
         | 
| 249 | 
            +
                      f"request_id: {response.status.req_id}",
         | 
| 233 250 | 
             
                      end='\r',
         | 
| 234 251 | 
             
                      flush=True)
         | 
| 235 252 | 
             
                print()
         | 
| @@ -247,24 +264,35 @@ class ModelUploader: | |
| 247 264 | 
             
                  file_size = os.path.getsize(file_path)
         | 
| 248 265 | 
             
                  chunk_size = int(127 * 1024 * 1024)  # 127MB chunk size
         | 
| 249 266 | 
             
                  num_chunks = (file_size // chunk_size) + 1
         | 
| 250 | 
            -
             | 
| 267 | 
            +
                  print("Uploading file...")
         | 
| 268 | 
            +
                  print("File size: ", file_size)
         | 
| 269 | 
            +
                  print("Chunk size: ", chunk_size)
         | 
| 270 | 
            +
                  print("Number of chunks: ", num_chunks)
         | 
| 251 271 | 
             
                  read_so_far = 0
         | 
| 252 272 | 
             
                  for part_id in range(num_chunks):
         | 
| 253 | 
            -
                     | 
| 254 | 
            -
             | 
| 255 | 
            -
             | 
| 256 | 
            -
             | 
| 257 | 
            -
             | 
| 258 | 
            -
             | 
| 259 | 
            -
             | 
| 260 | 
            -
             | 
| 261 | 
            -
             | 
| 273 | 
            +
                    try:
         | 
| 274 | 
            +
                      chunk_size = min(chunk_size, file_size - read_so_far)
         | 
| 275 | 
            +
                      chunk = f.read(chunk_size)
         | 
| 276 | 
            +
                      if not chunk:
         | 
| 277 | 
            +
                        break
         | 
| 278 | 
            +
                      read_so_far += len(chunk)
         | 
| 279 | 
            +
                      yield service_pb2.PostModelVersionsUploadRequest(
         | 
| 280 | 
            +
                          content_part=resources_pb2.UploadContentPart(
         | 
| 281 | 
            +
                              data=chunk,
         | 
| 282 | 
            +
                              part_number=part_id + 1,
         | 
| 283 | 
            +
                              range_start=read_so_far,
         | 
| 284 | 
            +
                          ))
         | 
| 285 | 
            +
                    except Exception as e:
         | 
| 286 | 
            +
                      print(f"\nError uploading file: {e}")
         | 
| 287 | 
            +
                      break
         | 
| 288 | 
            +
             | 
| 289 | 
            +
                if read_so_far == file_size:
         | 
| 290 | 
            +
                  print("\nUpload complete!, waiting for model build...")
         | 
| 262 291 |  | 
| 263 292 | 
             
              def init_upload_model_version(self, model_version, file_path):
         | 
| 264 293 | 
             
                file_size = os.path.getsize(file_path)
         | 
| 265 | 
            -
                print(
         | 
| 266 | 
            -
             | 
| 267 | 
            -
                )
         | 
| 294 | 
            +
                print(f"Uploading model version '{model_version.id}' of model {self.model_proto.id}")
         | 
| 295 | 
            +
                print(f"Using file '{os.path.basename(file_path)}' of size: {file_size} bytes")
         | 
| 268 296 | 
             
                return service_pb2.PostModelVersionsUploadRequest(
         | 
| 269 297 | 
             
                    upload_config=service_pb2.PostModelVersionsUploadConfig(
         | 
| 270 298 | 
             
                        user_app_id=self.client.user_app_id,
         | 
| @@ -290,7 +318,7 @@ class ModelUploader: | |
| 290 318 | 
             
                  elif status_code == status_code_pb2.MODEL_TRAINED:
         | 
| 291 319 | 
             
                    print("\nModel build complete!")
         | 
| 292 320 | 
             
                    print(
         | 
| 293 | 
            -
                        f"Check out the model at https://clarifai.com/{self.user_app_id.user_id}/apps/{self.user_app_id.app_id}/models/{self.model_id}/versions/{model_version_id}"
         | 
| 321 | 
            +
                        f"Check out the model at https://clarifai.com/{self.client.user_app_id.user_id}/apps/{self.client.user_app_id.app_id}/models/{self.model_id}/versions/{model_version_id}"
         | 
| 294 322 | 
             
                    )
         | 
| 295 323 | 
             
                    break
         | 
| 296 324 | 
             
                  else:
         | 
| @@ -298,9 +326,10 @@ class ModelUploader: | |
| 298 326 | 
             
                    break
         | 
| 299 327 |  | 
| 300 328 |  | 
| 301 | 
            -
            def main(folder):
         | 
| 329 | 
            +
            def main(folder, download_checkpoints):
         | 
| 302 330 | 
             
              uploader = ModelUploader(folder)
         | 
| 303 | 
            -
               | 
| 331 | 
            +
              if download_checkpoints:
         | 
| 332 | 
            +
                uploader.download_checkpoints()
         | 
| 304 333 | 
             
              uploader.create_dockerfile()
         | 
| 305 334 | 
             
              input("Press Enter to continue...")
         | 
| 306 335 | 
             
              uploader.upload_model_version()
         | 
| @@ -310,6 +339,13 @@ if __name__ == "__main__": | |
| 310 339 | 
             
              parser = argparse.ArgumentParser()
         | 
| 311 340 | 
             
              parser.add_argument(
         | 
| 312 341 | 
             
                  '--model_path', type=str, help='Path of the model folder to upload', required=True)
         | 
| 342 | 
            +
              # flag to default to not download checkpoints
         | 
| 343 | 
            +
              parser.add_argument(
         | 
| 344 | 
            +
                  '--download_checkpoints',
         | 
| 345 | 
            +
                  action='store_true',
         | 
| 346 | 
            +
                  help=
         | 
| 347 | 
            +
                  'Flag to download checkpoints before uploading and including them in the tar file that is uploaded. Defaults to False, which will attempt to download them at docker build time.',
         | 
| 348 | 
            +
              )
         | 
| 313 349 | 
             
              args = parser.parse_args()
         | 
| 314 350 |  | 
| 315 | 
            -
              main(args.model_path)
         | 
| 351 | 
            +
              main(args.model_path, args.download_checkpoints)
         | 
    
        clarifai/runners/utils/loader.py
    CHANGED
    
    | @@ -57,7 +57,6 @@ class HuggingFaceLoarder: | |
| 57 57 | 
             
              def validate_download(self, checkpoint_path: str):
         | 
| 58 58 | 
             
                # check if model exists on HF
         | 
| 59 59 | 
             
                from huggingface_hub import list_repo_files
         | 
| 60 | 
            -
             | 
| 61 60 | 
             
                return (len(os.listdir(checkpoint_path)) >= len(list_repo_files(self.repo_id))) and len(
         | 
| 62 61 | 
             
                    list_repo_files(self.repo_id)) > 0
         | 
| 63 62 |  | 
    
        clarifai/utils/misc.py
    CHANGED
    
    | @@ -1,4 +1,5 @@ | |
| 1 1 | 
             
            import os
         | 
| 2 | 
            +
            import uuid
         | 
| 2 3 | 
             
            from typing import Any, Dict, List
         | 
| 3 4 |  | 
| 4 5 | 
             
            from clarifai.errors import UserError
         | 
| @@ -69,3 +70,8 @@ def concept_relations_accumulation(relations_dict: Dict[str, Any], subject_conce | |
| 69 70 | 
             
                relations_dict[object_concept] = []
         | 
| 70 71 | 
             
                relations_dict[subject_concept] = []
         | 
| 71 72 | 
             
              return relations_dict
         | 
| 73 | 
            +
             | 
| 74 | 
            +
             | 
| 75 | 
            +
            def get_uuid(val: int) -> str:
         | 
| 76 | 
            +
              """Generates a UUID."""
         | 
| 77 | 
            +
              return uuid.uuid4().hex[:val]
         | 
| @@ -1,6 +1,6 @@ | |
| 1 1 | 
             
            Metadata-Version: 2.1
         | 
| 2 2 | 
             
            Name: clarifai
         | 
| 3 | 
            -
            Version: 10.8. | 
| 3 | 
            +
            Version: 10.8.6
         | 
| 4 4 | 
             
            Summary: Clarifai Python SDK
         | 
| 5 5 | 
             
            Home-page: https://github.com/Clarifai/clarifai-python
         | 
| 6 6 | 
             
            Author: Clarifai
         | 
| @@ -32,6 +32,7 @@ Requires-Dist: Pillow >=9.5.0 | |
| 32 32 | 
             
            Requires-Dist: inquirerpy ==0.3.4
         | 
| 33 33 | 
             
            Requires-Dist: tabulate >=0.9.0
         | 
| 34 34 | 
             
            Requires-Dist: protobuf ==5.27.3
         | 
| 35 | 
            +
            Requires-Dist: fsspec ==2024.6.1
         | 
| 35 36 | 
             
            Provides-Extra: all
         | 
| 36 37 | 
             
            Requires-Dist: pycocotools ==2.0.6 ; extra == 'all'
         | 
| 37 38 |  |