clarifai 10.3.0__py3-none-any.whl → 10.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clarifai/client/input.py +32 -9
- clarifai/client/model.py +355 -36
- clarifai/client/search.py +90 -15
- clarifai/constants/model.py +1 -0
- clarifai/constants/search.py +2 -1
- clarifai/datasets/upload/features.py +4 -0
- clarifai/datasets/upload/image.py +25 -2
- clarifai/datasets/upload/loaders/coco_captions.py +7 -2
- clarifai/datasets/upload/loaders/coco_detection.py +7 -2
- clarifai/datasets/upload/text.py +2 -0
- clarifai/models/model_serving/README.md +3 -0
- clarifai/models/model_serving/cli/upload.py +65 -68
- clarifai/models/model_serving/docs/cli.md +17 -6
- clarifai/rag/rag.py +1 -1
- clarifai/rag/utils.py +2 -2
- clarifai/versions.py +1 -1
- {clarifai-10.3.0.dist-info → clarifai-10.3.2.dist-info}/METADATA +20 -3
- {clarifai-10.3.0.dist-info → clarifai-10.3.2.dist-info}/RECORD +22 -22
- {clarifai-10.3.0.dist-info → clarifai-10.3.2.dist-info}/LICENSE +0 -0
- {clarifai-10.3.0.dist-info → clarifai-10.3.2.dist-info}/WHEEL +0 -0
- {clarifai-10.3.0.dist-info → clarifai-10.3.2.dist-info}/entry_points.txt +0 -0
- {clarifai-10.3.0.dist-info → clarifai-10.3.2.dist-info}/top_level.txt +0 -0
    
        clarifai/client/search.py
    CHANGED
    
    | @@ -10,7 +10,8 @@ from schema import SchemaError | |
| 10 10 | 
             
            from clarifai.client.base import BaseClient
         | 
| 11 11 | 
             
            from clarifai.client.input import Inputs
         | 
| 12 12 | 
             
            from clarifai.client.lister import Lister
         | 
| 13 | 
            -
            from clarifai.constants.search import DEFAULT_SEARCH_METRIC, | 
| 13 | 
            +
            from clarifai.constants.search import (DEFAULT_SEARCH_ALGORITHM, DEFAULT_SEARCH_METRIC,
         | 
| 14 | 
            +
                                                   DEFAULT_TOP_K)
         | 
| 14 15 | 
             
            from clarifai.errors import UserError
         | 
| 15 16 | 
             
            from clarifai.schema.search import get_schema
         | 
| 16 17 |  | 
| @@ -20,8 +21,10 @@ class Search(Lister, BaseClient): | |
| 20 21 | 
             
              def __init__(self,
         | 
| 21 22 | 
             
                           user_id: str,
         | 
| 22 23 | 
             
                           app_id: str,
         | 
| 23 | 
            -
                           top_k: int =  | 
| 24 | 
            +
                           top_k: int = None,
         | 
| 24 25 | 
             
                           metric: str = DEFAULT_SEARCH_METRIC,
         | 
| 26 | 
            +
                           algorithm: str = DEFAULT_SEARCH_ALGORITHM,
         | 
| 27 | 
            +
                           pagination: bool = False,
         | 
| 25 28 | 
             
                           base_url: str = "https://api.clarifai.com",
         | 
| 26 29 | 
             
                           pat: str = None,
         | 
| 27 30 | 
             
                           token: str = None,
         | 
| @@ -33,6 +36,8 @@ class Search(Lister, BaseClient): | |
| 33 36 | 
             
                    app_id (str): App ID.
         | 
| 34 37 | 
             
                    top_k (int, optional): Top K results to retrieve. Defaults to 10.
         | 
| 35 38 | 
             
                    metric (str, optional): Similarity metric (either 'cosine' or 'euclidean'). Defaults to 'cosine'.
         | 
| 39 | 
            +
                    alogrithm (str, optional): Search algorithm (either 'nearest_neighbor' or 'brute_force'). Defaults to 'nearest_neighbor'.
         | 
| 40 | 
            +
                    pagination (bool, optional): Enable pagination. Defaults to False.
         | 
| 36 41 | 
             
                    base_url (str, optional): Base API url. Defaults to "https://api.clarifai.com".
         | 
| 37 42 | 
             
                    pat (str, optional): A personal access token for authentication. Can be set as env var CLARIFAI_PAT
         | 
| 38 43 | 
             
                    token (str): A session token for authentication. Accepts either a session token or a pat. Can be set as env var CLARIFAI_SESSION_TOKEN
         | 
| @@ -40,16 +45,27 @@ class Search(Lister, BaseClient): | |
| 40 45 |  | 
| 41 46 | 
             
                Raises:
         | 
| 42 47 | 
             
                    UserError: If the metric is not 'cosine' or 'euclidean'.
         | 
| 48 | 
            +
                    UserError: If the algorithm is not 'nearest_neighbor' or 'brute_force'.
         | 
| 43 49 | 
             
                """
         | 
| 44 50 | 
             
                if metric not in ["cosine", "euclidean"]:
         | 
| 45 51 | 
             
                  raise UserError("Metric should be either cosine or euclidean")
         | 
| 52 | 
            +
                if algorithm not in ["nearest_neighbor", "brute_force"]:
         | 
| 53 | 
            +
                  raise UserError("Algorithm should be either nearest_neighbor or brute_force")
         | 
| 54 | 
            +
                if metric == "cosine" and algorithm == "nearest_neighbor":
         | 
| 55 | 
            +
                  raise UserError("Cosine distance metric is not supported with nearest neighbor algorithm")
         | 
| 56 | 
            +
                if top_k and pagination:
         | 
| 57 | 
            +
                  raise UserError(
         | 
| 58 | 
            +
                      "top_k and pagination cannot be used together. Please set pagination to False.")
         | 
| 59 | 
            +
                if not top_k and not pagination:
         | 
| 60 | 
            +
                  top_k = DEFAULT_TOP_K
         | 
| 46 61 |  | 
| 47 62 | 
             
                self.user_id = user_id
         | 
| 48 63 | 
             
                self.app_id = app_id
         | 
| 49 64 | 
             
                self.metric_distance = dict(cosine="COSINE_DISTANCE", euclidean="EUCLIDEAN_DISTANCE")[metric]
         | 
| 65 | 
            +
                self.algorithm = algorithm
         | 
| 50 66 | 
             
                self.data_proto = resources_pb2.Data()
         | 
| 51 67 | 
             
                self.top_k = top_k
         | 
| 52 | 
            -
             | 
| 68 | 
            +
                self.pagination = pagination
         | 
| 53 69 | 
             
                self.inputs = Inputs(
         | 
| 54 70 | 
             
                    user_id=self.user_id, app_id=self.app_id, pat=pat, token=token, base_url=base_url)
         | 
| 55 71 | 
             
                self.rank_filter_schema = get_schema()
         | 
| @@ -159,9 +175,8 @@ class Search(Lister, BaseClient): | |
| 159 175 | 
             
                    geo_point=resources_pb2.GeoPoint(longitude=longitude, latitude=latitude),
         | 
| 160 176 | 
             
                    geo_limit=resources_pb2.GeoLimit(type="withinKilometers", value=geo_limit))
         | 
| 161 177 |  | 
| 162 | 
            -
              def  | 
| 163 | 
            -
             | 
| 164 | 
            -
                  request_data: Dict[str, Any]) -> Generator[Dict[str, Any], None, None]:
         | 
| 178 | 
            +
              def _list_topk_generator(self, endpoint: Callable[..., Any], proto_message: Any,
         | 
| 179 | 
            +
                                       request_data: Dict[str, Any]) -> Generator[Dict[str, Any], None, None]:
         | 
| 165 180 | 
             
                """Lists all pages of a resource.
         | 
| 166 181 |  | 
| 167 182 | 
             
                    Args:
         | 
| @@ -199,12 +214,61 @@ class Search(Lister, BaseClient): | |
| 199 214 | 
             
                  total_hits += per_page
         | 
| 200 215 | 
             
                  yield response
         | 
| 201 216 |  | 
| 202 | 
            -
              def  | 
| 217 | 
            +
              def _list_all_pages_generator(self,
         | 
| 218 | 
            +
                                            endpoint: Callable,
         | 
| 219 | 
            +
                                            proto_message: Any,
         | 
| 220 | 
            +
                                            request_data: Dict[str, Any],
         | 
| 221 | 
            +
                                            page_no: int = None,
         | 
| 222 | 
            +
                                            per_page: int = None) -> Generator[Dict[str, Any], None, None]:
         | 
| 223 | 
            +
                """Lists pages of a resource.
         | 
| 224 | 
            +
             | 
| 225 | 
            +
                Args:
         | 
| 226 | 
            +
                    endpoint (Callable): The endpoint to call.
         | 
| 227 | 
            +
                    proto_message (Any): The proto message to use.
         | 
| 228 | 
            +
                    request_data (dict): The request data to use.
         | 
| 229 | 
            +
                    page_no (int): The page number to list.
         | 
| 230 | 
            +
                    per_page (int): The number of items per page.
         | 
| 231 | 
            +
             | 
| 232 | 
            +
                Yields:
         | 
| 233 | 
            +
                    response_dict: The next item in the listing.
         | 
| 234 | 
            +
                """
         | 
| 235 | 
            +
                page = 1 if not page_no else page_no
         | 
| 236 | 
            +
                if page_no and not per_page:
         | 
| 237 | 
            +
                  per_page = self.default_page_size
         | 
| 238 | 
            +
                while True:
         | 
| 239 | 
            +
                  request_data['pagination'] = service_pb2.Pagination(page=page, per_page=per_page)
         | 
| 240 | 
            +
                  response = self._grpc_request(endpoint, proto_message(**request_data))
         | 
| 241 | 
            +
                  dict_response = MessageToDict(response, preserving_proto_field_name=True)
         | 
| 242 | 
            +
                  if response.status.code != status_code_pb2.SUCCESS:
         | 
| 243 | 
            +
                    if "page * perPage cannot exceed" in str(response.status.details):
         | 
| 244 | 
            +
                      msg = (f"Your pagination is set to {page_no*per_page}. "
         | 
| 245 | 
            +
                             f"The current pagination settings exceed the limit. Please reach out to "
         | 
| 246 | 
            +
                             f"support@clarifai.com to request an increase for your use case.\n"
         | 
| 247 | 
            +
                             f"req_id: {response.status.req_id}")
         | 
| 248 | 
            +
                      raise UserError(msg)
         | 
| 249 | 
            +
                    else:
         | 
| 250 | 
            +
                      raise Exception(f"Listing failed with response {response!r}")
         | 
| 251 | 
            +
                  if 'hits' not in list(dict_response.keys()):
         | 
| 252 | 
            +
                    break
         | 
| 253 | 
            +
                  yield response
         | 
| 254 | 
            +
                  if page_no is not None or per_page is not None:
         | 
| 255 | 
            +
                    break
         | 
| 256 | 
            +
                  page += 1
         | 
| 257 | 
            +
             | 
| 258 | 
            +
              def query(
         | 
| 259 | 
            +
                  self,
         | 
| 260 | 
            +
                  ranks=[{}],
         | 
| 261 | 
            +
                  filters=[{}],
         | 
| 262 | 
            +
                  page_no: int = None,
         | 
| 263 | 
            +
                  per_page: int = None,
         | 
| 264 | 
            +
              ):
         | 
| 203 265 | 
             
                """Perform a query with rank and filters.
         | 
| 204 266 |  | 
| 205 267 | 
             
                Args:
         | 
| 206 268 | 
             
                    ranks (List[Dict], optional): List of rank parameters. Defaults to [{}].
         | 
| 207 269 | 
             
                    filters (List[Dict], optional): List of filter parameters. Defaults to [{}].
         | 
| 270 | 
            +
                    page_no (int): The page number to list.
         | 
| 271 | 
            +
                    per_page (int): The number of items per page.
         | 
| 208 272 |  | 
| 209 273 | 
             
                Returns:
         | 
| 210 274 | 
             
                    Generator[Dict[str, Any], None, None]: A generator of query results.
         | 
| @@ -217,13 +281,16 @@ class Search(Lister, BaseClient): | |
| 217 281 |  | 
| 218 282 | 
             
                    Vector search over inputs
         | 
| 219 283 | 
             
                    >>> from clarifai.client.search import Search
         | 
| 220 | 
            -
                    >>> search = Search(user_id='user_id', app_id='app_id' | 
| 221 | 
            -
                    >>> res = search.query(ranks=[{'image_url': 'https://samples.clarifai.com/dog.tiff'}])
         | 
| 284 | 
            +
                    >>> search = Search(user_id='user_id', app_id='app_id' , metric='cosine', pagination=True)
         | 
| 285 | 
            +
                    >>> res = search.query(ranks=[{'image_url': 'https://samples.clarifai.com/dog.tiff'}],page_no=2, per_page=5)
         | 
| 222 286 |  | 
| 223 287 | 
             
                Note:
         | 
| 224 288 | 
             
                    For schema of rank and filter, please refer to [schema](https://github.com/Clarifai/clarifai-python/tree/master/clarifai/schema/search.py).
         | 
| 225 289 | 
             
                    For more detailed search examples, please refer to [examples](https://github.com/Clarifai/examples/tree/main/search).
         | 
| 226 290 | 
             
                """
         | 
| 291 | 
            +
                if not self.pagination and (per_page or page_no):
         | 
| 292 | 
            +
                  raise UserError("Pagination settings are only available when pagination is enabled."
         | 
| 293 | 
            +
                                  "Please set Search(pagination=True) while initializing Search().")
         | 
| 227 294 | 
             
                try:
         | 
| 228 295 | 
             
                  self.rank_filter_schema.validate(ranks)
         | 
| 229 296 | 
             
                  self.rank_filter_schema.validate(filters)
         | 
| @@ -249,11 +316,15 @@ class Search(Lister, BaseClient): | |
| 249 316 | 
             
                      searches=[
         | 
| 250 317 | 
             
                          resources_pb2.Search(
         | 
| 251 318 | 
             
                              query=resources_pb2.Query(ranks=all_ranks, filters=all_filters),
         | 
| 319 | 
            +
                              algorithm=self.algorithm,
         | 
| 252 320 | 
             
                              metric=self.metric_distance)
         | 
| 253 321 | 
             
                      ])
         | 
| 254 | 
            -
             | 
| 255 | 
            -
             | 
| 256 | 
            -
             | 
| 322 | 
            +
                  if self.pagination:
         | 
| 323 | 
            +
                    return self._list_all_pages_generator(self.STUB.PostInputsSearches,
         | 
| 324 | 
            +
                                                          service_pb2.PostInputsSearchesRequest, request_data,
         | 
| 325 | 
            +
                                                          page_no, per_page)
         | 
| 326 | 
            +
                  return self._list_topk_generator(self.STUB.PostInputsSearches,
         | 
| 327 | 
            +
                                                   service_pb2.PostInputsSearchesRequest, request_data)
         | 
| 257 328 |  | 
| 258 329 | 
             
                # Calls PostAnnotationsSearches for annotation ranks, filters
         | 
| 259 330 | 
             
                filters_annot_proto = []
         | 
| @@ -269,8 +340,12 @@ class Search(Lister, BaseClient): | |
| 269 340 | 
             
                    searches=[
         | 
| 270 341 | 
             
                        resources_pb2.Search(
         | 
| 271 342 | 
             
                            query=resources_pb2.Query(ranks=all_ranks, filters=all_filters),
         | 
| 343 | 
            +
                            algorithm=self.algorithm,
         | 
| 272 344 | 
             
                            metric=self.metric_distance)
         | 
| 273 345 | 
             
                    ])
         | 
| 274 | 
            -
             | 
| 275 | 
            -
             | 
| 276 | 
            -
             | 
| 346 | 
            +
                if self.pagination:
         | 
| 347 | 
            +
                  return self._list_all_pages_generator(self.STUB.PostAnnotationsSearches,
         | 
| 348 | 
            +
                                                        service_pb2.PostAnnotationsSearchesRequest,
         | 
| 349 | 
            +
                                                        request_data, page_no, per_page)
         | 
| 350 | 
            +
                return self._list_topk_generator(self.STUB.PostAnnotationsSearches,
         | 
| 351 | 
            +
                                                 service_pb2.PostAnnotationsSearchesRequest, request_data)
         | 
    
        clarifai/constants/model.py
    CHANGED
    
    
    
        clarifai/constants/search.py
    CHANGED
    
    
| @@ -10,6 +10,7 @@ class TextFeatures: | |
| 10 10 | 
             
              labels: List[Union[str, int]]  # List[str or int] to cater for multi-class tasks
         | 
| 11 11 | 
             
              id: Optional[int] = None  # text_id
         | 
| 12 12 | 
             
              metadata: Optional[dict] = None
         | 
| 13 | 
            +
              label_ids: Optional[List[str]] = None
         | 
| 13 14 |  | 
| 14 15 |  | 
| 15 16 | 
             
            @dataclass
         | 
| @@ -21,6 +22,7 @@ class VisualClassificationFeatures: | |
| 21 22 | 
             
              id: Optional[int] = None  # image_id
         | 
| 22 23 | 
             
              metadata: Optional[dict] = None
         | 
| 23 24 | 
             
              image_bytes: Optional[bytes] = None
         | 
| 25 | 
            +
              label_ids: Optional[List[str]] = None
         | 
| 24 26 |  | 
| 25 27 |  | 
| 26 28 | 
             
            @dataclass
         | 
| @@ -33,6 +35,7 @@ class VisualDetectionFeatures: | |
| 33 35 | 
             
              id: Optional[int] = None  # image_id
         | 
| 34 36 | 
             
              metadata: Optional[dict] = None
         | 
| 35 37 | 
             
              image_bytes: Optional[bytes] = None
         | 
| 38 | 
            +
              label_ids: Optional[List[str]] = None
         | 
| 36 39 |  | 
| 37 40 |  | 
| 38 41 | 
             
            @dataclass
         | 
| @@ -45,3 +48,4 @@ class VisualSegmentationFeatures: | |
| 45 48 | 
             
              id: Optional[int] = None  # image_id
         | 
| 46 49 | 
             
              metadata: Optional[dict] = None
         | 
| 47 50 | 
             
              image_bytes: Optional[bytes] = None
         | 
| 51 | 
            +
              label_ids: Optional[List[str]] = None
         | 
| @@ -32,6 +32,7 @@ class VisualClassificationDataset(ClarifaiDataset): | |
| 32 32 | 
             
                  image_path = data_item.image_path
         | 
| 33 33 | 
             
                  labels = data_item.labels if isinstance(data_item.labels,
         | 
| 34 34 | 
             
                                                          list) else [data_item.labels]  # clarifai concept
         | 
| 35 | 
            +
                  label_ids = data_item.label_ids
         | 
| 35 36 | 
             
                  input_id = f"{self.dataset_id}-{uuid.uuid4().hex[:8]}" if data_item.id is None else f"{self.dataset_id}-{str(data_item.id)}"
         | 
| 36 37 | 
             
                  geo_info = data_item.geo_info
         | 
| 37 38 | 
             
                  if data_item.metadata is not None:
         | 
| @@ -49,6 +50,7 @@ class VisualClassificationDataset(ClarifaiDataset): | |
| 49 50 | 
             
                            image_bytes=data_item.image_bytes,
         | 
| 50 51 | 
             
                            dataset_id=self.dataset_id,
         | 
| 51 52 | 
             
                            labels=labels,
         | 
| 53 | 
            +
                            label_ids=label_ids,
         | 
| 52 54 | 
             
                            geo_info=geo_info,
         | 
| 53 55 | 
             
                            metadata=metadata))
         | 
| 54 56 | 
             
                  else:
         | 
| @@ -58,6 +60,7 @@ class VisualClassificationDataset(ClarifaiDataset): | |
| 58 60 | 
             
                            image_file=image_path,
         | 
| 59 61 | 
             
                            dataset_id=self.dataset_id,
         | 
| 60 62 | 
             
                            labels=labels,
         | 
| 63 | 
            +
                            label_ids=label_ids,
         | 
| 61 64 | 
             
                            geo_info=geo_info,
         | 
| 62 65 | 
             
                            metadata=metadata))
         | 
| 63 66 |  | 
| @@ -91,6 +94,12 @@ class VisualDetectionDataset(ClarifaiDataset): | |
| 91 94 | 
             
                  metadata = Struct()
         | 
| 92 95 | 
             
                  image = data_item.image_path
         | 
| 93 96 | 
             
                  labels = data_item.labels  # list:[l1,...,ln]
         | 
| 97 | 
            +
                  if data_item.label_ids is not None:
         | 
| 98 | 
            +
                    assert len(labels) == len(
         | 
| 99 | 
            +
                        data_item.label_ids), "Length of labels and label_ids must be equal"
         | 
| 100 | 
            +
                    label_ids = data_item.label_ids
         | 
| 101 | 
            +
                  else:
         | 
| 102 | 
            +
                    label_ids = None
         | 
| 94 103 | 
             
                  bboxes = data_item.bboxes  # [[xmin,ymin,xmax,ymax],...,[xmin,ymin,xmax,ymax]]
         | 
| 95 104 | 
             
                  input_id = f"{self.dataset_id}-{uuid.uuid4().hex[:8]}" if data_item.id is None else f"{self.dataset_id}-{str(data_item.id)}"
         | 
| 96 105 | 
             
                  if data_item.metadata is not None:
         | 
| @@ -120,7 +129,11 @@ class VisualDetectionDataset(ClarifaiDataset): | |
| 120 129 | 
             
                  # one id could have more than one bbox and label
         | 
| 121 130 | 
             
                  for i in range(len(bboxes)):
         | 
| 122 131 | 
             
                    annotation_protos.append(
         | 
| 123 | 
            -
                        Inputs.get_bbox_proto( | 
| 132 | 
            +
                        Inputs.get_bbox_proto(
         | 
| 133 | 
            +
                            input_id=input_id,
         | 
| 134 | 
            +
                            label=labels[i],
         | 
| 135 | 
            +
                            bbox=bboxes[i],
         | 
| 136 | 
            +
                            label_id=label_ids[i] if label_ids else None))
         | 
| 124 137 |  | 
| 125 138 | 
             
                with ThreadPoolExecutor(max_workers=4) as executor:
         | 
| 126 139 | 
             
                  futures = [executor.submit(process_data_item, id) for id in batch_input_ids]
         | 
| @@ -152,6 +165,12 @@ class VisualSegmentationDataset(ClarifaiDataset): | |
| 152 165 | 
             
                  metadata = Struct()
         | 
| 153 166 | 
             
                  image = data_item.image_path
         | 
| 154 167 | 
             
                  labels = data_item.labels
         | 
| 168 | 
            +
                  if data_item.label_ids is not None:
         | 
| 169 | 
            +
                    assert len(labels) == len(
         | 
| 170 | 
            +
                        data_item.label_ids), "Length of labels and label_ids must be equal"
         | 
| 171 | 
            +
                    label_ids = data_item.label_ids
         | 
| 172 | 
            +
                  else:
         | 
| 173 | 
            +
                    label_ids = None
         | 
| 155 174 | 
             
                  _polygons = data_item.polygons  # list of polygons: [[[x,y],...,[x,y]],...]
         | 
| 156 175 | 
             
                  input_id = f"{self.dataset_id}-{uuid.uuid4().hex[:8]}" if data_item.id is None else f"{self.dataset_id}-{str(data_item.id)}"
         | 
| 157 176 | 
             
                  if data_item.metadata is not None:
         | 
| @@ -183,7 +202,11 @@ class VisualSegmentationDataset(ClarifaiDataset): | |
| 183 202 | 
             
                  for i, _polygon in enumerate(_polygons):
         | 
| 184 203 | 
             
                    try:
         | 
| 185 204 | 
             
                      annotation_protos.append(
         | 
| 186 | 
            -
                          Inputs.get_mask_proto( | 
| 205 | 
            +
                          Inputs.get_mask_proto(
         | 
| 206 | 
            +
                              input_id=input_id,
         | 
| 207 | 
            +
                              label=labels[i],
         | 
| 208 | 
            +
                              polygons=_polygon,
         | 
| 209 | 
            +
                              label_id=label_ids[i] if label_ids else None))
         | 
| 187 210 | 
             
                    except IndexError:
         | 
| 188 211 | 
             
                      continue
         | 
| 189 212 |  | 
| @@ -2,12 +2,17 @@ | |
| 2 2 |  | 
| 3 3 | 
             
            import os
         | 
| 4 4 |  | 
| 5 | 
            -
            from pycocotools.coco import COCO
         | 
| 6 | 
            -
             | 
| 7 5 | 
             
            from clarifai.datasets.upload.base import ClarifaiDataLoader
         | 
| 8 6 |  | 
| 9 7 | 
             
            from ..features import VisualClassificationFeatures
         | 
| 10 8 |  | 
| 9 | 
            +
            #pycocotools is a dependency for this loader
         | 
| 10 | 
            +
            try:
         | 
| 11 | 
            +
              from pycocotools.coco import COCO
         | 
| 12 | 
            +
            except ImportError:
         | 
| 13 | 
            +
              raise ImportError("Could not import pycocotools package. "
         | 
| 14 | 
            +
                                "Please do `pip install 'clarifai[all]'` to import pycocotools.")
         | 
| 15 | 
            +
             | 
| 11 16 |  | 
| 12 17 | 
             
            class COCOCaptionsDataLoader(ClarifaiDataLoader):
         | 
| 13 18 | 
             
              """COCO Image Captioning Dataset."""
         | 
| @@ -2,12 +2,17 @@ | |
| 2 2 |  | 
| 3 3 | 
             
            import os
         | 
| 4 4 |  | 
| 5 | 
            -
            from pycocotools.coco import COCO
         | 
| 6 | 
            -
             | 
| 7 5 | 
             
            from ..base import ClarifaiDataLoader
         | 
| 8 6 |  | 
| 9 7 | 
             
            from ..features import VisualDetectionFeatures
         | 
| 10 8 |  | 
| 9 | 
            +
            #pycocotools is a dependency for this loader
         | 
| 10 | 
            +
            try:
         | 
| 11 | 
            +
              from pycocotools.coco import COCO
         | 
| 12 | 
            +
            except ImportError:
         | 
| 13 | 
            +
              raise ImportError("Could not import pycocotools package. "
         | 
| 14 | 
            +
                                "Please do `pip install 'clarifai[all]'` to import pycocotools.")
         | 
| 15 | 
            +
             | 
| 11 16 |  | 
| 12 17 | 
             
            class COCODetectionDataLoader(ClarifaiDataLoader):
         | 
| 13 18 |  | 
    
        clarifai/datasets/upload/text.py
    CHANGED
    
    | @@ -32,6 +32,7 @@ class TextClassificationDataset(ClarifaiDataset): | |
| 32 32 | 
             
                  text = data_item.text
         | 
| 33 33 | 
             
                  labels = data_item.labels if isinstance(data_item.labels,
         | 
| 34 34 | 
             
                                                          list) else [data_item.labels]  # clarifai concept
         | 
| 35 | 
            +
                  label_ids = data_item.label_ids
         | 
| 35 36 | 
             
                  input_id = f"{self.dataset_id}-{id}" if data_item.id is None else f"{self.dataset_id}-{str(data_item.id)}"
         | 
| 36 37 | 
             
                  if data_item.metadata is not None:
         | 
| 37 38 | 
             
                    metadata.update(data_item.metadata)
         | 
| @@ -43,6 +44,7 @@ class TextClassificationDataset(ClarifaiDataset): | |
| 43 44 | 
             
                          raw_text=text,
         | 
| 44 45 | 
             
                          dataset_id=self.dataset_id,
         | 
| 45 46 | 
             
                          labels=labels,
         | 
| 47 | 
            +
                          label_ids=label_ids,
         | 
| 46 48 | 
             
                          metadata=metadata))
         | 
| 47 49 |  | 
| 48 50 | 
             
                with ThreadPoolExecutor(max_workers=4) as executor:
         | 
| @@ -141,6 +141,9 @@ Get your PAT from https://clarifai.com/settings/security and pass it here: <inse | |
| 141 141 | 
             
            Upload
         | 
| 142 142 |  | 
| 143 143 | 
             
            ```bash
         | 
| 144 | 
            +
            # upload built file directly
         | 
| 145 | 
            +
            $ clarifai upload model <your-working-dir> --user-app <your_user_id>/<your_app_id> --id <your_model_id>
         | 
| 146 | 
            +
            # or using direct download url of cloud storage
         | 
| 144 147 | 
             
            $ clarifai upload model --url <url> --user-app <your_user_id>/<your_app_id> --id <your_model_id>
         | 
| 145 148 | 
             
            ```
         | 
| 146 149 |  | 
| @@ -14,14 +14,11 @@ | |
| 14 14 | 
             
            import argparse
         | 
| 15 15 | 
             
            import os
         | 
| 16 16 | 
             
            import subprocess
         | 
| 17 | 
            -
            from typing import Union
         | 
| 18 17 |  | 
| 19 | 
            -
            from clarifai. | 
| 20 | 
            -
            from clarifai.models.api import Models
         | 
| 21 | 
            -
            from clarifai.models.model_serving.model_config import (MODEL_TYPES, get_model_config,
         | 
| 22 | 
            -
                                                                    load_user_config)
         | 
| 18 | 
            +
            from clarifai.models.model_serving.model_config import get_model_config, load_user_config
         | 
| 23 19 | 
             
            from clarifai.models.model_serving.model_config.inference_parameter import InferParamManager
         | 
| 24 20 |  | 
| 21 | 
            +
            from ..constants import BUILT_MODEL_EXT
         | 
| 25 22 | 
             
            from ..utils import login
         | 
| 26 23 | 
             
            from .base import BaseClarifaiCli
         | 
| 27 24 |  | 
| @@ -51,7 +48,9 @@ class UploadModelSubCli(BaseClarifaiCli): | |
| 51 48 | 
             
                    "Path to working dir to get clarifai_config.yaml or path to yaml. Default is current directory",
         | 
| 52 49 | 
             
                    default=".")
         | 
| 53 50 | 
             
                upload_parser.add_argument(
         | 
| 54 | 
            -
                    "--url", type=str, required= | 
| 51 | 
            +
                    "--url", type=str, required=False, help="Direct download url of zip file", default=None)
         | 
| 52 | 
            +
                upload_parser.add_argument(
         | 
| 53 | 
            +
                    "--file", type=str, required=False, help="Local built file", default=None)
         | 
| 55 54 | 
             
                upload_parser.add_argument("--id", type=str, required=False, help="Model ID")
         | 
| 56 55 | 
             
                upload_parser.add_argument(
         | 
| 57 56 | 
             
                    "--user-app",
         | 
| @@ -62,7 +61,10 @@ class UploadModelSubCli(BaseClarifaiCli): | |
| 62 61 | 
             
                    "--no-test",
         | 
| 63 62 | 
             
                    action="store_true",
         | 
| 64 63 | 
             
                    help="Trigger this flag to skip testing before uploading")
         | 
| 65 | 
            -
             | 
| 64 | 
            +
                upload_parser.add_argument(
         | 
| 65 | 
            +
                    "--no-resume",
         | 
| 66 | 
            +
                    action="store_true",
         | 
| 67 | 
            +
                    help="Trigger this flag to not resume uploading local file")
         | 
| 66 68 | 
             
                upload_parser.add_argument(
         | 
| 67 69 | 
             
                    "--update-version",
         | 
| 68 70 | 
             
                    action="store_true",
         | 
| @@ -73,10 +75,13 @@ class UploadModelSubCli(BaseClarifaiCli): | |
| 73 75 |  | 
| 74 76 | 
             
              def __init__(self, args: argparse.Namespace) -> None:
         | 
| 75 77 | 
             
                self.no_test = args.no_test
         | 
| 78 | 
            +
                self.no_resume = args.no_resume
         | 
| 76 79 |  | 
| 77 80 | 
             
                working_dir_or_config = args.path
         | 
| 78 81 | 
             
                # if input a config file, then not running test
         | 
| 79 82 | 
             
                if working_dir_or_config.endswith(".yaml"):
         | 
| 83 | 
            +
                  # to folder
         | 
| 84 | 
            +
                  working_dir_or_config = os.path.split(working_dir_or_config)[0]
         | 
| 80 85 | 
             
                  config_yaml_path = working_dir_or_config
         | 
| 81 86 | 
             
                  self.test_path = None
         | 
| 82 87 | 
             
                  self.no_test = True
         | 
| @@ -89,12 +94,27 @@ class UploadModelSubCli(BaseClarifaiCli): | |
| 89 94 | 
             
                    f"`{config_yaml_path}` does not exist")
         | 
| 90 95 | 
             
                self.config = load_user_config(cfg_path=config_yaml_path)
         | 
| 91 96 |  | 
| 97 | 
            +
                self.file = args.file
         | 
| 98 | 
            +
                self.url = args.url
         | 
| 99 | 
            +
                if self.file:
         | 
| 100 | 
            +
                  assert self.url, ValueError("Provide either file or url, but got both.")
         | 
| 101 | 
            +
                  assert os.path.exists(self.file), FileNotFoundError
         | 
| 102 | 
            +
                elif self.url:
         | 
| 103 | 
            +
                  assert self.url.startswith("http") or self.url.startswith(
         | 
| 104 | 
            +
                      "s3"), f"Invalid url supported http or s3 url. Got {self.url}"
         | 
| 105 | 
            +
                  self.file = None
         | 
| 106 | 
            +
                else:
         | 
| 107 | 
            +
                  for _fname in os.listdir(working_dir_or_config):
         | 
| 108 | 
            +
                    if _fname.endswith(BUILT_MODEL_EXT):
         | 
| 109 | 
            +
                      self.file = os.path.join(working_dir_or_config, _fname)
         | 
| 110 | 
            +
                      break
         | 
| 111 | 
            +
                  assert self.file, ValueError(
         | 
| 112 | 
            +
                      f"Not using url/file but also not found built file with extension {BUILT_MODEL_EXT}")
         | 
| 113 | 
            +
             | 
| 92 114 | 
             
                self.user_id, self.app_id = "", ""
         | 
| 93 115 | 
             
                user_app = args.user_app
         | 
| 94 116 | 
             
                self.url: str = args.url
         | 
| 95 117 | 
             
                self.update_version = args.update_version
         | 
| 96 | 
            -
                assert self.url.startswith("http") or self.url.startswith(
         | 
| 97 | 
            -
                    "s3"), f"Invalid url supported http or s3 url. Got {self.url}"
         | 
| 98 118 |  | 
| 99 119 | 
             
                clarifai_cfg = self.config.clarifai_model
         | 
| 100 120 | 
             
                self.url: str = args.url
         | 
| @@ -111,17 +131,10 @@ class UploadModelSubCli(BaseClarifaiCli): | |
| 111 131 | 
             
                  ) == 2, f"id must be combination of user_id and app_id separated by `/`, e.g. <user_id>/<app_id>. Got {args.id}"
         | 
| 112 132 | 
             
                  self.user_id, self.app_id = user_app
         | 
| 113 133 |  | 
| 114 | 
            -
                if self.user_id:
         | 
| 115 | 
            -
                  os.environ["CLARIFAI_USER_ID"] = self.user_id
         | 
| 116 | 
            -
                if self.app_id:
         | 
| 117 | 
            -
                  os.environ["CLARIFAI_APP_ID"] = self.app_id
         | 
| 118 | 
            -
             | 
| 119 | 
            -
                _user_id = os.environ.get("CLARIFAI_USER_ID", None)
         | 
| 120 | 
            -
                _app_id = os.environ.get("CLARIFAI_APP_ID", None)
         | 
| 121 | 
            -
                assert _user_id or _app_id, f"Missing user-id or app-id, got user-id {_user_id} and app-id {_app_id}"
         | 
| 122 134 | 
             
                login()
         | 
| 123 135 |  | 
| 124 136 | 
             
              def run(self):
         | 
| 137 | 
            +
                from clarifai.client import App, Model
         | 
| 125 138 |  | 
| 126 139 | 
             
                # Run test before uploading
         | 
| 127 140 | 
             
                if not self.no_test:
         | 
| @@ -129,54 +142,38 @@ class UploadModelSubCli(BaseClarifaiCli): | |
| 129 142 | 
             
                  result = subprocess.run(f"pytest -s --log-level=INFO {self.test_path}", shell=True)
         | 
| 130 143 | 
             
                  assert result.returncode == 0, "Test has failed. Please make sure no error exists in your code."
         | 
| 131 144 |  | 
| 132 | 
            -
                 | 
| 133 | 
            -
             | 
| 134 | 
            -
             | 
| 135 | 
            -
             | 
| 136 | 
            -
             | 
| 137 | 
            -
             | 
| 138 | 
            -
             | 
| 139 | 
            -
             | 
| 140 | 
            -
             | 
| 141 | 
            -
            def  | 
| 142 | 
            -
             | 
| 143 | 
            -
             | 
| 144 | 
            -
             | 
| 145 | 
            -
             | 
| 146 | 
            -
             | 
| 147 | 
            -
             | 
| 148 | 
            -
             | 
| 149 | 
            -
             | 
| 150 | 
            -
             | 
| 151 | 
            -
             | 
| 152 | 
            -
             | 
| 153 | 
            -
             | 
| 154 | 
            -
             | 
| 155 | 
            -
             | 
| 156 | 
            -
             | 
| 157 | 
            -
             | 
| 158 | 
            -
             | 
| 159 | 
            -
             | 
| 160 | 
            -
             | 
| 161 | 
            -
             | 
| 162 | 
            -
             | 
| 163 | 
            -
             | 
| 164 | 
            -
             | 
| 165 | 
            -
             | 
| 166 | 
            -
             | 
| 167 | 
            -
                # post model
         | 
| 168 | 
            -
                resp = model_api.upload_model(
         | 
| 169 | 
            -
                    model_id=model_id,
         | 
| 170 | 
            -
                    model_zip_url=model_url,
         | 
| 171 | 
            -
                    model_type=model_type,
         | 
| 172 | 
            -
                    input=clarifai_key_map.input_fields_map,
         | 
| 173 | 
            -
                    outputs=clarifai_key_map.output_fields_map,
         | 
| 174 | 
            -
                    description=desc,
         | 
| 175 | 
            -
                    param_specs=inference_parameters)
         | 
| 176 | 
            -
              # response
         | 
| 177 | 
            -
              if resp["status"]["code"] != "SUCCESS":
         | 
| 178 | 
            -
                raise Exception("Post models failed, details: {}, {}".format(resp["status"]["description"],
         | 
| 179 | 
            -
                                                                             resp["status"]["details"]))
         | 
| 180 | 
            -
              else:
         | 
| 181 | 
            -
                print("Success!")
         | 
| 182 | 
            -
                print(f'Model version: {resp["model"]["model_version"]["id"]}')
         | 
| 145 | 
            +
                clarifai_key_map = get_model_config(model_type=self.type).clarifai_model.field_maps
         | 
| 146 | 
            +
                # inference parameters
         | 
| 147 | 
            +
                inference_parameters = None
         | 
| 148 | 
            +
                if isinstance(self.infer_param, str) and os.path.isfile(self.infer_param):
         | 
| 149 | 
            +
                  inference_parameters = InferParamManager(json_path=self.infer_param).get_list_params()
         | 
| 150 | 
            +
                inputs = clarifai_key_map.input_fields_map
         | 
| 151 | 
            +
                outputs = clarifai_key_map.output_fields_map
         | 
| 152 | 
            +
             | 
| 153 | 
            +
                # if updating new version of existing model
         | 
| 154 | 
            +
                def update_version():
         | 
| 155 | 
            +
                  model = Model(model_id=self.id, app_id=self.app_id)
         | 
| 156 | 
            +
                  if self.url:
         | 
| 157 | 
            +
                    model.create_version_by_url(
         | 
| 158 | 
            +
                        url=self.url,
         | 
| 159 | 
            +
                        input_field_maps=inputs,
         | 
| 160 | 
            +
                        output_field_maps=outputs,
         | 
| 161 | 
            +
                        inference_parameter_configs=inference_parameters,
         | 
| 162 | 
            +
                        description=self.desc)
         | 
| 163 | 
            +
                  elif self.file:
         | 
| 164 | 
            +
                    model.create_version_by_file(
         | 
| 165 | 
            +
                        file_path=self.file,
         | 
| 166 | 
            +
                        input_field_maps=inputs,
         | 
| 167 | 
            +
                        output_field_maps=outputs,
         | 
| 168 | 
            +
                        inference_parameter_configs=inference_parameters,
         | 
| 169 | 
            +
                        no_resume=self.no_resume,
         | 
| 170 | 
            +
                        description=self.desc)
         | 
| 171 | 
            +
                  else:
         | 
| 172 | 
            +
                    raise ValueError
         | 
| 173 | 
            +
             | 
| 174 | 
            +
                if self.update_version:
         | 
| 175 | 
            +
                  update_version()
         | 
| 176 | 
            +
                else:
         | 
| 177 | 
            +
                  # creating new model
         | 
| 178 | 
            +
                  _ = App(app_id=self.app_id).create_model(self.id, model_type_id=self.type)
         | 
| 179 | 
            +
                  update_version()
         | 
| @@ -127,7 +127,15 @@ optional arguments: | |
| 127 127 |  | 
| 128 128 | 
             
            5. Upload
         | 
| 129 129 |  | 
| 130 | 
            -
            This step will  | 
| 130 | 
            +
            This step will execute test.py in the specified working directory by default before proceeding with the build. You can upload your built file directly from the working directory to the platform or upload it to cloud storage and provide the direct URL during the upload process.
         | 
| 131 | 
            +
             | 
| 132 | 
            +
            Use the following command to upload your built file directly to the platform. It will upload the `*.clarifai` file. *Note*: Only support file size from 5MiB to 5GiB
         | 
| 133 | 
            +
             | 
| 134 | 
            +
            ```bash
         | 
| 135 | 
            +
            $ clarifai upload model <your_working_dir>
         | 
| 136 | 
            +
            ```
         | 
| 137 | 
            +
             | 
| 138 | 
            +
            or upload with direct download url
         | 
| 131 139 |  | 
| 132 140 | 
             
            ```bash
         | 
| 133 141 | 
             
            $ clarifai upload model <your_working_dir> --url <your url>
         | 
| @@ -141,10 +149,13 @@ positional arguments: | |
| 141 149 | 
             
              path                  Path to working dir to get clarifai_config.yaml or path to yaml. Default is current directory
         | 
| 142 150 |  | 
| 143 151 | 
             
            optional arguments:
         | 
| 144 | 
            -
              -- | 
| 145 | 
            -
              -- | 
| 146 | 
            -
              -- | 
| 147 | 
            -
              -- | 
| 148 | 
            -
              -- | 
| 152 | 
            +
              -h, --help           show this help message and exit
         | 
| 153 | 
            +
              --url URL            Direct download url of zip file
         | 
| 154 | 
            +
              --file FILE          Local built file
         | 
| 155 | 
            +
              --id ID              Model ID
         | 
| 156 | 
            +
              --user-app USER_APP  User ID and App ID separated by '/', e.g., <user_id>/<app_id>
         | 
| 157 | 
            +
              --no-test            Trigger this flag to skip testing before uploading
         | 
| 158 | 
            +
              --no-resume          Trigger this flag to not resume uploading local file
         | 
| 159 | 
            +
              --update-version     Update exist model with new version
         | 
| 149 160 |  | 
| 150 161 | 
             
            ```
         | 
    
        clarifai/rag/rag.py
    CHANGED
    
    | @@ -110,7 +110,7 @@ class RAG: | |
| 110 110 | 
             
                prompter_model_params = {"params": params}
         | 
| 111 111 |  | 
| 112 112 | 
             
                ## Create rag-prompter model and version
         | 
| 113 | 
            -
                model_id = f"prompter-{workflow_id}" if workflow_id is not None else f"rag-prompter-{now_ts}"
         | 
| 113 | 
            +
                model_id = f"prompter-{workflow_id}-{now_ts}" if workflow_id is not None else f"rag-prompter-{now_ts}"
         | 
| 114 114 | 
             
                prompter_model = app.create_model(model_id=model_id, model_type_id="rag-prompter")
         | 
| 115 115 | 
             
                prompter_model = prompter_model.create_version(output_info=prompter_model_params)
         | 
| 116 116 |  | 
    
        clarifai/rag/utils.py
    CHANGED
    
    | @@ -67,7 +67,7 @@ def load_documents(file_path: str = None, folder_path: str = None, url: str = No | |
| 67 67 | 
             
              #document loaders for folderpath
         | 
| 68 68 | 
             
              if folder_path:
         | 
| 69 69 | 
             
                documents = SimpleDirectoryReader(
         | 
| 70 | 
            -
                    input_dir=Path(folder_path), required_exts=[".pdf", ".docx"]).load_data()
         | 
| 70 | 
            +
                    input_dir=Path(folder_path), required_exts=[".pdf", ".docx", ".txt"]).load_data()
         | 
| 71 71 |  | 
| 72 72 | 
             
              #document loaders for url
         | 
| 73 73 | 
             
              if url:
         | 
| @@ -111,7 +111,7 @@ def split_document(text: str, chunk_size: int, chunk_overlap: int, **kwargs) -> | |
| 111 111 | 
             
                from llama_index.core.node_parser.text import SentenceSplitter
         | 
| 112 112 | 
             
              except ImportError:
         | 
| 113 113 | 
             
                raise ImportError("Could not import llama index package. "
         | 
| 114 | 
            -
                                  "Please install it with `pip install llama-index-core==0.10. | 
| 114 | 
            +
                                  "Please install it with `pip install llama-index-core==0.10.24`.")
         | 
| 115 115 | 
             
              #document
         | 
| 116 116 | 
             
              text_parser = SentenceSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap, **kwargs)
         | 
| 117 117 | 
             
              text_chunks = text_parser.split_text(text)
         |