PyPI - clarifai - Versions diffs - 9.7.0__py3-none-any.whl → 9.7.2__py3-none-any.whl - Mend

clarifai 9.7.0py3-none-any.whl → 9.7.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (336) hide show

clarifai_utils/errors.py ADDED Viewed

@@ -0,0 +1,90 @@
+# -*- coding: utf-8 -*-
+import json
+import time
+import requests  # noqa
+from google.protobuf.json_format import MessageToDict
+from clarifai.versions import CLIENT_VERSION, OS_VER, PYTHON_VERSION
+class TokenError(Exception):
+  pass
+class ApiError(Exception):
+  """ API Server error """
+  def __init__(self, resource: str, params: dict, method: str,
+               response: requests.Response = None) -> None:
+    self.resource = resource
+    self.params = params
+    self.method = method
+    self.response = response
+    self.error_code = 'N/A'
+    self.error_desc = 'N/A'
+    self.error_details = 'N/A'
+    response_json = 'N/A'
+    if response is not None:
+      response_json_dict = MessageToDict(response)
+      self.error_code = response_json_dict.get('status', {}).get('code', None)
+      self.error_desc = response_json_dict.get('status', {}).get('description', None)
+      self.error_details = response_json_dict.get('status', {}).get('details', None)
+      response_json = json.dumps(response_json_dict['status'], indent=2)
+    current_ts_str = str(time.time())
+    msg = """%(method)s %(resource)s FAILED(%(time_ts)s).  error_code: %(error_code)s, error_description: %(error_desc)s, error_details: %(error_details)s
+ >> Python client %(client_version)s with Python %(python_version)s on %(os_version)s
+ >> %(method)s %(resource)s
+ >> REQUEST(%(time_ts)s) %(request)s
+ >> RESPONSE(%(time_ts)s) %(response)s""" % {
+        'baseurl': '%s/v2/' % _base_url(self.resource),
+        'method': method,
+        'resource': resource,
+        'error_code': self.error_code,
+        'error_desc': self.error_desc,
+        'error_details': self.error_details,
+        'request': json.dumps(params, indent=2),
+        'response': response_json,
+        'time_ts': current_ts_str,
+        'client_version': CLIENT_VERSION,
+        'python_version': PYTHON_VERSION,
+        'os_version': OS_VER
+    }
+    super(ApiError, self).__init__(msg)
+class ApiClientError(Exception):
+  """ API Client Error """
+class UserError(Exception):
+  """ User Error """
+class AuthError(Exception):
+  """Raised when a client has missing or invalid authentication."""
+def _base_url(url: str) -> str:
+  """
+  Extracts the base URL from the url, which is everything before the 4th slash character.
+  https://www.clarifai.com/v2/models/1/output -> https://www.clarifai.com/v2/
+  """
+  try:
+    return url[:_find_nth(url, '/', 4) + 1]
+  except:
+    return ''
+def _find_nth(haystack: str, needle: str, n: int) -> int:
+  start = haystack.find(needle)
+  while start >= 0 and n > 1:
+    start = haystack.find(needle, start + len(needle))
+    n -= 1
+  return start

clarifai_utils/urls/helper.py CHANGED Viewed

@@ -6,9 +6,9 @@ class ClarifaiUrlHelper(object):
   def __init__(self, auth, module_manager_imv_id="module_manager_install"):
     """
-        Args:
-          auth: a ClarifaiAuthHelper object.
-        """
+    Args:
+      auth: a ClarifaiAuthHelper object.
+    """
     self._auth = auth
     self._module_manager_imv_id = module_manager_imv_id
@@ -39,12 +39,12 @@ class ClarifaiUrlHelper(object):
   def clarifai_url(self, user_id, app_id, resource_type, resource_id, version_id: str = None):
     """This is the path to the resource in community.
-        Args:
-          user_id: the author of the resource.
-          app_id: the author's app the resource was created in.
-          resource_type: the type of resource. One of "modules", "models", "concepts", "inputs", "workflows", "tasks", "installed_module_versions"
-          resource_id: the resource ID
-          version_id: the version of the resource.
+    Args:
+      user_id: the author of the resource.
+      app_id: the author's app the resource was created in.
+      resource_type: the type of resource. One of "modules", "models", "concepts", "inputs", "workflows", "tasks", "installed_module_versions"
+      resource_id: the resource ID
+      version_id: the version of the resource.
     """
     if resource_type not in [
         "modules", "models", "concepts", "inputs", "workflows", "tasks",
@@ -85,15 +85,14 @@ class ClarifaiUrlHelper(object):
   @classmethod
   def split_module_ui_url(cls, install):
-    """Takes in a path like https://clarifai.com/zeiler/app/modules/module1/versions/2
-        to split it apart into it's IDs.
+    """Takes in a path like https://clarifai.com/zeiler/app/modules/module1/versions/2 to split it apart into it's IDs.
-        Returns:
-          user_id: the author of the module.
-          app_id: the author's app the module was created in.
-          module_id: the module ID
-          module_version_id: the version of the module.
-        """
+    Returns:
+      user_id: the author of the module.
+      app_id: the author's app the module was created in.
+      module_id: the module ID
+      module_version_id: the version of the module.
+    """
     user_id, app_id, resource_type, resource_id, resource_version_id = cls.split_clarifai_url(
         install)

clarifai_utils/utils/logging.py ADDED Viewed

@@ -0,0 +1,40 @@
+import logging
+from typing import Optional
+from rich.logging import RichHandler
+from rich.table import Table
+from rich.traceback import install
+install()
+def table_from_dict(data, column_names, title="") -> Table:
+  """Use this function for printing tables from a list of dicts."""
+  table = Table(title=title, show_header=True, header_style="bold blue")
+  for column_name in column_names:
+    table.add_column(column_name)
+  for row in data:
+    req_row = [row.get(column_name, "") for column_name in column_names]
+    table.add_row(*req_row)
+  return table
+def _get_library_name() -> str:
+  return __name__.split(".")[0]
+def _configure_logger(logger_level: str = "ERROR") -> None:
+  logging.basicConfig(
+      level=logger_level,
+      datefmt='%Y-%m-%d %H:%M:%S',
+      handlers=[RichHandler(rich_tracebacks=True)])
+def get_logger(logger_level: str = "ERROR", name: Optional[str] = None) -> logging.Logger:
+  """Return a logger with the specified name."""
+  if name is None:
+    name = _get_library_name()
+  _configure_logger(logger_level)
+  return logging.getLogger(name)

clarifai_utils/utils/misc.py ADDED Viewed

@@ -0,0 +1,33 @@
+from typing import List
+class Chunker:
+  """Split an input sequence into small chunks."""
+  def __init__(self, seq: List, size: int) -> None:
+    self.seq = seq
+    self.size = size
+  def chunk(self) -> List[List]:
+    """Chunk input sequence."""
+    return [self.seq[pos:pos + self.size] for pos in range(0, len(self.seq), self.size)]
+class BackoffIterator:
+  """Iterator that returns a sequence of backoff values."""
+  def __init__(self):
+    self.count = 0
+  def __iter__(self):
+    return self
+  def __next__(self):
+    if self.count < 1:
+      self.count += 1
+      return 0.1
+    elif self.count < 7:
+      self.count += 1
+      return 0.01 * (2**(self.count + 4))
+    else:
+      return 0.01 * (2**10)  # 10 seconds

clarifai_utils/versions.py ADDED Viewed

@@ -0,0 +1,6 @@
+import os
+CLIENT_VERSION = '9.7.2'
+OS_VER = os.sys.platform
+PYTHON_VERSION = '.'.join(
+    map(str, [os.sys.version_info.major, os.sys.version_info.minor, os.sys.version_info.micro]))

clarifai/data_upload/README.md DELETED Viewed

@@ -1,63 +0,0 @@
-## Data Upload into your app dataset in the Clarifai platform
-The functionality here allows a user to upload datasets of the specified types and all sizes from a local directory to the Clarifai platform datasets
-Supported dataset types currently are:
-* Image classification
-* Object detection
-* Text classification
-* Image segmentation
-The `datasets` package holds methods to preprocess input data and generate input protos that are then sent as request objects in `upload.py` to the Clarifai api to upload into a particular dataset.
-## Usage
-* Create a dataset under any of your apps in the Clarifai platform.
-#### Upload dataset from dataset package
-* To upload the dataset from a (python)package, create a folder with the structure and files as below.
-- Package Structure:
-  ---------------------------
-      <folder_name>/
-      ├──__init__.py
-      ├── <Your local dir dataset>/
-      └──<Your local dir dataset>/dataset.py
-  `dataset.py` must implement a class named following the convention, `<dataset_name>Dataset`. This class must accept `split` as the only argument in the `__init__` method and must have a `dataloader()` generator method that formats your local dir dataset and yields either of `VisualClassificationFeatures()`, `VisualDetectionFeatures()`, `VisualSegmentationFeatures()` or `TextFeatures()` as defined in [clarifai/data_upload/datasets/features.py](datasets/features.py). Other methods can be added in the class as seen fit but `dataloader()` is the main method and must be named dataloader.
-- In a python script (or in the commandline), import the `UploadConfig` class from upload module and then specify the dataset module path in the `from_module` parameter of the  `UploadConfig` .i.e.
-	```python
-	from clarifai.data_upload.upload import UploadConfig
-	upload_obj = UploadConfig(
-		user_id="",
-		app_id="",
-		pat="", # Clarifai user PAT (not Clarifai app PAT)
-		dataset_id="",
-		task="<task-name>", # see supported tasks below
-		from_module="./path/to/dataset_package/<package-folder-name>",
-		split="val" # train, val or test depending on the dataset
-		)
-	# execute data upload to Clarifai app dataset
-	upload_obj.upload_to_clarifai()
-	```
-	See `examples/` and `examples.py` for reference.
-For data upload from dataset zoo, see [clarifai/data_upload/datasets/zoo](datasets/zoo)
-* Supported tasks:
-	* `text_clf` for text classification.
-	* `visual_clf` for image classification.
-	* `visual_detection` for object detection.
-	* `visual_segmentation` for image segmentation.
-**NOTE**: For text classification datasets, change the base workflow in your clarifai app settings to a Text workflow for a successful upload.
-## Notes
-* For datasets not available in the datasets zoo, the user has to handle the preprocessing for their local datasets to convert them into compatible upload formats.
-* An individual image can have multiple bounding boxes for the same or different classes and so `VisualDetectionFeatures()` classes and bounding boxes lists must match in length with each element of bounding boxes being a list of bbox coordinates ([`x_min, y_min, x_max, y_max`]) corresponding to a single class name in class_names.
-* For Segmentation tasks, a single image can have multiple masks corresponding to different or the same classes, hence `VisualSegmentationFeatures()` classes and polygons must be lists of the same length as well. Polygons in turn contain lists with each list in turn having an `[x, y]` list points.

clarifai/data_upload/convert_csv.py DELETED Viewed

@@ -1,182 +0,0 @@
-import csv
-import os
-import sys
-from itertools import chain
-import pandas as pd
-from sklearn.model_selection import train_test_split
-class DataPreprocessor:
-  def __init__(self, filename, multi_val, separator):
-    self.filename = filename
-    self.multi_val = multi_val
-    self.separator = separator
-    self.df = pd.read_csv(filename)
-  def process_data(self):
-    text_col = self.get_column("Enter the number of the column that contains the text: ")
-    label_col = self.get_column(
-        "Enter the number of the column that contains the labels: ", exclude=text_col)
-    self.split_values(label_col)
-    self.df[label_col] = self.df[label_col].apply(lambda x: [x] if isinstance(x, str) else x)
-    # Use chain.from_iterable to expand the multi-values if applicable
-    unique_labels = list(set(chain.from_iterable(self.df[label_col].values)))
-    print(
-        "\nThe following unqiue labels have been found in the '{}' column and will be used in the dataset:".
-        format(label_col))
-    for i, label in enumerate(unique_labels, start=1):
-      print('{}) {}'.format(i, label))
-    self.convert_for_classification(text_col, label_col, unique_labels)
-  def get_column(self, prompt, exclude=None):
-    available_columns = self.df.columns.drop(exclude) if exclude else self.df.columns
-    if len(available_columns) == 1:
-      print(f'\nThe column named \'{available_columns[0]}\' will be used as the labels column.')
-      return available_columns[0]
-    else:
-      for i, col in enumerate(available_columns):
-        print(f'{i+1}) {col}')
-      col_index = int(input(prompt)) - 1
-      return available_columns[col_index]
-  def split_values(self, label_col):
-    if self.multi_val.lower() == 'y':
-      self.df[label_col] = self.df[label_col].apply(
-          lambda x: str(x).split(self.separator) if not isinstance(x, float) else x)
-  def convert_for_classification(self, text_col, label_col, unique_labels):
-    # Binary classification
-    if len(unique_labels) == 2:
-      print("Converting the CSV to be used with binary classification")
-      self.df['input.data.text.raw'] = self.df[text_col]
-      self.df['input.data.concepts[0].id'] = label_col
-      self.df['input.data.concepts[0].value'] = self.df[label_col].apply(
-          lambda x: 1 if unique_labels[0] in x else 0)
-      self.df = self.df[[
-          'input.data.text.raw', 'input.data.concepts[0].id', 'input.data.concepts[0].value'
-      ]]
-    # Multi-class classification
-    else:
-      print("Converting the CSV to be used with multi-class classification")
-      self.df['input.data.text.raw'] = self.df[text_col].apply(
-          lambda x: x[0] if isinstance(x, list) else x)
-      for i in range(len(unique_labels)):
-        self.df[f'input.data.concepts[{i}].id'] = self.df[label_col].apply(
-            lambda x: unique_labels[i] if unique_labels[i] in x else '')
-        self.df[f'input.data.concepts[{i}].value'] = self.df[label_col].apply(
-            lambda x: 1 if unique_labels[i] in x else '')
-      self.df = self.df[['input.data.text.raw'] +
-                        [f'input.data.concepts[{i}].id' for i in range(len(unique_labels))] +
-                        [f'input.data.concepts[{i}].value' for i in range(len(unique_labels))]]
-      # Reorder the columns
-      cols = self.df.columns.tolist()
-      new_cols = cols[:1]  # The first column 'input.data.text.raw'
-      pairs = [[cols[i], cols[i + len(unique_labels)]] for i in range(1, len(unique_labels) + 1)]
-      for pair in pairs:
-        new_cols.extend(pair)
-      self.df = self.df[new_cols]
-      # Remove special characters from column names
-      self.df.columns = self.df.columns.str.replace("^[\[]|[\]]$", "", regex=True)
-class DatasetSplitter:
-  def __init__(self, df, split_dataset, shuffle_dataset, seed=555):
-    self.df = df
-    self.split_dataset = split_dataset
-    self.shuffle_dataset = shuffle_dataset
-    self.seed = seed if seed != '' else 555
-  def split_and_save(self, filename_base):
-    if self.split_dataset.lower() == 'y':
-      split_type = self.get_split_type()
-      if split_type == 1:
-        train_pct = self.get_percentage(
-            'What percentage of the dataset should be used for training? Enter a number between 1 and 99: ',
-            99)
-        test_pct = 100 - train_pct
-        print(f'Data will be split {train_pct}% train, {test_pct}% test')  # Added print statement
-      elif split_type == 2:
-        train_pct = self.get_percentage(
-            'What percentage of the dataset should be used for training? Enter a number between 1 and 98: ',
-            98)
-        max_val_pct = 99 - train_pct  # Max percentage for validation is now reduced by 1
-        val_pct = self.get_percentage(
-            f'What percentage of the dataset should be used for validation? Enter a number between 1 and {max_val_pct}: ',
-            max_val_pct)
-        test_pct = 100 - train_pct - val_pct
-        print(f'Data will be split {train_pct}% train, {val_pct}% validation, {test_pct}% test'
-             )  # Added print statement
-      train_df, test_df = train_test_split(
-          self.df,
-          test_size=test_pct / 100,
-          random_state=self.seed,
-          shuffle=self.shuffle_dataset.lower() == 'y')
-      train_df.to_csv(filename_base + '-train.csv', index=False, quoting=csv.QUOTE_MINIMAL)
-      test_df.to_csv(filename_base + '-test.csv', index=False, quoting=csv.QUOTE_MINIMAL)
-      if split_type == 2:
-        train_df, val_df = train_test_split(
-            train_df,
-            test_size=val_pct / 100,
-            random_state=self.seed,
-            shuffle=self.shuffle_dataset.lower() == 'y')
-        val_df.to_csv(filename_base + '-validation.csv', index=False, quoting=csv.QUOTE_MINIMAL)
-    else:
-      self.df.to_csv(filename_base + '.csv', index=False, quoting=csv.QUOTE_MINIMAL)
-  def get_split_type(self):
-    split_type = int(
-        input(
-            'How would you like to split the dataset?\n1) Train and test datasets\n2) Train, validate, and test datasets\n'
-        ))
-    while split_type not in [1, 2]:
-      split_type = int(
-          input(
-              'Invalid option. Enter 1 for "Train and test" or 2 for "Train, validate, and test": '
-          ))
-    return split_type
-  def get_percentage(self, prompt, max_pct):
-    pct = int(input(prompt))
-    while not 1 <= pct <= max_pct:
-      pct = int(input(f'Invalid input. Please enter a number between 1 and {max_pct}: '))
-    return pct
-def main():
-  filename = sys.argv[1]
-  multi_val = input('Do any columns have multiple values? (y/[n]) ')
-  separator = input('Enter the separator: ') if multi_val.lower() == 'y' else None
-  preprocessor = DataPreprocessor(filename, multi_val, separator)
-  preprocessor.process_data()
-  split_dataset = input('Would you like to split this dataset? (y/[n]) ')
-  shuffle_dataset = 'n'
-  seed = '555'
-  if split_dataset.lower() == 'y':
-    shuffle_dataset = input('Would you like to shuffle the dataset before splitting? (y/[n]) ')
-    if shuffle_dataset.lower() == 'y':
-      seed = input('Enter a seed integer or hit enter to use the default [555]: ')
-  splitter = DatasetSplitter(preprocessor.df, split_dataset, shuffle_dataset, seed)
-  splitter.split_and_save(os.path.splitext(filename)[0] + '-clarifai')
-  print("Done!")
-if __name__ == "__main__":
-  main()

clarifai/data_upload/datasets/base.py DELETED Viewed

@@ -1,87 +0,0 @@
-from collections import defaultdict
-from typing import Iterator, List, Tuple
-from clarifai_grpc.grpc.api import resources_pb2
-from google.protobuf.struct_pb2 import Struct
-class ClarifaiDataset:
-  """
-  Clarifai datasets base class.
-  """
-  def __init__(self, datagen_object: Iterator, dataset_id: str, split: str) -> None:
-    self.datagen_object = datagen_object
-    self.dataset_id = dataset_id
-    self.split = split
-    self.input_ids = []
-    self._all_input_protos = {}
-    self._all_annotation_protos = defaultdict(list)
-  def __len__(self) -> int:
-    """
-    Get size of all input protos
-    """
-    return len(self._all_input_protos)
-  def _to_list(self, input_protos: Iterator) -> List:
-    """
-    Parse protos iterator to list.
-    """
-    return list(input_protos)
-  def create_input_protos(self, image_path: str, label: str, input_id: str, dataset_id: str,
-                          metadata: Struct) -> resources_pb2.Input:
-    """
-    Create input protos for each image, label input pair.
-    Args:
-    	`image_path`: path to image.
-    	`label`: image label
-    	`input_id: unique input id
-    	`dataset_id`: Clarifai dataset id
-    	`metadata`: input metadata
-    Returns:
-    	An input proto representing a single row input
-    """
-    raise NotImplementedError()
-  def _extract_protos(self) -> None:
-    """
-    Create input image protos for each data generator item.
-    """
-    raise NotImplementedError()
-  def get_protos(self, input_ids: List[str]
-                ) -> Tuple[List[resources_pb2.Input], List[resources_pb2.Annotation]]:
-    """
-    Get input and annotation protos based on input_ids.
-    Args:
-      `input_ids`: List of input IDs to retrieve the protos for.
-    Returns:
-      Input and Annotation proto iterators for the specified input IDs.
-    """
-    input_protos = [self._all_input_protos.get(input_id) for input_id in input_ids]
-    annotation_protos = []
-    if len(self._all_annotation_protos) > 0:
-      annotation_protos = [self._annotation_protos.get(input_id) for input_id in input_ids]
-      annotation_protos = [
-          ann_proto for ann_protos in annotation_protos for ann_proto in ann_protos
-      ]
-    return input_protos, annotation_protos
-class Chunker:
-  """
-  Split an input sequence into small chunks.
-  """
-  def __init__(self, seq: List, size: int) -> None:
-    self.seq = seq
-    self.size = size
-  def chunk(self) -> List[List]:
-    """
-    Chunk input sequence.
-    """
-    return [self.seq[pos:pos + self.size] for pos in range(0, len(self.seq), self.size)]

clarifai 9.7.0__py3-none-any.whl → 9.7.2__py3-none-any.whl

clarifai 9.7.0py3-none-any.whl → 9.7.2py3-none-any.whl