PyPI - clarifai - Versions diffs - 10.8.2__py3-none-any.whl → 10.8.4__py3-none-any.whl - Mend

clarifai 10.8.2py3-none-any.whl → 10.8.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

clarifai/__init__.py +1 -1
clarifai/client/app.py +3 -4
clarifai/client/model.py +1 -2
clarifai/models/model_serving/repo_build/static_files/base_test.py +4 -4
clarifai/runners/__init__.py +14 -0
clarifai/runners/dockerfile_template/Dockerfile.cpu.template +31 -0
clarifai/runners/dockerfile_template/Dockerfile.cuda.template +129 -0
clarifai/runners/models/__init__.py +0 -0
clarifai/runners/models/base_typed_model.py +235 -0
clarifai/runners/models/model_class.py +41 -0
clarifai/runners/models/model_runner.py +175 -0
clarifai/runners/models/model_servicer.py +79 -0
clarifai/runners/models/model_upload.py +315 -0
clarifai/runners/server.py +130 -0
clarifai/runners/utils/__init__.py +0 -0
clarifai/runners/utils/data_handler.py +231 -0
clarifai/runners/utils/data_utils.py +15 -0
clarifai/runners/utils/loader.py +71 -0
clarifai/runners/utils/logging.py +6 -0
clarifai/runners/utils/url_fetcher.py +42 -0
clarifai/utils/logging.py +212 -6
{clarifai-10.8.2.dist-info → clarifai-10.8.4.dist-info}/METADATA +3 -2
{clarifai-10.8.2.dist-info → clarifai-10.8.4.dist-info}/RECORD +27 -11
{clarifai-10.8.2.dist-info → clarifai-10.8.4.dist-info}/LICENSE +0 -0
{clarifai-10.8.2.dist-info → clarifai-10.8.4.dist-info}/WHEEL +0 -0
{clarifai-10.8.2.dist-info → clarifai-10.8.4.dist-info}/entry_points.txt +0 -0
{clarifai-10.8.2.dist-info → clarifai-10.8.4.dist-info}/top_level.txt +0 -0

clarifai/runners/utils/data_handler.py ADDED Viewed

@@ -0,0 +1,231 @@
+from typing import Dict, List, Tuple, Union
+import numpy as np
+from clarifai_grpc.grpc.api import resources_pb2
+from clarifai_grpc.grpc.api.status import status_code_pb2, status_pb2
+from PIL import Image
+from clarifai.client.auth.helper import ClarifaiAuthHelper
+from .data_utils import bytes_to_image, image_to_bytes
+class BaseDataHandler:
+  def __init__(self,
+               proto: Union[resources_pb2.Input, resources_pb2.Output],
+               auth: ClarifaiAuthHelper = None):
+    self._proto = proto
+    self._auth = auth
+  #
+  def to_python(self):
+    return dict(text=self.text, image=self.image, audio=self.audio)
+  # ---------------- Start get/setters ---------------- #
+  # Proto
+  @property
+  def proto(self):
+    return self._proto
+  # Status
+  @property
+  def status(self) -> status_pb2.Status:
+    return self._proto.status
+  def set_status(self, code: str, description: str = ""):
+    self._proto.status.code = code
+    self._proto.status.description = description
+  # Text
+  @property
+  def text(self) -> Union[None, str]:
+    data = self._proto.data.text
+    text = None
+    if data.ByteSize():
+      if data.raw:
+        text = data.raw
+      else:
+        raise NotImplementedError
+    return text
+  def set_text(self, text: str):
+    self._proto.data.text.raw = text
+  # Image
+  @property
+  def image(self, format: str = "np") -> Union[None, Image.Image, np.ndarray]:
+    data = self._proto.data.image
+    image = None
+    if data.ByteSize():
+      data: resources_pb2.Image = data
+      if data.base64:
+        image = data.base64
+      elif data.url:
+        raise NotImplementedError
+      image = bytes_to_image(image)
+      image = image if not format == "np" else np.asarray(image).astype("uint8")
+    return image
+  def set_image(self, image: Union[Image.Image, np.ndarray]):
+    if isinstance(image, np.ndarray):
+      image = Image.fromarray(image)
+    self._proto.data.image.base64 = image_to_bytes(image)
+  # Audio
+  @property
+  def audio(self) -> bytes:
+    data = self._proto.data.audio
+    audio = None
+    if data.ByteSize():
+      if data.base64:
+        audio = data.base64
+    return audio
+  def set_audio(self, audio: bytes):
+    self._proto.data.audio.base64 = audio
+  # Bboxes
+  @property
+  def bboxes(self, real_coord: bool = False, image_width: int = None,
+             image_height: int = None) -> Tuple[List, List, List]:
+    if real_coord:
+      assert (image_height or image_width
+             ), "image_height and image_width are required when when return real coordinates"
+    xyxy = []
+    scores = []
+    concepts = []
+    for _, each in enumerate(self._proto.data.regions):
+      box = each.region_info
+      score = each.value
+      concept = each.data.concepts[0].id
+      x1 = box.left_col
+      y1 = box.top_row
+      x2 = box.right_col
+      y2 = box.bottom_row
+      if real_coord:
+        x1 = x1 * image_width
+        y1 = y1 * image_height
+        x2 = x2 * image_width
+        y2 = y2 * image_height
+      xyxy.append([x1, y1, x2, y2])
+      scores.append(score)
+      concepts.append(concept)
+    return xyxy, scores, concepts
+  def set_bboxes(self,
+                 boxes: list,
+                 scores: list,
+                 concepts: list,
+                 real_coord: bool = False,
+                 image_width: int = None,
+                 image_height: int = None):
+    if real_coord:
+      assert (image_height and
+              image_width), "image_height and image_width are required when `real_coord` is set"
+      bboxes = [[x[1] / image_height, x[0] / image_width, x[3] / image_height, x[2] / image_width]
+                for x in boxes]  # normalize the bboxes to [0,1] and [y1 x1 y2 x2]
+      bboxes = np.clip(bboxes, 0, 1.0)
+    regions = []
+    for ith, bbox in enumerate(bboxes):
+      score = scores[ith]
+      concept = concepts[ith]
+      if any([each > 1.0 for each in bbox]):
+        assert ValueError(
+            "Box coordinates is not normalized between [0, 1]. Please set format_box to True and provide image_height and image_width to normalize"
+        )
+      region = resources_pb2.RegionInfo(bounding_box=resources_pb2.BoundingBox(
+          top_row=bbox[0],  # y_min
+          left_col=bbox[1],  # x_min
+          bottom_row=bbox[2],  # y_max
+          right_col=bbox[3],  # x_max
+      ))
+      data = resources_pb2.Data(concepts=resources_pb2.Concept(id=concept, value=score))
+      regions.append(resources_pb2.Region(region_info=region, data=data))
+    self._proto.data.regions = regions
+  # Concepts
+  @property
+  def concepts(self) -> Dict[str, float]:
+    con_scores = {}
+    for each in self.proto.data.concepts:
+      con_scores.update({each.id: each.value})
+    return con_scores
+  def set_concepts(self, concept_score_pairs: Dict[str, float]):
+    concepts = []
+    for concept, score in concept_score_pairs.items():
+      con_score = resources_pb2.Concept(id=concept, name=concept, value=score)
+      concepts.append(con_score)
+    if concepts:
+      self._proto.data.ClearField("concepts")
+      for each in concepts:
+        self._proto.data.concepts.append(each)
+  # Embeddings
+  @property
+  def embeddings(self) -> List[List[float]]:
+    return [each.vector for each in self.proto.data.embeddings]
+  def set_embeddings(self, list_vectors: List[List[float]]):
+    if list_vectors[0]:
+      self._proto.data.ClearField("embeddings")
+    for vec in list_vectors:
+      self._proto.data.embeddings.append(
+          resources_pb2.Embedding(vector=vec, num_dimensions=len(vec)))
+  # ---------------- End get/setters ---------------- #
+  # Constructors
+  @classmethod
+  def from_proto(cls, proto):
+    clss = cls(proto=proto)
+    return clss
+  @classmethod
+  def from_data(
+      cls,
+      status_code: int = status_code_pb2.SUCCESS,
+      status_description: str = "",
+      text: str = None,
+      image: Union[Image.Image, np.ndarray] = None,
+      audio: bytes = None,
+      boxes: dict = None,
+      concepts: Dict[str, float] = {},
+      embeddings: List[List[float]] = [],
+  ) -> 'OutputDataHandler':
+    clss = cls(proto=resources_pb2.Output())
+    if isinstance(image, Image.Image) or isinstance(image, np.ndarray):
+      clss.set_image(image)
+    if text:
+      clss.set_text(text)
+    if audio:
+      clss.set_audio(audio)
+    if boxes:
+      clss.set_bboxes(**boxes)
+    if concepts:
+      clss.set_concepts(concepts)
+    if embeddings:
+      clss.set_embeddings(embeddings)
+    clss.set_status(code=status_code, description=status_description)
+    return clss
+class InputDataHandler(BaseDataHandler):
+  def __init__(self,
+               proto: resources_pb2.Input = resources_pb2.Input(),
+               auth: ClarifaiAuthHelper = None):
+    super().__init__(proto=proto, auth=auth)
+class OutputDataHandler(BaseDataHandler):
+  def __init__(self,
+               proto: resources_pb2.Output = resources_pb2.Output(),
+               auth: ClarifaiAuthHelper = None):
+    super().__init__(proto=proto, auth=auth)

clarifai/runners/utils/data_utils.py ADDED Viewed

@@ -0,0 +1,15 @@
+from io import BytesIO
+from PIL import Image
+def image_to_bytes(img: Image.Image, format="JPEG") -> bytes:
+  buffered = BytesIO()
+  img.save(buffered, format=format)
+  img_str = buffered.getvalue()
+  return img_str
+def bytes_to_image(bytes_img) -> Image.Image:
+  img = Image.open(BytesIO(bytes_img))
+  return img

clarifai/runners/utils/loader.py ADDED Viewed

@@ -0,0 +1,71 @@
+import importlib.util
+import json
+import os
+import subprocess
+class HuggingFaceLoarder:
+  def __init__(self, repo_id=None, token=None):
+    self.repo_id = repo_id
+    self.token = token
+    if token:
+      try:
+        if importlib.util.find_spec("huggingface_hub") is None:
+          raise ImportError(
+              "The 'huggingface_hub' package is not installed. Please install it using 'pip install huggingface_hub'."
+          )
+        os.environ['HF_TOKEN'] = token
+        subprocess.run(f'huggingface-cli login --token={os.environ["HF_TOKEN"]}', shell=True)
+      except Exception as e:
+        Exception("Error setting up Hugging Face token ", e)
+  def download_checkpoints(self, checkpoint_path: str):
+    # throw error if huggingface_hub wasn't installed
+    try:
+      from huggingface_hub import snapshot_download
+    except ImportError:
+      raise ImportError(
+          "The 'huggingface_hub' package is not installed. Please install it using 'pip install huggingface_hub'."
+      )
+    if os.path.exists(checkpoint_path) and self.validate_download(checkpoint_path):
+      print("Checkpoints already exist")
+    else:
+      os.makedirs(checkpoint_path, exist_ok=True)
+      try:
+        is_hf_model_exists = self.validate_hf_model()
+        if not is_hf_model_exists:
+          print("Model not found on Hugging Face")
+          return False
+        snapshot_download(repo_id=self.repo_id, local_dir=checkpoint_path)
+      except Exception as e:
+        print("Error downloading model checkpoints ", e)
+        return False
+      finally:
+        is_downloaded = self.validate_download(checkpoint_path)
+        if not is_downloaded:
+          print("Error downloading model checkpoints")
+          return False
+      return True
+  def validate_hf_model(self,):
+    # check if model exists on HF
+    from huggingface_hub import file_exists, repo_exists
+    return repo_exists(self.repo_id) and file_exists(self.repo_id, 'config.json')
+  def validate_download(self, checkpoint_path: str):
+    # check if model exists on HF
+    from huggingface_hub import list_repo_files
+    return (len(os.listdir(checkpoint_path)) >= len(list_repo_files(self.repo_id))) and len(
+        list_repo_files(self.repo_id)) > 0
+  def fetch_labels(self, checkpoint_path: str):
+    # Fetch labels for classification, detection and segmentation models
+    config_path = os.path.join(checkpoint_path, 'config.json')
+    with open(config_path, 'r') as f:
+      config = json.load(f)
+    labels = config['id2label']
+    return labels

clarifai/runners/utils/logging.py ADDED Viewed

@@ -0,0 +1,6 @@
+import os
+from clarifai.utils.logging import get_logger
+logger_level = os.environ.get("LOG_LEVEL", "INFO")
+logger = get_logger(logger_level, __name__)

clarifai/runners/utils/url_fetcher.py ADDED Viewed

@@ -0,0 +1,42 @@
+import concurrent.futures
+import fsspec
+from .logging import logger
+def download_input(input):
+  """
+  This function will download any urls that are not already bytes.
+  """
+  if input.data.image.url and not input.data.image.base64:
+    # Download the image
+    with fsspec.open(input.data.image.url, 'rb') as f:
+      input.data.image.base64 = f.read()
+  if input.data.video.url and not input.data.video.base64:
+    # Download the video
+    with fsspec.open(input.data.video.url, 'rb') as f:
+      input.data.video.base64 = f.read()
+  if input.data.audio.url and not input.data.audio.base64:
+    # Download the audio
+    with fsspec.open(input.data.audio.url, 'rb') as f:
+      input.data.audio.base64 = f.read()
+  if input.data.text.url and not input.data.text.raw:
+    # Download the text
+    with fsspec.open(input.data.text.url, 'r') as f:
+      input.data.text.raw = f.read()
+def ensure_urls_downloaded(request, max_threads=128):
+  """
+  This function will download any urls that are not already bytes and parallelize with a thread pool.
+  """
+  with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor:
+    futures = []
+    for input in request.inputs:
+      futures.append(executor.submit(download_input, input))
+    for future in concurrent.futures.as_completed(futures):
+      try:
+        future.result()
+      except Exception as e:
+        logger.exception(f"Error downloading input: {e}")

clarifai/utils/logging.py CHANGED Viewed

@@ -1,4 +1,12 @@
+import datetime
+import json
 import logging
+import os
+import socket
+import sys
+import threading
+import time
+import traceback
 from collections import defaultdict
 from typing import Any, Dict, List, Optional, Union
@@ -11,6 +19,41 @@ from rich.tree import Tree
 install()
+# For the json logger.
+JSON_LOGGER_NAME = "clarifai-json"
+JSON_LOG_KEY = 'msg'
+JSON_DEFAULT_CHAR_LENGTH = 400
+FIELD_BLACKLIST = [
+    'msg', 'message', 'account', 'levelno', 'created', 'threadName', 'name', 'processName',
+    'module', 'funcName', 'msecs', 'relativeCreated', 'pathname', 'args', 'thread', 'process'
+]
+# Create thread local storage that the format() call below uses.
+# This is only used by the json_logger in the appropriate CLARIFAI_DEPLOY levels.
+thread_log_info = threading.local()
+def get_logger_context():
+  return thread_log_info.__dict__
+def set_logger_context(**kwargs):
+  thread_log_info.__dict__.update(kwargs)
+def clear_logger_context():
+  thread_log_info.__dict__.clear()
+def restore_logger_context(context):
+  thread_log_info.__dict__.clear()
+  thread_log_info.__dict__.update(context)
+def get_req_id_from_context():
+  ctx = get_logger_context()
+  return ctx.get('req_id', '')
 def display_workflow_tree(nodes_data: List[Dict]) -> None:
   """Displays a tree of the workflow nodes."""
@@ -84,12 +127,24 @@ def _configure_logger(name: str, logger_level: Union[int, str] = logging.NOTSET)
   for handler in logger.handlers[:]:
     logger.removeHandler(handler)
-  # Add the new rich handler and formatter
-  handler = RichHandler(
-      rich_tracebacks=True, log_time_format="%Y-%m-%d %H:%M:%S", console=Console(width=255))
-  formatter = logging.Formatter('%(name)s:  %(message)s')
-  handler.setFormatter(formatter)
-  logger.addHandler(handler)
+  # If ENABLE_JSON_LOGGER is 'true' then definitely use json logger.
+  # If ENABLE_JSON_LOGGER is 'false' then definitely don't use json logger.
+  # If ENABLE_JSON_LOGGER is not set, then use json logger if in k8s.
+  enabled_json = os.getenv('ENABLE_JSON_LOGGER', None)
+  in_k8s = 'KUBERNETES_SERVICE_HOST' in os.environ
+  if enabled_json == 'true' or (in_k8s and enabled_json != 'false'):
+    # Add the json handler and formatter
+    handler = logging.StreamHandler()
+    formatter = JsonFormatter()
+    handler.setFormatter(formatter)
+    logger.addHandler(handler)
+  else:
+    # Add the new rich handler and formatter
+    handler = RichHandler(
+        rich_tracebacks=True, log_time_format="%Y-%m-%d %H:%M:%S", console=Console(width=255))
+    formatter = logging.Formatter('%(name)s:  %(message)s')
+    handler.setFormatter(formatter)
+    logger.addHandler(handler)
 def get_logger(logger_level: Union[int, str] = logging.NOTSET,
@@ -151,3 +206,154 @@ def display_concept_relations_tree(relations_dict: Dict[str, Any]) -> None:
     for child in children:
       tree.add(child)
     rprint(tree)
+def _default_json_default(obj):
+  """
+  Handle objects that could not be serialized to JSON automatically.
+  Coerce everything to strings.
+  All objects representing time get output as ISO8601.
+  """
+  if isinstance(obj, (datetime.datetime, datetime.date, datetime.time)):
+    return obj.isoformat()
+  else:
+    return _object_to_string_with_truncation(obj)
+def _object_to_string_with_truncation(obj) -> str:
+  """
+  Truncate object string.
+  It's preferred to not log objects that could cause triggering this function,
+  It's better to extract important parts form them and log them as regular Python types,
+  like str or int, which won't be passed to this functon.
+  This message brings additional information to the logs
+  that could help to find and fix truncation cases.
+  - hardcoded part of the message could be used for the looking all entries in logs
+  - obj class could help with detail investigation
+  """
+  objstr = str(obj)
+  if len(objstr) > JSON_DEFAULT_CHAR_LENGTH:
+    type_name = type(obj).__name__
+    truncated = objstr[:JSON_DEFAULT_CHAR_LENGTH]
+    objstr = f"{truncated}...[{type_name} was truncated, len={len(objstr)} chars]"
+  return objstr
+class JsonFormatter(logging.Formatter):
+  def __init__(self,
+               fmt=None,
+               datefmt=None,
+               style='%',
+               json_cls=None,
+               json_default=_default_json_default):
+    """
+    :param fmt: Config as a JSON string, allowed fields;
+           extra: provide extra fields always present in logs
+           source_host: override source host name
+    :param datefmt: Date format to use (required by logging.Formatter
+        interface but not used)
+    :param json_cls: JSON encoder to forward to json.dumps
+    :param json_default: Default JSON representation for unknown types,
+                         by default coerce everything to a string
+    """
+    if fmt is not None:
+      self._fmt = json.loads(fmt)
+    else:
+      self._fmt = {}
+    self.json_default = json_default
+    self.json_cls = json_cls
+    if 'extra' not in self._fmt:
+      self.defaults = {}
+    else:
+      self.defaults = self._fmt['extra']
+    if 'source_host' in self._fmt:
+      self.source_host = self._fmt['source_host']
+    else:
+      try:
+        self.source_host = socket.gethostname()
+      except Exception:
+        self.source_host = ""
+  def _build_fields(self, defaults, fields):
+    """Return provided fields including any in defaults
+    """
+    return dict(list(defaults.get('@fields', {}).items()) + list(fields.items()))
+  # Override the format function to fit Clarifai
+  def format(self, record):
+    fields = record.__dict__.copy()
+    # logger.info({...}) directly.
+    if isinstance(record.msg, dict):
+      fields.update(record.msg)
+      fields.pop('msg')
+      msg = ""
+    else:  # logger.info("message", {...})
+      if isinstance(record.args, dict):
+        fields.update(record.args)
+      msg = record.getMessage()
+    for k in FIELD_BLACKLIST:
+      fields.pop(k, None)
+    # Rename 'levelname' to 'level' and make the value lowercase to match Go logs
+    level = fields.pop('levelname', None)
+    if level:
+      fields['level'] = level.lower()
+    # Get the thread local data
+    req_id = getattr(thread_log_info, 'req_id', None)
+    if req_id:
+      fields['req_id'] = req_id
+    orig_req_id = getattr(thread_log_info, 'orig_req_id', None)
+    if orig_req_id:
+      fields['orig_req_id'] = orig_req_id
+    # Get the thread local data
+    requester = getattr(thread_log_info, 'requester', None)
+    if requester:
+      fields['requester'] = requester
+    user_id = getattr(thread_log_info, 'user_id', None)
+    if requester:
+      fields['user_id'] = user_id
+    if hasattr(thread_log_info, 'start_time'):
+      #pylint: disable=no-member
+      fields['duration_ms'] = (time.time() - thread_log_info.start_time) * 1000
+    if 'exc_info' in fields:
+      if fields['exc_info']:
+        formatted = traceback.format_exception(*fields['exc_info'])
+        fields['exception'] = formatted
+      fields.pop('exc_info')
+    if 'exc_text' in fields and not fields['exc_text']:
+      fields.pop('exc_text')
+    logr = self.defaults.copy()
+    logr.update({
+        JSON_LOG_KEY: msg,
+        '@timestamp': datetime.datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S.%fZ')
+    })
+    logr.update(fields)
+    try:
+      return json.dumps(logr, default=self.json_default, cls=self.json_cls)
+    except Exception:
+      type, value, tb = sys.exc_info()
+      return json.dumps(
+          {
+              "msg": f"Fail to format log {type.__name__}({value}), {logr}",
+              "formatting_traceback": "\n".join(traceback.format_tb(tb)),
+          },
+          default=self.json_default,
+          cls=self.json_cls,
+      )

{clarifai-10.8.2.dist-info → clarifai-10.8.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: clarifai
-Version: 10.8.2
+Version: 10.8.4
 Summary: Clarifai Python SDK
 Home-page: https://github.com/Clarifai/clarifai-python
 Author: Clarifai
@@ -20,7 +20,8 @@ Classifier: Operating System :: OS Independent
 Requires-Python: >=3.8
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: clarifai-grpc >=10.8.6
+Requires-Dist: clarifai-grpc >=10.8.7
+Requires-Dist: clarifai-protocol >=0.0.4
 Requires-Dist: numpy >=1.22.0
 Requires-Dist: tqdm >=4.65.0
 Requires-Dist: tritonclient >=2.34.0

clarifai 10.8.2__py3-none-any.whl → 10.8.4__py3-none-any.whl

clarifai 10.8.2py3-none-any.whl → 10.8.4py3-none-any.whl