PyPI - mediapipe-nightly - Versions diffs - 0.10.10.post20240216__cp310-cp310-macosx_11_0_x86_64.whl → 0.10.10.post20240220__cp310-cp310-macosx_11_0_x86_64.whl - Mend

mediapipe-nightly 0.10.10.post20240216__cp310-cp310-macosx_11_0_x86_64.whl → 0.10.10.post20240220__cp310-cp310-macosx_11_0_x86_64.whl

Files changed (21) hide show

mediapipe/__init__.py CHANGED Viewed

@@ -23,4 +23,4 @@ del modules
 del python
 del mediapipe
 del util
-__version__ = '0.10.10-20240216'
+__version__ = '0.10.10-20240220'

mediapipe/python/_framework_bindings.cpython-310-darwin.so CHANGED Viewed

Binary file

mediapipe/tasks/python/__init__.py CHANGED Viewed

@@ -17,6 +17,7 @@
 from . import audio
 from . import components
 from . import core
+from . import genai
 from . import text
 from . import vision

mediapipe/tasks/python/genai/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+# Copyright 2024 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""MediaPipe GenAI Python API."""

mediapipe/tasks/python/genai/converter/__init__.py ADDED Viewed

@@ -0,0 +1,24 @@
+# Copyright 2022 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""MediaPipe Tasks GenAI Converter API."""
+import mediapipe.tasks.python.genai.converter.llm_converter
+ConversionConfig = llm_converter.ConversionConfig
+convert_checkpoint = llm_converter.convert_checkpoint
+# Remove unnecessary modules to avoid duplication in API docs.
+del mediapipe
+del llm_converter

mediapipe/tasks/python/genai/converter/converter_base.py ADDED Viewed

@@ -0,0 +1,172 @@
+# Copyright 2024 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Defines a couple base classes for the conversion/quantization process."""
+import os
+from typing import Dict, List, Optional, Tuple
+import numpy as np
+class QuantizationAction:
+  """Container of the tensor values and its corresponding quantization settings.
+  The contrainer is responsible for hosting all of the information that is
+  required to execute the weight-only quantization.
+  Attributes:
+    tensor_name: A string that represents the input tensor name.
+    tensor_value: A numpy array that contains the unquantized tensor values.
+    target_name: A string that represents the updated tensor name.
+    quantize_axis: A list of integers representing the dimensions to be
+      quantized along. For example, if an input tensor has shape [128, 256] and
+      the quantize_axis==[0], it means the quantization happens along the 0-th
+      dimension, resulting in [256] scaling factors.
+    quantize_bits: An integer that specifies the target quantization bits. It
+      currently only supports either 8 or 4 bits.
+    pack_dim: An integer specifying which dimension to pack the quantized bits.
+      This is only applicable when the quantize_bits == 4.
+  """
+  def __init__(
+      self,
+      tensor_name: str,
+      tensor_value: Optional[np.ndarray] = None,
+      target_name: Optional[str] = None,
+      quantize_axis: Optional[List[int]] = None,
+      quantize_bits: Optional[int] = None,
+      pack_dim: Optional[int] = 0,
+  ):
+    """Initializes the model attributes."""
+    self.tensor_name = tensor_name
+    self.tensor_value = tensor_value
+    self.target_name = target_name
+    self.quantize_axis = quantize_axis
+    self.quantize_bits = quantize_bits
+    self.pack_dim = pack_dim
+  def __str__(self) -> str:
+    output_string = "QuantizationAction(\n"
+    output_string += f"  tensor_name: {self.tensor_name}\n"
+    output_string += f"  target_name: {self.target_name}\n"
+    output_string += f"  quantize_axis: {self.quantize_axis}\n"
+    output_string += f"  quantize_bits: {self.quantize_bits}\n"
+    output_string += f"  pack_dim: {self.pack_dim}\n"
+    if self.tensor_value is not None:
+      output_string += f"  tensor_value: {self.tensor_value.shape}\n"
+    output_string += ")\n"
+    return output_string
+class CkptLoaderBase:
+  """Base class for loading the checkpoint.
+  This class is responsible for loading the checkpoint files into the layer
+  weight tensors (as numpy arrays) + quantization setting information (8/4
+  bits). The returned data should be a list of QuantizationAction that describes
+  how to quantize each layer weights.
+  """
+  def __init__(
+      self,
+      ckpt_path: str,
+      is_symmetric: bool,
+      attention_quant_bits: int,
+      feedforward_quant_bits: int,
+      embedding_quant_bits: int,
+  ):
+    """Initializes the loader.
+    Args:
+      ckpt_path: The filepath to the checkpoint.
+      is_symmetric: Whether to apply symmetric or asymmetric quantization.
+      attention_quant_bits: An integer that specify the target quantization bits
+        (support 8 or 4) for the attention layers.
+      feedforward_quant_bits: An integer that specify the target quantization
+        bits (support 8 or 4) for the feedforward layers in each Transformer
+        blocks.
+      embedding_quant_bits: An integer that specify the target quantization bits
+        (support 8 or 4) for the embedding (and the final projection) layers.
+    """
+    self._ckpt_path = ckpt_path
+    self._is_symmetric = is_symmetric
+    self._attention_quant_bits = attention_quant_bits
+    self._feedforward_quant_bits = feedforward_quant_bits
+    self._embedding_quant_bits = embedding_quant_bits
+  def load_to_actions(self) -> List[Optional[QuantizationAction]]:
+    """Loads the checkpoint and returns the quantization actions."""
+    raise NotImplementedError("The load_to_actions method is not implemented.")
+class LayerActionMapperBase:
+  """Base class for mapping the layer weights to quantization actions.
+  This class is responsible for mapping from each layer to its corresponding
+  quantization information (e.g. target quantization bits / updated tensor
+  name...).
+  """
+  def __init__(
+      self,
+      is_symmetric: bool,
+      attention_quant_bits: int,
+      feedforward_quant_bits: int,
+      embedding_quant_bits: int,
+      backend: str,
+  ):
+    self._is_symmetric = is_symmetric
+    self._attention_quant_bits = attention_quant_bits
+    self._feedforward_quant_bits = feedforward_quant_bits
+    self._embedding_quant_bits = embedding_quant_bits
+    self._backend = backend
+  def map_to_actions(
+      self, layer_name: str
+  ) -> Optional[List[QuantizationAction]]:
+    """Maps the layer weights to quantization actions.
+    Args:
+      layer_name: A string representing the name of the layer weight. Note that
+        it is expected the layer information is contained in the name which is
+        enough to determine the target quantization information. Any child class
+        is expected to implement this function.
+    """
+    raise NotImplementedError("The map_to_actions method is not implemented.")
+class ModelWriterBase:
+  """Base class for writing the quantized model.
+  This class is responsible for taking a dictionary of the quantized
+  tensors/names and writing them into the format that can be loaded by the
+  on-device inference engine.
+  """
+  def __init__(self, output_dir: str, backend: str):
+    """Initializes the class.
+    Args:
+      output_dir: A string that represents the output directory to write the
+        resulting file(s).
+      backend: A string that represents the target backend to run the output
+        file(s).
+    """
+    self._output_dir = output_dir
+    if not os.path.exists(self._output_dir):
+      os.mkdir(self._output_dir)
+    self._backend = backend
+  def write_variables(self, variables: Dict[str, Tuple[np.ndarray, bool]]):
+    raise NotImplementedError("The write_variables method is not implemented.")

mediapipe/tasks/python/genai/converter/converter_factory.py ADDED Viewed

@@ -0,0 +1,79 @@
+# Copyright 2024 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Utility library that helps create the converter instances."""
+from mediapipe.tasks.python.genai.converter import converter_base
+from mediapipe.tasks.python.genai.converter import pytorch_converter
+from mediapipe.tasks.python.genai.converter import safetensors_converter
+from mediapipe.tasks.python.genai.converter import weight_bins_writer
+def create_ckpt_loader(
+    ckpt_format: str, *args, **kwargs
+) -> converter_base.CkptLoaderBase:
+  """Creates the checkpoint loader.
+  Args:
+    ckpt_format: A string that indicates which input checkpoint format is.
+    *args: Additional arguments to be passed into the loader.
+    **kwargs: Additional arguments to be passed into the loader.
+  Returns:
+    A created CkptLoader instance.
+  """
+  del args
+  if ckpt_format == "pytorch":
+    return pytorch_converter.PytorchCkptLoader(
+        ckpt_path=kwargs["ckpt_path"],
+        is_symmetric=kwargs["is_symmetric"],
+        attention_quant_bits=kwargs["attention_quant_bits"],
+        feedforward_quant_bits=kwargs["feedforward_quant_bits"],
+        embedding_quant_bits=kwargs["embedding_quant_bits"],
+        special_model=kwargs["special_model"],
+        backend=kwargs["backend"],
+    )
+  elif ckpt_format == "safetensors":
+    return safetensors_converter.SafetensorsCkptLoader(
+        ckpt_path=kwargs["ckpt_path"],
+        is_symmetric=kwargs["is_symmetric"],
+        attention_quant_bits=kwargs["attention_quant_bits"],
+        feedforward_quant_bits=kwargs["feedforward_quant_bits"],
+        embedding_quant_bits=kwargs["embedding_quant_bits"],
+        special_model=kwargs["special_model"],
+        backend=kwargs["backend"],
+    )
+  else:
+    raise ValueError(f"Unknown checkpoint format: {ckpt_format}")
+def create_writer(
+    writer_type: str, *args, **kwargs
+) -> converter_base.ModelWriterBase:
+  """Creates the model writer.
+  Args:
+    writer_type: A string the indicates which model writer to create.
+    *args: Additional arguments to be passed into the loader.
+    **kwargs: Additional arguments to be passed into the loader.
+  Returns:
+    A created ModelWriter instance.
+  """
+  del args
+  if writer_type == "weight_bins":
+    return weight_bins_writer.WeightBinsWriter(
+        output_dir=kwargs["output_dir"], backend=kwargs["backend"]
+    )
+  else:
+    raise ValueError(f"Unknown writer type: {writer_type}")

mediapipe/tasks/python/genai/converter/llm_converter.py ADDED Viewed

@@ -0,0 +1,213 @@
+"""Functions to perform the checkpoint conversion."""
+import os
+from typing import List, Optional
+from absl import logging
+from mediapipe.tasks.python.genai.converter import converter_base
+from mediapipe.tasks.python.genai.converter import converter_factory
+from mediapipe.tasks.python.genai.converter import model_ckpt_util
+from mediapipe.tasks.python.genai.converter import quantization_util
+class ConversionConfig(object):
+  """Config for checkpoint conversion.
+  Attributes:
+    input_ckpt: Directory or path for the input checkpoint.
+    ckpt_format: Checkpoint format, e.g. 'safetensors', 'pytorch'.
+    model_type: Name of the model, e.g. GEMMA_2B.
+    backend: Target backend to run the model. Can be either "cpu" or "gpu".
+    output_dir: Where the output file(s) to be stored.
+    is_symmetric: Whether to quantize symmetrically.
+    attention_quant_bits: Target quantization bits for the attention layers.
+    feedforward_quant_bits: Target quantization bits for the feedforward layers.
+    embedding_quant_bits: Target quantization bits for the embedding layers.
+    combine_file_only: Whether to combine the weight files only (assuming the
+      weight files are already existed).
+    vocab_model_file: The file path to the 1) SentencePiece vocab model; 2)
+      Hugging Face BPE tokenizer files; 1) is applicable for the Gemma model and
+      2) is applicable for other models. When 2) is used, the provided path is
+      expected to point to a directory that contains both tokenizer.json and
+      tokenizer_config.json files.
+    output_tflite_file: (optional) the output tflite filename. If not provided,
+      the output will be `model.tflite` stored in the output_dir.
+  """
+  def __init__(
+      self,
+      input_ckpt: str,
+      ckpt_format: str,
+      model_type: str,
+      backend: str,
+      output_dir: str,
+      is_symmetric: bool = True,
+      attention_quant_bits: int = 8,
+      feedforward_quant_bits: int = 8,
+      embedding_quant_bits: int = 8,
+      combine_file_only: bool = False,
+      vocab_model_file: str = '',
+      output_tflite_file: Optional[str] = None,
+  ):
+    self.input_ckpt = input_ckpt
+    self.ckpt_format = ckpt_format
+    self.model_type = model_type
+    self.backend = backend
+    if os.path.isfile(output_dir):
+      raise ValueError('Output directory mush not point to an existing file.')
+    if not os.path.isdir(output_dir):
+      logging.info('Creating output directory: %s', output_dir)
+      os.makedirs(output_dir, exist_ok=True)
+    self.output_dir = output_dir
+    self.is_symmetric = is_symmetric
+    self.attention_quant_bits = attention_quant_bits
+    self.feedforward_quant_bits = feedforward_quant_bits
+    self.embedding_quant_bits = embedding_quant_bits
+    self.combine_file_only = combine_file_only
+    self.vocab_model_file = vocab_model_file
+    if output_tflite_file:
+      parent_dir = os.path.dirname(output_tflite_file)
+      if not os.path.isdir(parent_dir):
+        logging.info('Creating tflite parent directory: %s', parent_dir)
+        os.makedirs(parent_dir, exist_ok=True)
+      self.output_tflite_file = output_tflite_file
+    else:
+      self.output_tflite_file = os.path.join(output_dir, 'model.tflite')
+def quantize_by_actions(
+    actions: List[converter_base.QuantizationAction],
+    backend: str,
+    is_symmetric: bool,
+):
+  """Quantizes the weights by actions.
+  Args:
+    actions: A list of QuantizationAction that contains the information and
+      tensor values to be quantized.
+    backend: Target backend to run the model. Can be either "cpu" or "gpu".
+    is_symmetric: Whether to quantize symmetrically.
+  Returns:
+    A dictionary that maps from the updated tensor names to the quantized
+    tensor values + a boolean that indicates whether the tensor values need to
+    be packed (only applicable for the 4-bit quantized weights).
+  """
+  output_tensors = {}
+  for action in actions:
+    if action.quantize_axis:
+      pack = action.quantize_bits == 4
+      if is_symmetric:
+        target_var, scale = quantization_util.quantize_tensor(
+            var=action.tensor_value,
+            axis=action.quantize_axis,
+            sym=is_symmetric,
+            number_bits=action.quantize_bits,
+        )
+        output_tensors[action.target_name] = (target_var, pack)
+        output_tensors[action.target_name + '_quantized_scale'] = (scale, False)
+      else:
+        target_var, scale, zp = quantization_util.quantize_tensor(
+            var=action.tensor_value,
+            axis=action.quantize_axis,
+            sym=is_symmetric,
+            number_bits=action.quantize_bits,
+        )
+        if backend == 'cpu' and (action.quantize_bits == 4):
+          target_var, scale, zp = quantization_util.update_to_uint4(
+              target_var, scale, zp
+          )
+        output_tensors[action.target_name] = (target_var, pack)
+        output_tensors[action.target_name + '_quantized_scale'] = (scale, False)
+        output_tensors[action.target_name + '_quantized_zp'] = (zp, False)
+    else:
+      output_tensors[action.target_name] = (action.tensor_value, False)
+  return output_tensors
+def combined_weight_bins_to_tflite(
+    model_type: str,
+    backend: str,
+    weight_path: str,
+    output_tflite_file: str,
+    vocab_model_file: str,
+):
+  """Combines weight files to tflite file."""
+  # TODO: Figure out whether to clean up the weight files after this.
+  if backend == 'cpu':
+    model_ckpt_util.GenerateCpuTfLite(
+        model_type,
+        weight_path,
+        vocab_model_file,
+        True,
+        output_tflite_file,
+    )
+  elif backend == 'gpu':
+    model_ckpt_util.GenerateGpuTfLite(
+        model_type,
+        weight_path,
+        vocab_model_file,
+        True,
+        output_tflite_file,
+    )
+  else:
+    raise ValueError('Unsupported backend: %s' % backend)
+def convert_bpe_vocab(vocab_model_file: str, output_dir: str) -> str:
+  if not os.path.isdir(vocab_model_file):
+    raise ValueError(
+        'The input BPE vocab model file path is expected to be a directory that'
+        ' conatins both tokenizer.json and tokenizer_config.json files.'
+    )
+  output_vocab_file = os.path.join(output_dir, 'spm.model')
+  model_ckpt_util.ConvertHfTokenizer(vocab_model_file, output_vocab_file)
+  return output_vocab_file
+def convert_checkpoint(config: ConversionConfig) -> None:
+  """Converts the checkpoint to tflite file."""
+  logging.info('input folder: %s', config.input_ckpt)
+  if config.model_type == 'GEMMA_2B':
+    vocab_model_path = config.vocab_model_file
+  else:
+    vocab_model_path = convert_bpe_vocab(
+        config.vocab_model_file, config.output_dir
+    )
+  if not config.combine_file_only:
+    # Load the layer weights and prepare the quantization configurations.
+    loader = converter_factory.create_ckpt_loader(
+        config.ckpt_format,
+        ckpt_path=config.input_ckpt,
+        is_symmetric=config.is_symmetric,
+        backend=config.backend,
+        attention_quant_bits=config.attention_quant_bits,
+        feedforward_quant_bits=config.feedforward_quant_bits,
+        embedding_quant_bits=config.embedding_quant_bits,
+        special_model=config.model_type,
+    )
+    actions = loader.load_to_actions()
+    # Quantize the weights.
+    quantized_tensors = quantize_by_actions(
+        actions, config.backend, config.is_symmetric
+    )
+    # Write the quantized tensors into file(s).
+    writer = converter_factory.create_writer(
+        writer_type='weight_bins',
+        output_dir=config.output_dir,
+        backend=config.backend,
+    )
+    writer.write_variables(quantized_tensors)
+  combined_weight_bins_to_tflite(
+      config.model_type,
+      config.backend,
+      weight_path=config.output_dir,
+      output_tflite_file=config.output_tflite_file,
+      vocab_model_file=vocab_model_path,
+  )