PyPI - mediapipe-nightly - Versions diffs - 0.0.0.post20231103__cp311-cp311-macosx_11_0_universal2.whl - Mend

mediapipe-nightly 0.0.0.post20231103__cp311-cp311-macosx_11_0_universal2.whl

Files changed (545) hide show

mediapipe/tasks/python/audio/core/audio_record.py ADDED Viewed

@@ -0,0 +1,125 @@
+# Copyright 2023 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""A module to record audio in a streaming basis."""
+import threading
+import numpy as np
+try:
+  import sounddevice as sd
+except OSError as oe:
+  sd = None
+  sd_error = oe
+except ImportError as ie:
+  sd = None
+  sd_error = ie
+class AudioRecord(object):
+  """A class to record audio in a streaming basis."""
+  def __init__(
+      self, channels: int, sampling_rate: int, buffer_size: int
+  ) -> None:
+    """Creates an AudioRecord instance.
+    Args:
+      channels: Number of input channels.
+      sampling_rate: Sampling rate in Hertz.
+      buffer_size: Size of the ring buffer in number of samples.
+    Raises:
+      ValueError: if any of the arguments is non-positive.
+      ImportError: if failed to import `sounddevice`.
+      OSError: if failed to load `PortAudio`.
+    """
+    if sd is None:
+      raise sd_error
+    if channels <= 0:
+      raise ValueError('channels must be positive.')
+    if sampling_rate <= 0:
+      raise ValueError('sampling_rate must be positive.')
+    if buffer_size <= 0:
+      raise ValueError('buffer_size must be positive.')
+    self._audio_buffer = []
+    self._buffer_size = buffer_size
+    self._channels = channels
+    self._sampling_rate = sampling_rate
+    # Create a ring buffer to store the input audio.
+    self._buffer = np.zeros([buffer_size, channels], dtype=float)
+    self._lock = threading.Lock()
+    def audio_callback(data, *_):
+      """A callback to receive recorded audio data from sounddevice."""
+      self._lock.acquire()
+      shift = len(data)
+      if shift > buffer_size:
+        self._buffer = np.copy(data[:buffer_size])
+      else:
+        self._buffer = np.roll(self._buffer, -shift, axis=0)
+        self._buffer[-shift:, :] = np.copy(data)
+      self._lock.release()
+    # Create an input stream to continuously capture the audio data.
+    self._stream = sd.InputStream(
+        channels=channels,
+        samplerate=sampling_rate,
+        callback=audio_callback,
+    )
+  @property
+  def channels(self) -> int:
+    return self._channels
+  @property
+  def sampling_rate(self) -> int:
+    return self._sampling_rate
+  @property
+  def buffer_size(self) -> int:
+    return self._buffer_size
+  def start_recording(self) -> None:
+    """Starts the audio recording."""
+    # Clear the internal ring buffer.
+    self._buffer.fill(0)
+    # Start recording using sounddevice's InputStream.
+    self._stream.start()
+  def stop(self) -> None:
+    """Stops the audio recording."""
+    self._stream.stop()
+  def read(self, size: int) -> np.ndarray:
+    """Reads the latest audio data captured in the buffer.
+    Args:
+      size: Number of samples to read from the buffer.
+    Returns:
+      A NumPy array containing the audio data.
+    Raises:
+      ValueError: Raised if `size` is larger than the buffer size.
+    """
+    if size > self._buffer_size:
+      raise ValueError('Cannot read more samples than the size of the buffer.')
+    elif size <= 0:
+      raise ValueError('Size must be positive.')
+    start_index = self._buffer_size - size
+    return np.copy(self._buffer[start_index:])

mediapipe/tasks/python/audio/core/audio_task_running_mode.py ADDED Viewed

@@ -0,0 +1,29 @@
+# Copyright 2022 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""The running mode of MediaPipe Audio Tasks."""
+import enum
+class AudioTaskRunningMode(enum.Enum):
+  """MediaPipe audio task running mode.
+  Attributes:
+    AUDIO_CLIPS: The mode for running a mediapipe audio task on independent
+      audio clips.
+    AUDIO_STREAM: The mode for running a mediapipe audio task on an audio
+      stream, such as from microphone.
+  """
+  AUDIO_CLIPS = 'AUDIO_CLIPS'
+  AUDIO_STREAM = 'AUDIO_STREAM'

mediapipe/tasks/python/audio/core/base_audio_task_api.py ADDED Viewed

@@ -0,0 +1,181 @@
+# Copyright 2022 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""MediaPipe audio task base api."""
+from typing import Callable, Mapping, Optional
+from mediapipe.framework import calculator_pb2
+from mediapipe.python import packet_creator
+from mediapipe.python._framework_bindings import packet as packet_module
+from mediapipe.python._framework_bindings import task_runner as task_runner_module
+from mediapipe.python._framework_bindings import timestamp as timestamp_module
+from mediapipe.tasks.python.audio.core import audio_record
+from mediapipe.tasks.python.audio.core import audio_task_running_mode as running_mode_module
+from mediapipe.tasks.python.core.optional_dependencies import doc_controls
+_TaskRunner = task_runner_module.TaskRunner
+_Packet = packet_module.Packet
+_RunningMode = running_mode_module.AudioTaskRunningMode
+_Timestamp = timestamp_module.Timestamp
+@doc_controls.do_not_generate_docs
+class BaseAudioTaskApi(object):
+  """The base class of the user-facing mediapipe audio task api classes."""
+  def __init__(
+      self,
+      graph_config: calculator_pb2.CalculatorGraphConfig,
+      running_mode: _RunningMode,
+      packet_callback: Optional[Callable[[Mapping[str, packet_module.Packet]],
+                                         None]] = None
+  ) -> None:
+    """Initializes the `BaseAudioTaskApi` object.
+    Args:
+      graph_config: The mediapipe audio task graph config proto.
+      running_mode: The running mode of the mediapipe audio task.
+      packet_callback: The optional packet callback for getting results
+        asynchronously in the audio stream mode.
+    Raises:
+      ValueError: The packet callback is not properly set based on the task's
+      running mode.
+    """
+    if running_mode == _RunningMode.AUDIO_STREAM:
+      if packet_callback is None:
+        raise ValueError(
+            'The audio task is in audio stream mode, a user-defined result '
+            'callback must be provided.')
+    elif packet_callback:
+      raise ValueError(
+          'The audio task is in audio clips mode, a user-defined result '
+          'callback should not be provided.')
+    self._runner = _TaskRunner.create(graph_config, packet_callback)
+    self._running_mode = running_mode
+    self._default_sample_rate = None
+  def _process_audio_clip(
+      self, inputs: Mapping[str, _Packet]) -> Mapping[str, _Packet]:
+    """A synchronous method to process independent audio clips.
+    The call blocks the current thread until a failure status or a successful
+    result is returned.
+    Args:
+      inputs: A dict contains (input stream name, data packet) pairs.
+    Returns:
+      A dict contains (output stream name, data packet) pairs.
+    Raises:
+      ValueError: If the task's running mode is not set to audio clips mode.
+    """
+    if self._running_mode != _RunningMode.AUDIO_CLIPS:
+      raise ValueError(
+          'Task is not initialized with the audio clips mode. Current running'
+          ' mode:'
+          + self._running_mode.name
+      )
+    return self._runner.process(inputs)
+  def _set_sample_rate(
+      self, sample_rate_stream_name: str, sample_rate: float
+  ) -> None:
+    """An asynchronous method to set audio sample rate in the audio stream mode.
+    Args:
+      sample_rate_stream_name: The audio sample rate stream name.
+      sample_rate: The audio sample rate.
+    Raises:
+      ValueError: If the task's running mode is not set to the audio stream
+      mode.
+    """
+    if self._running_mode != _RunningMode.AUDIO_STREAM:
+      raise ValueError(
+          'Task is not initialized with the audio stream mode. Current running mode:'
+          + self._running_mode.name)
+    self._runner.send({
+        sample_rate_stream_name:
+            packet_creator.create_double(sample_rate).at(_Timestamp.PRESTREAM)
+    })
+  def _send_audio_stream_data(self, inputs: Mapping[str, _Packet]) -> None:
+    """An asynchronous method to send audio stream data to the runner.
+    The results will be available in the user-defined results callback.
+    Args:
+      inputs: A dict contains (input stream name, data packet) pairs.
+    Raises:
+      ValueError: If the task's running mode is not set to the audio stream
+      mode.
+    """
+    if self._running_mode != _RunningMode.AUDIO_STREAM:
+      raise ValueError(
+          'Task is not initialized with the audio stream mode. Current running'
+          ' mode:'
+          + self._running_mode.name
+      )
+    self._runner.send(inputs)
+  def create_audio_record(
+      self, num_channels: int, sample_rate: int, required_input_buffer_size: int
+  ) -> audio_record.AudioRecord:
+    """Creates an AudioRecord instance to record audio stream.
+    The returned AudioRecord instance is initialized and client needs to call
+    the appropriate method to start recording.
+    Note that MediaPipe Audio tasks will up/down sample automatically to fit the
+    sample rate required by the model. The default sample rate of the MediaPipe
+    pretrained audio model, Yamnet is 16kHz.
+    Args:
+      num_channels: The number of audio channels.
+      sample_rate: The audio sample rate.
+      required_input_buffer_size: The required input buffer size in number of
+        float elements.
+    Returns:
+      An AudioRecord instance.
+    Raises:
+      ValueError: If there's a problem creating the AudioRecord instance.
+    """
+    return audio_record.AudioRecord(
+        num_channels, sample_rate, required_input_buffer_size
+    )
+  def close(self) -> None:
+    """Shuts down the mediapipe audio task instance.
+    Raises:
+      RuntimeError: If the mediapipe audio task failed to close.
+    """
+    self._runner.close()
+  def __enter__(self):
+    """Return `self` upon entering the runtime context."""
+    return self
+  def __exit__(self, unused_exc_type, unused_exc_value, unused_traceback):
+    """Shuts down the mediapipe audio task instance on exit of the context manager.
+    Raises:
+      RuntimeError: If the mediapipe audio task failed to close.
+    """
+    self.close()

mediapipe/tasks/python/components/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+# Copyright 2022 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

mediapipe/tasks/python/components/containers/__init__.py ADDED Viewed

@@ -0,0 +1,53 @@
+# Copyright 2022 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""MediaPipe Tasks Components Containers API."""
+import mediapipe.tasks.python.components.containers.audio_data
+import mediapipe.tasks.python.components.containers.bounding_box
+import mediapipe.tasks.python.components.containers.category
+import mediapipe.tasks.python.components.containers.classification_result
+import mediapipe.tasks.python.components.containers.detections
+import mediapipe.tasks.python.components.containers.embedding_result
+import mediapipe.tasks.python.components.containers.landmark
+import mediapipe.tasks.python.components.containers.landmark_detection_result
+import mediapipe.tasks.python.components.containers.rect
+AudioDataFormat = audio_data.AudioDataFormat
+AudioData = audio_data.AudioData
+BoundingBox = bounding_box.BoundingBox
+Category = category.Category
+Classifications = classification_result.Classifications
+ClassificationResult = classification_result.ClassificationResult
+Detection = detections.Detection
+DetectionResult = detections.DetectionResult
+Embedding = embedding_result.Embedding
+EmbeddingResult = embedding_result.EmbeddingResult
+Landmark = landmark.Landmark
+NormalizedLandmark = landmark.NormalizedLandmark
+LandmarksDetectionResult = landmark_detection_result.LandmarksDetectionResult
+Rect = rect.Rect
+NormalizedRect = rect.NormalizedRect
+# Remove unnecessary modules to avoid duplication in API docs.
+del audio_data
+del bounding_box
+del category
+del classification_result
+del detections
+del embedding_result
+del landmark
+del landmark_detection_result
+del rect
+del mediapipe

mediapipe/tasks/python/components/containers/audio_data.py ADDED Viewed

@@ -0,0 +1,137 @@
+# Copyright 2022 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""MediaPipe audio data."""
+import dataclasses
+from typing import Optional
+import numpy as np
+@dataclasses.dataclass
+class AudioDataFormat:
+  """Audio format metadata.
+  Attributes:
+    num_channels: the number of channels of the audio data.
+    sample_rate: the audio sample rate.
+  """
+  num_channels: int = 1
+  sample_rate: Optional[float] = None
+class AudioData(object):
+  """MediaPipe Tasks' audio container."""
+  def __init__(
+      self,
+      buffer_length: int,
+      audio_format: AudioDataFormat = AudioDataFormat()
+  ) -> None:
+    """Initializes the `AudioData` object.
+    Args:
+      buffer_length: the length of the audio buffer.
+      audio_format: the audio format metadata.
+    """
+    self._audio_format = audio_format
+    self._buffer = np.zeros([buffer_length, self._audio_format.num_channels],
+                            dtype=np.float32)
+  def clear(self):
+    """Clears the internal buffer and fill it with zeros."""
+    self._buffer.fill(0)
+  def load_from_array(self,
+                      src: np.ndarray,
+                      offset: int = 0,
+                      size: int = -1) -> None:
+    """Loads the audio data from a NumPy array.
+    Args:
+      src: A NumPy source array contains the input audio.
+      offset: An optional offset for loading a slice of the `src` array to the
+        buffer.
+      size: An optional size parameter denoting the number of samples to load
+        from the `src` array.
+    Raises:
+      ValueError: If the input array has an incorrect shape or if
+        `offset` + `size` exceeds the length of the `src` array.
+    """
+    if len(src.shape) == 1:
+      if self._audio_format.num_channels != 1:
+        raise ValueError(f"Input audio is mono, but the audio data is expected "
+                         f"to have {self._audio_format.num_channels} channels.")
+    elif src.shape[1] != self._audio_format.num_channels:
+      raise ValueError(f"Input audio contains an invalid number of channels. "
+                       f"Expect {self._audio_format.num_channels}.")
+    if size < 0:
+      size = len(src)
+    if offset + size > len(src):
+      raise ValueError(
+          f"Index out of range. offset {offset} + size {size} should be <= "
+          f"src's length: {len(src)}")
+    if len(src) >= len(self._buffer):
+      # If the internal buffer is shorter than the load target (src), copy
+      # values from the end of the src array to the internal buffer.
+      new_offset = offset + size - len(self._buffer)
+      new_size = len(self._buffer)
+      self._buffer = src[new_offset:new_offset + new_size].copy()
+    else:
+      # Shift the internal buffer backward and add the incoming data to the end
+      # of the buffer.
+      shift = size
+      self._buffer = np.roll(self._buffer, -shift, axis=0)
+      self._buffer[-shift:, :] = src[offset:offset + size].copy()
+  @classmethod
+  def create_from_array(cls,
+                        src: np.ndarray,
+                        sample_rate: Optional[float] = None) -> "AudioData":
+    """Creates an `AudioData` object from a NumPy array.
+    Args:
+      src: A NumPy source array contains the input audio.
+      sample_rate: the optional audio sample rate.
+    Returns:
+      An `AudioData` object that contains a copy of the NumPy source array as
+      the data.
+    """
+    obj = cls(
+        buffer_length=src.shape[0],
+        audio_format=AudioDataFormat(
+            num_channels=1 if len(src.shape) == 1 else src.shape[1],
+            sample_rate=sample_rate))
+    obj.load_from_array(src)
+    return obj
+  @property
+  def audio_format(self) -> AudioDataFormat:
+    """Gets the audio format of the audio."""
+    return self._audio_format
+  @property
+  def buffer_length(self) -> int:
+    """Gets the sample count of the audio."""
+    return self._buffer.shape[0]
+  @property
+  def buffer(self) -> np.ndarray:
+    """Gets the internal buffer."""
+    return self._buffer

mediapipe/tasks/python/components/containers/bounding_box.py ADDED Viewed

@@ -0,0 +1,73 @@
+# Copyright 2022 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Bounding box data class."""
+import dataclasses
+from typing import Any
+from mediapipe.framework.formats import location_data_pb2
+from mediapipe.tasks.python.core.optional_dependencies import doc_controls
+_BoundingBoxProto = location_data_pb2.LocationData.BoundingBox
+@dataclasses.dataclass
+class BoundingBox:
+  """An integer bounding box, axis aligned.
+  Attributes:
+    origin_x: The X coordinate of the top-left corner, in pixels.
+    origin_y: The Y coordinate of the top-left corner, in pixels.
+    width: The width of the bounding box, in pixels.
+    height: The height of the bounding box, in pixels.
+  """
+  origin_x: int
+  origin_y: int
+  width: int
+  height: int
+  @doc_controls.do_not_generate_docs
+  def to_pb2(self) -> _BoundingBoxProto:
+    """Generates a BoundingBox protobuf object."""
+    return _BoundingBoxProto(
+        xmin=self.origin_x,
+        ymin=self.origin_y,
+        width=self.width,
+        height=self.height,
+    )
+  @classmethod
+  @doc_controls.do_not_generate_docs
+  def create_from_pb2(cls, pb2_obj: _BoundingBoxProto) -> 'BoundingBox':
+    """Creates a `BoundingBox` object from the given protobuf object."""
+    return BoundingBox(
+        origin_x=pb2_obj.xmin,
+        origin_y=pb2_obj.ymin,
+        width=pb2_obj.width,
+        height=pb2_obj.height)
+  def __eq__(self, other: Any) -> bool:
+    """Checks if this object is equal to the given object.
+    Args:
+      other: The object to be compared with.
+    Returns:
+      True if the objects are equal.
+    """
+    if not isinstance(other, BoundingBox):
+      return False
+    return self.to_pb2().__eq__(other.to_pb2())

mediapipe/tasks/python/components/containers/category.py ADDED Viewed

@@ -0,0 +1,78 @@
+# Copyright 2022 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Category data class."""
+import dataclasses
+from typing import Any, Optional
+from mediapipe.framework.formats import classification_pb2
+from mediapipe.tasks.python.core.optional_dependencies import doc_controls
+_ClassificationProto = classification_pb2.Classification
+@dataclasses.dataclass
+class Category:
+  """A classification category.
+  Category is a util class, contains a label, its display name, a float
+  value as score, and the index of the label in the corresponding label file.
+  Typically it's used as the result of classification tasks.
+  Attributes:
+    index: The index of the label in the corresponding label file.
+    score: The probability score of this label category.
+    display_name: The display name of the label, which may be translated for
+      different locales. For example, a label, "apple", may be translated into
+      Spanish for display purpose, so that the `display_name` is "manzana".
+    category_name: The label of this category object.
+  """
+  index: Optional[int] = None
+  score: Optional[float] = None
+  display_name: Optional[str] = None
+  category_name: Optional[str] = None
+  @doc_controls.do_not_generate_docs
+  def to_pb2(self) -> _ClassificationProto:
+    """Generates a Category protobuf object."""
+    return _ClassificationProto(
+        index=self.index,
+        score=self.score,
+        label=self.category_name,
+        display_name=self.display_name)
+  @classmethod
+  @doc_controls.do_not_generate_docs
+  def create_from_pb2(cls, pb2_obj: _ClassificationProto) -> 'Category':
+    """Creates a `Category` object from the given protobuf object."""
+    return Category(
+        index=pb2_obj.index,
+        score=pb2_obj.score,
+        display_name=pb2_obj.display_name,
+        category_name=pb2_obj.label)
+  def __eq__(self, other: Any) -> bool:
+    """Checks if this object is equal to the given object.
+    Args:
+      other: The object to be compared with.
+    Returns:
+      True if the objects are equal.
+    """
+    if not isinstance(other, Category):
+      return False
+    return self.to_pb2().__eq__(other.to_pb2())