PyPI - azure-ai-transcription - Versions diffs - 1.0.0b1__py3-none-any.whl - Mend

azure-ai-transcription 1.0.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

azure/ai/transcription/__init__.py +32 -0
azure/ai/transcription/_client.py +103 -0
azure/ai/transcription/_configuration.py +73 -0
azure/ai/transcription/_operations/__init__.py +23 -0
azure/ai/transcription/_operations/_operations.py +151 -0
azure/ai/transcription/_operations/_patch.py +118 -0
azure/ai/transcription/_patch.py +21 -0
azure/ai/transcription/_utils/__init__.py +6 -0
azure/ai/transcription/_utils/model_base.py +1237 -0
azure/ai/transcription/_utils/serialization.py +2030 -0
azure/ai/transcription/_utils/utils.py +67 -0
azure/ai/transcription/_version.py +9 -0
azure/ai/transcription/aio/__init__.py +29 -0
azure/ai/transcription/aio/_client.py +107 -0
azure/ai/transcription/aio/_configuration.py +75 -0
azure/ai/transcription/aio/_operations/__init__.py +23 -0
azure/ai/transcription/aio/_operations/_operations.py +131 -0
azure/ai/transcription/aio/_operations/_patch.py +116 -0
azure/ai/transcription/aio/_patch.py +21 -0
azure/ai/transcription/models/__init__.py +48 -0
azure/ai/transcription/models/_enums.py +23 -0
azure/ai/transcription/models/_models.py +450 -0
azure/ai/transcription/models/_patch.py +21 -0
azure/ai/transcription/py.typed +1 -0
azure_ai_transcription-1.0.0b1.dist-info/METADATA +471 -0
azure_ai_transcription-1.0.0b1.dist-info/RECORD +29 -0
azure_ai_transcription-1.0.0b1.dist-info/WHEEL +5 -0
azure_ai_transcription-1.0.0b1.dist-info/licenses/LICENSE +21 -0
azure_ai_transcription-1.0.0b1.dist-info/top_level.txt +1 -0

azure/ai/transcription/models/_models.py ADDED Viewed

@@ -0,0 +1,450 @@
+# coding=utf-8
+# --------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# Code generated by Microsoft (R) Python Code Generator.
+# Changes may cause incorrect behavior and will be lost if the code is regenerated.
+# --------------------------------------------------------------------------
+# pylint: disable=useless-super-delegation
+from typing import Any, Mapping, Optional, TYPE_CHECKING, Union, overload
+from .._utils.model_base import Model as _Model, rest_field
+from .._utils.utils import FileType
+if TYPE_CHECKING:
+    from .. import models as _models
+class ChannelCombinedPhrases(_Model):
+    """The full transcript per channel.
+    :ivar channel: The 0-based channel index. Only present if channel separation is enabled.
+    :vartype channel: int
+    :ivar text: The complete transcribed text for the channel. Required.
+    :vartype text: str
+    """
+    channel: Optional[int] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """The 0-based channel index. Only present if channel separation is enabled."""
+    text: str = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """The complete transcribed text for the channel. Required."""
+    @overload
+    def __init__(
+        self,
+        *,
+        text: str,
+        channel: Optional[int] = None,
+    ) -> None: ...
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+class EnhancedModeProperties(_Model):
+    """Enhanced mode properties for transcription.
+    :ivar enabled: Enable enhanced mode for transcription. This is automatically set to true when
+     task, targetLanguage, or prompt are specified.
+    :vartype enabled: bool
+    :ivar task: Task type for enhanced mode.
+    :vartype task: str
+    :ivar target_language: Target language for enhanced mode.
+    :vartype target_language: str
+    :ivar prompt: A list of user prompts.
+    :vartype prompt: list[str]
+    """
+    enabled: Optional[bool] = rest_field(visibility=["read"])
+    """Enable enhanced mode for transcription. This is automatically set to true when task,
+     targetLanguage, or prompt are specified."""
+    task: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """Task type for enhanced mode."""
+    target_language: Optional[str] = rest_field(
+        name="targetLanguage", visibility=["read", "create", "update", "delete", "query"]
+    )
+    """Target language for enhanced mode."""
+    prompt: Optional[list[str]] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """A list of user prompts."""
+    @overload
+    def __init__(
+        self,
+        *,
+        task: Optional[str] = None,
+        target_language: Optional[str] = None,
+        prompt: Optional[list[str]] = None,
+    ) -> None: ...
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+class PhraseListProperties(_Model):
+    """Phrase list properties for transcription.
+    :ivar phrases: List of phrases for recognition.
+    :vartype phrases: list[str]
+    :ivar biasing_weight: Biasing weight for phrase list (1.0 to 20.0).
+    :vartype biasing_weight: float
+    """
+    phrases: Optional[list[str]] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """List of phrases for recognition."""
+    biasing_weight: Optional[float] = rest_field(
+        name="biasingWeight", visibility=["read", "create", "update", "delete", "query"]
+    )
+    """Biasing weight for phrase list (1.0 to 20.0)."""
+    @overload
+    def __init__(
+        self,
+        *,
+        phrases: Optional[list[str]] = None,
+        biasing_weight: Optional[float] = None,
+    ) -> None: ...
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+class TranscribedPhrase(_Model):
+    """A transcribed phrase.
+    :ivar channel: The 0-based channel index. Only present if channel separation is enabled.
+    :vartype channel: int
+    :ivar speaker: A unique integer number that is assigned to each speaker detected in the audio
+     without particular order. Only present if speaker diarization is enabled.
+    :vartype speaker: int
+    :ivar offset_milliseconds: The start offset of the phrase in milliseconds. Required.
+    :vartype offset_milliseconds: int
+    :ivar duration_milliseconds: The duration of the phrase in milliseconds. Required.
+    :vartype duration_milliseconds: int
+    :ivar text: The transcribed text of the phrase. Required.
+    :vartype text: str
+    :ivar words: The words that make up the phrase. Only present if word-level timestamps are
+     enabled.
+    :vartype words: list[~azure.ai.transcription.models.TranscribedWord]
+    :ivar locale: The locale of the phrase.
+    :vartype locale: str
+    :ivar confidence: The confidence value for the phrase. Required.
+    :vartype confidence: float
+    """
+    channel: Optional[int] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """The 0-based channel index. Only present if channel separation is enabled."""
+    speaker: Optional[int] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """A unique integer number that is assigned to each speaker detected in the audio without
+     particular order. Only present if speaker diarization is enabled."""
+    offset_milliseconds: int = rest_field(
+        name="offsetMilliseconds", visibility=["read", "create", "update", "delete", "query"]
+    )
+    """The start offset of the phrase in milliseconds. Required."""
+    duration_milliseconds: int = rest_field(
+        name="durationMilliseconds", visibility=["read", "create", "update", "delete", "query"]
+    )
+    """The duration of the phrase in milliseconds. Required."""
+    text: str = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """The transcribed text of the phrase. Required."""
+    words: Optional[list["_models.TranscribedWord"]] = rest_field(
+        visibility=["read", "create", "update", "delete", "query"]
+    )
+    """The words that make up the phrase. Only present if word-level timestamps are enabled."""
+    locale: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """The locale of the phrase."""
+    confidence: float = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """The confidence value for the phrase. Required."""
+    @overload
+    def __init__(
+        self,
+        *,
+        offset_milliseconds: int,
+        duration_milliseconds: int,
+        text: str,
+        confidence: float,
+        channel: Optional[int] = None,
+        speaker: Optional[int] = None,
+        words: Optional[list["_models.TranscribedWord"]] = None,
+        locale: Optional[str] = None,
+    ) -> None: ...
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+class TranscribedWord(_Model):
+    """Time-stamped word in the display form.
+    :ivar text: The recognized word, including punctuation. Required.
+    :vartype text: str
+    :ivar offset_milliseconds: The start offset of the word in milliseconds. Required.
+    :vartype offset_milliseconds: int
+    :ivar duration_milliseconds: The duration of the word in milliseconds. Required.
+    :vartype duration_milliseconds: int
+    """
+    text: str = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """The recognized word, including punctuation. Required."""
+    offset_milliseconds: int = rest_field(
+        name="offsetMilliseconds", visibility=["read", "create", "update", "delete", "query"]
+    )
+    """The start offset of the word in milliseconds. Required."""
+    duration_milliseconds: int = rest_field(
+        name="durationMilliseconds", visibility=["read", "create", "update", "delete", "query"]
+    )
+    """The duration of the word in milliseconds. Required."""
+    @overload
+    def __init__(
+        self,
+        *,
+        text: str,
+        offset_milliseconds: int,
+        duration_milliseconds: int,
+    ) -> None: ...
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+class TranscriptionContent(_Model):
+    """Request model for transcription operation.
+    :ivar definition: Metadata for a transcription request. This field contains a JSON-serialized
+     object of type ``TranscriptionOptions``. Required.
+    :vartype definition: ~azure.ai.transcription.models.TranscriptionOptions
+    :ivar audio: The content of the audio file to be transcribed. The audio file must be shorter
+     than 2 hours in audio duration and smaller than 250 MB in size. Optional if audioUrl is
+     provided in the definition.
+    :vartype audio: ~azure.ai.transcription._utils.utils.FileType
+    """
+    definition: "_models.TranscriptionOptions" = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """Metadata for a transcription request. This field contains a JSON-serialized object of type
+     ``TranscriptionOptions``. Required."""
+    audio: Optional[FileType] = rest_field(
+        visibility=["read", "create", "update", "delete", "query"], is_multipart_file_input=True
+    )
+    """The content of the audio file to be transcribed. The audio file must be shorter than 2 hours in
+     audio duration and smaller than 250 MB in size. Optional if audioUrl is provided in the
+     definition."""
+    @overload
+    def __init__(
+        self,
+        *,
+        definition: "_models.TranscriptionOptions",
+        audio: Optional[FileType] = None,
+    ) -> None: ...
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+class TranscriptionDiarizationOptions(_Model):
+    """The Speaker Diarization settings. Diarization settings must be specified to enable speaker
+    diarization.
+    :ivar enabled: Enable speaker diarization. This is automatically set to true when maxSpeakers
+     is specified.
+    :vartype enabled: bool
+    :ivar max_speakers: Gets or sets a hint for the maximum number of speakers for diarization.
+     Must be greater than 1 and less than 36.
+    :vartype max_speakers: int
+    """
+    enabled: Optional[bool] = rest_field(visibility=["read"])
+    """Enable speaker diarization. This is automatically set to true when maxSpeakers is specified."""
+    max_speakers: Optional[int] = rest_field(
+        name="maxSpeakers", visibility=["read", "create", "update", "delete", "query"]
+    )
+    """Gets or sets a hint for the maximum number of speakers for diarization. Must be greater than 1
+     and less than 36."""
+    @overload
+    def __init__(
+        self,
+        *,
+        max_speakers: Optional[int] = None,
+    ) -> None: ...
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+class TranscriptionOptions(_Model):
+    """Metadata for a transcription request.
+    :ivar audio_url: The URL of the audio to be transcribed. The audio must be shorter than 2 hours
+     in audio duration and smaller than 250 MB in size. If both Audio and AudioUrl are provided,
+     Audio is used.
+    :vartype audio_url: str
+    :ivar locales: A list of possible locales for the transcription. If not specified, the locale
+     of the speech in the audio is detected automatically from all supported locales.
+    :vartype locales: list[str]
+    :ivar models: Maps some or all candidate locales to a model URI to be used for transcription.
+     If no mapping is given, the default model for the locale is used.
+    :vartype models: dict[str, str]
+    :ivar profanity_filter_mode: Mode of profanity filtering. Known values are: "None", "Removed",
+     "Tags", and "Masked".
+    :vartype profanity_filter_mode: str or ~azure.ai.transcription.models.ProfanityFilterMode
+    :ivar diarization_options: Mode of diarization.
+    :vartype diarization_options: ~azure.ai.transcription.models.TranscriptionDiarizationOptions
+    :ivar active_channels: The 0-based indices of the channels to be transcribed separately. If not
+     specified, multiple channels are merged and transcribed jointly. Only up to two channels are
+     supported.
+    :vartype active_channels: list[int]
+    :ivar enhanced_mode: Enhanced mode properties.
+    :vartype enhanced_mode: ~azure.ai.transcription.models.EnhancedModeProperties
+    :ivar phrase_list: Phrase list properties.
+    :vartype phrase_list: ~azure.ai.transcription.models.PhraseListProperties
+    """
+    audio_url: Optional[str] = rest_field(name="audioUrl", visibility=["read", "create", "update", "delete", "query"])
+    """The URL of the audio to be transcribed. The audio must be shorter than 2 hours in audio
+     duration and smaller than 250 MB in size. If both Audio and AudioUrl are provided, Audio is
+     used."""
+    locales: Optional[list[str]] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """A list of possible locales for the transcription. If not specified, the locale of the speech in
+     the audio is detected automatically from all supported locales."""
+    models: Optional[dict[str, str]] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """Maps some or all candidate locales to a model URI to be used for transcription. If no mapping
+     is given, the default model for the locale is used."""
+    profanity_filter_mode: Optional[Union[str, "_models.ProfanityFilterMode"]] = rest_field(
+        name="profanityFilterMode", visibility=["read", "create", "update", "delete", "query"]
+    )
+    """Mode of profanity filtering. Known values are: \"None\", \"Removed\", \"Tags\", and \"Masked\"."""
+    diarization_options: Optional["_models.TranscriptionDiarizationOptions"] = rest_field(
+        name="diarization", visibility=["read", "create", "update", "delete", "query"]
+    )
+    """Mode of diarization."""
+    active_channels: Optional[list[int]] = rest_field(
+        name="channels", visibility=["read", "create", "update", "delete", "query"]
+    )
+    """The 0-based indices of the channels to be transcribed separately. If not specified, multiple
+     channels are merged and transcribed jointly. Only up to two channels are supported."""
+    enhanced_mode: Optional["_models.EnhancedModeProperties"] = rest_field(
+        name="enhancedMode", visibility=["read", "create", "update", "delete", "query"]
+    )
+    """Enhanced mode properties."""
+    phrase_list: Optional["_models.PhraseListProperties"] = rest_field(
+        name="phraseList", visibility=["read", "create", "update", "delete", "query"]
+    )
+    """Phrase list properties."""
+    @overload
+    def __init__(
+        self,
+        *,
+        audio_url: Optional[str] = None,
+        locales: Optional[list[str]] = None,
+        models: Optional[dict[str, str]] = None,
+        profanity_filter_mode: Optional[Union[str, "_models.ProfanityFilterMode"]] = None,
+        diarization_options: Optional["_models.TranscriptionDiarizationOptions"] = None,
+        active_channels: Optional[list[int]] = None,
+        enhanced_mode: Optional["_models.EnhancedModeProperties"] = None,
+        phrase_list: Optional["_models.PhraseListProperties"] = None,
+    ) -> None: ...
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+class TranscriptionResult(_Model):
+    """The result of the transcribe operation.
+    :ivar duration_milliseconds: The duration of the audio in milliseconds. Required.
+    :vartype duration_milliseconds: int
+    :ivar combined_phrases: The full transcript for each channel. Required.
+    :vartype combined_phrases: list[~azure.ai.transcription.models.ChannelCombinedPhrases]
+    :ivar phrases: The transcription results segmented into phrases. Required.
+    :vartype phrases: list[~azure.ai.transcription.models.TranscribedPhrase]
+    """
+    duration_milliseconds: int = rest_field(
+        name="durationMilliseconds", visibility=["read", "create", "update", "delete", "query"]
+    )
+    """The duration of the audio in milliseconds. Required."""
+    combined_phrases: list["_models.ChannelCombinedPhrases"] = rest_field(
+        name="combinedPhrases", visibility=["read", "create", "update", "delete", "query"]
+    )
+    """The full transcript for each channel. Required."""
+    phrases: list["_models.TranscribedPhrase"] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """The transcription results segmented into phrases. Required."""
+    @overload
+    def __init__(
+        self,
+        *,
+        duration_milliseconds: int,
+        combined_phrases: list["_models.ChannelCombinedPhrases"],
+        phrases: list["_models.TranscribedPhrase"],
+    ) -> None: ...
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)

azure/ai/transcription/models/_patch.py ADDED Viewed

@@ -0,0 +1,21 @@
+# coding=utf-8
+# --------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# --------------------------------------------------------------------------
+"""Customize generated code here.
+Follow our quickstart for examples: https://aka.ms/azsdk/python/dpcodegen/python/customize
+"""
+__all__: list[str] = []  # Add all objects you want publicly available to users at this package level
+def patch_sdk():
+    """Do not remove from this file.
+    `patch_sdk` is a last resort escape hatch that allows you to do customizations
+    you can't accomplish using the techniques described in
+    https://aka.ms/azsdk/python/dpcodegen/python/customize
+    """

azure/ai/transcription/py.typed ADDED Viewed

	@@ -0,0 +1 @@
1	+ # Marker file for PEP 561.