azure-ai-transcription 1.0.0b1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- azure_ai_transcription-1.0.0b1/CHANGELOG.md +7 -0
- azure_ai_transcription-1.0.0b1/LICENSE +21 -0
- azure_ai_transcription-1.0.0b1/MANIFEST.in +7 -0
- azure_ai_transcription-1.0.0b1/PKG-INFO +471 -0
- azure_ai_transcription-1.0.0b1/README.md +438 -0
- azure_ai_transcription-1.0.0b1/azure/__init__.py +1 -0
- azure_ai_transcription-1.0.0b1/azure/ai/__init__.py +1 -0
- azure_ai_transcription-1.0.0b1/azure/ai/transcription/__init__.py +32 -0
- azure_ai_transcription-1.0.0b1/azure/ai/transcription/_client.py +103 -0
- azure_ai_transcription-1.0.0b1/azure/ai/transcription/_configuration.py +73 -0
- azure_ai_transcription-1.0.0b1/azure/ai/transcription/_operations/__init__.py +23 -0
- azure_ai_transcription-1.0.0b1/azure/ai/transcription/_operations/_operations.py +151 -0
- azure_ai_transcription-1.0.0b1/azure/ai/transcription/_operations/_patch.py +118 -0
- azure_ai_transcription-1.0.0b1/azure/ai/transcription/_patch.py +21 -0
- azure_ai_transcription-1.0.0b1/azure/ai/transcription/_utils/__init__.py +6 -0
- azure_ai_transcription-1.0.0b1/azure/ai/transcription/_utils/model_base.py +1237 -0
- azure_ai_transcription-1.0.0b1/azure/ai/transcription/_utils/serialization.py +2030 -0
- azure_ai_transcription-1.0.0b1/azure/ai/transcription/_utils/utils.py +67 -0
- azure_ai_transcription-1.0.0b1/azure/ai/transcription/_version.py +9 -0
- azure_ai_transcription-1.0.0b1/azure/ai/transcription/aio/__init__.py +29 -0
- azure_ai_transcription-1.0.0b1/azure/ai/transcription/aio/_client.py +107 -0
- azure_ai_transcription-1.0.0b1/azure/ai/transcription/aio/_configuration.py +75 -0
- azure_ai_transcription-1.0.0b1/azure/ai/transcription/aio/_operations/__init__.py +23 -0
- azure_ai_transcription-1.0.0b1/azure/ai/transcription/aio/_operations/_operations.py +131 -0
- azure_ai_transcription-1.0.0b1/azure/ai/transcription/aio/_operations/_patch.py +116 -0
- azure_ai_transcription-1.0.0b1/azure/ai/transcription/aio/_patch.py +21 -0
- azure_ai_transcription-1.0.0b1/azure/ai/transcription/models/__init__.py +48 -0
- azure_ai_transcription-1.0.0b1/azure/ai/transcription/models/_enums.py +23 -0
- azure_ai_transcription-1.0.0b1/azure/ai/transcription/models/_models.py +450 -0
- azure_ai_transcription-1.0.0b1/azure/ai/transcription/models/_patch.py +21 -0
- azure_ai_transcription-1.0.0b1/azure/ai/transcription/py.typed +1 -0
- azure_ai_transcription-1.0.0b1/azure_ai_transcription.egg-info/PKG-INFO +471 -0
- azure_ai_transcription-1.0.0b1/azure_ai_transcription.egg-info/SOURCES.txt +66 -0
- azure_ai_transcription-1.0.0b1/azure_ai_transcription.egg-info/dependency_links.txt +1 -0
- azure_ai_transcription-1.0.0b1/azure_ai_transcription.egg-info/requires.txt +3 -0
- azure_ai_transcription-1.0.0b1/azure_ai_transcription.egg-info/top_level.txt +1 -0
- azure_ai_transcription-1.0.0b1/pyproject.toml +61 -0
- azure_ai_transcription-1.0.0b1/samples/README.md +92 -0
- azure_ai_transcription-1.0.0b1/samples/async_samples/sample_transcribe_audio_file_async.py +71 -0
- azure_ai_transcription-1.0.0b1/samples/async_samples/sample_transcribe_from_url_async.py +59 -0
- azure_ai_transcription-1.0.0b1/samples/async_samples/sample_transcribe_multiple_languages_async.py +69 -0
- azure_ai_transcription-1.0.0b1/samples/async_samples/sample_transcribe_with_diarization_async.py +77 -0
- azure_ai_transcription-1.0.0b1/samples/async_samples/sample_transcribe_with_enhanced_mode_async.py +84 -0
- azure_ai_transcription-1.0.0b1/samples/async_samples/sample_transcribe_with_phrase_list_async.py +87 -0
- azure_ai_transcription-1.0.0b1/samples/async_samples/sample_transcribe_with_profanity_filter_async.py +75 -0
- azure_ai_transcription-1.0.0b1/samples/sample_transcribe_audio_file.py +71 -0
- azure_ai_transcription-1.0.0b1/samples/sample_transcribe_from_url.py +58 -0
- azure_ai_transcription-1.0.0b1/samples/sample_transcribe_multiple_languages.py +69 -0
- azure_ai_transcription-1.0.0b1/samples/sample_transcribe_with_diarization.py +77 -0
- azure_ai_transcription-1.0.0b1/samples/sample_transcribe_with_enhanced_mode.py +84 -0
- azure_ai_transcription-1.0.0b1/samples/sample_transcribe_with_phrase_list.py +79 -0
- azure_ai_transcription-1.0.0b1/samples/sample_transcribe_with_profanity_filter.py +85 -0
- azure_ai_transcription-1.0.0b1/setup.cfg +4 -0
- azure_ai_transcription-1.0.0b1/tests/conftest.py +131 -0
- azure_ai_transcription-1.0.0b1/tests/preparer.py +63 -0
- azure_ai_transcription-1.0.0b1/tests/test_transcription_basic.py +71 -0
- azure_ai_transcription-1.0.0b1/tests/test_transcription_basic_async.py +79 -0
- azure_ai_transcription-1.0.0b1/tests/test_transcription_client_management.py +38 -0
- azure_ai_transcription-1.0.0b1/tests/test_transcription_client_management_async.py +25 -0
- azure_ai_transcription-1.0.0b1/tests/test_transcription_diarization.py +34 -0
- azure_ai_transcription-1.0.0b1/tests/test_transcription_diarization_async.py +36 -0
- azure_ai_transcription-1.0.0b1/tests/test_transcription_enhanced_mode.py +38 -0
- azure_ai_transcription-1.0.0b1/tests/test_transcription_file.py +42 -0
- azure_ai_transcription-1.0.0b1/tests/test_transcription_file_async.py +43 -0
- azure_ai_transcription-1.0.0b1/tests/test_transcription_options.py +115 -0
- azure_ai_transcription-1.0.0b1/tests/test_transcription_options_async.py +83 -0
- azure_ai_transcription-1.0.0b1/tests/test_transcription_url.py +26 -0
- azure_ai_transcription-1.0.0b1/tests/test_transcription_url_async.py +28 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
Copyright (c) Microsoft Corporation.
|
|
2
|
+
|
|
3
|
+
MIT License
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,471 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: azure-ai-transcription
|
|
3
|
+
Version: 1.0.0b1
|
|
4
|
+
Summary: Microsoft Corporation Azure AI Transcription Client Library for Python
|
|
5
|
+
Author-email: Microsoft Corporation <azpysdkhelp@microsoft.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: repository, https://github.com/Azure/azure-sdk-for-python
|
|
8
|
+
Keywords: azure,azure sdk
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Programming Language :: Python
|
|
11
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Requires-Python: >=3.9
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE
|
|
21
|
+
Requires-Dist: isodate>=0.6.1
|
|
22
|
+
Requires-Dist: azure-core>=1.35.0
|
|
23
|
+
Requires-Dist: typing-extensions>=4.6.0
|
|
24
|
+
Dynamic: license-file
|
|
25
|
+
|
|
26
|
+
# Azure AI Speech Transcription client library for Python
|
|
27
|
+
|
|
28
|
+
Azure AI Speech Transcription is a service that provides advanced speech-to-text capabilities, allowing you to transcribe audio content into text with high accuracy. This client library enables developers to integrate speech transcription features into their Python applications.
|
|
29
|
+
|
|
30
|
+
Use the client library to:
|
|
31
|
+
- Transcribe audio files and audio URLs to text
|
|
32
|
+
- Support multiple languages with automatic language detection
|
|
33
|
+
- Customize transcription with domain-specific models
|
|
34
|
+
- Enable speaker diarization to identify different speakers
|
|
35
|
+
- Configure profanity filtering and channel separation
|
|
36
|
+
|
|
37
|
+
[Source code][source_code] | [Package (PyPI)][pypi_package] | [API reference documentation][api_reference] | [Product documentation][product_docs]
|
|
38
|
+
|
|
39
|
+
## Getting started
|
|
40
|
+
|
|
41
|
+
### Prerequisites
|
|
42
|
+
|
|
43
|
+
- Python 3.9 or later is required to use this package.
|
|
44
|
+
- You must have an [Azure subscription][azure_sub] to use this package.
|
|
45
|
+
- An [Azure AI Speech resource][speech_resource] in your Azure account.
|
|
46
|
+
|
|
47
|
+
### Install the package
|
|
48
|
+
|
|
49
|
+
Install the Azure AI Speech Transcription client library for Python with [pip][pip]:
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
pip install azure-ai-transcription
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
### Create an Azure AI Speech resource
|
|
56
|
+
|
|
57
|
+
You can create an Azure AI Speech resource using the [Azure Portal][azure_portal] or [Azure CLI][azure_cli].
|
|
58
|
+
|
|
59
|
+
Here's an example using the Azure CLI:
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
az cognitiveservices account create \
|
|
63
|
+
--name <your-resource-name> \
|
|
64
|
+
--resource-group <your-resource-group> \
|
|
65
|
+
--kind SpeechServices \
|
|
66
|
+
--sku F0 \
|
|
67
|
+
--location <region>
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
### Authenticate the client
|
|
71
|
+
|
|
72
|
+
In order to interact with the Azure AI Speech Transcription service, you'll need to create an instance of the [TranscriptionClient][transcription_client] class. The client supports two authentication methods:
|
|
73
|
+
|
|
74
|
+
1. **Azure Active Directory (Azure AD) Authentication** - Using `DefaultAzureCredential` or other token credentials from `azure-identity`
|
|
75
|
+
2. **API Key Authentication** - Using `AzureKeyCredential` with your Speech resource's API key
|
|
76
|
+
|
|
77
|
+
#### Get credentials
|
|
78
|
+
|
|
79
|
+
You can get the endpoint and API key from the Azure Portal or by running the following Azure CLI command:
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
az cognitiveservices account keys list \
|
|
83
|
+
--name <your-resource-name> \
|
|
84
|
+
--resource-group <your-resource-group>
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
The endpoint can be found in the "Keys and Endpoint" section of your Speech resource in the Azure Portal.
|
|
88
|
+
|
|
89
|
+
#### Create the client with API Key
|
|
90
|
+
|
|
91
|
+
Using an API key is the simplest authentication method:
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
import os
|
|
95
|
+
from azure.core.credentials import AzureKeyCredential
|
|
96
|
+
from azure.ai.transcription import TranscriptionClient
|
|
97
|
+
|
|
98
|
+
endpoint = os.environ.get("SPEECH_ENDPOINT")
|
|
99
|
+
api_key = os.environ.get("SPEECH_API_KEY")
|
|
100
|
+
|
|
101
|
+
credential = AzureKeyCredential(api_key)
|
|
102
|
+
client = TranscriptionClient(endpoint=endpoint, credential=credential)
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
#### Create the client with Azure AD (Recommended for Production)
|
|
106
|
+
|
|
107
|
+
Azure AD authentication provides better security and is recommended for production scenarios. First, install the `azure-identity` package:
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
pip install azure-identity
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Then create the client using `DefaultAzureCredential`:
|
|
114
|
+
|
|
115
|
+
```python
|
|
116
|
+
import os
|
|
117
|
+
from azure.identity import DefaultAzureCredential
|
|
118
|
+
from azure.ai.transcription import TranscriptionClient
|
|
119
|
+
|
|
120
|
+
endpoint = os.environ.get("SPEECH_ENDPOINT")
|
|
121
|
+
|
|
122
|
+
# DefaultAzureCredential will try multiple authentication methods
|
|
123
|
+
# including environment variables, managed identity, Azure CLI, etc.
|
|
124
|
+
credential = DefaultAzureCredential()
|
|
125
|
+
client = TranscriptionClient(endpoint=endpoint, credential=credential)
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
**Note:** When using Azure AD authentication, ensure your Azure identity has the appropriate role assigned (e.g., `Cognitive Services User` or `Cognitive Services Speech User`) on the Speech resource.
|
|
129
|
+
|
|
130
|
+
## Key concepts
|
|
131
|
+
|
|
132
|
+
### TranscriptionClient
|
|
133
|
+
|
|
134
|
+
The `TranscriptionClient` is the primary interface for developers using the Azure AI Speech Transcription client library. It provides the `transcribe` method to convert audio into text.
|
|
135
|
+
|
|
136
|
+
### Transcription Options
|
|
137
|
+
|
|
138
|
+
The service supports various transcription options including:
|
|
139
|
+
- **Language Detection**: Automatic detection from supported locales or specify candidate locales
|
|
140
|
+
- **Custom Models**: Map locales to custom model URIs for domain-specific vocabulary
|
|
141
|
+
- **Diarization**: Identify and separate different speakers in the audio
|
|
142
|
+
- **Channel Separation**: Process up to two audio channels separately
|
|
143
|
+
- **Profanity Filtering**: Control how profanity appears in transcripts (None, Removed, Tags, Masked)
|
|
144
|
+
- **Enhanced Mode**: Additional processing capabilities
|
|
145
|
+
- **Phrase Lists**: Improve accuracy for specific terms and phrases
|
|
146
|
+
|
|
147
|
+
### Transcription Results
|
|
148
|
+
|
|
149
|
+
Results include:
|
|
150
|
+
- Full transcript text per channel
|
|
151
|
+
- Segmented phrases with timestamps
|
|
152
|
+
- Word-level details including confidence scores
|
|
153
|
+
- Duration information
|
|
154
|
+
|
|
155
|
+
## Examples
|
|
156
|
+
|
|
157
|
+
The following sections provide several code snippets covering common scenarios:
|
|
158
|
+
|
|
159
|
+
- [Transcribe an audio file](#transcribe-an-audio-file)
|
|
160
|
+
- [Transcribe from a URL](#transcribe-from-a-url)
|
|
161
|
+
- [Transcribe with enhanced mode](#transcribe-with-enhanced-mode)
|
|
162
|
+
- [Using async client](#using-async-client)
|
|
163
|
+
|
|
164
|
+
For more extensive examples including speaker diarization, multi-language detection, profanity filtering, and custom phrase lists, see the [samples][samples_directory] directory.
|
|
165
|
+
|
|
166
|
+
### Transcribe an audio file
|
|
167
|
+
|
|
168
|
+
<!-- SNIPPET:sample_transcribe_audio_file.transcribe_audio_file-->
|
|
169
|
+
|
|
170
|
+
```python
|
|
171
|
+
from azure.core.credentials import AzureKeyCredential
|
|
172
|
+
from azure.ai.transcription import TranscriptionClient
|
|
173
|
+
from azure.ai.transcription.models import TranscriptionContent, TranscriptionOptions
|
|
174
|
+
|
|
175
|
+
# Get configuration from environment variables
|
|
176
|
+
endpoint = os.environ["AZURE_SPEECH_ENDPOINT"]
|
|
177
|
+
api_key = os.environ["AZURE_SPEECH_API_KEY"]
|
|
178
|
+
|
|
179
|
+
# Create the transcription client
|
|
180
|
+
client = TranscriptionClient(endpoint=endpoint, credential=AzureKeyCredential(api_key))
|
|
181
|
+
|
|
182
|
+
# Path to your audio file
|
|
183
|
+
import pathlib
|
|
184
|
+
|
|
185
|
+
audio_file_path = pathlib.Path(__file__).parent / "assets" / "audio.wav"
|
|
186
|
+
|
|
187
|
+
# Open and read the audio file
|
|
188
|
+
with open(audio_file_path, "rb") as audio_file:
|
|
189
|
+
# Create transcription options
|
|
190
|
+
options = TranscriptionOptions(locales=["en-US"]) # Specify the language
|
|
191
|
+
|
|
192
|
+
# Create the request content
|
|
193
|
+
request_content = TranscriptionContent(definition=options, audio=audio_file)
|
|
194
|
+
|
|
195
|
+
# Transcribe the audio
|
|
196
|
+
result = client.transcribe(request_content)
|
|
197
|
+
|
|
198
|
+
# Print the transcription result
|
|
199
|
+
print(f"Transcription: {result.combined_phrases[0].text}")
|
|
200
|
+
|
|
201
|
+
# Print detailed phrase information
|
|
202
|
+
if result.phrases:
|
|
203
|
+
print("\nDetailed phrases:")
|
|
204
|
+
for phrase in result.phrases:
|
|
205
|
+
print(
|
|
206
|
+
f" [{phrase.offset_milliseconds}ms - "
|
|
207
|
+
f"{phrase.offset_milliseconds + phrase.duration_milliseconds}ms]: "
|
|
208
|
+
f"{phrase.text}"
|
|
209
|
+
)
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
<!-- END SNIPPET -->
|
|
213
|
+
|
|
214
|
+
### Transcribe from a URL
|
|
215
|
+
|
|
216
|
+
<!-- SNIPPET:sample_transcribe_from_url.transcribe_from_url-->
|
|
217
|
+
|
|
218
|
+
```python
|
|
219
|
+
from azure.core.credentials import AzureKeyCredential
|
|
220
|
+
from azure.ai.transcription import TranscriptionClient
|
|
221
|
+
from azure.ai.transcription.models import TranscriptionOptions
|
|
222
|
+
|
|
223
|
+
# Get configuration from environment variables
|
|
224
|
+
endpoint = os.environ["AZURE_SPEECH_ENDPOINT"]
|
|
225
|
+
api_key = os.environ["AZURE_SPEECH_API_KEY"]
|
|
226
|
+
|
|
227
|
+
# Create the transcription client
|
|
228
|
+
client = TranscriptionClient(endpoint=endpoint, credential=AzureKeyCredential(api_key))
|
|
229
|
+
|
|
230
|
+
# URL to your audio file (must be publicly accessible)
|
|
231
|
+
audio_url = "https://example.com/path/to/audio.wav"
|
|
232
|
+
# Configure transcription options
|
|
233
|
+
options = TranscriptionOptions(locales=["en-US"])
|
|
234
|
+
|
|
235
|
+
# Transcribe the audio from URL
|
|
236
|
+
# The service will access and transcribe the audio directly from the URL
|
|
237
|
+
result = client.transcribe_from_url(audio_url, options=options)
|
|
238
|
+
|
|
239
|
+
# Print the transcription result
|
|
240
|
+
print(f"Transcription: {result.combined_phrases[0].text}")
|
|
241
|
+
|
|
242
|
+
# Print duration information
|
|
243
|
+
if result.duration_milliseconds:
|
|
244
|
+
print(f"Audio duration: {result.duration_milliseconds / 1000:.2f} seconds")
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
<!-- END SNIPPET -->
|
|
248
|
+
|
|
249
|
+
### Transcribe with enhanced mode
|
|
250
|
+
|
|
251
|
+
Enhanced mode provides advanced capabilities such as translation or summarization during transcription:
|
|
252
|
+
|
|
253
|
+
<!-- SNIPPET:sample_transcribe_with_enhanced_mode.transcribe_with_enhanced_mode-->
|
|
254
|
+
|
|
255
|
+
```python
|
|
256
|
+
from azure.core.credentials import AzureKeyCredential
|
|
257
|
+
from azure.ai.transcription import TranscriptionClient
|
|
258
|
+
from azure.ai.transcription.models import (
|
|
259
|
+
TranscriptionContent,
|
|
260
|
+
TranscriptionOptions,
|
|
261
|
+
EnhancedModeProperties,
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
# Get configuration from environment variables
|
|
265
|
+
endpoint = os.environ["AZURE_SPEECH_ENDPOINT"]
|
|
266
|
+
api_key = os.environ["AZURE_SPEECH_API_KEY"]
|
|
267
|
+
|
|
268
|
+
# Create the transcription client
|
|
269
|
+
client = TranscriptionClient(endpoint=endpoint, credential=AzureKeyCredential(api_key))
|
|
270
|
+
|
|
271
|
+
# Path to your audio file
|
|
272
|
+
import pathlib
|
|
273
|
+
|
|
274
|
+
audio_file_path = pathlib.Path(__file__).parent / "assets" / "audio.wav"
|
|
275
|
+
|
|
276
|
+
# Open and read the audio file
|
|
277
|
+
with open(audio_file_path, "rb") as audio_file:
|
|
278
|
+
# Create enhanced mode properties
|
|
279
|
+
# Enable enhanced mode for advanced processing capabilities
|
|
280
|
+
enhanced_mode = EnhancedModeProperties(
|
|
281
|
+
task="translation", # Specify the task type (e.g., "translation", "summarization")
|
|
282
|
+
target_language="es-ES", # Target language for translation
|
|
283
|
+
prompt=[
|
|
284
|
+
"Translate the following audio to Spanish",
|
|
285
|
+
"Focus on technical terminology",
|
|
286
|
+
], # Optional prompts to guide the enhanced mode
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
# Create transcription options with enhanced mode
|
|
290
|
+
options = TranscriptionOptions(locales=["en-US"], enhanced_mode=enhanced_mode)
|
|
291
|
+
|
|
292
|
+
# Create the request content
|
|
293
|
+
request_content = TranscriptionContent(definition=options, audio=audio_file)
|
|
294
|
+
|
|
295
|
+
# Transcribe the audio with enhanced mode
|
|
296
|
+
result = client.transcribe(request_content)
|
|
297
|
+
|
|
298
|
+
# Print the transcription result
|
|
299
|
+
print("Transcription with enhanced mode:")
|
|
300
|
+
print(f"{result.combined_phrases[0].text}")
|
|
301
|
+
|
|
302
|
+
# Print individual phrases if available
|
|
303
|
+
if result.phrases:
|
|
304
|
+
print("\nDetailed phrases:")
|
|
305
|
+
for phrase in result.phrases:
|
|
306
|
+
print(f" [{phrase.offset_milliseconds}ms]: {phrase.text}")
|
|
307
|
+
```
|
|
308
|
+
|
|
309
|
+
<!-- END SNIPPET -->
|
|
310
|
+
|
|
311
|
+
### Using async client
|
|
312
|
+
|
|
313
|
+
The library also provides an async client for asynchronous operations:
|
|
314
|
+
|
|
315
|
+
<!-- SNIPPET:sample_transcribe_audio_file_async.transcribe_audio_file_async-->
|
|
316
|
+
|
|
317
|
+
```python
|
|
318
|
+
from azure.core.credentials import AzureKeyCredential
|
|
319
|
+
from azure.ai.transcription.aio import TranscriptionClient
|
|
320
|
+
from azure.ai.transcription.models import TranscriptionContent, TranscriptionOptions
|
|
321
|
+
|
|
322
|
+
# Get configuration from environment variables
|
|
323
|
+
endpoint = os.environ["AZURE_SPEECH_ENDPOINT"]
|
|
324
|
+
api_key = os.environ["AZURE_SPEECH_API_KEY"]
|
|
325
|
+
|
|
326
|
+
# Create the transcription client
|
|
327
|
+
async with TranscriptionClient(endpoint=endpoint, credential=AzureKeyCredential(api_key)) as client:
|
|
328
|
+
# Path to your audio file
|
|
329
|
+
import pathlib
|
|
330
|
+
|
|
331
|
+
audio_file_path = pathlib.Path(__file__).parent.parent / "assets" / "audio.wav"
|
|
332
|
+
|
|
333
|
+
# Open and read the audio file
|
|
334
|
+
with open(audio_file_path, "rb") as audio_file:
|
|
335
|
+
# Create transcription options
|
|
336
|
+
options = TranscriptionOptions(locales=["en-US"]) # Specify the language
|
|
337
|
+
|
|
338
|
+
# Create the request content
|
|
339
|
+
request_content = TranscriptionContent(definition=options, audio=audio_file)
|
|
340
|
+
|
|
341
|
+
# Transcribe the audio
|
|
342
|
+
result = await client.transcribe(request_content)
|
|
343
|
+
|
|
344
|
+
# Print the transcription result
|
|
345
|
+
print(f"Transcription: {result.combined_phrases[0].text}")
|
|
346
|
+
|
|
347
|
+
# Print detailed phrase information
|
|
348
|
+
if result.phrases:
|
|
349
|
+
print("\nDetailed phrases:")
|
|
350
|
+
for phrase in result.phrases:
|
|
351
|
+
print(
|
|
352
|
+
f" [{phrase.offset_milliseconds}ms - "
|
|
353
|
+
f"{phrase.offset_milliseconds + phrase.duration_milliseconds}ms]: "
|
|
354
|
+
f"{phrase.text}"
|
|
355
|
+
)
|
|
356
|
+
```
|
|
357
|
+
|
|
358
|
+
<!-- END SNIPPET -->
|
|
359
|
+
|
|
360
|
+
## Troubleshooting
|
|
361
|
+
|
|
362
|
+
### General
|
|
363
|
+
|
|
364
|
+
Azure AI Speech Transcription client library will raise exceptions defined in [Azure Core][azure_core_exceptions] if you call `.raise_for_status()` on your responses.
|
|
365
|
+
|
|
366
|
+
### Logging
|
|
367
|
+
|
|
368
|
+
This library uses the standard [logging][python_logging] library for logging. Basic information about HTTP sessions (URLs, headers, etc.) is logged at `INFO` level.
|
|
369
|
+
|
|
370
|
+
Detailed `DEBUG` level logging, including request/response bodies and **unredacted** headers, can be enabled on the client or per-operation with the `logging_enable` keyword argument.
|
|
371
|
+
|
|
372
|
+
```python
|
|
373
|
+
import sys
|
|
374
|
+
import logging
|
|
375
|
+
from azure.core.credentials import AzureKeyCredential
|
|
376
|
+
from azure.ai.transcription import TranscriptionClient
|
|
377
|
+
|
|
378
|
+
# Create a logger for the 'azure' SDK
|
|
379
|
+
logger = logging.getLogger('azure')
|
|
380
|
+
logger.setLevel(logging.DEBUG)
|
|
381
|
+
|
|
382
|
+
# Configure a console output
|
|
383
|
+
handler = logging.StreamHandler(stream=sys.stdout)
|
|
384
|
+
logger.addHandler(handler)
|
|
385
|
+
|
|
386
|
+
# Enable network trace logging
|
|
387
|
+
endpoint = "https://<your-region>.api.cognitive.microsoft.com"
|
|
388
|
+
credential = AzureKeyCredential("<your-api-key>")
|
|
389
|
+
client = TranscriptionClient(endpoint=endpoint, credential=credential, logging_enable=True)
|
|
390
|
+
```
|
|
391
|
+
|
|
392
|
+
### Errors and exceptions
|
|
393
|
+
|
|
394
|
+
When you interact with the Azure AI Speech Transcription client library using the Python SDK, errors returned by the service correspond to the same HTTP status codes returned for [REST API][rest_api] requests.
|
|
395
|
+
|
|
396
|
+
For example, if you try to use an invalid API key, a `401` error is returned, indicating "Unauthorized".
|
|
397
|
+
|
|
398
|
+
```python
|
|
399
|
+
from azure.core.credentials import AzureKeyCredential
|
|
400
|
+
from azure.ai.transcription import TranscriptionClient
|
|
401
|
+
from azure.core.exceptions import HttpResponseError
|
|
402
|
+
|
|
403
|
+
endpoint = "https://<your-region>.api.cognitive.microsoft.com"
|
|
404
|
+
credential = AzureKeyCredential("invalid_key")
|
|
405
|
+
|
|
406
|
+
client = TranscriptionClient(endpoint=endpoint, credential=credential)
|
|
407
|
+
|
|
408
|
+
try:
|
|
409
|
+
# Attempt an operation
|
|
410
|
+
pass
|
|
411
|
+
except HttpResponseError as e:
|
|
412
|
+
print(f"Error: {e}")
|
|
413
|
+
```
|
|
414
|
+
|
|
415
|
+
## Next steps
|
|
416
|
+
|
|
417
|
+
### More sample code
|
|
418
|
+
|
|
419
|
+
For more extensive examples of using the Azure AI Speech Transcription client library, see the [samples][samples_directory] directory. These samples demonstrate:
|
|
420
|
+
- Basic transcription of audio files and URLs (sync and async)
|
|
421
|
+
- Speaker diarization to identify different speakers
|
|
422
|
+
- Multi-language detection and transcription
|
|
423
|
+
- Profanity filtering options
|
|
424
|
+
- Custom phrase lists for domain-specific terminology
|
|
425
|
+
|
|
426
|
+
Additional resources:
|
|
427
|
+
- Check the [Azure AI Speech documentation][speech_docs] for comprehensive tutorials and guides
|
|
428
|
+
- Explore the [Azure SDK for Python samples][azure_sdk_samples] repository
|
|
429
|
+
|
|
430
|
+
### Additional documentation
|
|
431
|
+
|
|
432
|
+
For more extensive documentation on Azure AI Speech, see the [Speech service documentation][speech_docs] on docs.microsoft.com.
|
|
433
|
+
|
|
434
|
+
## Contributing
|
|
435
|
+
|
|
436
|
+
This project welcomes contributions and suggestions. Most contributions require you to agree to a Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us the rights to use your contribution. For details, visit [https://cla.microsoft.com][cla].
|
|
437
|
+
|
|
438
|
+
When you submit a pull request, a CLA-bot will automatically determine whether you need to provide a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions provided by the bot. You will only need to do this once across all repos using our CLA.
|
|
439
|
+
|
|
440
|
+
This project has adopted the [Microsoft Open Source Code of Conduct][code_of_conduct]. For more information, see the [Code of Conduct FAQ][code_of_conduct_faq] or contact [opencode@microsoft.com][opencode_email] with any additional questions or comments.
|
|
441
|
+
|
|
442
|
+
<!-- LINKS -->
|
|
443
|
+
[source_code]: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/cognitiveservices/azure-ai-transcription
|
|
444
|
+
[pypi_package]: https://pypi.org/project/azure-ai-transcription/
|
|
445
|
+
[api_reference]: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/cognitiveservices/azure-ai-transcription/azure/ai/transcription
|
|
446
|
+
[product_docs]: https://learn.microsoft.com/azure/ai-services/speech-service/
|
|
447
|
+
[azure_sub]: https://azure.microsoft.com/free/
|
|
448
|
+
[speech_resource]: https://learn.microsoft.com/azure/ai-services/speech-service/overview
|
|
449
|
+
[pip]: https://pypi.org/project/pip/
|
|
450
|
+
[azure_portal]: https://portal.azure.com
|
|
451
|
+
[azure_cli]: https://learn.microsoft.com/cli/azure
|
|
452
|
+
[transcription_client]: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/cognitiveservices/azure-ai-transcription/azure/ai/transcription/_client.py
|
|
453
|
+
[azure_core_exceptions]: https://aka.ms/azsdk/python/core/docs#module-azure.core.exceptions
|
|
454
|
+
[python_logging]: https://docs.python.org/3/library/logging.html
|
|
455
|
+
[rest_api]: https://learn.microsoft.com/azure/ai-services/speech-service/rest-speech-to-text
|
|
456
|
+
[samples_directory]: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/cognitiveservices/azure-ai-transcription/samples
|
|
457
|
+
[azure_sdk_samples]: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/cognitiveservices/azure-ai-transcription/samples
|
|
458
|
+
[speech_docs]: https://learn.microsoft.com/azure/ai-services/speech-service/
|
|
459
|
+
[cla]: https://cla.microsoft.com
|
|
460
|
+
[code_of_conduct]: https://opensource.microsoft.com/codeofconduct/
|
|
461
|
+
[code_of_conduct_faq]: https://opensource.microsoft.com/codeofconduct/faq/
|
|
462
|
+
[opencode_email]: mailto:opencode@microsoft.com
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
# Release History
|
|
466
|
+
|
|
467
|
+
## 1.0.0b1 (2025-12-03)
|
|
468
|
+
|
|
469
|
+
### Other Changes
|
|
470
|
+
|
|
471
|
+
- Initial version
|