azure-ai-transcription 1.0.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,450 @@
1
+ # coding=utf-8
2
+ # --------------------------------------------------------------------------
3
+ # Copyright (c) Microsoft Corporation. All rights reserved.
4
+ # Licensed under the MIT License. See License.txt in the project root for license information.
5
+ # Code generated by Microsoft (R) Python Code Generator.
6
+ # Changes may cause incorrect behavior and will be lost if the code is regenerated.
7
+ # --------------------------------------------------------------------------
8
+ # pylint: disable=useless-super-delegation
9
+
10
+ from typing import Any, Mapping, Optional, TYPE_CHECKING, Union, overload
11
+
12
+ from .._utils.model_base import Model as _Model, rest_field
13
+ from .._utils.utils import FileType
14
+
15
+ if TYPE_CHECKING:
16
+ from .. import models as _models
17
+
18
+
19
+ class ChannelCombinedPhrases(_Model):
20
+ """The full transcript per channel.
21
+
22
+ :ivar channel: The 0-based channel index. Only present if channel separation is enabled.
23
+ :vartype channel: int
24
+ :ivar text: The complete transcribed text for the channel. Required.
25
+ :vartype text: str
26
+ """
27
+
28
+ channel: Optional[int] = rest_field(visibility=["read", "create", "update", "delete", "query"])
29
+ """The 0-based channel index. Only present if channel separation is enabled."""
30
+ text: str = rest_field(visibility=["read", "create", "update", "delete", "query"])
31
+ """The complete transcribed text for the channel. Required."""
32
+
33
+ @overload
34
+ def __init__(
35
+ self,
36
+ *,
37
+ text: str,
38
+ channel: Optional[int] = None,
39
+ ) -> None: ...
40
+
41
+ @overload
42
+ def __init__(self, mapping: Mapping[str, Any]) -> None:
43
+ """
44
+ :param mapping: raw JSON to initialize the model.
45
+ :type mapping: Mapping[str, Any]
46
+ """
47
+
48
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
49
+ super().__init__(*args, **kwargs)
50
+
51
+
52
+ class EnhancedModeProperties(_Model):
53
+ """Enhanced mode properties for transcription.
54
+
55
+ :ivar enabled: Enable enhanced mode for transcription. This is automatically set to true when
56
+ task, targetLanguage, or prompt are specified.
57
+ :vartype enabled: bool
58
+ :ivar task: Task type for enhanced mode.
59
+ :vartype task: str
60
+ :ivar target_language: Target language for enhanced mode.
61
+ :vartype target_language: str
62
+ :ivar prompt: A list of user prompts.
63
+ :vartype prompt: list[str]
64
+ """
65
+
66
+ enabled: Optional[bool] = rest_field(visibility=["read"])
67
+ """Enable enhanced mode for transcription. This is automatically set to true when task,
68
+ targetLanguage, or prompt are specified."""
69
+ task: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"])
70
+ """Task type for enhanced mode."""
71
+ target_language: Optional[str] = rest_field(
72
+ name="targetLanguage", visibility=["read", "create", "update", "delete", "query"]
73
+ )
74
+ """Target language for enhanced mode."""
75
+ prompt: Optional[list[str]] = rest_field(visibility=["read", "create", "update", "delete", "query"])
76
+ """A list of user prompts."""
77
+
78
+ @overload
79
+ def __init__(
80
+ self,
81
+ *,
82
+ task: Optional[str] = None,
83
+ target_language: Optional[str] = None,
84
+ prompt: Optional[list[str]] = None,
85
+ ) -> None: ...
86
+
87
+ @overload
88
+ def __init__(self, mapping: Mapping[str, Any]) -> None:
89
+ """
90
+ :param mapping: raw JSON to initialize the model.
91
+ :type mapping: Mapping[str, Any]
92
+ """
93
+
94
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
95
+ super().__init__(*args, **kwargs)
96
+
97
+
98
+ class PhraseListProperties(_Model):
99
+ """Phrase list properties for transcription.
100
+
101
+ :ivar phrases: List of phrases for recognition.
102
+ :vartype phrases: list[str]
103
+ :ivar biasing_weight: Biasing weight for phrase list (1.0 to 20.0).
104
+ :vartype biasing_weight: float
105
+ """
106
+
107
+ phrases: Optional[list[str]] = rest_field(visibility=["read", "create", "update", "delete", "query"])
108
+ """List of phrases for recognition."""
109
+ biasing_weight: Optional[float] = rest_field(
110
+ name="biasingWeight", visibility=["read", "create", "update", "delete", "query"]
111
+ )
112
+ """Biasing weight for phrase list (1.0 to 20.0)."""
113
+
114
+ @overload
115
+ def __init__(
116
+ self,
117
+ *,
118
+ phrases: Optional[list[str]] = None,
119
+ biasing_weight: Optional[float] = None,
120
+ ) -> None: ...
121
+
122
+ @overload
123
+ def __init__(self, mapping: Mapping[str, Any]) -> None:
124
+ """
125
+ :param mapping: raw JSON to initialize the model.
126
+ :type mapping: Mapping[str, Any]
127
+ """
128
+
129
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
130
+ super().__init__(*args, **kwargs)
131
+
132
+
133
+ class TranscribedPhrase(_Model):
134
+ """A transcribed phrase.
135
+
136
+ :ivar channel: The 0-based channel index. Only present if channel separation is enabled.
137
+ :vartype channel: int
138
+ :ivar speaker: A unique integer number that is assigned to each speaker detected in the audio
139
+ without particular order. Only present if speaker diarization is enabled.
140
+ :vartype speaker: int
141
+ :ivar offset_milliseconds: The start offset of the phrase in milliseconds. Required.
142
+ :vartype offset_milliseconds: int
143
+ :ivar duration_milliseconds: The duration of the phrase in milliseconds. Required.
144
+ :vartype duration_milliseconds: int
145
+ :ivar text: The transcribed text of the phrase. Required.
146
+ :vartype text: str
147
+ :ivar words: The words that make up the phrase. Only present if word-level timestamps are
148
+ enabled.
149
+ :vartype words: list[~azure.ai.transcription.models.TranscribedWord]
150
+ :ivar locale: The locale of the phrase.
151
+ :vartype locale: str
152
+ :ivar confidence: The confidence value for the phrase. Required.
153
+ :vartype confidence: float
154
+ """
155
+
156
+ channel: Optional[int] = rest_field(visibility=["read", "create", "update", "delete", "query"])
157
+ """The 0-based channel index. Only present if channel separation is enabled."""
158
+ speaker: Optional[int] = rest_field(visibility=["read", "create", "update", "delete", "query"])
159
+ """A unique integer number that is assigned to each speaker detected in the audio without
160
+ particular order. Only present if speaker diarization is enabled."""
161
+ offset_milliseconds: int = rest_field(
162
+ name="offsetMilliseconds", visibility=["read", "create", "update", "delete", "query"]
163
+ )
164
+ """The start offset of the phrase in milliseconds. Required."""
165
+ duration_milliseconds: int = rest_field(
166
+ name="durationMilliseconds", visibility=["read", "create", "update", "delete", "query"]
167
+ )
168
+ """The duration of the phrase in milliseconds. Required."""
169
+ text: str = rest_field(visibility=["read", "create", "update", "delete", "query"])
170
+ """The transcribed text of the phrase. Required."""
171
+ words: Optional[list["_models.TranscribedWord"]] = rest_field(
172
+ visibility=["read", "create", "update", "delete", "query"]
173
+ )
174
+ """The words that make up the phrase. Only present if word-level timestamps are enabled."""
175
+ locale: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"])
176
+ """The locale of the phrase."""
177
+ confidence: float = rest_field(visibility=["read", "create", "update", "delete", "query"])
178
+ """The confidence value for the phrase. Required."""
179
+
180
+ @overload
181
+ def __init__(
182
+ self,
183
+ *,
184
+ offset_milliseconds: int,
185
+ duration_milliseconds: int,
186
+ text: str,
187
+ confidence: float,
188
+ channel: Optional[int] = None,
189
+ speaker: Optional[int] = None,
190
+ words: Optional[list["_models.TranscribedWord"]] = None,
191
+ locale: Optional[str] = None,
192
+ ) -> None: ...
193
+
194
+ @overload
195
+ def __init__(self, mapping: Mapping[str, Any]) -> None:
196
+ """
197
+ :param mapping: raw JSON to initialize the model.
198
+ :type mapping: Mapping[str, Any]
199
+ """
200
+
201
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
202
+ super().__init__(*args, **kwargs)
203
+
204
+
205
+ class TranscribedWord(_Model):
206
+ """Time-stamped word in the display form.
207
+
208
+ :ivar text: The recognized word, including punctuation. Required.
209
+ :vartype text: str
210
+ :ivar offset_milliseconds: The start offset of the word in milliseconds. Required.
211
+ :vartype offset_milliseconds: int
212
+ :ivar duration_milliseconds: The duration of the word in milliseconds. Required.
213
+ :vartype duration_milliseconds: int
214
+ """
215
+
216
+ text: str = rest_field(visibility=["read", "create", "update", "delete", "query"])
217
+ """The recognized word, including punctuation. Required."""
218
+ offset_milliseconds: int = rest_field(
219
+ name="offsetMilliseconds", visibility=["read", "create", "update", "delete", "query"]
220
+ )
221
+ """The start offset of the word in milliseconds. Required."""
222
+ duration_milliseconds: int = rest_field(
223
+ name="durationMilliseconds", visibility=["read", "create", "update", "delete", "query"]
224
+ )
225
+ """The duration of the word in milliseconds. Required."""
226
+
227
+ @overload
228
+ def __init__(
229
+ self,
230
+ *,
231
+ text: str,
232
+ offset_milliseconds: int,
233
+ duration_milliseconds: int,
234
+ ) -> None: ...
235
+
236
+ @overload
237
+ def __init__(self, mapping: Mapping[str, Any]) -> None:
238
+ """
239
+ :param mapping: raw JSON to initialize the model.
240
+ :type mapping: Mapping[str, Any]
241
+ """
242
+
243
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
244
+ super().__init__(*args, **kwargs)
245
+
246
+
247
+ class TranscriptionContent(_Model):
248
+ """Request model for transcription operation.
249
+
250
+ :ivar definition: Metadata for a transcription request. This field contains a JSON-serialized
251
+ object of type ``TranscriptionOptions``. Required.
252
+ :vartype definition: ~azure.ai.transcription.models.TranscriptionOptions
253
+ :ivar audio: The content of the audio file to be transcribed. The audio file must be shorter
254
+ than 2 hours in audio duration and smaller than 250 MB in size. Optional if audioUrl is
255
+ provided in the definition.
256
+ :vartype audio: ~azure.ai.transcription._utils.utils.FileType
257
+ """
258
+
259
+ definition: "_models.TranscriptionOptions" = rest_field(visibility=["read", "create", "update", "delete", "query"])
260
+ """Metadata for a transcription request. This field contains a JSON-serialized object of type
261
+ ``TranscriptionOptions``. Required."""
262
+ audio: Optional[FileType] = rest_field(
263
+ visibility=["read", "create", "update", "delete", "query"], is_multipart_file_input=True
264
+ )
265
+ """The content of the audio file to be transcribed. The audio file must be shorter than 2 hours in
266
+ audio duration and smaller than 250 MB in size. Optional if audioUrl is provided in the
267
+ definition."""
268
+
269
+ @overload
270
+ def __init__(
271
+ self,
272
+ *,
273
+ definition: "_models.TranscriptionOptions",
274
+ audio: Optional[FileType] = None,
275
+ ) -> None: ...
276
+
277
+ @overload
278
+ def __init__(self, mapping: Mapping[str, Any]) -> None:
279
+ """
280
+ :param mapping: raw JSON to initialize the model.
281
+ :type mapping: Mapping[str, Any]
282
+ """
283
+
284
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
285
+ super().__init__(*args, **kwargs)
286
+
287
+
288
+ class TranscriptionDiarizationOptions(_Model):
289
+ """The Speaker Diarization settings. Diarization settings must be specified to enable speaker
290
+ diarization.
291
+
292
+ :ivar enabled: Enable speaker diarization. This is automatically set to true when maxSpeakers
293
+ is specified.
294
+ :vartype enabled: bool
295
+ :ivar max_speakers: Gets or sets a hint for the maximum number of speakers for diarization.
296
+ Must be greater than 1 and less than 36.
297
+ :vartype max_speakers: int
298
+ """
299
+
300
+ enabled: Optional[bool] = rest_field(visibility=["read"])
301
+ """Enable speaker diarization. This is automatically set to true when maxSpeakers is specified."""
302
+ max_speakers: Optional[int] = rest_field(
303
+ name="maxSpeakers", visibility=["read", "create", "update", "delete", "query"]
304
+ )
305
+ """Gets or sets a hint for the maximum number of speakers for diarization. Must be greater than 1
306
+ and less than 36."""
307
+
308
+ @overload
309
+ def __init__(
310
+ self,
311
+ *,
312
+ max_speakers: Optional[int] = None,
313
+ ) -> None: ...
314
+
315
+ @overload
316
+ def __init__(self, mapping: Mapping[str, Any]) -> None:
317
+ """
318
+ :param mapping: raw JSON to initialize the model.
319
+ :type mapping: Mapping[str, Any]
320
+ """
321
+
322
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
323
+ super().__init__(*args, **kwargs)
324
+
325
+
326
+ class TranscriptionOptions(_Model):
327
+ """Metadata for a transcription request.
328
+
329
+ :ivar audio_url: The URL of the audio to be transcribed. The audio must be shorter than 2 hours
330
+ in audio duration and smaller than 250 MB in size. If both Audio and AudioUrl are provided,
331
+ Audio is used.
332
+ :vartype audio_url: str
333
+ :ivar locales: A list of possible locales for the transcription. If not specified, the locale
334
+ of the speech in the audio is detected automatically from all supported locales.
335
+ :vartype locales: list[str]
336
+ :ivar models: Maps some or all candidate locales to a model URI to be used for transcription.
337
+ If no mapping is given, the default model for the locale is used.
338
+ :vartype models: dict[str, str]
339
+ :ivar profanity_filter_mode: Mode of profanity filtering. Known values are: "None", "Removed",
340
+ "Tags", and "Masked".
341
+ :vartype profanity_filter_mode: str or ~azure.ai.transcription.models.ProfanityFilterMode
342
+ :ivar diarization_options: Mode of diarization.
343
+ :vartype diarization_options: ~azure.ai.transcription.models.TranscriptionDiarizationOptions
344
+ :ivar active_channels: The 0-based indices of the channels to be transcribed separately. If not
345
+ specified, multiple channels are merged and transcribed jointly. Only up to two channels are
346
+ supported.
347
+ :vartype active_channels: list[int]
348
+ :ivar enhanced_mode: Enhanced mode properties.
349
+ :vartype enhanced_mode: ~azure.ai.transcription.models.EnhancedModeProperties
350
+ :ivar phrase_list: Phrase list properties.
351
+ :vartype phrase_list: ~azure.ai.transcription.models.PhraseListProperties
352
+ """
353
+
354
+ audio_url: Optional[str] = rest_field(name="audioUrl", visibility=["read", "create", "update", "delete", "query"])
355
+ """The URL of the audio to be transcribed. The audio must be shorter than 2 hours in audio
356
+ duration and smaller than 250 MB in size. If both Audio and AudioUrl are provided, Audio is
357
+ used."""
358
+ locales: Optional[list[str]] = rest_field(visibility=["read", "create", "update", "delete", "query"])
359
+ """A list of possible locales for the transcription. If not specified, the locale of the speech in
360
+ the audio is detected automatically from all supported locales."""
361
+ models: Optional[dict[str, str]] = rest_field(visibility=["read", "create", "update", "delete", "query"])
362
+ """Maps some or all candidate locales to a model URI to be used for transcription. If no mapping
363
+ is given, the default model for the locale is used."""
364
+ profanity_filter_mode: Optional[Union[str, "_models.ProfanityFilterMode"]] = rest_field(
365
+ name="profanityFilterMode", visibility=["read", "create", "update", "delete", "query"]
366
+ )
367
+ """Mode of profanity filtering. Known values are: \"None\", \"Removed\", \"Tags\", and \"Masked\"."""
368
+ diarization_options: Optional["_models.TranscriptionDiarizationOptions"] = rest_field(
369
+ name="diarization", visibility=["read", "create", "update", "delete", "query"]
370
+ )
371
+ """Mode of diarization."""
372
+ active_channels: Optional[list[int]] = rest_field(
373
+ name="channels", visibility=["read", "create", "update", "delete", "query"]
374
+ )
375
+ """The 0-based indices of the channels to be transcribed separately. If not specified, multiple
376
+ channels are merged and transcribed jointly. Only up to two channels are supported."""
377
+ enhanced_mode: Optional["_models.EnhancedModeProperties"] = rest_field(
378
+ name="enhancedMode", visibility=["read", "create", "update", "delete", "query"]
379
+ )
380
+ """Enhanced mode properties."""
381
+ phrase_list: Optional["_models.PhraseListProperties"] = rest_field(
382
+ name="phraseList", visibility=["read", "create", "update", "delete", "query"]
383
+ )
384
+ """Phrase list properties."""
385
+
386
+ @overload
387
+ def __init__(
388
+ self,
389
+ *,
390
+ audio_url: Optional[str] = None,
391
+ locales: Optional[list[str]] = None,
392
+ models: Optional[dict[str, str]] = None,
393
+ profanity_filter_mode: Optional[Union[str, "_models.ProfanityFilterMode"]] = None,
394
+ diarization_options: Optional["_models.TranscriptionDiarizationOptions"] = None,
395
+ active_channels: Optional[list[int]] = None,
396
+ enhanced_mode: Optional["_models.EnhancedModeProperties"] = None,
397
+ phrase_list: Optional["_models.PhraseListProperties"] = None,
398
+ ) -> None: ...
399
+
400
+ @overload
401
+ def __init__(self, mapping: Mapping[str, Any]) -> None:
402
+ """
403
+ :param mapping: raw JSON to initialize the model.
404
+ :type mapping: Mapping[str, Any]
405
+ """
406
+
407
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
408
+ super().__init__(*args, **kwargs)
409
+
410
+
411
+ class TranscriptionResult(_Model):
412
+ """The result of the transcribe operation.
413
+
414
+ :ivar duration_milliseconds: The duration of the audio in milliseconds. Required.
415
+ :vartype duration_milliseconds: int
416
+ :ivar combined_phrases: The full transcript for each channel. Required.
417
+ :vartype combined_phrases: list[~azure.ai.transcription.models.ChannelCombinedPhrases]
418
+ :ivar phrases: The transcription results segmented into phrases. Required.
419
+ :vartype phrases: list[~azure.ai.transcription.models.TranscribedPhrase]
420
+ """
421
+
422
+ duration_milliseconds: int = rest_field(
423
+ name="durationMilliseconds", visibility=["read", "create", "update", "delete", "query"]
424
+ )
425
+ """The duration of the audio in milliseconds. Required."""
426
+ combined_phrases: list["_models.ChannelCombinedPhrases"] = rest_field(
427
+ name="combinedPhrases", visibility=["read", "create", "update", "delete", "query"]
428
+ )
429
+ """The full transcript for each channel. Required."""
430
+ phrases: list["_models.TranscribedPhrase"] = rest_field(visibility=["read", "create", "update", "delete", "query"])
431
+ """The transcription results segmented into phrases. Required."""
432
+
433
+ @overload
434
+ def __init__(
435
+ self,
436
+ *,
437
+ duration_milliseconds: int,
438
+ combined_phrases: list["_models.ChannelCombinedPhrases"],
439
+ phrases: list["_models.TranscribedPhrase"],
440
+ ) -> None: ...
441
+
442
+ @overload
443
+ def __init__(self, mapping: Mapping[str, Any]) -> None:
444
+ """
445
+ :param mapping: raw JSON to initialize the model.
446
+ :type mapping: Mapping[str, Any]
447
+ """
448
+
449
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
450
+ super().__init__(*args, **kwargs)
@@ -0,0 +1,21 @@
1
+ # coding=utf-8
2
+ # --------------------------------------------------------------------------
3
+ # Copyright (c) Microsoft Corporation. All rights reserved.
4
+ # Licensed under the MIT License. See License.txt in the project root for license information.
5
+ # --------------------------------------------------------------------------
6
+ """Customize generated code here.
7
+
8
+ Follow our quickstart for examples: https://aka.ms/azsdk/python/dpcodegen/python/customize
9
+ """
10
+
11
+
12
+ __all__: list[str] = [] # Add all objects you want publicly available to users at this package level
13
+
14
+
15
+ def patch_sdk():
16
+ """Do not remove from this file.
17
+
18
+ `patch_sdk` is a last resort escape hatch that allows you to do customizations
19
+ you can't accomplish using the techniques described in
20
+ https://aka.ms/azsdk/python/dpcodegen/python/customize
21
+ """
@@ -0,0 +1 @@
1
+ # Marker file for PEP 561.