dashscope 1.8.0__py3-none-any.whl → 1.25.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. dashscope/__init__.py +61 -14
  2. dashscope/aigc/__init__.py +10 -3
  3. dashscope/aigc/chat_completion.py +282 -0
  4. dashscope/aigc/code_generation.py +145 -0
  5. dashscope/aigc/conversation.py +71 -12
  6. dashscope/aigc/generation.py +288 -16
  7. dashscope/aigc/image_synthesis.py +473 -31
  8. dashscope/aigc/multimodal_conversation.py +299 -14
  9. dashscope/aigc/video_synthesis.py +610 -0
  10. dashscope/api_entities/aiohttp_request.py +8 -5
  11. dashscope/api_entities/api_request_data.py +4 -2
  12. dashscope/api_entities/api_request_factory.py +68 -20
  13. dashscope/api_entities/base_request.py +20 -3
  14. dashscope/api_entities/chat_completion_types.py +344 -0
  15. dashscope/api_entities/dashscope_response.py +243 -15
  16. dashscope/api_entities/encryption.py +179 -0
  17. dashscope/api_entities/http_request.py +216 -62
  18. dashscope/api_entities/websocket_request.py +43 -34
  19. dashscope/app/__init__.py +5 -0
  20. dashscope/app/application.py +203 -0
  21. dashscope/app/application_response.py +246 -0
  22. dashscope/assistants/__init__.py +16 -0
  23. dashscope/assistants/assistant_types.py +175 -0
  24. dashscope/assistants/assistants.py +311 -0
  25. dashscope/assistants/files.py +197 -0
  26. dashscope/audio/__init__.py +4 -2
  27. dashscope/audio/asr/__init__.py +17 -1
  28. dashscope/audio/asr/asr_phrase_manager.py +203 -0
  29. dashscope/audio/asr/recognition.py +167 -27
  30. dashscope/audio/asr/transcription.py +107 -14
  31. dashscope/audio/asr/translation_recognizer.py +1006 -0
  32. dashscope/audio/asr/vocabulary.py +177 -0
  33. dashscope/audio/qwen_asr/__init__.py +7 -0
  34. dashscope/audio/qwen_asr/qwen_transcription.py +189 -0
  35. dashscope/audio/qwen_omni/__init__.py +11 -0
  36. dashscope/audio/qwen_omni/omni_realtime.py +524 -0
  37. dashscope/audio/qwen_tts/__init__.py +5 -0
  38. dashscope/audio/qwen_tts/speech_synthesizer.py +77 -0
  39. dashscope/audio/qwen_tts_realtime/__init__.py +10 -0
  40. dashscope/audio/qwen_tts_realtime/qwen_tts_realtime.py +355 -0
  41. dashscope/audio/tts/__init__.py +2 -0
  42. dashscope/audio/tts/speech_synthesizer.py +5 -0
  43. dashscope/audio/tts_v2/__init__.py +12 -0
  44. dashscope/audio/tts_v2/enrollment.py +179 -0
  45. dashscope/audio/tts_v2/speech_synthesizer.py +886 -0
  46. dashscope/cli.py +157 -37
  47. dashscope/client/base_api.py +652 -87
  48. dashscope/common/api_key.py +2 -0
  49. dashscope/common/base_type.py +135 -0
  50. dashscope/common/constants.py +13 -16
  51. dashscope/common/env.py +2 -0
  52. dashscope/common/error.py +58 -22
  53. dashscope/common/logging.py +2 -0
  54. dashscope/common/message_manager.py +2 -0
  55. dashscope/common/utils.py +276 -46
  56. dashscope/customize/__init__.py +0 -0
  57. dashscope/customize/customize_types.py +192 -0
  58. dashscope/customize/deployments.py +146 -0
  59. dashscope/customize/finetunes.py +234 -0
  60. dashscope/embeddings/__init__.py +5 -1
  61. dashscope/embeddings/batch_text_embedding.py +208 -0
  62. dashscope/embeddings/batch_text_embedding_response.py +65 -0
  63. dashscope/embeddings/multimodal_embedding.py +118 -10
  64. dashscope/embeddings/text_embedding.py +13 -1
  65. dashscope/{file.py → files.py} +19 -4
  66. dashscope/io/input_output.py +2 -0
  67. dashscope/model.py +11 -2
  68. dashscope/models.py +43 -0
  69. dashscope/multimodal/__init__.py +20 -0
  70. dashscope/multimodal/dialog_state.py +56 -0
  71. dashscope/multimodal/multimodal_constants.py +28 -0
  72. dashscope/multimodal/multimodal_dialog.py +648 -0
  73. dashscope/multimodal/multimodal_request_params.py +313 -0
  74. dashscope/multimodal/tingwu/__init__.py +10 -0
  75. dashscope/multimodal/tingwu/tingwu.py +80 -0
  76. dashscope/multimodal/tingwu/tingwu_realtime.py +579 -0
  77. dashscope/nlp/__init__.py +0 -0
  78. dashscope/nlp/understanding.py +64 -0
  79. dashscope/protocol/websocket.py +3 -0
  80. dashscope/rerank/__init__.py +0 -0
  81. dashscope/rerank/text_rerank.py +69 -0
  82. dashscope/resources/qwen.tiktoken +151643 -0
  83. dashscope/threads/__init__.py +26 -0
  84. dashscope/threads/messages/__init__.py +0 -0
  85. dashscope/threads/messages/files.py +113 -0
  86. dashscope/threads/messages/messages.py +220 -0
  87. dashscope/threads/runs/__init__.py +0 -0
  88. dashscope/threads/runs/runs.py +501 -0
  89. dashscope/threads/runs/steps.py +112 -0
  90. dashscope/threads/thread_types.py +665 -0
  91. dashscope/threads/threads.py +212 -0
  92. dashscope/tokenizers/__init__.py +7 -0
  93. dashscope/tokenizers/qwen_tokenizer.py +111 -0
  94. dashscope/tokenizers/tokenization.py +125 -0
  95. dashscope/tokenizers/tokenizer.py +45 -0
  96. dashscope/tokenizers/tokenizer_base.py +32 -0
  97. dashscope/utils/__init__.py +0 -0
  98. dashscope/utils/message_utils.py +838 -0
  99. dashscope/utils/oss_utils.py +243 -0
  100. dashscope/utils/param_utils.py +29 -0
  101. dashscope/version.py +3 -1
  102. {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/METADATA +53 -50
  103. dashscope-1.25.6.dist-info/RECORD +112 -0
  104. {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/WHEEL +1 -1
  105. {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/entry_points.txt +0 -1
  106. {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info/licenses}/LICENSE +2 -4
  107. dashscope/deployment.py +0 -129
  108. dashscope/finetune.py +0 -149
  109. dashscope-1.8.0.dist-info/RECORD +0 -49
  110. {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,203 @@
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+
3
+ from http import HTTPStatus
4
+ from typing import Any, Dict
5
+
6
+ from dashscope.api_entities.dashscope_response import DashScopeAPIResponse
7
+ from dashscope.client.base_api import BaseAsyncApi
8
+ from dashscope.common.error import InvalidParameter
9
+ from dashscope.common.logging import logger
10
+ from dashscope.customize.finetunes import FineTunes
11
+
12
+
13
+ class AsrPhraseManager(BaseAsyncApi):
14
+ """Hot word management for speech recognition.
15
+ """
16
+ @classmethod
17
+ def create_phrases(cls,
18
+ model: str,
19
+ phrases: Dict[str, Any],
20
+ training_type: str = 'compile_asr_phrase',
21
+ workspace: str = None,
22
+ **kwargs) -> DashScopeAPIResponse:
23
+ """Create hot words.
24
+
25
+ Args:
26
+ model (str): The requested model.
27
+ phrases (Dict[str, Any]): A dictionary that contains phrases,
28
+ such as {'下一首':90,'上一首':90}.
29
+ training_type (str, `optional`): The training type,
30
+ 'compile_asr_phrase' is default.
31
+ workspace (str): The dashscope workspace id.
32
+
33
+ Raises:
34
+ InvalidParameter: Parameter input is None or empty!
35
+
36
+ Returns:
37
+ DashScopeAPIResponse: The results of creating hot words.
38
+ """
39
+ if phrases is None or len(phrases) == 0:
40
+ raise InvalidParameter('phrases is empty!')
41
+ if training_type is None or len(training_type) == 0:
42
+ raise InvalidParameter('training_type is empty!')
43
+
44
+ original_ft_sub_path = FineTunes.SUB_PATH
45
+ FineTunes.SUB_PATH = 'fine-tunes'
46
+ response = FineTunes.call(model=model,
47
+ training_file_ids=[],
48
+ validation_file_ids=[],
49
+ mode=training_type,
50
+ hyper_parameters={'phrase_list': phrases},
51
+ workspace=workspace,
52
+ **kwargs)
53
+ FineTunes.SUB_PATH = original_ft_sub_path
54
+
55
+ if response.status_code != HTTPStatus.OK:
56
+ logger.error('Create phrase failed, ' + str(response))
57
+
58
+ return response
59
+
60
+ @classmethod
61
+ def update_phrases(cls,
62
+ model: str,
63
+ phrase_id: str,
64
+ phrases: Dict[str, Any],
65
+ training_type: str = 'compile_asr_phrase',
66
+ workspace: str = None,
67
+ **kwargs) -> DashScopeAPIResponse:
68
+ """Update the hot words marked phrase_id.
69
+
70
+ Args:
71
+ model (str): The requested model.
72
+ phrase_id (str): The ID of phrases,
73
+ which created by create_phrases().
74
+ phrases (Dict[str, Any]): A dictionary that contains phrases,
75
+ such as {'暂停':90}.
76
+ training_type (str, `optional`):
77
+ The training type, 'compile_asr_phrase' is default.
78
+ workspace (str): The dashscope workspace id.
79
+
80
+ Raises:
81
+ InvalidParameter: Parameter input is None or empty!
82
+
83
+ Returns:
84
+ DashScopeAPIResponse: The results of updating hot words.
85
+ """
86
+ if phrase_id is None or len(phrase_id) == 0:
87
+ raise InvalidParameter('phrase_id is empty!')
88
+ if phrases is None or len(phrases) == 0:
89
+ raise InvalidParameter('phrases is empty!')
90
+ if training_type is None or len(training_type) == 0:
91
+ raise InvalidParameter('training_type is empty!')
92
+
93
+ original_ft_sub_path = FineTunes.SUB_PATH
94
+ FineTunes.SUB_PATH = 'fine-tunes'
95
+ response = FineTunes.call(model=model,
96
+ training_file_ids=[],
97
+ validation_file_ids=[],
98
+ mode=training_type,
99
+ hyper_parameters={'phrase_list': phrases},
100
+ finetuned_output=phrase_id,
101
+ workspace=workspace,
102
+ **kwargs)
103
+ FineTunes.SUB_PATH = original_ft_sub_path
104
+
105
+ if response.status_code != HTTPStatus.OK:
106
+ logger.error('Update phrase failed, ' + str(response))
107
+
108
+ return response
109
+
110
+ @classmethod
111
+ def query_phrases(cls,
112
+ phrase_id: str,
113
+ workspace: str = None,
114
+ **kwargs) -> DashScopeAPIResponse:
115
+ """Query the hot words by phrase_id.
116
+
117
+ Args:
118
+ phrase_id (str): The ID of phrases,
119
+ which created by create_phrases().
120
+ workspace (str): The dashscope workspace id.
121
+
122
+ Raises:
123
+ InvalidParameter: phrase_id input is None or empty!
124
+
125
+ Returns:
126
+ AsrPhraseManagerResult: The results of querying hot words.
127
+ """
128
+ if phrase_id is None or len(phrase_id) == 0:
129
+ raise InvalidParameter('phrase_id is empty!')
130
+
131
+ original_ft_sub_path = FineTunes.SUB_PATH
132
+ FineTunes.SUB_PATH = 'fine-tunes/outputs'
133
+ response = FineTunes.get(job_id=phrase_id,
134
+ workspace=workspace,
135
+ **kwargs)
136
+ FineTunes.SUB_PATH = original_ft_sub_path
137
+
138
+ if response.status_code != HTTPStatus.OK:
139
+ logger.error('Query phrase failed, ' + str(response))
140
+
141
+ return response
142
+
143
+ @classmethod
144
+ def list_phrases(cls,
145
+ page: int = 1,
146
+ page_size: int = 10,
147
+ workspace: str = None,
148
+ **kwargs) -> DashScopeAPIResponse:
149
+ """List all information of phrases.
150
+
151
+ Args:
152
+ page (int): Page number, greater than 0, default value 1.
153
+ page_size (int): The paging size, greater than 0
154
+ and less than or equal to 100, default value 10.
155
+ workspace (str): The dashscope workspace id.
156
+
157
+ Returns:
158
+ DashScopeAPIResponse: The results of listing hot words.
159
+ """
160
+ original_ft_sub_path = FineTunes.SUB_PATH
161
+ FineTunes.SUB_PATH = 'fine-tunes/outputs'
162
+ response = FineTunes.list(page=page,
163
+ page_size=page_size,
164
+ workspace=workspace,
165
+ **kwargs)
166
+ FineTunes.SUB_PATH = original_ft_sub_path
167
+
168
+ if response.status_code != HTTPStatus.OK:
169
+ logger.error('List phrase failed, ' + str(response))
170
+
171
+ return response
172
+
173
+ @classmethod
174
+ def delete_phrases(cls,
175
+ phrase_id: str,
176
+ workspace: str = None,
177
+ **kwargs) -> DashScopeAPIResponse:
178
+ """Delete the hot words by phrase_id.
179
+
180
+ Args:
181
+ phrase_id (str): The ID of phrases,
182
+ which created by create_phrases().
183
+
184
+ Raises:
185
+ InvalidParameter: phrase_id input is None or empty!
186
+
187
+ Returns:
188
+ DashScopeAPIResponse: The results of deleting hot words.
189
+ """
190
+ if phrase_id is None or len(phrase_id) == 0:
191
+ raise InvalidParameter('phrase_id is empty!')
192
+
193
+ original_ft_sub_path = FineTunes.SUB_PATH
194
+ FineTunes.SUB_PATH = 'fine-tunes/outputs'
195
+ response = FineTunes.delete(job_id=phrase_id,
196
+ workspace=workspace,
197
+ **kwargs)
198
+ FineTunes.SUB_PATH = original_ft_sub_path
199
+
200
+ if response.status_code != HTTPStatus.OK:
201
+ logger.error('Delete phrase failed, ' + str(response))
202
+
203
+ return response
@@ -1,7 +1,12 @@
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+
1
3
  import json
2
4
  import os
3
5
  import threading
6
+ import time
7
+ import uuid
4
8
  from http import HTTPStatus
9
+ from queue import Queue
5
10
  from threading import Timer
6
11
  from typing import Any, Dict, List, Union
7
12
 
@@ -110,10 +115,20 @@ class Recognition(BaseApi):
110
115
  speech recognition results.
111
116
  format (str): The input audio format for speech recognition.
112
117
  sample_rate (int): The input audio sample rate for speech recognition.
118
+ workspace (str): The dashscope workspace id.
113
119
 
114
120
  **kwargs:
121
+ phrase_id (list, `optional`): The ID of phrase.
115
122
  disfluency_removal_enabled(bool, `optional`): Filter mood words,
116
123
  turned off by default.
124
+ diarization_enabled (bool, `optional`): Speech auto diarization,
125
+ turned off by default.
126
+ speaker_count (int, `optional`): The number of speakers.
127
+ timestamp_alignment_enabled (bool, `optional`): Timestamp-alignment
128
+ calibration, turned off by default.
129
+ special_word_filter(str, `optional`): Sensitive word filter.
130
+ audio_event_detection_enabled(bool, `optional`):
131
+ Audio event detection, turned off by default.
117
132
 
118
133
  Raises:
119
134
  InputRequired: Input is required.
@@ -121,8 +136,13 @@ class Recognition(BaseApi):
121
136
 
122
137
  SILENCE_TIMEOUT_S = 23
123
138
 
124
- def __init__(self, model: str, callback: RecognitionCallback, format: str,
125
- sample_rate: int, **kwargs):
139
+ def __init__(self,
140
+ model: str,
141
+ callback: RecognitionCallback,
142
+ format: str,
143
+ sample_rate: int,
144
+ workspace: str = None,
145
+ **kwargs):
126
146
  if model is None:
127
147
  raise ModelRequired('Model is required!')
128
148
  if format is None:
@@ -137,15 +157,22 @@ class Recognition(BaseApi):
137
157
  self._recognition_once = False
138
158
  self._callback = callback
139
159
  self._running = False
140
- self._stream_data = []
160
+ self._stream_data = Queue()
141
161
  self._worker = None
142
162
  self._silence_timer = None
143
163
  self._kwargs = kwargs
164
+ self._workspace = workspace
165
+ self._start_stream_timestamp = -1
166
+ self._first_package_timestamp = -1
167
+ self._stop_stream_timestamp = -1
168
+ self._on_complete_timestamp = -1
169
+ self.request_id_confirmed = False
170
+ self.last_request_id = uuid.uuid4().hex
144
171
 
145
172
  def __del__(self):
146
173
  if self._running:
147
174
  self._running = False
148
- self._stream_data.clear()
175
+ self._stream_data = Queue()
149
176
  if self._worker is not None and self._worker.is_alive():
150
177
  self._worker.join()
151
178
  if self._silence_timer is not None and self._silence_timer.is_alive( # noqa E501
@@ -162,25 +189,46 @@ class Recognition(BaseApi):
162
189
  responses = self.__launch_request()
163
190
  for part in responses:
164
191
  if part.status_code == HTTPStatus.OK:
165
- if len(part.output) == 0:
192
+ if len(part.output) == 0 or ('finished' in part.output and part.output['finished'] == True):
193
+ self._on_complete_timestamp = time.time() * 1000
194
+ logger.debug('last package delay {}'.format(
195
+ self.get_last_package_delay()))
166
196
  self._callback.on_complete()
167
197
  else:
168
198
  usage: Dict[str, Any] = None
169
- useags: List[Any] = None
170
- if 'sentence' in part.output and part.usage is not None:
171
- usage = {
172
- 'end_time': part.output['sentence']['end_time'],
173
- 'usage': part.usage
174
- }
175
- useags = [usage]
199
+ usages: List[Any] = None
200
+ if 'sentence' in part.output:
201
+ if 'text' in part.output['sentence'] and part.output['sentence']['text'] != '':
202
+ if (self._first_package_timestamp < 0):
203
+ self._first_package_timestamp = time.time() * 1000
204
+ logger.debug('first package delay {}'.format(
205
+ self.get_first_package_delay()))
206
+ sentence = part.output['sentence']
207
+ if 'heartbeat' in sentence and sentence['heartbeat'] == True:
208
+ logger.debug('recv heartbeat')
209
+ continue
210
+ logger.debug(
211
+ 'Recv Result [rid:{}]:{}, isEnd: {}'.format(
212
+ part.request_id, sentence,
213
+ RecognitionResult.is_sentence_end(sentence)))
214
+ if part.usage is not None:
215
+ usage = {
216
+ 'end_time':
217
+ part.output['sentence']['end_time'],
218
+ 'usage': part.usage
219
+ }
220
+ usages = [usage]
221
+ if self.request_id_confirmed is False and part.request_id is not None:
222
+ self.last_request_id = part.request_id
223
+ self.request_id_confirmed = True
176
224
 
177
225
  self._callback.on_event(
178
226
  RecognitionResult(
179
227
  RecognitionResponse.from_api_response(part),
180
- usages=useags))
228
+ usages=usages))
181
229
  else:
182
230
  self._running = False
183
- self._stream_data.clear()
231
+ self._stream_data = Queue()
184
232
  self._callback.on_error(
185
233
  RecognitionResult(
186
234
  RecognitionResponse.from_api_response(part)))
@@ -190,6 +238,15 @@ class Recognition(BaseApi):
190
238
  def __launch_request(self):
191
239
  """Initiate real-time speech recognition requests.
192
240
  """
241
+ resources_list: list = []
242
+ if self._phrase is not None and len(self._phrase) > 0:
243
+ item = {'resource_id': self._phrase, 'resource_type': 'asr_phrase'}
244
+ resources_list.append(item)
245
+
246
+ if len(resources_list) > 0:
247
+ self._kwargs['resources'] = resources_list
248
+
249
+ self._tidy_kwargs()
193
250
  task_name, _ = _get_task_group_and_task(__name__)
194
251
  responses = super().call(model=self.model,
195
252
  task_group='audio',
@@ -202,13 +259,30 @@ class Recognition(BaseApi):
202
259
  sample_rate=self.sample_rate,
203
260
  format=self.format,
204
261
  stream=True,
262
+ workspace=self._workspace,
263
+ pre_task_id=self.last_request_id,
205
264
  **self._kwargs)
206
265
  return responses
207
266
 
208
- def start(self):
267
+ def start(self, phrase_id: str = None, **kwargs):
209
268
  """Real-time speech recognition in asynchronous mode.
210
269
  Please call 'stop()' after you have completed recognition.
211
270
 
271
+ Args:
272
+ phrase_id (str, `optional`): The ID of phrase.
273
+
274
+ **kwargs:
275
+ disfluency_removal_enabled(bool, `optional`):
276
+ Filter mood words, turned off by default.
277
+ diarization_enabled (bool, `optional`):
278
+ Speech auto diarization, turned off by default.
279
+ speaker_count (int, `optional`): The number of speakers.
280
+ timestamp_alignment_enabled (bool, `optional`):
281
+ Timestamp-alignment calibration, turned off by default.
282
+ special_word_filter(str, `optional`): Sensitive word filter.
283
+ audio_event_detection_enabled(bool, `optional`):
284
+ Audio event detection, turned off by default.
285
+
212
286
  Raises:
213
287
  InvalidParameter: This interface cannot be called again
214
288
  if it has already been started.
@@ -219,6 +293,12 @@ class Recognition(BaseApi):
219
293
  if self._running:
220
294
  raise InvalidParameter('Speech recognition has started.')
221
295
 
296
+ self._start_stream_timestamp = -1
297
+ self._first_package_timestamp = -1
298
+ self._stop_stream_timestamp = -1
299
+ self._on_complete_timestamp = -1
300
+ self._phrase = phrase_id
301
+ self._kwargs.update(**kwargs)
222
302
  self._recognition_once = False
223
303
  self._worker = threading.Thread(target=self.__receive_worker)
224
304
  self._worker.start()
@@ -234,11 +314,27 @@ class Recognition(BaseApi):
234
314
  self._running = False
235
315
  raise InvalidTask('Invalid task, task create failed.')
236
316
 
237
- def call(self, file: str) -> RecognitionResult:
317
+ def call(self,
318
+ file: str,
319
+ phrase_id: str = None,
320
+ **kwargs) -> RecognitionResult:
238
321
  """Real-time speech recognition in synchronous mode.
239
322
 
240
323
  Args:
241
324
  file (str): The path to the local audio file.
325
+ phrase_id (str, `optional`): The ID of phrase.
326
+
327
+ **kwargs:
328
+ disfluency_removal_enabled(bool, `optional`):
329
+ Filter mood words, turned off by default.
330
+ diarization_enabled (bool, `optional`):
331
+ Speech auto diarization, turned off by default.
332
+ speaker_count (int, `optional`): The number of speakers.
333
+ timestamp_alignment_enabled (bool, `optional`):
334
+ Timestamp-alignment calibration, turned off by default.
335
+ special_word_filter(str, `optional`): Sensitive word filter.
336
+ audio_event_detection_enabled(bool, `optional`):
337
+ Audio event detection, turned off by default.
242
338
 
243
339
  Raises:
244
340
  InvalidParameter: This interface cannot be called again
@@ -248,6 +344,7 @@ class Recognition(BaseApi):
248
344
  Returns:
249
345
  RecognitionResult: The result of speech recognition.
250
346
  """
347
+ self._start_stream_timestamp = time.time() * 1000
251
348
  if self._running:
252
349
  raise InvalidParameter('Speech recognition has been called.')
253
350
 
@@ -258,12 +355,14 @@ class Recognition(BaseApi):
258
355
  raise FileNotFoundError('No such file or directory: ' + file)
259
356
 
260
357
  self._recognition_once = True
358
+ self._stream_data = Queue()
359
+ self._phrase = phrase_id
360
+ self._kwargs.update(**kwargs)
261
361
  error_flag: bool = False
262
362
  sentences: List[Any] = []
263
363
  usages: List[Any] = []
264
364
  response: RecognitionResponse = None
265
365
  result: RecognitionResult = None
266
- self._stream_data.clear()
267
366
 
268
367
  try:
269
368
  audio_data: bytes = None
@@ -274,22 +373,33 @@ class Recognition(BaseApi):
274
373
  if not audio_data:
275
374
  break
276
375
  else:
277
- self._stream_data = self._stream_data + [audio_data]
376
+ self._stream_data.put(audio_data)
278
377
  else:
279
378
  raise InputDataRequired(
280
379
  'The supplied file was empty (zero bytes long)')
281
380
  f.close()
381
+ self._stop_stream_timestamp = time.time() * 1000
282
382
  except Exception as e:
283
383
  logger.error(e)
284
384
  raise e
285
385
 
286
- if self._stream_data is not None and len(self._stream_data) > 0:
386
+ if not self._stream_data.empty():
287
387
  self._running = True
288
388
  responses = self.__launch_request()
289
389
  for part in responses:
290
390
  if part.status_code == HTTPStatus.OK:
291
391
  if 'sentence' in part.output:
392
+ if 'text' in part.output['sentence'] and part.output['sentence']['text'] != '':
393
+ if (self._first_package_timestamp < 0):
394
+ self._first_package_timestamp = time.time() * 1000
395
+ logger.debug('first package delay {}'.format(
396
+ self._first_package_timestamp -
397
+ self._start_stream_timestamp))
292
398
  sentence = part.output['sentence']
399
+ logger.debug(
400
+ 'Recv Result [rid:{}]:{}, isEnd: {}'.format(
401
+ part.request_id, sentence,
402
+ RecognitionResult.is_sentence_end(sentence)))
293
403
  if RecognitionResult.is_sentence_end(sentence):
294
404
  sentences.append(sentence)
295
405
 
@@ -308,12 +418,16 @@ class Recognition(BaseApi):
308
418
  error_flag = True
309
419
  break
310
420
 
421
+ self._on_complete_timestamp = time.time() * 1000
422
+ logger.debug('last package delay {}'.format(
423
+ self.get_last_package_delay()))
424
+
311
425
  if error_flag:
312
426
  result = RecognitionResult(response)
313
427
  else:
314
428
  result = RecognitionResult(response, sentences, usages)
315
429
 
316
- self._stream_data.clear()
430
+ self._stream_data = Queue()
317
431
  self._recognition_once = False
318
432
  self._running = False
319
433
 
@@ -328,10 +442,12 @@ class Recognition(BaseApi):
328
442
  if self._running is False:
329
443
  raise InvalidParameter('Speech recognition has stopped.')
330
444
 
445
+ self._stop_stream_timestamp = time.time() * 1000
446
+
331
447
  self._running = False
332
448
  if self._worker is not None and self._worker.is_alive():
333
449
  self._worker.join()
334
- self._stream_data.clear()
450
+ self._stream_data = Queue()
335
451
  if self._silence_timer is not None and self._silence_timer.is_alive():
336
452
  self._silence_timer.cancel()
337
453
  self._silence_timer = None
@@ -347,12 +463,21 @@ class Recognition(BaseApi):
347
463
  if self._running is False:
348
464
  raise InvalidParameter('Speech recognition has stopped.')
349
465
 
350
- self._stream_data = self._stream_data + [buffer]
466
+ if (self._start_stream_timestamp < 0):
467
+ self._start_stream_timestamp = time.time() * 1000
468
+ logger.debug('send_audio_frame: {}'.format(len(buffer)))
469
+ self._stream_data.put(buffer)
470
+
471
+ def _tidy_kwargs(self):
472
+ for k in self._kwargs.copy():
473
+ if self._kwargs[k] is None:
474
+ self._kwargs.pop(k, None)
351
475
 
352
476
  def _input_stream_cycle(self):
353
477
  while self._running:
354
- while len(self._stream_data) == 0:
478
+ while self._stream_data.empty():
355
479
  if self._running:
480
+ time.sleep(0.01)
356
481
  continue
357
482
  else:
358
483
  break
@@ -365,16 +490,17 @@ class Recognition(BaseApi):
365
490
  self._silence_stop_timer)
366
491
  self._silence_timer.start()
367
492
 
368
- for frame in self._stream_data:
493
+ while not self._stream_data.empty():
494
+ frame = self._stream_data.get()
369
495
  yield bytes(frame)
370
- self._stream_data.clear()
371
496
 
372
497
  if self._recognition_once:
373
498
  self._running = False
374
499
 
375
500
  # drain all audio data when invoking stop().
376
501
  if self._recognition_once is False:
377
- for frame in self._stream_data:
502
+ while not self._stream_data.empty():
503
+ frame = self._stream_data.get()
378
504
  yield bytes(frame)
379
505
 
380
506
  def _silence_stop_timer(self):
@@ -386,4 +512,18 @@ class Recognition(BaseApi):
386
512
  self._silence_timer = None
387
513
  if self._worker is not None and self._worker.is_alive():
388
514
  self._worker.join()
389
- self._stream_data.clear()
515
+ self._stream_data = Queue()
516
+
517
+ def get_first_package_delay(self):
518
+ """First Package Delay is the time between start sending audio and receive first words package
519
+ """
520
+ return self._first_package_timestamp - self._start_stream_timestamp
521
+
522
+ def get_last_package_delay(self):
523
+ """Last Package Delay is the time between stop sending audio and receive last words package
524
+ """
525
+ return self._on_complete_timestamp - self._stop_stream_timestamp
526
+
527
+ # 获取上一个任务的taskId
528
+ def get_last_request_id(self):
529
+ return self.last_request_id