dashscope 1.8.0__py3-none-any.whl → 1.25.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. dashscope/__init__.py +61 -14
  2. dashscope/aigc/__init__.py +10 -3
  3. dashscope/aigc/chat_completion.py +282 -0
  4. dashscope/aigc/code_generation.py +145 -0
  5. dashscope/aigc/conversation.py +71 -12
  6. dashscope/aigc/generation.py +288 -16
  7. dashscope/aigc/image_synthesis.py +473 -31
  8. dashscope/aigc/multimodal_conversation.py +299 -14
  9. dashscope/aigc/video_synthesis.py +610 -0
  10. dashscope/api_entities/aiohttp_request.py +8 -5
  11. dashscope/api_entities/api_request_data.py +4 -2
  12. dashscope/api_entities/api_request_factory.py +68 -20
  13. dashscope/api_entities/base_request.py +20 -3
  14. dashscope/api_entities/chat_completion_types.py +344 -0
  15. dashscope/api_entities/dashscope_response.py +243 -15
  16. dashscope/api_entities/encryption.py +179 -0
  17. dashscope/api_entities/http_request.py +216 -62
  18. dashscope/api_entities/websocket_request.py +43 -34
  19. dashscope/app/__init__.py +5 -0
  20. dashscope/app/application.py +203 -0
  21. dashscope/app/application_response.py +246 -0
  22. dashscope/assistants/__init__.py +16 -0
  23. dashscope/assistants/assistant_types.py +175 -0
  24. dashscope/assistants/assistants.py +311 -0
  25. dashscope/assistants/files.py +197 -0
  26. dashscope/audio/__init__.py +4 -2
  27. dashscope/audio/asr/__init__.py +17 -1
  28. dashscope/audio/asr/asr_phrase_manager.py +203 -0
  29. dashscope/audio/asr/recognition.py +167 -27
  30. dashscope/audio/asr/transcription.py +107 -14
  31. dashscope/audio/asr/translation_recognizer.py +1006 -0
  32. dashscope/audio/asr/vocabulary.py +177 -0
  33. dashscope/audio/qwen_asr/__init__.py +7 -0
  34. dashscope/audio/qwen_asr/qwen_transcription.py +189 -0
  35. dashscope/audio/qwen_omni/__init__.py +11 -0
  36. dashscope/audio/qwen_omni/omni_realtime.py +524 -0
  37. dashscope/audio/qwen_tts/__init__.py +5 -0
  38. dashscope/audio/qwen_tts/speech_synthesizer.py +77 -0
  39. dashscope/audio/qwen_tts_realtime/__init__.py +10 -0
  40. dashscope/audio/qwen_tts_realtime/qwen_tts_realtime.py +355 -0
  41. dashscope/audio/tts/__init__.py +2 -0
  42. dashscope/audio/tts/speech_synthesizer.py +5 -0
  43. dashscope/audio/tts_v2/__init__.py +12 -0
  44. dashscope/audio/tts_v2/enrollment.py +179 -0
  45. dashscope/audio/tts_v2/speech_synthesizer.py +886 -0
  46. dashscope/cli.py +157 -37
  47. dashscope/client/base_api.py +652 -87
  48. dashscope/common/api_key.py +2 -0
  49. dashscope/common/base_type.py +135 -0
  50. dashscope/common/constants.py +13 -16
  51. dashscope/common/env.py +2 -0
  52. dashscope/common/error.py +58 -22
  53. dashscope/common/logging.py +2 -0
  54. dashscope/common/message_manager.py +2 -0
  55. dashscope/common/utils.py +276 -46
  56. dashscope/customize/__init__.py +0 -0
  57. dashscope/customize/customize_types.py +192 -0
  58. dashscope/customize/deployments.py +146 -0
  59. dashscope/customize/finetunes.py +234 -0
  60. dashscope/embeddings/__init__.py +5 -1
  61. dashscope/embeddings/batch_text_embedding.py +208 -0
  62. dashscope/embeddings/batch_text_embedding_response.py +65 -0
  63. dashscope/embeddings/multimodal_embedding.py +118 -10
  64. dashscope/embeddings/text_embedding.py +13 -1
  65. dashscope/{file.py → files.py} +19 -4
  66. dashscope/io/input_output.py +2 -0
  67. dashscope/model.py +11 -2
  68. dashscope/models.py +43 -0
  69. dashscope/multimodal/__init__.py +20 -0
  70. dashscope/multimodal/dialog_state.py +56 -0
  71. dashscope/multimodal/multimodal_constants.py +28 -0
  72. dashscope/multimodal/multimodal_dialog.py +648 -0
  73. dashscope/multimodal/multimodal_request_params.py +313 -0
  74. dashscope/multimodal/tingwu/__init__.py +10 -0
  75. dashscope/multimodal/tingwu/tingwu.py +80 -0
  76. dashscope/multimodal/tingwu/tingwu_realtime.py +579 -0
  77. dashscope/nlp/__init__.py +0 -0
  78. dashscope/nlp/understanding.py +64 -0
  79. dashscope/protocol/websocket.py +3 -0
  80. dashscope/rerank/__init__.py +0 -0
  81. dashscope/rerank/text_rerank.py +69 -0
  82. dashscope/resources/qwen.tiktoken +151643 -0
  83. dashscope/threads/__init__.py +26 -0
  84. dashscope/threads/messages/__init__.py +0 -0
  85. dashscope/threads/messages/files.py +113 -0
  86. dashscope/threads/messages/messages.py +220 -0
  87. dashscope/threads/runs/__init__.py +0 -0
  88. dashscope/threads/runs/runs.py +501 -0
  89. dashscope/threads/runs/steps.py +112 -0
  90. dashscope/threads/thread_types.py +665 -0
  91. dashscope/threads/threads.py +212 -0
  92. dashscope/tokenizers/__init__.py +7 -0
  93. dashscope/tokenizers/qwen_tokenizer.py +111 -0
  94. dashscope/tokenizers/tokenization.py +125 -0
  95. dashscope/tokenizers/tokenizer.py +45 -0
  96. dashscope/tokenizers/tokenizer_base.py +32 -0
  97. dashscope/utils/__init__.py +0 -0
  98. dashscope/utils/message_utils.py +838 -0
  99. dashscope/utils/oss_utils.py +243 -0
  100. dashscope/utils/param_utils.py +29 -0
  101. dashscope/version.py +3 -1
  102. {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/METADATA +53 -50
  103. dashscope-1.25.6.dist-info/RECORD +112 -0
  104. {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/WHEEL +1 -1
  105. {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/entry_points.txt +0 -1
  106. {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info/licenses}/LICENSE +2 -4
  107. dashscope/deployment.py +0 -129
  108. dashscope/finetune.py +0 -149
  109. dashscope-1.8.0.dist-info/RECORD +0 -49
  110. {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/top_level.txt +0 -0
@@ -1,11 +1,14 @@
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+
1
3
  from dataclasses import dataclass
2
4
  from typing import List
3
5
 
4
- from dashscope.api_entities.dashscope_response import (DashScopeAPIResponse,
6
+ from dashscope.api_entities.dashscope_response import (DashScopeAPIResponse,
5
7
  DictMixin)
6
- from dashscope.client.base_api import BaseApi
8
+ from dashscope.client.base_api import BaseApi, BaseAioApi
7
9
  from dashscope.common.error import InputRequired, ModelRequired
8
10
  from dashscope.common.utils import _get_task_group_and_task
11
+ from dashscope.utils.oss_utils import preprocess_message_element
9
12
 
10
13
 
11
14
  @dataclass(init=False)
@@ -15,30 +18,34 @@ class MultiModalEmbeddingItemBase(DictMixin):
15
18
  def __init__(self, factor: float, **kwargs):
16
19
  super().__init__(factor=factor, **kwargs)
17
20
 
21
+
18
22
  @dataclass(init=False)
19
23
  class MultiModalEmbeddingItemText(MultiModalEmbeddingItemBase):
20
24
  text: str
21
-
22
- def __init__(self, text: str, factor: float, **kwargs):
25
+
26
+ def __init__(self, text: str, factor: float, **kwargs):
23
27
  super().__init__(factor, **kwargs)
24
28
  self.text = text
25
29
 
30
+
26
31
  @dataclass(init=False)
27
32
  class MultiModalEmbeddingItemImage(MultiModalEmbeddingItemBase):
28
33
  image: str
29
-
34
+
30
35
  def __init__(self, image: str, factor: float, **kwargs):
31
36
  super().__init__(factor, **kwargs)
32
37
  self.image = image
33
-
38
+
39
+
34
40
  @dataclass(init=False)
35
41
  class MultiModalEmbeddingItemAudio(MultiModalEmbeddingItemBase):
36
42
  audio: str
37
-
43
+
38
44
  def __init__(self, audio: str, factor: float, **kwargs):
39
45
  super().__init__(factor, **kwargs)
40
46
  self.audio = audio
41
-
47
+
48
+
42
49
  class MultiModalEmbedding(BaseApi):
43
50
  task = 'multimodal-embedding'
44
51
 
@@ -46,7 +53,11 @@ class MultiModalEmbedding(BaseApi):
46
53
  multimodal_embedding_one_peace_v1 = 'multimodal-embedding-one-peace-v1'
47
54
 
48
55
  @classmethod
49
- def call(cls, model: str, input: List[MultiModalEmbeddingItemBase],
56
+ def call(cls,
57
+ model: str,
58
+ input: List[MultiModalEmbeddingItemBase],
59
+ api_key: str = None,
60
+ workspace: str = None,
50
61
  **kwargs) -> DashScopeAPIResponse:
51
62
  """Get embedding multimodal contents..
52
63
 
@@ -54,8 +65,9 @@ class MultiModalEmbedding(BaseApi):
54
65
  model (str): The embedding model name.
55
66
  input (List[MultiModalEmbeddingElement]): The embedding elements,
56
67
  every element include data, modal, factor field.
68
+ workspace (str): The dashscope workspace id.
57
69
  **kwargs:
58
- auto_truncation(bool, `optional`): Automatically truncate
70
+ auto_truncation(bool, `optional`): Automatically truncate
59
71
  audio longer than 15 seconds or text longer than 70 words.
60
72
  Default to false(Too long input will result in failure).
61
73
 
@@ -67,6 +79,11 @@ class MultiModalEmbedding(BaseApi):
67
79
  if model is None or not model:
68
80
  raise ModelRequired('Model is required!')
69
81
  embedding_input = {}
82
+ has_upload = cls._preprocess_message_inputs(model, input, api_key)
83
+ if has_upload:
84
+ headers = kwargs.pop('headers', {})
85
+ headers['X-DashScope-OssResourceResolve'] = 'enable'
86
+ kwargs['headers'] = headers
70
87
  embedding_input['contents'] = input
71
88
  kwargs.pop('stream', False) # not support streaming output.
72
89
  task_group, function = _get_task_group_and_task(__name__)
@@ -75,4 +92,95 @@ class MultiModalEmbedding(BaseApi):
75
92
  task_group=task_group,
76
93
  task=MultiModalEmbedding.task,
77
94
  function=function,
95
+ api_key=api_key,
96
+ workspace=workspace,
78
97
  **kwargs)
98
+
99
+ @classmethod
100
+ def _preprocess_message_inputs(cls, model: str, input: List[dict],
101
+ api_key: str):
102
+ """preprocess following inputs
103
+ input = [{'factor': 1, 'text': 'hello'},
104
+ {'factor': 2, 'audio': ''},
105
+ {'factor': 3, 'image': ''}]
106
+ """
107
+ has_upload = False
108
+ upload_certificate = None
109
+ for elem in input:
110
+ if not isinstance(elem, (int, float, bool, str, bytes, bytearray)):
111
+ is_upload, upload_certificate = preprocess_message_element(
112
+ model, elem, api_key, upload_certificate)
113
+ if is_upload and not has_upload:
114
+ has_upload = True
115
+ return has_upload
116
+
117
+
118
+ class AioMultiModalEmbedding(BaseAioApi):
119
+ task = 'multimodal-embedding'
120
+
121
+ class Models:
122
+ multimodal_embedding_one_peace_v1 = 'multimodal-embedding-one-peace-v1'
123
+
124
+ @classmethod
125
+ async def call(cls,
126
+ model: str,
127
+ input: List[MultiModalEmbeddingItemBase],
128
+ api_key: str = None,
129
+ workspace: str = None,
130
+ **kwargs) -> DashScopeAPIResponse:
131
+ """Get embedding multimodal contents..
132
+
133
+ Args:
134
+ model (str): The embedding model name.
135
+ input (List[MultiModalEmbeddingElement]): The embedding elements,
136
+ every element include data, modal, factor field.
137
+ workspace (str): The dashscope workspace id.
138
+ **kwargs:
139
+ auto_truncation(bool, `optional`): Automatically truncate
140
+ audio longer than 15 seconds or text longer than 70 words.
141
+ Default to false(Too long input will result in failure).
142
+
143
+ Returns:
144
+ DashScopeAPIResponse: The embedding result.
145
+ """
146
+ if input is None or not input:
147
+ raise InputRequired('prompt is required!')
148
+ if model is None or not model:
149
+ raise ModelRequired('Model is required!')
150
+ embedding_input = {}
151
+ has_upload = cls._preprocess_message_inputs(model, input, api_key)
152
+ if has_upload:
153
+ headers = kwargs.pop('headers', {})
154
+ headers['X-DashScope-OssResourceResolve'] = 'enable'
155
+ kwargs['headers'] = headers
156
+ embedding_input['contents'] = input
157
+ kwargs.pop('stream', False) # not support streaming output.
158
+ task_group, function = _get_task_group_and_task(__name__)
159
+ response = await super().call(
160
+ model=model,
161
+ input=embedding_input,
162
+ task_group=task_group,
163
+ task=MultiModalEmbedding.task,
164
+ function=function,
165
+ api_key=api_key,
166
+ workspace=workspace,
167
+ **kwargs)
168
+ return response
169
+
170
+ @classmethod
171
+ def _preprocess_message_inputs(cls, model: str, input: List[dict],
172
+ api_key: str):
173
+ """preprocess following inputs
174
+ input = [{'factor': 1, 'text': 'hello'},
175
+ {'factor': 2, 'audio': ''},
176
+ {'factor': 3, 'image': ''}]
177
+ """
178
+ has_upload = False
179
+ upload_certificate = None
180
+ for elem in input:
181
+ if not isinstance(elem, (int, float, bool, str, bytes, bytearray)):
182
+ is_upload, upload_certificate = preprocess_message_element(
183
+ model, elem, api_key, upload_certificate)
184
+ if is_upload and not has_upload:
185
+ has_upload = True
186
+ return has_upload
@@ -1,3 +1,5 @@
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+
1
3
  from typing import List, Union
2
4
 
3
5
  from dashscope.api_entities.dashscope_response import DashScopeAPIResponse
@@ -11,9 +13,16 @@ class TextEmbedding(BaseApi):
11
13
 
12
14
  class Models:
13
15
  text_embedding_v1 = 'text-embedding-v1'
16
+ text_embedding_v2 = 'text-embedding-v2'
17
+ text_embedding_v3 = 'text-embedding-v3'
18
+ text_embedding_v4 = 'text-embedding-v4'
14
19
 
15
20
  @classmethod
16
- def call(cls, model: str, input: Union[str, List[str]],
21
+ def call(cls,
22
+ model: str,
23
+ input: Union[str, List[str]],
24
+ workspace: str = None,
25
+ api_key: str = None,
17
26
  **kwargs) -> DashScopeAPIResponse:
18
27
  """Get embedding of text input.
19
28
 
@@ -23,6 +32,7 @@ class TextEmbedding(BaseApi):
23
32
  can be a text or list of text or opened file object,
24
33
  if opened file object, will read all lines,
25
34
  one embedding per line.
35
+ workspace (str): The dashscope workspace id.
26
36
  **kwargs:
27
37
  text_type(str, `optional`): query or document.
28
38
 
@@ -41,4 +51,6 @@ class TextEmbedding(BaseApi):
41
51
  task_group=task_group,
42
52
  task=TextEmbedding.task,
43
53
  function=function,
54
+ api_key=api_key,
55
+ workspace=workspace,
44
56
  **kwargs)
@@ -1,3 +1,5 @@
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+
1
3
  import os
2
4
 
3
5
  from dashscope.api_entities.dashscope_response import DashScopeAPIResponse
@@ -8,7 +10,7 @@ from dashscope.common.error import InvalidFileFormat
8
10
  from dashscope.common.utils import is_validate_fine_tune_file
9
11
 
10
12
 
11
- class File(FileUploadMixin, ListMixin, DeleteMixin, GetMixin):
13
+ class Files(FileUploadMixin, ListMixin, DeleteMixin, GetMixin):
12
14
  SUB_PATH = 'files'
13
15
 
14
16
  @classmethod
@@ -17,6 +19,7 @@ class File(FileUploadMixin, ListMixin, DeleteMixin, GetMixin):
17
19
  purpose: str = FilePurpose.fine_tune,
18
20
  description: str = None,
19
21
  api_key: str = None,
22
+ workspace: str = None,
20
23
  **kwargs) -> DashScopeAPIResponse:
21
24
  """Upload file for model fine-tune or other tasks.
22
25
 
@@ -25,6 +28,7 @@ class File(FileUploadMixin, ListMixin, DeleteMixin, GetMixin):
25
28
  purpose (str): The purpose of the file[fine-tune|inference]
26
29
  description (str, optional): The file description message.
27
30
  api_key (str, optional): The api key. Defaults to None.
31
+ workspace (str): The dashscope workspace id.
28
32
 
29
33
  Returns:
30
34
  DashScopeAPIResponse: The upload information
@@ -39,6 +43,7 @@ class File(FileUploadMixin, ListMixin, DeleteMixin, GetMixin):
39
43
  descriptions=[description]
40
44
  if description is not None else None,
41
45
  api_key=api_key,
46
+ workspace=workspace,
42
47
  **kwargs)
43
48
 
44
49
  @classmethod
@@ -46,6 +51,7 @@ class File(FileUploadMixin, ListMixin, DeleteMixin, GetMixin):
46
51
  page=1,
47
52
  page_size=10,
48
53
  api_key: str = None,
54
+ workspace: str = None,
49
55
  **kwargs) -> DashScopeAPIResponse:
50
56
  """List uploaded files.
51
57
 
@@ -55,40 +61,49 @@ class File(FileUploadMixin, ListMixin, DeleteMixin, GetMixin):
55
61
  if None, will get by default rule(TODO: api key doc).
56
62
  page (int, optional): Page number. Defaults to 1.
57
63
  page_size (int, optional): Items per page. Defaults to 10.
64
+ workspace (str): The dashscope workspace id.
58
65
 
59
66
  Returns:
60
67
  DashScopeAPIResponse: The fine-tune jobs in the result.
61
68
  """
62
- return super().list(page, page_size, api_key, **kwargs)
69
+ return super().list(page,
70
+ page_size,
71
+ api_key,
72
+ workspace=workspace,
73
+ **kwargs)
63
74
 
64
75
  @classmethod
65
76
  def get(cls,
66
77
  file_id: str,
67
78
  api_key: str = None,
79
+ workspace: str = None,
68
80
  **kwargs) -> DashScopeAPIResponse:
69
81
  """Get the file info.
70
82
 
71
83
  Args:
72
84
  file_id (str): The file id.
73
85
  api_key (str, optional): The api key. Defaults to None.
86
+ workspace (str): The dashscope workspace id.
74
87
 
75
88
  Returns:
76
89
  DashScopeAPIResponse: The job info
77
90
  """
78
- return super().get(file_id, api_key, **kwargs)
91
+ return super().get(file_id, api_key, workspace=workspace, **kwargs)
79
92
 
80
93
  @classmethod
81
94
  def delete(cls,
82
95
  file_id: str,
83
96
  api_key: str = None,
97
+ workspace: str = None,
84
98
  **kwargs) -> DashScopeAPIResponse:
85
99
  """Delete uploaded file.
86
100
 
87
101
  Args:
88
102
  file_id (str): The file id want to delete.
89
103
  api_key (str, optional): The api key. Defaults to None.
104
+ workspace (str): The dashscope workspace id.
90
105
 
91
106
  Returns:
92
107
  DashScopeAPIResponse: Delete result.
93
108
  """
94
- return super().delete(file_id, api_key, **kwargs)
109
+ return super().delete(file_id, api_key, workspace=workspace, **kwargs)
@@ -1,3 +1,5 @@
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+
1
3
  import base64
2
4
  import io
3
5
  from typing import Generator
dashscope/model.py CHANGED
@@ -1,3 +1,5 @@
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+
1
3
  from dashscope.api_entities.dashscope_response import DashScopeAPIResponse
2
4
  from dashscope.client.base_api import GetMixin, ListMixin
3
5
 
@@ -9,23 +11,26 @@ class Model(ListMixin, GetMixin):
9
11
  def get(cls,
10
12
  name: str,
11
13
  api_key: str = None,
14
+ workspace: str = None,
12
15
  **kwargs) -> DashScopeAPIResponse:
13
16
  """Get the model information.
14
17
 
15
18
  Args:
16
19
  name (str): The model name.
17
20
  api_key (str, optional): The api key. Defaults to None.
21
+ workspace (str): The dashscope workspace id.
18
22
 
19
23
  Returns:
20
24
  DashScopeAPIResponse: The model information.
21
25
  """
22
- return super().get(name, api_key, **kwargs)
26
+ return super().get(name, api_key, workspace=workspace, **kwargs)
23
27
 
24
28
  @classmethod
25
29
  def list(cls,
26
30
  page=1,
27
31
  page_size=10,
28
32
  api_key: str = None,
33
+ workspace: str = None,
29
34
  **kwargs) -> DashScopeAPIResponse:
30
35
  """List models.
31
36
 
@@ -37,4 +42,8 @@ class Model(ListMixin, GetMixin):
37
42
  Returns:
38
43
  DashScopeAPIResponse: The models.
39
44
  """
40
- return super().list(api_key, page, page_size, **kwargs)
45
+ return super().list(api_key,
46
+ page,
47
+ page_size,
48
+ workspace=workspace,
49
+ **kwargs)
dashscope/models.py ADDED
@@ -0,0 +1,43 @@
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+
3
+ from dashscope.api_entities.dashscope_response import DashScopeAPIResponse
4
+ from dashscope.client.base_api import GetMixin, ListMixin
5
+
6
+
7
+ class Models(ListMixin, GetMixin):
8
+ SUB_PATH = 'models'
9
+
10
+ @classmethod
11
+ def get(cls,
12
+ name: str,
13
+ api_key: str = None,
14
+ **kwargs) -> DashScopeAPIResponse:
15
+ """Get the model information.
16
+
17
+ Args:
18
+ name (str): The model name.
19
+ api_key (str, optional): The api key. Defaults to None.
20
+ workspace (str): The dashscope workspace id.
21
+
22
+ Returns:
23
+ DashScopeAPIResponse: The model information.
24
+ """
25
+ return super().get(name, api_key, **kwargs)
26
+
27
+ @classmethod
28
+ def list(cls,
29
+ page=1,
30
+ page_size=10,
31
+ api_key: str = None,
32
+ **kwargs) -> DashScopeAPIResponse:
33
+ """List models.
34
+
35
+ Args:
36
+ api_key (str, optional): The api key
37
+ page (int, optional): Page number. Defaults to 1.
38
+ page_size (int, optional): Items per page. Defaults to 10.
39
+
40
+ Returns:
41
+ DashScopeAPIResponse: The models.
42
+ """
43
+ return super().list(page, page_size, api_key=api_key, **kwargs)
@@ -0,0 +1,20 @@
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+
3
+ from .tingwu import tingwu
4
+ from .tingwu.tingwu import TingWu
5
+ from .tingwu.tingwu_realtime import TingWuRealtime, TingWuRealtimeCallback
6
+
7
+ from .multimodal_dialog import MultiModalDialog, MultiModalCallback
8
+ from .dialog_state import DialogState
9
+ from .multimodal_constants import *
10
+ from .multimodal_request_params import *
11
+
12
+ __all__ = [
13
+ 'tingwu',
14
+ 'TingWu',
15
+ 'TingWuRealtime',
16
+ 'TingWuRealtimeCallback',
17
+ 'MultiModalDialog',
18
+ 'MultiModalCallback',
19
+ 'DialogState'
20
+ ]
@@ -0,0 +1,56 @@
1
+ # dialog_state.py
2
+
3
+ from enum import Enum
4
+
5
+
6
+ class DialogState(Enum):
7
+ """
8
+ 对话状态枚举类,定义了对话机器人可能处于的不同状态。
9
+
10
+ Attributes:
11
+ IDLE (str): 表示机器人处于空闲状态。
12
+ LISTENING (str): 表示机器人正在监听用户输入。
13
+ THINKING (str): 表示机器人正在思考。
14
+ RESPONDING (str): 表示机器人正在生成或回复中。
15
+ """
16
+ IDLE = 'Idle'
17
+ LISTENING = 'Listening'
18
+ THINKING = 'Thinking'
19
+ RESPONDING = 'Responding'
20
+
21
+
22
+ class StateMachine:
23
+ """
24
+ 状态机类,用于管理机器人的状态转换。
25
+
26
+ Attributes:
27
+ current_state (DialogState): 当前状态。
28
+ """
29
+
30
+ def __init__(self):
31
+ # 初始化状态机时设置初始状态为IDLE
32
+ self.current_state = DialogState.IDLE
33
+
34
+ def change_state(self, new_state: str) -> None:
35
+ """
36
+ 更改当前状态到指定的新状态。
37
+
38
+ Args:
39
+ new_state (str): 要切换到的新状态。
40
+
41
+ Raises:
42
+ ValueError: 如果尝试切换到一个无效的状态,则抛出此异常。
43
+ """
44
+ if new_state in [state.value for state in DialogState]:
45
+ self.current_state = DialogState(new_state)
46
+ else:
47
+ raise ValueError("无效的状态类型")
48
+
49
+ def get_current_state(self) -> DialogState:
50
+ """
51
+ 获取当前状态。
52
+
53
+ Returns:
54
+ DialogState: 当前状态。
55
+ """
56
+ return self.current_state
@@ -0,0 +1,28 @@
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ # -*- coding: utf-8 -*-
3
+
4
+ # multimodal conversation request directive
5
+
6
+ class RequestToRespondType:
7
+ TRANSCRIPT = 'transcript'
8
+ PROMPT = 'prompt'
9
+
10
+
11
+ # multimodal conversation response directive
12
+ RESPONSE_NAME_TASK_STARTED = "task-started"
13
+ RESPONSE_NAME_RESULT_GENERATED = "result-generated"
14
+ RESPONSE_NAME_TASK_FINISHED = "task-finished"
15
+
16
+ RESPONSE_NAME_TASK_FAILED = "TaskFailed"
17
+ RESPONSE_NAME_STARTED = "Started"
18
+ RESPONSE_NAME_STOPPED = "Stopped"
19
+ RESPONSE_NAME_STATE_CHANGED = "DialogStateChanged"
20
+ RESPONSE_NAME_REQUEST_ACCEPTED = "RequestAccepted"
21
+ RESPONSE_NAME_SPEECH_STARTED = "SpeechStarted"
22
+ RESPONSE_NAME_SPEECH_ENDED = "SpeechEnded" # 服务端检测到asr语音尾点时下发此事件,可选事件
23
+ RESPONSE_NAME_RESPONDING_STARTED = "RespondingStarted" # AI语音应答开始,sdk要准备接收服务端下发的语音数据
24
+ RESPONSE_NAME_RESPONDING_ENDED = "RespondingEnded" # AI语音应答结束
25
+ RESPONSE_NAME_SPEECH_CONTENT = "SpeechContent" # 用户语音识别出的文本,流式全量输出
26
+ RESPONSE_NAME_RESPONDING_CONTENT = "RespondingContent" # 统对外输出的文本,流式全量输出
27
+ RESPONSE_NAME_ERROR = "Error" # 服务端对话中报错
28
+ RESPONSE_NAME_HEART_BEAT = "HeartBeat" # 心跳消息