local-coze 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_coze/__init__.py +110 -0
- local_coze/cli/__init__.py +3 -0
- local_coze/cli/chat.py +126 -0
- local_coze/cli/cli.py +34 -0
- local_coze/cli/constants.py +7 -0
- local_coze/cli/db.py +81 -0
- local_coze/cli/embedding.py +193 -0
- local_coze/cli/image.py +162 -0
- local_coze/cli/knowledge.py +195 -0
- local_coze/cli/search.py +198 -0
- local_coze/cli/utils.py +41 -0
- local_coze/cli/video.py +191 -0
- local_coze/cli/video_edit.py +888 -0
- local_coze/cli/voice.py +351 -0
- local_coze/core/__init__.py +25 -0
- local_coze/core/client.py +253 -0
- local_coze/core/config.py +58 -0
- local_coze/core/exceptions.py +67 -0
- local_coze/database/__init__.py +29 -0
- local_coze/database/client.py +170 -0
- local_coze/database/migration.py +342 -0
- local_coze/embedding/__init__.py +31 -0
- local_coze/embedding/client.py +350 -0
- local_coze/embedding/models.py +130 -0
- local_coze/image/__init__.py +19 -0
- local_coze/image/client.py +110 -0
- local_coze/image/models.py +163 -0
- local_coze/knowledge/__init__.py +19 -0
- local_coze/knowledge/client.py +148 -0
- local_coze/knowledge/models.py +45 -0
- local_coze/llm/__init__.py +25 -0
- local_coze/llm/client.py +317 -0
- local_coze/llm/models.py +48 -0
- local_coze/memory/__init__.py +14 -0
- local_coze/memory/client.py +176 -0
- local_coze/s3/__init__.py +12 -0
- local_coze/s3/client.py +580 -0
- local_coze/s3/models.py +18 -0
- local_coze/search/__init__.py +19 -0
- local_coze/search/client.py +183 -0
- local_coze/search/models.py +57 -0
- local_coze/video/__init__.py +17 -0
- local_coze/video/client.py +347 -0
- local_coze/video/models.py +39 -0
- local_coze/video_edit/__init__.py +23 -0
- local_coze/video_edit/examples.py +340 -0
- local_coze/video_edit/frame_extractor.py +176 -0
- local_coze/video_edit/models.py +362 -0
- local_coze/video_edit/video_edit.py +631 -0
- local_coze/voice/__init__.py +17 -0
- local_coze/voice/asr.py +82 -0
- local_coze/voice/models.py +86 -0
- local_coze/voice/tts.py +94 -0
- local_coze-0.0.1.dist-info/METADATA +636 -0
- local_coze-0.0.1.dist-info/RECORD +58 -0
- local_coze-0.0.1.dist-info/WHEEL +4 -0
- local_coze-0.0.1.dist-info/entry_points.txt +3 -0
- local_coze-0.0.1.dist-info/licenses/LICENSE +21 -0
local_coze/voice/asr.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
from typing import Dict, Optional, Tuple
|
|
2
|
+
|
|
3
|
+
from coze_coding_utils.runtime_ctx.context import Context
|
|
4
|
+
from cozeloop.decorator import observe
|
|
5
|
+
|
|
6
|
+
from ..core.client import BaseClient
|
|
7
|
+
from ..core.config import Config
|
|
8
|
+
from ..core.exceptions import APIError, ValidationError
|
|
9
|
+
from .models import ASRRequest, ASRResponse
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ASRClient(BaseClient):
|
|
13
|
+
def __init__(
|
|
14
|
+
self,
|
|
15
|
+
config: Optional[Config] = None,
|
|
16
|
+
ctx: Optional[Context] = None,
|
|
17
|
+
custom_headers: Optional[Dict[str, str]] = None,
|
|
18
|
+
verbose: bool = False,
|
|
19
|
+
):
|
|
20
|
+
super().__init__(config, ctx, custom_headers, verbose)
|
|
21
|
+
self.base_url = self.config.base_url
|
|
22
|
+
|
|
23
|
+
@observe
|
|
24
|
+
def recognize(
|
|
25
|
+
self,
|
|
26
|
+
uid: Optional[str] = None,
|
|
27
|
+
url: Optional[str] = None,
|
|
28
|
+
base64_data: Optional[str] = None,
|
|
29
|
+
) -> Tuple[str, dict]:
|
|
30
|
+
"""
|
|
31
|
+
识别音频文件中的语音内容
|
|
32
|
+
|
|
33
|
+
音频要求:
|
|
34
|
+
- 音频时长 ≤ 2小时
|
|
35
|
+
- 音频大小 ≤ 100MB
|
|
36
|
+
- 支持编码: WAV/MP3/OGG OPUS
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
uid: 用户唯一标识
|
|
40
|
+
url: 音频文件 URL (与 base64_data 二选一)
|
|
41
|
+
base64_data: Base64 编码的音频数据 (与 url 二选一)
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
Tuple[str, dict]: 识别的文本和详细响应数据
|
|
45
|
+
"""
|
|
46
|
+
if not (url or base64_data):
|
|
47
|
+
raise ValidationError(
|
|
48
|
+
"必须提供 url 或 base64_data 其中之一", field="url/base64_data"
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
request = ASRRequest(uid=uid, url=url, base64_data=base64_data)
|
|
52
|
+
|
|
53
|
+
response = self._request_with_response(
|
|
54
|
+
method="POST",
|
|
55
|
+
url=f"{self.base_url}/api/v3/auc/bigmodel/recognize/flash",
|
|
56
|
+
json=request.to_api_request(),
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
status_code = response.headers.get("X-Api-Status-Code", "0")
|
|
60
|
+
message = response.headers.get("X-Api-Message", "")
|
|
61
|
+
|
|
62
|
+
if status_code != "20000000":
|
|
63
|
+
raise APIError(
|
|
64
|
+
f"ASR 识别失败,状态码: {status_code}, 错误信息: {message}",
|
|
65
|
+
code=status_code,
|
|
66
|
+
status_code=response.status_code,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
response.raise_for_status()
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
data = response.json()
|
|
73
|
+
except Exception as e:
|
|
74
|
+
raise APIError(
|
|
75
|
+
f"响应解析失败: {str(e)}",
|
|
76
|
+
status_code=response.status_code,
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
result = data.get("result", {})
|
|
80
|
+
text = result.get("text", "")
|
|
81
|
+
|
|
82
|
+
return text, data
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
from typing import Optional, Literal
|
|
2
|
+
from pydantic import BaseModel, Field
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class TTSConfig:
|
|
6
|
+
DEFAULT_SPEAKER = "zh_female_xiaohe_uranus_bigtts"
|
|
7
|
+
DEFAULT_AUDIO_FORMAT = "mp3"
|
|
8
|
+
DEFAULT_SAMPLE_RATE = 24000
|
|
9
|
+
DEFAULT_SPEECH_RATE = 0
|
|
10
|
+
DEFAULT_LOUDNESS_RATE = 0
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class TTSRequest(BaseModel):
|
|
14
|
+
uid: str = Field(..., description="用户唯一标识")
|
|
15
|
+
text: Optional[str] = Field(default=None, description="合成文本")
|
|
16
|
+
ssml: Optional[str] = Field(default=None, description="SSML格式文本")
|
|
17
|
+
speaker: str = Field(default=TTSConfig.DEFAULT_SPEAKER, description="音色标识")
|
|
18
|
+
audio_format: Literal["pcm", "mp3", "ogg_opus"] = Field(
|
|
19
|
+
default=TTSConfig.DEFAULT_AUDIO_FORMAT,
|
|
20
|
+
description="音频格式"
|
|
21
|
+
)
|
|
22
|
+
sample_rate: Literal[8000, 16000, 22050, 24000, 32000, 44100, 48000] = Field(
|
|
23
|
+
default=TTSConfig.DEFAULT_SAMPLE_RATE,
|
|
24
|
+
description="采样率"
|
|
25
|
+
)
|
|
26
|
+
speech_rate: int = Field(
|
|
27
|
+
default=TTSConfig.DEFAULT_SPEECH_RATE,
|
|
28
|
+
ge=-50,
|
|
29
|
+
le=100,
|
|
30
|
+
description="语速"
|
|
31
|
+
)
|
|
32
|
+
loudness_rate: int = Field(
|
|
33
|
+
default=TTSConfig.DEFAULT_LOUDNESS_RATE,
|
|
34
|
+
ge=-50,
|
|
35
|
+
le=100,
|
|
36
|
+
description="音量"
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
def to_api_request(self) -> dict:
|
|
40
|
+
params = {
|
|
41
|
+
"user": {
|
|
42
|
+
"uid": self.uid,
|
|
43
|
+
},
|
|
44
|
+
"req_params": {
|
|
45
|
+
"speaker": self.speaker,
|
|
46
|
+
"audio_params": {
|
|
47
|
+
"format": self.audio_format,
|
|
48
|
+
"sample_rate": self.sample_rate,
|
|
49
|
+
"speech_rate": self.speech_rate,
|
|
50
|
+
"loudness_rate": self.loudness_rate
|
|
51
|
+
}
|
|
52
|
+
},
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
if self.text:
|
|
56
|
+
params["req_params"]["text"] = self.text
|
|
57
|
+
elif self.ssml:
|
|
58
|
+
params["req_params"]["ssml"] = self.ssml
|
|
59
|
+
|
|
60
|
+
return params
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class ASRRequest(BaseModel):
|
|
64
|
+
uid: Optional[str] = Field(default=None, description="用户唯一标识")
|
|
65
|
+
url: Optional[str] = Field(default=None, description="音频文件URL")
|
|
66
|
+
base64_data: Optional[str] = Field(default=None, description="音频文件Base64编码")
|
|
67
|
+
|
|
68
|
+
def to_api_request(self) -> dict:
|
|
69
|
+
audio_data = {}
|
|
70
|
+
if self.url:
|
|
71
|
+
audio_data["url"] = self.url
|
|
72
|
+
elif self.base64_data:
|
|
73
|
+
audio_data["data"] = self.base64_data
|
|
74
|
+
|
|
75
|
+
return {
|
|
76
|
+
"user": {
|
|
77
|
+
"uid": self.uid,
|
|
78
|
+
},
|
|
79
|
+
"audio": audio_data,
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class ASRResponse(BaseModel):
|
|
84
|
+
text: str = Field(..., description="识别结果文本")
|
|
85
|
+
duration: Optional[int] = Field(default=None, description="音频时长(毫秒)")
|
|
86
|
+
utterances: Optional[list] = Field(default=None, description="详细识别结果")
|
local_coze/voice/tts.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import json
|
|
3
|
+
from typing import Dict, Optional, Tuple
|
|
4
|
+
|
|
5
|
+
from coze_coding_utils.runtime_ctx.context import Context
|
|
6
|
+
from cozeloop.decorator import observe
|
|
7
|
+
|
|
8
|
+
from ..core.client import BaseClient
|
|
9
|
+
from ..core.config import Config
|
|
10
|
+
from ..core.exceptions import APIError, ValidationError
|
|
11
|
+
from .models import TTSConfig, TTSRequest
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class TTSClient(BaseClient):
|
|
15
|
+
def __init__(
|
|
16
|
+
self,
|
|
17
|
+
config: Optional[Config] = None,
|
|
18
|
+
ctx: Optional[Context] = None,
|
|
19
|
+
custom_headers: Optional[Dict[str, str]] = None,
|
|
20
|
+
verbose: bool = False,
|
|
21
|
+
):
|
|
22
|
+
super().__init__(config, ctx, custom_headers, verbose)
|
|
23
|
+
self.base_url = self.config.base_url
|
|
24
|
+
|
|
25
|
+
@observe
|
|
26
|
+
def synthesize(
|
|
27
|
+
self,
|
|
28
|
+
uid: str,
|
|
29
|
+
text: Optional[str] = None,
|
|
30
|
+
ssml: Optional[str] = None,
|
|
31
|
+
speaker: str = TTSConfig.DEFAULT_SPEAKER,
|
|
32
|
+
audio_format: str = TTSConfig.DEFAULT_AUDIO_FORMAT,
|
|
33
|
+
sample_rate: int = TTSConfig.DEFAULT_SAMPLE_RATE,
|
|
34
|
+
speech_rate: int = TTSConfig.DEFAULT_SPEECH_RATE,
|
|
35
|
+
loudness_rate: int = TTSConfig.DEFAULT_LOUDNESS_RATE,
|
|
36
|
+
) -> Tuple[str, int]:
|
|
37
|
+
if not (text or ssml):
|
|
38
|
+
raise ValidationError("必须提供 text 或 ssml 其中之一", field="text/ssml")
|
|
39
|
+
|
|
40
|
+
request = TTSRequest(
|
|
41
|
+
uid=uid,
|
|
42
|
+
text=text,
|
|
43
|
+
ssml=ssml,
|
|
44
|
+
speaker=speaker,
|
|
45
|
+
audio_format=audio_format,
|
|
46
|
+
sample_rate=sample_rate,
|
|
47
|
+
speech_rate=speech_rate,
|
|
48
|
+
loudness_rate=loudness_rate,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
response = self._request_stream(
|
|
52
|
+
method="POST",
|
|
53
|
+
url=f"{self.base_url}/api/v3/tts/unidirectional",
|
|
54
|
+
json=request.to_api_request(),
|
|
55
|
+
headers={"Connection": "keep-alive"},
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
try:
|
|
59
|
+
audio_uri = None
|
|
60
|
+
audio_data = bytearray()
|
|
61
|
+
total_audio_size = 0
|
|
62
|
+
|
|
63
|
+
for chunk in response.iter_lines(decode_unicode=False):
|
|
64
|
+
if not chunk:
|
|
65
|
+
continue
|
|
66
|
+
|
|
67
|
+
chunk_str = chunk.decode("utf-8").replace("data:", "")
|
|
68
|
+
data = json.loads(chunk_str)
|
|
69
|
+
|
|
70
|
+
if data.get("code", 0) == 0 and "data" in data and data["data"]:
|
|
71
|
+
chunk_audio = base64.b64decode(data["data"])
|
|
72
|
+
audio_size = len(chunk_audio)
|
|
73
|
+
total_audio_size += audio_size
|
|
74
|
+
audio_data.extend(chunk_audio)
|
|
75
|
+
|
|
76
|
+
elif data.get("code", 0) == 20000000:
|
|
77
|
+
if "url" in data and data["url"]:
|
|
78
|
+
audio_uri = data["url"]
|
|
79
|
+
break
|
|
80
|
+
|
|
81
|
+
elif data.get("code", 0) > 0:
|
|
82
|
+
raise APIError(
|
|
83
|
+
f"合成音频失败: {data.get('message', '')}",
|
|
84
|
+
code=str(data.get("code", 0)),
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
return audio_uri or "", total_audio_size
|
|
88
|
+
|
|
89
|
+
except json.JSONDecodeError as e:
|
|
90
|
+
raise APIError(f"响应解析失败: {str(e)}")
|
|
91
|
+
except Exception as e:
|
|
92
|
+
raise APIError(f"合成异常: {str(e)}")
|
|
93
|
+
finally:
|
|
94
|
+
response.close()
|