shellus-voice2text 1.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,77 @@
1
+ Metadata-Version: 2.4
2
+ Name: shellus-voice2text
3
+ Version: 1.0.1
4
+ Summary: 火山引擎语音识别 CLI 工具
5
+ Requires-Python: >=3.7
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: requests
8
+ Requires-Dist: boto3
9
+
10
+ # voice2text
11
+
12
+ 火山引擎语音识别 CLI 工具,支持智能缓存和并发处理。
13
+
14
+ ## 安装
15
+
16
+ ```bash
17
+ pip install shellus-voice2text
18
+ ```
19
+
20
+ ## 使用
21
+
22
+ ```bash
23
+ # 单文件处理
24
+ voice2text audio.mp3
25
+
26
+ # 批量处理(自动生成 .srt 文件)
27
+ voice2text *.mp3
28
+
29
+ # 输出纯文本
30
+ voice2text audio.mp3 --txt
31
+
32
+ # 输出 LRC 歌词
33
+ voice2text audio.mp3 --lrc
34
+
35
+ # 输出原始 JSON
36
+ voice2text audio.mp3 --json
37
+
38
+ # 控制并发数
39
+ voice2text *.mp3 --max-workers 5
40
+ ```
41
+
42
+ ## 特性
43
+
44
+ - **智能缓存**:相同文件自动跳过,避免重复识别
45
+ - **并发处理**:支持多文件并发,默认3个
46
+ - **自动输出**:完成后自动生成对应格式文件
47
+ - **断点续传**:失败任务可重新运行,已完成的直接跳过
48
+
49
+ ## 配置
50
+
51
+ 首次使用运行交互式初始化:
52
+
53
+ ```bash
54
+ voice2text init
55
+ ```
56
+
57
+ 配置保存在 `~/.voice2text/config.json`,包含:
58
+
59
+ ```json
60
+ {
61
+ "app_key": "your_app_key",
62
+ "access_key": "your_access_key",
63
+ "resource_id": "volc.seedasr.auc",
64
+ "s3_endpoint": "https://tos-cn-guangzhou.volces.com",
65
+ "s3_bucket": "your_bucket",
66
+ "s3_access_key": "your_s3_key",
67
+ "s3_secret_key": "your_s3_secret",
68
+ "max_concurrent_tasks": "3"
69
+ }
70
+ ```
71
+
72
+ 环境变量可覆盖配置文件:`VOLC_APP_KEY`, `VOLC_ACCESS_KEY`, `VOLC_RESOURCE_ID`, `S3_ENDPOINT`, `S3_BUCKET`, `S3_ACCESS_KEY`, `S3_SECRET_KEY`, `MAX_CONCURRENT_TASKS`。
73
+
74
+ ## API 文档
75
+
76
+ - [大模型录音文件识别 API](https://www.volcengine.com/docs/6561/1354868)
77
+ - [火山引擎控制台](https://console.volcengine.com/speech/service/8)
@@ -0,0 +1,68 @@
1
+ # voice2text
2
+
3
+ 火山引擎语音识别 CLI 工具,支持智能缓存和并发处理。
4
+
5
+ ## 安装
6
+
7
+ ```bash
8
+ pip install shellus-voice2text
9
+ ```
10
+
11
+ ## 使用
12
+
13
+ ```bash
14
+ # 单文件处理
15
+ voice2text audio.mp3
16
+
17
+ # 批量处理(自动生成 .srt 文件)
18
+ voice2text *.mp3
19
+
20
+ # 输出纯文本
21
+ voice2text audio.mp3 --txt
22
+
23
+ # 输出 LRC 歌词
24
+ voice2text audio.mp3 --lrc
25
+
26
+ # 输出原始 JSON
27
+ voice2text audio.mp3 --json
28
+
29
+ # 控制并发数
30
+ voice2text *.mp3 --max-workers 5
31
+ ```
32
+
33
+ ## 特性
34
+
35
+ - **智能缓存**:相同文件自动跳过,避免重复识别
36
+ - **并发处理**:支持多文件并发,默认3个
37
+ - **自动输出**:完成后自动生成对应格式文件
38
+ - **断点续传**:失败任务可重新运行,已完成的直接跳过
39
+
40
+ ## 配置
41
+
42
+ 首次使用运行交互式初始化:
43
+
44
+ ```bash
45
+ voice2text init
46
+ ```
47
+
48
+ 配置保存在 `~/.voice2text/config.json`,包含:
49
+
50
+ ```json
51
+ {
52
+ "app_key": "your_app_key",
53
+ "access_key": "your_access_key",
54
+ "resource_id": "volc.seedasr.auc",
55
+ "s3_endpoint": "https://tos-cn-guangzhou.volces.com",
56
+ "s3_bucket": "your_bucket",
57
+ "s3_access_key": "your_s3_key",
58
+ "s3_secret_key": "your_s3_secret",
59
+ "max_concurrent_tasks": "3"
60
+ }
61
+ ```
62
+
63
+ 环境变量可覆盖配置文件:`VOLC_APP_KEY`, `VOLC_ACCESS_KEY`, `VOLC_RESOURCE_ID`, `S3_ENDPOINT`, `S3_BUCKET`, `S3_ACCESS_KEY`, `S3_SECRET_KEY`, `MAX_CONCURRENT_TASKS`。
64
+
65
+ ## API 文档
66
+
67
+ - [大模型录音文件识别 API](https://www.volcengine.com/docs/6561/1354868)
68
+ - [火山引擎控制台](https://console.volcengine.com/speech/service/8)
@@ -0,0 +1,17 @@
1
+ [build-system]
2
+ requires = ["setuptools>=45", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "shellus-voice2text"
7
+ version = "1.0.1"
8
+ description = "火山引擎语音识别 CLI 工具"
9
+ readme = "README.md"
10
+ requires-python = ">=3.7"
11
+ dependencies = [
12
+ "requests",
13
+ "boto3",
14
+ ]
15
+
16
+ [project.scripts]
17
+ voice2text = "volc_asr.cli:main"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,6 @@
1
+ from setuptools import setup, find_packages
2
+
3
+ setup(
4
+ name="shellus-voice2text",
5
+ packages=find_packages(),
6
+ )
@@ -0,0 +1,77 @@
1
+ Metadata-Version: 2.4
2
+ Name: shellus-voice2text
3
+ Version: 1.0.1
4
+ Summary: 火山引擎语音识别 CLI 工具
5
+ Requires-Python: >=3.7
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: requests
8
+ Requires-Dist: boto3
9
+
10
+ # voice2text
11
+
12
+ 火山引擎语音识别 CLI 工具,支持智能缓存和并发处理。
13
+
14
+ ## 安装
15
+
16
+ ```bash
17
+ pip install shellus-voice2text
18
+ ```
19
+
20
+ ## 使用
21
+
22
+ ```bash
23
+ # 单文件处理
24
+ voice2text audio.mp3
25
+
26
+ # 批量处理(自动生成 .srt 文件)
27
+ voice2text *.mp3
28
+
29
+ # 输出纯文本
30
+ voice2text audio.mp3 --txt
31
+
32
+ # 输出 LRC 歌词
33
+ voice2text audio.mp3 --lrc
34
+
35
+ # 输出原始 JSON
36
+ voice2text audio.mp3 --json
37
+
38
+ # 控制并发数
39
+ voice2text *.mp3 --max-workers 5
40
+ ```
41
+
42
+ ## 特性
43
+
44
+ - **智能缓存**:相同文件自动跳过,避免重复识别
45
+ - **并发处理**:支持多文件并发,默认3个
46
+ - **自动输出**:完成后自动生成对应格式文件
47
+ - **断点续传**:失败任务可重新运行,已完成的直接跳过
48
+
49
+ ## 配置
50
+
51
+ 首次使用运行交互式初始化:
52
+
53
+ ```bash
54
+ voice2text init
55
+ ```
56
+
57
+ 配置保存在 `~/.voice2text/config.json`,包含:
58
+
59
+ ```json
60
+ {
61
+ "app_key": "your_app_key",
62
+ "access_key": "your_access_key",
63
+ "resource_id": "volc.seedasr.auc",
64
+ "s3_endpoint": "https://tos-cn-guangzhou.volces.com",
65
+ "s3_bucket": "your_bucket",
66
+ "s3_access_key": "your_s3_key",
67
+ "s3_secret_key": "your_s3_secret",
68
+ "max_concurrent_tasks": "3"
69
+ }
70
+ ```
71
+
72
+ 环境变量可覆盖配置文件:`VOLC_APP_KEY`, `VOLC_ACCESS_KEY`, `VOLC_RESOURCE_ID`, `S3_ENDPOINT`, `S3_BUCKET`, `S3_ACCESS_KEY`, `S3_SECRET_KEY`, `MAX_CONCURRENT_TASKS`。
73
+
74
+ ## API 文档
75
+
76
+ - [大模型录音文件识别 API](https://www.volcengine.com/docs/6561/1354868)
77
+ - [火山引擎控制台](https://console.volcengine.com/speech/service/8)
@@ -0,0 +1,14 @@
1
+ README.md
2
+ pyproject.toml
3
+ setup.py
4
+ shellus_voice2text.egg-info/PKG-INFO
5
+ shellus_voice2text.egg-info/SOURCES.txt
6
+ shellus_voice2text.egg-info/dependency_links.txt
7
+ shellus_voice2text.egg-info/entry_points.txt
8
+ shellus_voice2text.egg-info/requires.txt
9
+ shellus_voice2text.egg-info/top_level.txt
10
+ volc_asr/__init__.py
11
+ volc_asr/__main__.py
12
+ volc_asr/cache.py
13
+ volc_asr/cli.py
14
+ volc_asr/core.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ voice2text = volc_asr.cli:main
@@ -0,0 +1,2 @@
1
+ requests
2
+ boto3
@@ -0,0 +1 @@
1
+ # volc_asr package
@@ -0,0 +1,4 @@
1
+ from volc_asr.cli import main
2
+
3
+ if __name__ == "__main__":
4
+ main()
@@ -0,0 +1,66 @@
1
+ import hashlib
2
+ import json
3
+ from pathlib import Path
4
+ from datetime import datetime
5
+
6
+ CACHE_DIR = Path.home() / ".voice2text"
7
+ TASKS_FILE = CACHE_DIR / "tasks.json"
8
+ RESULTS_DIR = CACHE_DIR / "results"
9
+
10
+ def compute_file_hash(filepath):
11
+ """计算文件唯一标识: sha256(绝对路径 + 文件内容hash)"""
12
+ path = Path(filepath).resolve()
13
+
14
+ # 计算文件内容hash
15
+ h = hashlib.sha256()
16
+ with open(path, "rb") as f:
17
+ for chunk in iter(lambda: f.read(8192), b""):
18
+ h.update(chunk)
19
+ content_hash = h.hexdigest()
20
+
21
+ # 组合路径和内容hash
22
+ combined = f"{path}:{content_hash}"
23
+ return hashlib.sha256(combined.encode()).hexdigest()
24
+
25
+ def _ensure_cache_dir():
26
+ """确保缓存目录存在"""
27
+ CACHE_DIR.mkdir(parents=True, exist_ok=True)
28
+ RESULTS_DIR.mkdir(exist_ok=True)
29
+ if not TASKS_FILE.exists():
30
+ TASKS_FILE.write_text(json.dumps({"tasks": {}}, ensure_ascii=False))
31
+
32
+ def _load_tasks():
33
+ """加载任务列表"""
34
+ _ensure_cache_dir()
35
+ return json.loads(TASKS_FILE.read_text())
36
+
37
+ def _save_tasks(data):
38
+ """保存任务列表"""
39
+ TASKS_FILE.write_text(json.dumps(data, ensure_ascii=False, indent=2))
40
+
41
+ def get_task(file_hash):
42
+ """获取任务,不存在返回 None"""
43
+ data = _load_tasks()
44
+ return data["tasks"].get(file_hash)
45
+
46
+ def save_task(file_hash, task_data):
47
+ """立即保存任务状态"""
48
+ data = _load_tasks()
49
+ if file_hash not in data["tasks"]:
50
+ task_data["created"] = datetime.now().isoformat(timespec="seconds")
51
+ task_data["updated"] = datetime.now().isoformat(timespec="seconds")
52
+ data["tasks"][file_hash] = task_data
53
+ _save_tasks(data)
54
+
55
+ def get_result(request_id):
56
+ """从缓存读取API结果,不存在返回 None"""
57
+ result_file = RESULTS_DIR / f"{request_id}.json"
58
+ if result_file.exists():
59
+ return json.loads(result_file.read_text())
60
+ return None
61
+
62
+ def save_result(request_id, result):
63
+ """保存API结果到缓存"""
64
+ _ensure_cache_dir()
65
+ result_file = RESULTS_DIR / f"{request_id}.json"
66
+ result_file.write_text(json.dumps(result, ensure_ascii=False, indent=2))
@@ -0,0 +1,183 @@
1
+ """voice2text 统一批量处理入口"""
2
+
3
+ import argparse
4
+ import json
5
+ import os
6
+ import sys
7
+ import time
8
+ from concurrent.futures import ThreadPoolExecutor, as_completed
9
+ from pathlib import Path
10
+
11
+ from volc_asr.cache import compute_file_hash, get_task, save_task, get_result, save_result, CACHE_DIR
12
+ from volc_asr.core import load_config, CONFIG_FILE, prepare_audio_url, submit_task, query_result, format_result
13
+
14
+
15
+ def process_file(filepath, config, fmt, force=False):
16
+ """处理单个文件:检查缓存 → 提交 → 轮询 → 保存结果"""
17
+ path = Path(filepath)
18
+ file_hash = compute_file_hash(filepath)
19
+ app_key = config["app_key"]
20
+ access_key = config["access_key"]
21
+ resource_id = config["resource_id"]
22
+
23
+ # 检查缓存
24
+ if not force:
25
+ task = get_task(file_hash)
26
+ if task and task.get("status") == "completed":
27
+ request_id = task["request_id"]
28
+ result = get_result(request_id)
29
+ if result:
30
+ print(f"[缓存] {path.name}", file=sys.stderr)
31
+ return {"file": str(path), "status": "completed", "result": result, "from_cache": True}
32
+
33
+ # 检查 pending 任务,继续轮询
34
+ if task and task.get("status") == "pending" and task.get("request_id"):
35
+ request_id = task["request_id"]
36
+ print(f"[继续] {path.name} ({request_id[:8]}...)", file=sys.stderr)
37
+ else:
38
+ request_id = None
39
+ else:
40
+ request_id = None
41
+
42
+ if not request_id:
43
+ # 准备音频URL
44
+ try:
45
+ audio_url = prepare_audio_url(str(path), config)
46
+ except Exception as e:
47
+ print(f"[失败] {path.name}: {e}", file=sys.stderr)
48
+ return {"file": str(path), "status": "failed", "error": str(e)}
49
+
50
+ # 提交任务
51
+ try:
52
+ request_id = submit_task(audio_url, app_key, access_key, resource_id)
53
+ except Exception as e:
54
+ print(f"[失败] {path.name}: {e}", file=sys.stderr)
55
+ return {"file": str(path), "status": "failed", "error": str(e)}
56
+
57
+ # 保存任务状态
58
+ save_task(file_hash, {
59
+ "file": str(path),
60
+ "request_id": request_id,
61
+ "status": "pending",
62
+ })
63
+
64
+ label = "[重新提交]" if force else "[提交]"
65
+ print(f"{label} {path.name} ({request_id[:8]}...)", file=sys.stderr)
66
+
67
+ # 轮询结果
68
+ while True:
69
+ time.sleep(3)
70
+ code, body = query_result(request_id, app_key, access_key, resource_id)
71
+
72
+ if code == "20000000":
73
+ save_result(request_id, body)
74
+ save_task(file_hash, {
75
+ "file": str(path),
76
+ "request_id": request_id,
77
+ "status": "completed",
78
+ })
79
+ print(f"[完成] {path.name}", file=sys.stderr)
80
+ return {"file": str(path), "status": "completed", "result": body, "from_cache": False}
81
+ elif code in ("20000001", "20000002"):
82
+ continue
83
+ else:
84
+ error = f"[{code}] {body}"
85
+ save_task(file_hash, {
86
+ "file": str(path),
87
+ "request_id": request_id,
88
+ "status": "failed",
89
+ "error": error,
90
+ })
91
+ print(f"[失败] {path.name}: {error}", file=sys.stderr)
92
+ return {"file": str(path), "status": "failed", "error": error}
93
+
94
+
95
+ def init_config():
96
+ """交互式初始化配置"""
97
+ print("voice2text 配置初始化\n")
98
+
99
+ fields = [
100
+ ("app_key", "VOLC App Key", None),
101
+ ("access_key", "VOLC Access Key", None),
102
+ ("resource_id", "Resource ID", "volc.seedasr.auc"),
103
+ ("s3_endpoint", "S3 Endpoint", "https://tos-cn-guangzhou.volces.com"),
104
+ ("s3_bucket", "S3 Bucket", None),
105
+ ("s3_access_key", "S3 Access Key", None),
106
+ ("s3_secret_key", "S3 Secret Key", None),
107
+ ("max_concurrent_tasks", "最大并发数", "3"),
108
+ ]
109
+
110
+ # 读取已有配置
111
+ existing = {}
112
+ if CONFIG_FILE.exists():
113
+ existing = json.loads(CONFIG_FILE.read_text())
114
+
115
+ config = {}
116
+ for key, label, default in fields:
117
+ current = existing.get(key, default)
118
+ prompt = f" {label}"
119
+ if current:
120
+ prompt += f" [{current}]"
121
+ prompt += ": "
122
+ val = input(prompt).strip()
123
+ config[key] = val if val else (current or "")
124
+
125
+ CACHE_DIR.mkdir(parents=True, exist_ok=True)
126
+ CONFIG_FILE.write_text(json.dumps(config, ensure_ascii=False, indent=2))
127
+ print(f"\n配置已保存到 {CONFIG_FILE}")
128
+
129
+
130
+ def main():
131
+ # init 子命令单独处理
132
+ if len(sys.argv) >= 2 and sys.argv[1] == "init":
133
+ init_config()
134
+ return
135
+
136
+ parser = argparse.ArgumentParser(description="voice2text - 火山引擎语音识别工具",
137
+ epilog="子命令: voice2text init 交互式初始化配置")
138
+ parser.add_argument("files", nargs="+", help="音频文件路径")
139
+ parser.add_argument("--srt", action="store_true", help="输出SRT字幕 (默认)")
140
+ parser.add_argument("--txt", action="store_true", help="输出纯文本")
141
+ parser.add_argument("--json", action="store_true", help="输出原始JSON")
142
+ parser.add_argument("--lrc", action="store_true", help="输出LRC歌词")
143
+ parser.add_argument("--force", action="store_true", help="忽略缓存,强制重新识别")
144
+ parser.add_argument("--max-workers", type=int, default=None, help="最大并发数 (默认3)")
145
+ args = parser.parse_args()
146
+
147
+ try:
148
+ config = load_config()
149
+ except ValueError as e:
150
+ print(f"错误: {e}", file=sys.stderr)
151
+ sys.exit(1)
152
+
153
+ max_workers = args.max_workers or int(config.get("max_concurrent_tasks", "3"))
154
+ fmt = "json" if args.json else "txt" if args.txt else "lrc" if args.lrc else "srt"
155
+
156
+ # 并发处理
157
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
158
+ futures = {
159
+ executor.submit(process_file, f, config, fmt, args.force): f
160
+ for f in args.files
161
+ }
162
+
163
+ results = []
164
+ for future in as_completed(futures):
165
+ results.append(future.result())
166
+
167
+ # 生成输出文件
168
+ for r in results:
169
+ if r["status"] == "completed":
170
+ path = Path(r["file"])
171
+ ext = {"srt": ".srt", "txt": ".txt", "json": ".json", "lrc": ".lrc"}[fmt]
172
+ out_path = path.with_suffix(ext)
173
+ out_path.write_text(format_result(r["result"], fmt), encoding="utf-8")
174
+
175
+ # 统计
176
+ total = len(results)
177
+ completed = sum(1 for r in results if r["status"] == "completed")
178
+ failed = total - completed
179
+ print(f"\n完成 {completed}/{total}" + (f" | 失败 {failed}" if failed else ""), file=sys.stderr)
180
+
181
+
182
+ if __name__ == "__main__":
183
+ main()
@@ -0,0 +1,176 @@
1
+ """火山引擎 ASR API 核心逻辑"""
2
+
3
+ import json
4
+ import os
5
+ import uuid
6
+ from pathlib import Path
7
+
8
+ import boto3
9
+ import requests
10
+
11
+ from volc_asr.cache import CACHE_DIR
12
+
13
+ SUBMIT_URL = "https://openspeech.bytedance.com/api/v3/auc/bigmodel/submit"
14
+ QUERY_URL = "https://openspeech.bytedance.com/api/v3/auc/bigmodel/query"
15
+ CONFIG_FILE = CACHE_DIR / "config.json"
16
+
17
+
18
+ def load_config():
19
+ """加载配置:读取 ~/.voice2text/config.json,环境变量覆盖"""
20
+ config = {}
21
+ if CONFIG_FILE.exists():
22
+ config = json.loads(CONFIG_FILE.read_text())
23
+
24
+ # 环境变量覆盖(config key → 环境变量名)
25
+ env_map = {
26
+ "app_key": "VOLC_APP_KEY",
27
+ "access_key": "VOLC_ACCESS_KEY",
28
+ "resource_id": "VOLC_RESOURCE_ID",
29
+ "s3_endpoint": "S3_ENDPOINT",
30
+ "s3_bucket": "S3_BUCKET",
31
+ "s3_access_key": "S3_ACCESS_KEY",
32
+ "s3_secret_key": "S3_SECRET_KEY",
33
+ "max_concurrent_tasks": "MAX_CONCURRENT_TASKS",
34
+ }
35
+ for key, env_key in env_map.items():
36
+ env_val = os.getenv(env_key)
37
+ if env_val:
38
+ config[key] = env_val
39
+
40
+ # 默认值
41
+ config.setdefault("resource_id", "volc.seedasr.auc")
42
+ config.setdefault("max_concurrent_tasks", "3")
43
+
44
+ if not config.get("app_key") or not config.get("access_key"):
45
+ raise ValueError("请运行 voice2text init 初始化配置,或设置 VOLC_APP_KEY 和 VOLC_ACCESS_KEY 环境变量")
46
+
47
+ return config
48
+
49
+
50
+ def make_headers(app_key, access_key, resource_id, request_id):
51
+ return {
52
+ "Content-Type": "application/json",
53
+ "X-Api-App-Key": app_key,
54
+ "X-Api-Access-Key": access_key,
55
+ "X-Api-Resource-Id": resource_id,
56
+ "X-Api-Request-Id": request_id,
57
+ }
58
+
59
+
60
+ def guess_format(url):
61
+ lower = url.lower()
62
+ for fmt in ("mp3", "wav", "ogg", "m4a"):
63
+ if f".{fmt}" in lower:
64
+ return fmt
65
+ return "mp3"
66
+
67
+
68
+ def prepare_audio_url(audio_input, config):
69
+ """处理音频输入,返回公网 URL。本地文件不存在或缺少配置时抛异常。"""
70
+ if audio_input.startswith(("http://", "https://")):
71
+ return audio_input
72
+
73
+ path = Path(audio_input)
74
+ if not path.exists():
75
+ raise FileNotFoundError(f"文件不存在: {audio_input}")
76
+
77
+ endpoint = config.get("s3_endpoint")
78
+ bucket = config.get("s3_bucket")
79
+ access_key = config.get("s3_access_key")
80
+ secret_key = config.get("s3_secret_key")
81
+ if not all([endpoint, bucket, access_key, secret_key]):
82
+ raise ValueError("本地文件需配置 s3_endpoint, s3_bucket, s3_access_key, s3_secret_key")
83
+
84
+ s3 = boto3.client(
85
+ "s3",
86
+ endpoint_url=endpoint,
87
+ aws_access_key_id=access_key,
88
+ aws_secret_access_key=secret_key,
89
+ region_name="cn-guangzhou",
90
+ config=boto3.session.Config(s3={'addressing_style': 'virtual'}),
91
+ )
92
+ key = f"{uuid.uuid4().hex}{path.suffix}"
93
+ s3.upload_file(str(path), bucket, key)
94
+ return s3.generate_presigned_url("get_object", Params={"Bucket": bucket, "Key": key}, ExpiresIn=86400)
95
+
96
+
97
+ def submit_task(audio_url, app_key, access_key, resource_id):
98
+ """提交识别任务,返回 request_id。失败时抛 RuntimeError。"""
99
+ request_id = str(uuid.uuid4())
100
+ headers = make_headers(app_key, access_key, resource_id, request_id)
101
+ headers["X-Api-Sequence"] = "-1"
102
+
103
+ payload = {
104
+ "user": {"uid": "volc_asr_cli"},
105
+ "audio": {"format": guess_format(audio_url), "url": audio_url},
106
+ "request": {
107
+ "model_name": "bigmodel",
108
+ "enable_itn": True,
109
+ "enable_punc": True,
110
+ "show_utterances": True,
111
+ },
112
+ }
113
+
114
+ resp = requests.post(SUBMIT_URL, json=payload, headers=headers)
115
+ status_code = resp.headers.get("X-Api-Status-Code", "")
116
+ message = resp.headers.get("X-Api-Message", "")
117
+
118
+ if status_code != "20000000":
119
+ raise RuntimeError(f"提交失败: [{status_code}] {message}")
120
+
121
+ return request_id
122
+
123
+
124
+ def query_result(request_id, app_key, access_key, resource_id):
125
+ """查询识别结果,返回 (status_code, body)"""
126
+ headers = make_headers(app_key, access_key, resource_id, request_id)
127
+ resp = requests.post(QUERY_URL, json={}, headers=headers)
128
+ status_code = resp.headers.get("X-Api-Status-Code", "")
129
+ body = resp.json() if resp.text.strip() else {}
130
+ return status_code, body
131
+
132
+
133
+ def format_time(seconds):
134
+ h = int(seconds // 3600)
135
+ m = int((seconds % 3600) // 60)
136
+ s = int(seconds % 60)
137
+ ms = int((seconds % 1) * 1000)
138
+ return f"{h:02d}:{m:02d}:{s:02d},{ms:03d}"
139
+
140
+
141
+ def to_srt(utterances):
142
+ lines = []
143
+ for i, seg in enumerate(utterances, 1):
144
+ start = seg["start_time"] / 1000
145
+ end = seg["end_time"] / 1000
146
+ lines.append(str(i))
147
+ lines.append(f"{format_time(start)} --> {format_time(end)}")
148
+ lines.append(seg["text"])
149
+ lines.append("")
150
+ return "\n".join(lines)
151
+
152
+
153
+ def to_lrc(utterances):
154
+ lines = []
155
+ for seg in utterances:
156
+ start = seg["start_time"] / 1000
157
+ m = int(start // 60)
158
+ s = start % 60
159
+ lines.append(f"[{m:02d}:{s:05.2f}]{seg['text']}")
160
+ return "\n".join(lines)
161
+
162
+
163
+ def format_result(body, fmt="srt"):
164
+ """按指定格式返回结果字符串。fmt: srt / txt / json / lrc"""
165
+ if fmt == "json":
166
+ return json.dumps(body, ensure_ascii=False, indent=2)
167
+ result = body.get("result", {})
168
+ if fmt == "txt":
169
+ return result.get("text", "")
170
+ utterances = result.get("utterances", [])
171
+ if fmt == "lrc":
172
+ return to_lrc(utterances) if utterances else result.get("text", "")
173
+ # srt
174
+ if utterances:
175
+ return to_srt(utterances)
176
+ return result.get("text", "")