shellus-voice2text 1.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- shellus_voice2text-1.0.1/PKG-INFO +77 -0
- shellus_voice2text-1.0.1/README.md +68 -0
- shellus_voice2text-1.0.1/pyproject.toml +17 -0
- shellus_voice2text-1.0.1/setup.cfg +4 -0
- shellus_voice2text-1.0.1/setup.py +6 -0
- shellus_voice2text-1.0.1/shellus_voice2text.egg-info/PKG-INFO +77 -0
- shellus_voice2text-1.0.1/shellus_voice2text.egg-info/SOURCES.txt +14 -0
- shellus_voice2text-1.0.1/shellus_voice2text.egg-info/dependency_links.txt +1 -0
- shellus_voice2text-1.0.1/shellus_voice2text.egg-info/entry_points.txt +2 -0
- shellus_voice2text-1.0.1/shellus_voice2text.egg-info/requires.txt +2 -0
- shellus_voice2text-1.0.1/shellus_voice2text.egg-info/top_level.txt +1 -0
- shellus_voice2text-1.0.1/volc_asr/__init__.py +1 -0
- shellus_voice2text-1.0.1/volc_asr/__main__.py +4 -0
- shellus_voice2text-1.0.1/volc_asr/cache.py +66 -0
- shellus_voice2text-1.0.1/volc_asr/cli.py +183 -0
- shellus_voice2text-1.0.1/volc_asr/core.py +176 -0
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: shellus-voice2text
|
|
3
|
+
Version: 1.0.1
|
|
4
|
+
Summary: 火山引擎语音识别 CLI 工具
|
|
5
|
+
Requires-Python: >=3.7
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: requests
|
|
8
|
+
Requires-Dist: boto3
|
|
9
|
+
|
|
10
|
+
# voice2text
|
|
11
|
+
|
|
12
|
+
火山引擎语音识别 CLI 工具,支持智能缓存和并发处理。
|
|
13
|
+
|
|
14
|
+
## 安装
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
pip install shellus-voice2text
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## 使用
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
# 单文件处理
|
|
24
|
+
voice2text audio.mp3
|
|
25
|
+
|
|
26
|
+
# 批量处理(自动生成 .srt 文件)
|
|
27
|
+
voice2text *.mp3
|
|
28
|
+
|
|
29
|
+
# 输出纯文本
|
|
30
|
+
voice2text audio.mp3 --txt
|
|
31
|
+
|
|
32
|
+
# 输出 LRC 歌词
|
|
33
|
+
voice2text audio.mp3 --lrc
|
|
34
|
+
|
|
35
|
+
# 输出原始 JSON
|
|
36
|
+
voice2text audio.mp3 --json
|
|
37
|
+
|
|
38
|
+
# 控制并发数
|
|
39
|
+
voice2text *.mp3 --max-workers 5
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## 特性
|
|
43
|
+
|
|
44
|
+
- **智能缓存**:相同文件自动跳过,避免重复识别
|
|
45
|
+
- **并发处理**:支持多文件并发,默认3个
|
|
46
|
+
- **自动输出**:完成后自动生成对应格式文件
|
|
47
|
+
- **断点续传**:失败任务可重新运行,已完成的直接跳过
|
|
48
|
+
|
|
49
|
+
## 配置
|
|
50
|
+
|
|
51
|
+
首次使用运行交互式初始化:
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
voice2text init
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
配置保存在 `~/.voice2text/config.json`,包含:
|
|
58
|
+
|
|
59
|
+
```json
|
|
60
|
+
{
|
|
61
|
+
"app_key": "your_app_key",
|
|
62
|
+
"access_key": "your_access_key",
|
|
63
|
+
"resource_id": "volc.seedasr.auc",
|
|
64
|
+
"s3_endpoint": "https://tos-cn-guangzhou.volces.com",
|
|
65
|
+
"s3_bucket": "your_bucket",
|
|
66
|
+
"s3_access_key": "your_s3_key",
|
|
67
|
+
"s3_secret_key": "your_s3_secret",
|
|
68
|
+
"max_concurrent_tasks": "3"
|
|
69
|
+
}
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
环境变量可覆盖配置文件:`VOLC_APP_KEY`, `VOLC_ACCESS_KEY`, `VOLC_RESOURCE_ID`, `S3_ENDPOINT`, `S3_BUCKET`, `S3_ACCESS_KEY`, `S3_SECRET_KEY`, `MAX_CONCURRENT_TASKS`。
|
|
73
|
+
|
|
74
|
+
## API 文档
|
|
75
|
+
|
|
76
|
+
- [大模型录音文件识别 API](https://www.volcengine.com/docs/6561/1354868)
|
|
77
|
+
- [火山引擎控制台](https://console.volcengine.com/speech/service/8)
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# voice2text
|
|
2
|
+
|
|
3
|
+
火山引擎语音识别 CLI 工具,支持智能缓存和并发处理。
|
|
4
|
+
|
|
5
|
+
## 安装
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install shellus-voice2text
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## 使用
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
# 单文件处理
|
|
15
|
+
voice2text audio.mp3
|
|
16
|
+
|
|
17
|
+
# 批量处理(自动生成 .srt 文件)
|
|
18
|
+
voice2text *.mp3
|
|
19
|
+
|
|
20
|
+
# 输出纯文本
|
|
21
|
+
voice2text audio.mp3 --txt
|
|
22
|
+
|
|
23
|
+
# 输出 LRC 歌词
|
|
24
|
+
voice2text audio.mp3 --lrc
|
|
25
|
+
|
|
26
|
+
# 输出原始 JSON
|
|
27
|
+
voice2text audio.mp3 --json
|
|
28
|
+
|
|
29
|
+
# 控制并发数
|
|
30
|
+
voice2text *.mp3 --max-workers 5
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## 特性
|
|
34
|
+
|
|
35
|
+
- **智能缓存**:相同文件自动跳过,避免重复识别
|
|
36
|
+
- **并发处理**:支持多文件并发,默认3个
|
|
37
|
+
- **自动输出**:完成后自动生成对应格式文件
|
|
38
|
+
- **断点续传**:失败任务可重新运行,已完成的直接跳过
|
|
39
|
+
|
|
40
|
+
## 配置
|
|
41
|
+
|
|
42
|
+
首次使用运行交互式初始化:
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
voice2text init
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
配置保存在 `~/.voice2text/config.json`,包含:
|
|
49
|
+
|
|
50
|
+
```json
|
|
51
|
+
{
|
|
52
|
+
"app_key": "your_app_key",
|
|
53
|
+
"access_key": "your_access_key",
|
|
54
|
+
"resource_id": "volc.seedasr.auc",
|
|
55
|
+
"s3_endpoint": "https://tos-cn-guangzhou.volces.com",
|
|
56
|
+
"s3_bucket": "your_bucket",
|
|
57
|
+
"s3_access_key": "your_s3_key",
|
|
58
|
+
"s3_secret_key": "your_s3_secret",
|
|
59
|
+
"max_concurrent_tasks": "3"
|
|
60
|
+
}
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
环境变量可覆盖配置文件:`VOLC_APP_KEY`, `VOLC_ACCESS_KEY`, `VOLC_RESOURCE_ID`, `S3_ENDPOINT`, `S3_BUCKET`, `S3_ACCESS_KEY`, `S3_SECRET_KEY`, `MAX_CONCURRENT_TASKS`。
|
|
64
|
+
|
|
65
|
+
## API 文档
|
|
66
|
+
|
|
67
|
+
- [大模型录音文件识别 API](https://www.volcengine.com/docs/6561/1354868)
|
|
68
|
+
- [火山引擎控制台](https://console.volcengine.com/speech/service/8)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=45", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "shellus-voice2text"
|
|
7
|
+
version = "1.0.1"
|
|
8
|
+
description = "火山引擎语音识别 CLI 工具"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.7"
|
|
11
|
+
dependencies = [
|
|
12
|
+
"requests",
|
|
13
|
+
"boto3",
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
[project.scripts]
|
|
17
|
+
voice2text = "volc_asr.cli:main"
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: shellus-voice2text
|
|
3
|
+
Version: 1.0.1
|
|
4
|
+
Summary: 火山引擎语音识别 CLI 工具
|
|
5
|
+
Requires-Python: >=3.7
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: requests
|
|
8
|
+
Requires-Dist: boto3
|
|
9
|
+
|
|
10
|
+
# voice2text
|
|
11
|
+
|
|
12
|
+
火山引擎语音识别 CLI 工具,支持智能缓存和并发处理。
|
|
13
|
+
|
|
14
|
+
## 安装
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
pip install shellus-voice2text
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## 使用
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
# 单文件处理
|
|
24
|
+
voice2text audio.mp3
|
|
25
|
+
|
|
26
|
+
# 批量处理(自动生成 .srt 文件)
|
|
27
|
+
voice2text *.mp3
|
|
28
|
+
|
|
29
|
+
# 输出纯文本
|
|
30
|
+
voice2text audio.mp3 --txt
|
|
31
|
+
|
|
32
|
+
# 输出 LRC 歌词
|
|
33
|
+
voice2text audio.mp3 --lrc
|
|
34
|
+
|
|
35
|
+
# 输出原始 JSON
|
|
36
|
+
voice2text audio.mp3 --json
|
|
37
|
+
|
|
38
|
+
# 控制并发数
|
|
39
|
+
voice2text *.mp3 --max-workers 5
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## 特性
|
|
43
|
+
|
|
44
|
+
- **智能缓存**:相同文件自动跳过,避免重复识别
|
|
45
|
+
- **并发处理**:支持多文件并发,默认3个
|
|
46
|
+
- **自动输出**:完成后自动生成对应格式文件
|
|
47
|
+
- **断点续传**:失败任务可重新运行,已完成的直接跳过
|
|
48
|
+
|
|
49
|
+
## 配置
|
|
50
|
+
|
|
51
|
+
首次使用运行交互式初始化:
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
voice2text init
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
配置保存在 `~/.voice2text/config.json`,包含:
|
|
58
|
+
|
|
59
|
+
```json
|
|
60
|
+
{
|
|
61
|
+
"app_key": "your_app_key",
|
|
62
|
+
"access_key": "your_access_key",
|
|
63
|
+
"resource_id": "volc.seedasr.auc",
|
|
64
|
+
"s3_endpoint": "https://tos-cn-guangzhou.volces.com",
|
|
65
|
+
"s3_bucket": "your_bucket",
|
|
66
|
+
"s3_access_key": "your_s3_key",
|
|
67
|
+
"s3_secret_key": "your_s3_secret",
|
|
68
|
+
"max_concurrent_tasks": "3"
|
|
69
|
+
}
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
环境变量可覆盖配置文件:`VOLC_APP_KEY`, `VOLC_ACCESS_KEY`, `VOLC_RESOURCE_ID`, `S3_ENDPOINT`, `S3_BUCKET`, `S3_ACCESS_KEY`, `S3_SECRET_KEY`, `MAX_CONCURRENT_TASKS`。
|
|
73
|
+
|
|
74
|
+
## API 文档
|
|
75
|
+
|
|
76
|
+
- [大模型录音文件识别 API](https://www.volcengine.com/docs/6561/1354868)
|
|
77
|
+
- [火山引擎控制台](https://console.volcengine.com/speech/service/8)
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
setup.py
|
|
4
|
+
shellus_voice2text.egg-info/PKG-INFO
|
|
5
|
+
shellus_voice2text.egg-info/SOURCES.txt
|
|
6
|
+
shellus_voice2text.egg-info/dependency_links.txt
|
|
7
|
+
shellus_voice2text.egg-info/entry_points.txt
|
|
8
|
+
shellus_voice2text.egg-info/requires.txt
|
|
9
|
+
shellus_voice2text.egg-info/top_level.txt
|
|
10
|
+
volc_asr/__init__.py
|
|
11
|
+
volc_asr/__main__.py
|
|
12
|
+
volc_asr/cache.py
|
|
13
|
+
volc_asr/cli.py
|
|
14
|
+
volc_asr/core.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
volc_asr
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# volc_asr package
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import json
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
|
|
6
|
+
CACHE_DIR = Path.home() / ".voice2text"
|
|
7
|
+
TASKS_FILE = CACHE_DIR / "tasks.json"
|
|
8
|
+
RESULTS_DIR = CACHE_DIR / "results"
|
|
9
|
+
|
|
10
|
+
def compute_file_hash(filepath):
|
|
11
|
+
"""计算文件唯一标识: sha256(绝对路径 + 文件内容hash)"""
|
|
12
|
+
path = Path(filepath).resolve()
|
|
13
|
+
|
|
14
|
+
# 计算文件内容hash
|
|
15
|
+
h = hashlib.sha256()
|
|
16
|
+
with open(path, "rb") as f:
|
|
17
|
+
for chunk in iter(lambda: f.read(8192), b""):
|
|
18
|
+
h.update(chunk)
|
|
19
|
+
content_hash = h.hexdigest()
|
|
20
|
+
|
|
21
|
+
# 组合路径和内容hash
|
|
22
|
+
combined = f"{path}:{content_hash}"
|
|
23
|
+
return hashlib.sha256(combined.encode()).hexdigest()
|
|
24
|
+
|
|
25
|
+
def _ensure_cache_dir():
|
|
26
|
+
"""确保缓存目录存在"""
|
|
27
|
+
CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
|
28
|
+
RESULTS_DIR.mkdir(exist_ok=True)
|
|
29
|
+
if not TASKS_FILE.exists():
|
|
30
|
+
TASKS_FILE.write_text(json.dumps({"tasks": {}}, ensure_ascii=False))
|
|
31
|
+
|
|
32
|
+
def _load_tasks():
|
|
33
|
+
"""加载任务列表"""
|
|
34
|
+
_ensure_cache_dir()
|
|
35
|
+
return json.loads(TASKS_FILE.read_text())
|
|
36
|
+
|
|
37
|
+
def _save_tasks(data):
|
|
38
|
+
"""保存任务列表"""
|
|
39
|
+
TASKS_FILE.write_text(json.dumps(data, ensure_ascii=False, indent=2))
|
|
40
|
+
|
|
41
|
+
def get_task(file_hash):
|
|
42
|
+
"""获取任务,不存在返回 None"""
|
|
43
|
+
data = _load_tasks()
|
|
44
|
+
return data["tasks"].get(file_hash)
|
|
45
|
+
|
|
46
|
+
def save_task(file_hash, task_data):
|
|
47
|
+
"""立即保存任务状态"""
|
|
48
|
+
data = _load_tasks()
|
|
49
|
+
if file_hash not in data["tasks"]:
|
|
50
|
+
task_data["created"] = datetime.now().isoformat(timespec="seconds")
|
|
51
|
+
task_data["updated"] = datetime.now().isoformat(timespec="seconds")
|
|
52
|
+
data["tasks"][file_hash] = task_data
|
|
53
|
+
_save_tasks(data)
|
|
54
|
+
|
|
55
|
+
def get_result(request_id):
|
|
56
|
+
"""从缓存读取API结果,不存在返回 None"""
|
|
57
|
+
result_file = RESULTS_DIR / f"{request_id}.json"
|
|
58
|
+
if result_file.exists():
|
|
59
|
+
return json.loads(result_file.read_text())
|
|
60
|
+
return None
|
|
61
|
+
|
|
62
|
+
def save_result(request_id, result):
|
|
63
|
+
"""保存API结果到缓存"""
|
|
64
|
+
_ensure_cache_dir()
|
|
65
|
+
result_file = RESULTS_DIR / f"{request_id}.json"
|
|
66
|
+
result_file.write_text(json.dumps(result, ensure_ascii=False, indent=2))
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
"""voice2text 统一批量处理入口"""
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
import sys
|
|
7
|
+
import time
|
|
8
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from volc_asr.cache import compute_file_hash, get_task, save_task, get_result, save_result, CACHE_DIR
|
|
12
|
+
from volc_asr.core import load_config, CONFIG_FILE, prepare_audio_url, submit_task, query_result, format_result
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def process_file(filepath, config, fmt, force=False):
|
|
16
|
+
"""处理单个文件:检查缓存 → 提交 → 轮询 → 保存结果"""
|
|
17
|
+
path = Path(filepath)
|
|
18
|
+
file_hash = compute_file_hash(filepath)
|
|
19
|
+
app_key = config["app_key"]
|
|
20
|
+
access_key = config["access_key"]
|
|
21
|
+
resource_id = config["resource_id"]
|
|
22
|
+
|
|
23
|
+
# 检查缓存
|
|
24
|
+
if not force:
|
|
25
|
+
task = get_task(file_hash)
|
|
26
|
+
if task and task.get("status") == "completed":
|
|
27
|
+
request_id = task["request_id"]
|
|
28
|
+
result = get_result(request_id)
|
|
29
|
+
if result:
|
|
30
|
+
print(f"[缓存] {path.name}", file=sys.stderr)
|
|
31
|
+
return {"file": str(path), "status": "completed", "result": result, "from_cache": True}
|
|
32
|
+
|
|
33
|
+
# 检查 pending 任务,继续轮询
|
|
34
|
+
if task and task.get("status") == "pending" and task.get("request_id"):
|
|
35
|
+
request_id = task["request_id"]
|
|
36
|
+
print(f"[继续] {path.name} ({request_id[:8]}...)", file=sys.stderr)
|
|
37
|
+
else:
|
|
38
|
+
request_id = None
|
|
39
|
+
else:
|
|
40
|
+
request_id = None
|
|
41
|
+
|
|
42
|
+
if not request_id:
|
|
43
|
+
# 准备音频URL
|
|
44
|
+
try:
|
|
45
|
+
audio_url = prepare_audio_url(str(path), config)
|
|
46
|
+
except Exception as e:
|
|
47
|
+
print(f"[失败] {path.name}: {e}", file=sys.stderr)
|
|
48
|
+
return {"file": str(path), "status": "failed", "error": str(e)}
|
|
49
|
+
|
|
50
|
+
# 提交任务
|
|
51
|
+
try:
|
|
52
|
+
request_id = submit_task(audio_url, app_key, access_key, resource_id)
|
|
53
|
+
except Exception as e:
|
|
54
|
+
print(f"[失败] {path.name}: {e}", file=sys.stderr)
|
|
55
|
+
return {"file": str(path), "status": "failed", "error": str(e)}
|
|
56
|
+
|
|
57
|
+
# 保存任务状态
|
|
58
|
+
save_task(file_hash, {
|
|
59
|
+
"file": str(path),
|
|
60
|
+
"request_id": request_id,
|
|
61
|
+
"status": "pending",
|
|
62
|
+
})
|
|
63
|
+
|
|
64
|
+
label = "[重新提交]" if force else "[提交]"
|
|
65
|
+
print(f"{label} {path.name} ({request_id[:8]}...)", file=sys.stderr)
|
|
66
|
+
|
|
67
|
+
# 轮询结果
|
|
68
|
+
while True:
|
|
69
|
+
time.sleep(3)
|
|
70
|
+
code, body = query_result(request_id, app_key, access_key, resource_id)
|
|
71
|
+
|
|
72
|
+
if code == "20000000":
|
|
73
|
+
save_result(request_id, body)
|
|
74
|
+
save_task(file_hash, {
|
|
75
|
+
"file": str(path),
|
|
76
|
+
"request_id": request_id,
|
|
77
|
+
"status": "completed",
|
|
78
|
+
})
|
|
79
|
+
print(f"[完成] {path.name}", file=sys.stderr)
|
|
80
|
+
return {"file": str(path), "status": "completed", "result": body, "from_cache": False}
|
|
81
|
+
elif code in ("20000001", "20000002"):
|
|
82
|
+
continue
|
|
83
|
+
else:
|
|
84
|
+
error = f"[{code}] {body}"
|
|
85
|
+
save_task(file_hash, {
|
|
86
|
+
"file": str(path),
|
|
87
|
+
"request_id": request_id,
|
|
88
|
+
"status": "failed",
|
|
89
|
+
"error": error,
|
|
90
|
+
})
|
|
91
|
+
print(f"[失败] {path.name}: {error}", file=sys.stderr)
|
|
92
|
+
return {"file": str(path), "status": "failed", "error": error}
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def init_config():
|
|
96
|
+
"""交互式初始化配置"""
|
|
97
|
+
print("voice2text 配置初始化\n")
|
|
98
|
+
|
|
99
|
+
fields = [
|
|
100
|
+
("app_key", "VOLC App Key", None),
|
|
101
|
+
("access_key", "VOLC Access Key", None),
|
|
102
|
+
("resource_id", "Resource ID", "volc.seedasr.auc"),
|
|
103
|
+
("s3_endpoint", "S3 Endpoint", "https://tos-cn-guangzhou.volces.com"),
|
|
104
|
+
("s3_bucket", "S3 Bucket", None),
|
|
105
|
+
("s3_access_key", "S3 Access Key", None),
|
|
106
|
+
("s3_secret_key", "S3 Secret Key", None),
|
|
107
|
+
("max_concurrent_tasks", "最大并发数", "3"),
|
|
108
|
+
]
|
|
109
|
+
|
|
110
|
+
# 读取已有配置
|
|
111
|
+
existing = {}
|
|
112
|
+
if CONFIG_FILE.exists():
|
|
113
|
+
existing = json.loads(CONFIG_FILE.read_text())
|
|
114
|
+
|
|
115
|
+
config = {}
|
|
116
|
+
for key, label, default in fields:
|
|
117
|
+
current = existing.get(key, default)
|
|
118
|
+
prompt = f" {label}"
|
|
119
|
+
if current:
|
|
120
|
+
prompt += f" [{current}]"
|
|
121
|
+
prompt += ": "
|
|
122
|
+
val = input(prompt).strip()
|
|
123
|
+
config[key] = val if val else (current or "")
|
|
124
|
+
|
|
125
|
+
CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
|
126
|
+
CONFIG_FILE.write_text(json.dumps(config, ensure_ascii=False, indent=2))
|
|
127
|
+
print(f"\n配置已保存到 {CONFIG_FILE}")
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def main():
|
|
131
|
+
# init 子命令单独处理
|
|
132
|
+
if len(sys.argv) >= 2 and sys.argv[1] == "init":
|
|
133
|
+
init_config()
|
|
134
|
+
return
|
|
135
|
+
|
|
136
|
+
parser = argparse.ArgumentParser(description="voice2text - 火山引擎语音识别工具",
|
|
137
|
+
epilog="子命令: voice2text init 交互式初始化配置")
|
|
138
|
+
parser.add_argument("files", nargs="+", help="音频文件路径")
|
|
139
|
+
parser.add_argument("--srt", action="store_true", help="输出SRT字幕 (默认)")
|
|
140
|
+
parser.add_argument("--txt", action="store_true", help="输出纯文本")
|
|
141
|
+
parser.add_argument("--json", action="store_true", help="输出原始JSON")
|
|
142
|
+
parser.add_argument("--lrc", action="store_true", help="输出LRC歌词")
|
|
143
|
+
parser.add_argument("--force", action="store_true", help="忽略缓存,强制重新识别")
|
|
144
|
+
parser.add_argument("--max-workers", type=int, default=None, help="最大并发数 (默认3)")
|
|
145
|
+
args = parser.parse_args()
|
|
146
|
+
|
|
147
|
+
try:
|
|
148
|
+
config = load_config()
|
|
149
|
+
except ValueError as e:
|
|
150
|
+
print(f"错误: {e}", file=sys.stderr)
|
|
151
|
+
sys.exit(1)
|
|
152
|
+
|
|
153
|
+
max_workers = args.max_workers or int(config.get("max_concurrent_tasks", "3"))
|
|
154
|
+
fmt = "json" if args.json else "txt" if args.txt else "lrc" if args.lrc else "srt"
|
|
155
|
+
|
|
156
|
+
# 并发处理
|
|
157
|
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
158
|
+
futures = {
|
|
159
|
+
executor.submit(process_file, f, config, fmt, args.force): f
|
|
160
|
+
for f in args.files
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
results = []
|
|
164
|
+
for future in as_completed(futures):
|
|
165
|
+
results.append(future.result())
|
|
166
|
+
|
|
167
|
+
# 生成输出文件
|
|
168
|
+
for r in results:
|
|
169
|
+
if r["status"] == "completed":
|
|
170
|
+
path = Path(r["file"])
|
|
171
|
+
ext = {"srt": ".srt", "txt": ".txt", "json": ".json", "lrc": ".lrc"}[fmt]
|
|
172
|
+
out_path = path.with_suffix(ext)
|
|
173
|
+
out_path.write_text(format_result(r["result"], fmt), encoding="utf-8")
|
|
174
|
+
|
|
175
|
+
# 统计
|
|
176
|
+
total = len(results)
|
|
177
|
+
completed = sum(1 for r in results if r["status"] == "completed")
|
|
178
|
+
failed = total - completed
|
|
179
|
+
print(f"\n完成 {completed}/{total}" + (f" | 失败 {failed}" if failed else ""), file=sys.stderr)
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
if __name__ == "__main__":
|
|
183
|
+
main()
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
"""火山引擎 ASR API 核心逻辑"""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import uuid
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
import boto3
|
|
9
|
+
import requests
|
|
10
|
+
|
|
11
|
+
from volc_asr.cache import CACHE_DIR
|
|
12
|
+
|
|
13
|
+
SUBMIT_URL = "https://openspeech.bytedance.com/api/v3/auc/bigmodel/submit"
|
|
14
|
+
QUERY_URL = "https://openspeech.bytedance.com/api/v3/auc/bigmodel/query"
|
|
15
|
+
CONFIG_FILE = CACHE_DIR / "config.json"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def load_config():
|
|
19
|
+
"""加载配置:读取 ~/.voice2text/config.json,环境变量覆盖"""
|
|
20
|
+
config = {}
|
|
21
|
+
if CONFIG_FILE.exists():
|
|
22
|
+
config = json.loads(CONFIG_FILE.read_text())
|
|
23
|
+
|
|
24
|
+
# 环境变量覆盖(config key → 环境变量名)
|
|
25
|
+
env_map = {
|
|
26
|
+
"app_key": "VOLC_APP_KEY",
|
|
27
|
+
"access_key": "VOLC_ACCESS_KEY",
|
|
28
|
+
"resource_id": "VOLC_RESOURCE_ID",
|
|
29
|
+
"s3_endpoint": "S3_ENDPOINT",
|
|
30
|
+
"s3_bucket": "S3_BUCKET",
|
|
31
|
+
"s3_access_key": "S3_ACCESS_KEY",
|
|
32
|
+
"s3_secret_key": "S3_SECRET_KEY",
|
|
33
|
+
"max_concurrent_tasks": "MAX_CONCURRENT_TASKS",
|
|
34
|
+
}
|
|
35
|
+
for key, env_key in env_map.items():
|
|
36
|
+
env_val = os.getenv(env_key)
|
|
37
|
+
if env_val:
|
|
38
|
+
config[key] = env_val
|
|
39
|
+
|
|
40
|
+
# 默认值
|
|
41
|
+
config.setdefault("resource_id", "volc.seedasr.auc")
|
|
42
|
+
config.setdefault("max_concurrent_tasks", "3")
|
|
43
|
+
|
|
44
|
+
if not config.get("app_key") or not config.get("access_key"):
|
|
45
|
+
raise ValueError("请运行 voice2text init 初始化配置,或设置 VOLC_APP_KEY 和 VOLC_ACCESS_KEY 环境变量")
|
|
46
|
+
|
|
47
|
+
return config
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def make_headers(app_key, access_key, resource_id, request_id):
|
|
51
|
+
return {
|
|
52
|
+
"Content-Type": "application/json",
|
|
53
|
+
"X-Api-App-Key": app_key,
|
|
54
|
+
"X-Api-Access-Key": access_key,
|
|
55
|
+
"X-Api-Resource-Id": resource_id,
|
|
56
|
+
"X-Api-Request-Id": request_id,
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def guess_format(url):
|
|
61
|
+
lower = url.lower()
|
|
62
|
+
for fmt in ("mp3", "wav", "ogg", "m4a"):
|
|
63
|
+
if f".{fmt}" in lower:
|
|
64
|
+
return fmt
|
|
65
|
+
return "mp3"
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def prepare_audio_url(audio_input, config):
|
|
69
|
+
"""处理音频输入,返回公网 URL。本地文件不存在或缺少配置时抛异常。"""
|
|
70
|
+
if audio_input.startswith(("http://", "https://")):
|
|
71
|
+
return audio_input
|
|
72
|
+
|
|
73
|
+
path = Path(audio_input)
|
|
74
|
+
if not path.exists():
|
|
75
|
+
raise FileNotFoundError(f"文件不存在: {audio_input}")
|
|
76
|
+
|
|
77
|
+
endpoint = config.get("s3_endpoint")
|
|
78
|
+
bucket = config.get("s3_bucket")
|
|
79
|
+
access_key = config.get("s3_access_key")
|
|
80
|
+
secret_key = config.get("s3_secret_key")
|
|
81
|
+
if not all([endpoint, bucket, access_key, secret_key]):
|
|
82
|
+
raise ValueError("本地文件需配置 s3_endpoint, s3_bucket, s3_access_key, s3_secret_key")
|
|
83
|
+
|
|
84
|
+
s3 = boto3.client(
|
|
85
|
+
"s3",
|
|
86
|
+
endpoint_url=endpoint,
|
|
87
|
+
aws_access_key_id=access_key,
|
|
88
|
+
aws_secret_access_key=secret_key,
|
|
89
|
+
region_name="cn-guangzhou",
|
|
90
|
+
config=boto3.session.Config(s3={'addressing_style': 'virtual'}),
|
|
91
|
+
)
|
|
92
|
+
key = f"{uuid.uuid4().hex}{path.suffix}"
|
|
93
|
+
s3.upload_file(str(path), bucket, key)
|
|
94
|
+
return s3.generate_presigned_url("get_object", Params={"Bucket": bucket, "Key": key}, ExpiresIn=86400)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def submit_task(audio_url, app_key, access_key, resource_id):
|
|
98
|
+
"""提交识别任务,返回 request_id。失败时抛 RuntimeError。"""
|
|
99
|
+
request_id = str(uuid.uuid4())
|
|
100
|
+
headers = make_headers(app_key, access_key, resource_id, request_id)
|
|
101
|
+
headers["X-Api-Sequence"] = "-1"
|
|
102
|
+
|
|
103
|
+
payload = {
|
|
104
|
+
"user": {"uid": "volc_asr_cli"},
|
|
105
|
+
"audio": {"format": guess_format(audio_url), "url": audio_url},
|
|
106
|
+
"request": {
|
|
107
|
+
"model_name": "bigmodel",
|
|
108
|
+
"enable_itn": True,
|
|
109
|
+
"enable_punc": True,
|
|
110
|
+
"show_utterances": True,
|
|
111
|
+
},
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
resp = requests.post(SUBMIT_URL, json=payload, headers=headers)
|
|
115
|
+
status_code = resp.headers.get("X-Api-Status-Code", "")
|
|
116
|
+
message = resp.headers.get("X-Api-Message", "")
|
|
117
|
+
|
|
118
|
+
if status_code != "20000000":
|
|
119
|
+
raise RuntimeError(f"提交失败: [{status_code}] {message}")
|
|
120
|
+
|
|
121
|
+
return request_id
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def query_result(request_id, app_key, access_key, resource_id):
|
|
125
|
+
"""查询识别结果,返回 (status_code, body)"""
|
|
126
|
+
headers = make_headers(app_key, access_key, resource_id, request_id)
|
|
127
|
+
resp = requests.post(QUERY_URL, json={}, headers=headers)
|
|
128
|
+
status_code = resp.headers.get("X-Api-Status-Code", "")
|
|
129
|
+
body = resp.json() if resp.text.strip() else {}
|
|
130
|
+
return status_code, body
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def format_time(seconds):
|
|
134
|
+
h = int(seconds // 3600)
|
|
135
|
+
m = int((seconds % 3600) // 60)
|
|
136
|
+
s = int(seconds % 60)
|
|
137
|
+
ms = int((seconds % 1) * 1000)
|
|
138
|
+
return f"{h:02d}:{m:02d}:{s:02d},{ms:03d}"
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def to_srt(utterances):
|
|
142
|
+
lines = []
|
|
143
|
+
for i, seg in enumerate(utterances, 1):
|
|
144
|
+
start = seg["start_time"] / 1000
|
|
145
|
+
end = seg["end_time"] / 1000
|
|
146
|
+
lines.append(str(i))
|
|
147
|
+
lines.append(f"{format_time(start)} --> {format_time(end)}")
|
|
148
|
+
lines.append(seg["text"])
|
|
149
|
+
lines.append("")
|
|
150
|
+
return "\n".join(lines)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def to_lrc(utterances):
|
|
154
|
+
lines = []
|
|
155
|
+
for seg in utterances:
|
|
156
|
+
start = seg["start_time"] / 1000
|
|
157
|
+
m = int(start // 60)
|
|
158
|
+
s = start % 60
|
|
159
|
+
lines.append(f"[{m:02d}:{s:05.2f}]{seg['text']}")
|
|
160
|
+
return "\n".join(lines)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def format_result(body, fmt="srt"):
|
|
164
|
+
"""按指定格式返回结果字符串。fmt: srt / txt / json / lrc"""
|
|
165
|
+
if fmt == "json":
|
|
166
|
+
return json.dumps(body, ensure_ascii=False, indent=2)
|
|
167
|
+
result = body.get("result", {})
|
|
168
|
+
if fmt == "txt":
|
|
169
|
+
return result.get("text", "")
|
|
170
|
+
utterances = result.get("utterances", [])
|
|
171
|
+
if fmt == "lrc":
|
|
172
|
+
return to_lrc(utterances) if utterances else result.get("text", "")
|
|
173
|
+
# srt
|
|
174
|
+
if utterances:
|
|
175
|
+
return to_srt(utterances)
|
|
176
|
+
return result.get("text", "")
|