@sdamarketing/qwen-tts-client 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +17 -0
- package/LICENSE +21 -0
- package/README.md +100 -0
- package/deploy/openclaw/profiles/README.md +17 -0
- package/deploy/openclaw/profiles/remote-api/openclaw-node.env.example +22 -0
- package/deploy/openclaw/profiles/remote-api/openclaw.tts.json5 +25 -0
- package/docs/01-production-setup.md +65 -0
- package/docs/02-npm-ci-token.md +59 -0
- package/docs/README.md +11 -0
- package/package.json +48 -0
- package/scripts/install_qwen_tts_client.sh +184 -0
- package/scripts/qwen_tts_proxy_opus.sh +26 -0
- package/scripts/qwen_tts_runtime.py +310 -0
package/.env.example
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
CENTRAL_TTS_BASE_URL=https://qwen-tts-118.tailf26c2b.ts.net
|
|
2
|
+
CENTRAL_TTS_API_KEY=change_me_same_value_as_server_tts_api_key
|
|
3
|
+
SMOKE_TEXT=Проверка подключения Qwen TTS Client к удаленному серверу.
|
|
4
|
+
CENTRAL_TTS_TARGET=voice-note
|
|
5
|
+
CENTRAL_TTS_TIMEOUT_SEC=120
|
|
6
|
+
CENTRAL_TTS_RETRIES=2
|
|
7
|
+
CENTRAL_TTS_RETRY_BACKOFF_MS=350
|
|
8
|
+
# When the server returns WAV/MP3/etc. but OpenClaw expects *.opus, runtime transcodes via ffmpeg:
|
|
9
|
+
CENTRAL_TTS_FFMPEG_TIMEOUT_SEC=120
|
|
10
|
+
# Set to 1 only if CENTRAL_TTS_BASE_URL must be reached via HTTP_PROXY/HTTPS_PROXY (default: direct, no proxy).
|
|
11
|
+
CENTRAL_TTS_USE_SYSTEM_PROXY=
|
|
12
|
+
|
|
13
|
+
# Optional OpenClaw 2026.4.25+ hints
|
|
14
|
+
CENTRAL_TTS_VOICE=
|
|
15
|
+
CENTRAL_TTS_MODEL=
|
|
16
|
+
CENTRAL_TTS_PERSONA=
|
|
17
|
+
CENTRAL_TTS_SESSION_HINTS_JSON=
|
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 sdamarketing
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
# Qwen TTS Client
|
|
2
|
+
|
|
3
|
+
Production-клиент для OpenClaw `tts-local-cli`, который проксирует синтез на удаленный `Qwen TTS Server`.
|
|
4
|
+
|
|
5
|
+
Базовый путь:
|
|
6
|
+
|
|
7
|
+
- `OpenClaw (tts-local-cli)`
|
|
8
|
+
- `qwen_tts_proxy_opus.sh` (тонкий launcher)
|
|
9
|
+
- `qwen_tts_runtime.py` (валидация/retry/diagnostics)
|
|
10
|
+
- `Qwen TTS Server /tts`
|
|
11
|
+
- возврат `OGG/Opus` или `WAV` (по настройке сервера)
|
|
12
|
+
|
|
13
|
+
## Что делает проект
|
|
14
|
+
|
|
15
|
+
- интерактивно запрашивает endpoint и API key удаленного TTS;
|
|
16
|
+
- генерирует `~/.openclaw/qwen_tts_client.env` с retry/timeout/hints настройками;
|
|
17
|
+
- настраивает `~/.openclaw/openclaw.json` под `tts-local-cli` без удаления других providers;
|
|
18
|
+
- мигрирует legacy-ключи (`audioAsVoice`, `textLimit`) в `maxTextLength`;
|
|
19
|
+
- выполняет smoke-test и проверяет выходной аудиофайл.
|
|
20
|
+
|
|
21
|
+
## Быстрый запуск
|
|
22
|
+
|
|
23
|
+
### Установка из npm (npmjs.com)
|
|
24
|
+
|
|
25
|
+
После `npm run publish:all` пакет доступен на обоих реестрах. С npmjs.com ставится **без** `.npmrc`:
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
npm install -g @sdamarketing/qwen-tts-client
|
|
29
|
+
qwen-tts-install
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
### Установка из GitHub Packages
|
|
33
|
+
|
|
34
|
+
Скопируй `.npmrc.github.example` в `~/.npmrc` и задай `GITHUB_TOKEN` (`read:packages`), либо:
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
npm install -g @sdamarketing/qwen-tts-client --registry=https://npm.pkg.github.com
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
### Публикация в оба реестра (maintainers)
|
|
41
|
+
|
|
42
|
+
Один tarball, два реестра (имя и версия совпадают):
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
# npmjs.com — нужен OTP или NPM_TOKEN с publish
|
|
46
|
+
npm run publish:npm
|
|
47
|
+
|
|
48
|
+
# GitHub Packages — GITHUB_TOKEN с write:packages
|
|
49
|
+
export NODE_AUTH_TOKEN="$GITHUB_TOKEN"
|
|
50
|
+
npm run publish:github
|
|
51
|
+
|
|
52
|
+
# или оба подряд
|
|
53
|
+
npm run publish:all
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
На npmjs.com для scoped-пакета обязателен `--access public` (уже в скрипте `publish:npm`).
|
|
57
|
+
|
|
58
|
+
CI: при Release workflow `.github/workflows/publish.yml` публикует в npmjs и GitHub Packages **параллельно**.
|
|
59
|
+
|
|
60
|
+
**npmjs.com:** Trusted Publisher на пакете (workflow `publish.yml`, OIDC) — секрет `NPM_TOKEN` не нужен. Ошибки `403`/`EOTP` — см. [docs/02-npm-ci-token.md](docs/02-npm-ci-token.md).
|
|
61
|
+
|
|
62
|
+
Прокси для ручного smoke-test:
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
qwen-tts-proxy "Проверка TTS" /tmp/qwen-tts-smoke.ogg
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
### Установка из git
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
git clone https://github.com/sdamarketing/qwen-tts-client.git
|
|
72
|
+
cd qwen-tts-client
|
|
73
|
+
chmod +x ./scripts/install_qwen_tts_client.sh
|
|
74
|
+
./scripts/install_qwen_tts_client.sh
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
После установки проверь в чате OpenClaw:
|
|
78
|
+
|
|
79
|
+
- `/tts status`
|
|
80
|
+
- `/tts latest`
|
|
81
|
+
- `/tts chat on`
|
|
82
|
+
- `/tts persona off`
|
|
83
|
+
|
|
84
|
+
## Новые возможности OpenClaw 2026.4.25+
|
|
85
|
+
|
|
86
|
+
Клиент совместим с расширенным TTS workflow:
|
|
87
|
+
|
|
88
|
+
- chat-level override: `/tts chat on|off|default`;
|
|
89
|
+
- ручная озвучка последнего ответа: `/tts latest`;
|
|
90
|
+
- persona override: `/tts persona <id>|off`;
|
|
91
|
+
- per-agent/per-channel overlays через OpenClaw-конфиг (`agents.list[].tts`, `channels.<channel>.accounts.<id>.tts`).
|
|
92
|
+
|
|
93
|
+
## Структура
|
|
94
|
+
|
|
95
|
+
- `scripts/install_qwen_tts_client.sh` — интерактивный установщик;
|
|
96
|
+
- `scripts/qwen_tts_proxy_opus.sh` — shell launcher для Local CLI;
|
|
97
|
+
- `scripts/qwen_tts_runtime.py` — runtime транспорт в удаленный `/tts`;
|
|
98
|
+
- `deploy/openclaw/profiles/remote-api` — готовый профиль OpenClaw для remote TTS;
|
|
99
|
+
- `.env.example` — шаблон клиентских переменных;
|
|
100
|
+
- `docs/` — production runbook клиента.
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# OpenClaw Profile (Remote API)
|
|
2
|
+
|
|
3
|
+
Этот проект использует один production-профиль клиента:
|
|
4
|
+
|
|
5
|
+
- `remote-api` — OpenClaw не запускает локальный TTS и получает финальный `OGG/Opus` от удаленного сервера.
|
|
6
|
+
|
|
7
|
+
## Файлы
|
|
8
|
+
|
|
9
|
+
- `remote-api/openclaw.tts.json5` — фрагмент для `messages.tts`;
|
|
10
|
+
- `remote-api/openclaw-node.env.example` — пример переменных окружения gateway.
|
|
11
|
+
|
|
12
|
+
## Принцип
|
|
13
|
+
|
|
14
|
+
- `tts-local-cli` активируется как primary provider;
|
|
15
|
+
- `qwen_tts_proxy_opus.sh` запускает `qwen_tts_runtime.py`;
|
|
16
|
+
- runtime делает HTTPS вызов в удаленный `/tts` и поддерживает retry/timeout/diagnostics;
|
|
17
|
+
- локальная генерация отсутствует, локальная конверсия не выполняется.
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# OpenClaw gateway runtime
|
|
2
|
+
OPENCLAW_GATEWAY_PORT=18789
|
|
3
|
+
OPENCLAW_DISABLE_BONJOUR=1
|
|
4
|
+
OPENCLAW_NO_RESPAWN=1
|
|
5
|
+
NODE_COMPILE_CACHE=/var/tmp/openclaw-compile-cache
|
|
6
|
+
NODE_OPTIONS=--dns-result-order=ipv4first
|
|
7
|
+
|
|
8
|
+
# Remote Qwen TTS endpoint
|
|
9
|
+
CENTRAL_TTS_BASE_URL=https://qwen-tts-118.tailf26c2b.ts.net
|
|
10
|
+
CENTRAL_TTS_API_KEY=change_me_same_value_as_server_tts_api_key
|
|
11
|
+
CENTRAL_TTS_TARGET=voice-note
|
|
12
|
+
CENTRAL_TTS_TIMEOUT_SEC=120
|
|
13
|
+
CENTRAL_TTS_RETRIES=2
|
|
14
|
+
CENTRAL_TTS_RETRY_BACKOFF_MS=350
|
|
15
|
+
CENTRAL_TTS_FFMPEG_TIMEOUT_SEC=120
|
|
16
|
+
CENTRAL_TTS_USE_SYSTEM_PROXY=
|
|
17
|
+
|
|
18
|
+
# Optional OpenClaw 2026.4.25+ hints
|
|
19
|
+
CENTRAL_TTS_VOICE=
|
|
20
|
+
CENTRAL_TTS_MODEL=
|
|
21
|
+
CENTRAL_TTS_PERSONA=
|
|
22
|
+
CENTRAL_TTS_SESSION_HINTS_JSON=
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
// Merge this fragment into ~/.openclaw/openclaw.json
|
|
3
|
+
messages: {
|
|
4
|
+
tts: {
|
|
5
|
+
enabled: true,
|
|
6
|
+
provider: "tts-local-cli",
|
|
7
|
+
auto: "off",
|
|
8
|
+
persona: "",
|
|
9
|
+
personas: {},
|
|
10
|
+
maxTextLength: 1500,
|
|
11
|
+
providers: {
|
|
12
|
+
"tts-local-cli": {
|
|
13
|
+
enabled: true,
|
|
14
|
+
command: "/home/alekhm/.openclaw/bin/qwen_tts_proxy_opus.sh",
|
|
15
|
+
args: ["{{Text}}", "{{OutputPath}}"],
|
|
16
|
+
outputFormat: "opus",
|
|
17
|
+
timeoutMs: 120000,
|
|
18
|
+
env: {
|
|
19
|
+
QWEN_TTS_RUNTIME_SCRIPT: "/home/alekhm/.openclaw/bin/qwen_tts_runtime.py"
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# 01. Production Setup
|
|
2
|
+
|
|
3
|
+
## Цель
|
|
4
|
+
|
|
5
|
+
Подключить OpenClaw-клиент к удаленному `Qwen TTS Server` через `tts-local-cli`, без локальной генерации.
|
|
6
|
+
|
|
7
|
+
## Предпосылки
|
|
8
|
+
|
|
9
|
+
- установлен и хотя бы один раз запущен OpenClaw (должен существовать `~/.openclaw/openclaw.json`);
|
|
10
|
+
- есть рабочий endpoint сервера `/tts`;
|
|
11
|
+
- есть API key сервера.
|
|
12
|
+
|
|
13
|
+
## Установка
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
cd products/qwen-tts-client
|
|
17
|
+
chmod +x ./scripts/install_qwen_tts_client.sh
|
|
18
|
+
./scripts/install_qwen_tts_client.sh
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
Скрипт:
|
|
22
|
+
|
|
23
|
+
- создаёт `~/.openclaw/qwen_tts_client.env`;
|
|
24
|
+
- устанавливает `qwen_tts_proxy_opus.sh` и `qwen_tts_runtime.py` в `~/.openclaw/bin`;
|
|
25
|
+
- патчит `~/.openclaw/openclaw.json` на `tts-local-cli` без удаления остальных providers;
|
|
26
|
+
- чистит legacy-ключи `audioAsVoice` / `textLimit`;
|
|
27
|
+
- выполняет smoke-test.
|
|
28
|
+
|
|
29
|
+
## Проверка в OpenClaw
|
|
30
|
+
|
|
31
|
+
- `/tts status`
|
|
32
|
+
- `/tts latest`
|
|
33
|
+
- `/tts chat on`
|
|
34
|
+
- `/tts persona off`
|
|
35
|
+
- отправить тестовое сообщение для озвучки
|
|
36
|
+
|
|
37
|
+
Если после обновления OpenClaw видишь ошибку `Unrecognized keys: "audioAsVoice", "textLimit"`, значит остались старые ключи в `messages.tts`. Удалить их и оставить `maxTextLength`.
|
|
38
|
+
|
|
39
|
+
Если `/tts status` показывает лимит 1500, а синтез падает с **`max 600`**, в `~/.openclaw/openclaw.json` в `messages.tts` всё ещё стоит `maxTextLength: 600`. Поставь **не меньше 1500** (или перезапусти актуальный `install_qwen_tts_client.sh` — он поднимет лимит минимум до 1500, не урезая более высокий, если ты его задавал).
|
|
40
|
+
|
|
41
|
+
## Ручной профиль (без установщика)
|
|
42
|
+
|
|
43
|
+
Если нужно применить настройки вручную:
|
|
44
|
+
|
|
45
|
+
- взять фрагмент `deploy/openclaw/profiles/remote-api/openclaw.tts.json5`;
|
|
46
|
+
- использовать пример env `deploy/openclaw/profiles/remote-api/openclaw-node.env.example`.
|
|
47
|
+
|
|
48
|
+
## Архитектура
|
|
49
|
+
|
|
50
|
+
`OpenClaw` -> `tts-local-cli` -> `qwen_tts_proxy_opus.sh` -> `qwen_tts_runtime.py` -> `https://<server>/tts` -> `OGG/Opus | WAV`
|
|
51
|
+
|
|
52
|
+
OpenClaw для `outputFormat: opus` передаёт путь вида `…/speech.opus` и **считает формат по расширению**: если записать туда WAV, озвучка падает с `provider_error`. `qwen_tts_runtime.py` проверяет сигнатуру `OpusHead` в Ogg и при необходимости **транскодирует в Opus через `ffmpeg`**. На gateway/host должен быть доступен `ffmpeg`, если сервер отдаёт не Opus-in-Ogg.
|
|
53
|
+
|
|
54
|
+
Если `tts-local-cli` стабильно падает с `provider_error`, а с ноутбука `curl` до `/tts` работает: у процесса gateway часто заданы `HTTP_PROXY`/`HTTPS_PROXY`. Рантайм по умолчанию **не** использует системный прокси для `CENTRAL_TTS_BASE_URL` (Tailscale и приватные URL так не ломаются). Если TTS доступен **только** через прокси — выставь `CENTRAL_TTS_USE_SYSTEM_PROXY=1` в `qwen_tts_client.env`.
|
|
55
|
+
|
|
56
|
+
Если в логах видно **`503` / `no tunnel here`** на URL вида `*.lhr.life` / `trycloudflare.com` — временный туннель к машине с TTS **умер**; подними туннель заново или переключи `CENTRAL_TTS_BASE_URL` на постоянный хост (например Tailscale `*.ts.net`), затем перезапусти gateway.
|
|
57
|
+
|
|
58
|
+
## Расширенный payload `/tts`
|
|
59
|
+
|
|
60
|
+
`qwen_tts_runtime.py` отправляет backward-compatible тело:
|
|
61
|
+
|
|
62
|
+
- обязательно: `text`
|
|
63
|
+
- опционально: `voice`, `model`, `persona`, `target`, `requestId`, `sessionHints`
|
|
64
|
+
|
|
65
|
+
Старый контракт (только `text`) продолжает работать.
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# Публикация на npmjs.com из GitHub Actions
|
|
2
|
+
|
|
3
|
+
## Ошибки в CI
|
|
4
|
+
|
|
5
|
+
| Код | Причина |
|
|
6
|
+
|-----|---------|
|
|
7
|
+
| `403` + bypass 2fa | `NPM_TOKEN` — не Automation / без bypass 2FA |
|
|
8
|
+
| `EOTP` | В workflow передан `NPM_TOKEN` типа **Publish** — npm требует OTP, в CI его нет |
|
|
9
|
+
|
|
10
|
+
**Рекомендуемый способ:** [npm Trusted Publishers](https://docs.npmjs.com/trusted-publishers) (OIDC). Секрет `NPM_TOKEN` для publish **не нужен**.
|
|
11
|
+
|
|
12
|
+
## Trusted Publisher (рекомендуется)
|
|
13
|
+
|
|
14
|
+
Один раз на npmjs.com (под аккаунтом с правом publish `@sdamarketing/*`):
|
|
15
|
+
|
|
16
|
+
1. Открой настройки пакета `@sdamarketing/qwen-tts-client` → **Publishing access** → **Trusted publishing**
|
|
17
|
+
(или org: https://www.npmjs.com/settings/sdamarketing/packages)
|
|
18
|
+
2. **Add GitHub Actions trusted publisher**
|
|
19
|
+
- Repository: `sdamarketing/qwen-tts-client`
|
|
20
|
+
- Workflow filename: `publish.yml`
|
|
21
|
+
- Environment: пусто (если не используете GitHub Environment)
|
|
22
|
+
3. В GitHub **удали секрет `NPM_TOKEN`** (если есть) — иначе старый токен может мешать.
|
|
23
|
+
4. Re-run workflow **Publish package**.
|
|
24
|
+
|
|
25
|
+
Workflow уже настроен: `id-token: write`, `npm publish --provenance`, **без** `NODE_AUTH_TOKEN`.
|
|
26
|
+
|
|
27
|
+
Первый publish scoped-пакета иногда делают локально один раз:
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
npm run publish:npm -- --otp=123456
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
После появления пакета на npmjs — дальнейшие версии через CI + Trusted Publisher.
|
|
34
|
+
|
|
35
|
+
## Запасной вариант: NPM_TOKEN
|
|
36
|
+
|
|
37
|
+
Если Trusted Publisher не используете:
|
|
38
|
+
|
|
39
|
+
1. [Access Tokens](https://www.npmjs.com/settings/~/tokens) → Classic **Automation** (не Publish)
|
|
40
|
+
или Granular с **Bypass two-factor authentication for automation**
|
|
41
|
+
2. Секрет `NPM_TOKEN` в GitHub Actions
|
|
42
|
+
3. В workflow **временно** вернуть:
|
|
43
|
+
|
|
44
|
+
```yaml
|
|
45
|
+
env:
|
|
46
|
+
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
Не смешивайте Trusted Publisher и Publish-токен в одном job — будет `EOTP` или `403`.
|
|
50
|
+
|
|
51
|
+
## После смены версии
|
|
52
|
+
|
|
53
|
+
Если версия уже на registry — подними `version` в `package.json` перед повторным publish.
|
|
54
|
+
|
|
55
|
+
## Локально с 2FA
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
npm run publish:npm -- --otp=123456
|
|
59
|
+
```
|
package/docs/README.md
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# Qwen TTS Client Docs
|
|
2
|
+
|
|
3
|
+
Документация по отдельному production-проекту `Qwen TTS Client`.
|
|
4
|
+
|
|
5
|
+
- [01. Production Setup](01-production-setup.md)
|
|
6
|
+
|
|
7
|
+
Клиент ориентирован на OpenClaw `2026.4.25+` и учитывает:
|
|
8
|
+
|
|
9
|
+
- `messages.tts.providers` как основную схему;
|
|
10
|
+
- новые команды `/tts latest`, `/tts chat`, `/tts persona`;
|
|
11
|
+
- compatibility с per-agent/per-channel overrides без агрессивной зачистки `openclaw.json`.
|
package/package.json
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@sdamarketing/qwen-tts-client",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "OpenClaw tts-local-cli client for remote Qwen TTS Server (OGG/Opus proxy)",
|
|
5
|
+
"keywords": [
|
|
6
|
+
"openclaw",
|
|
7
|
+
"tts",
|
|
8
|
+
"qwen",
|
|
9
|
+
"opus",
|
|
10
|
+
"voice"
|
|
11
|
+
],
|
|
12
|
+
"homepage": "https://github.com/sdamarketing/qwen-tts-client#readme",
|
|
13
|
+
"bugs": {
|
|
14
|
+
"url": "https://github.com/sdamarketing/qwen-tts-client/issues"
|
|
15
|
+
},
|
|
16
|
+
"repository": {
|
|
17
|
+
"type": "git",
|
|
18
|
+
"url": "git+https://github.com/sdamarketing/qwen-tts-client.git"
|
|
19
|
+
},
|
|
20
|
+
"license": "MIT",
|
|
21
|
+
"author": "sdamarketing",
|
|
22
|
+
"type": "commonjs",
|
|
23
|
+
"files": [
|
|
24
|
+
"scripts/",
|
|
25
|
+
"deploy/",
|
|
26
|
+
"docs/",
|
|
27
|
+
".env.example",
|
|
28
|
+
"README.md"
|
|
29
|
+
],
|
|
30
|
+
"bin": {
|
|
31
|
+
"qwen-tts-proxy": "scripts/qwen_tts_proxy_opus.sh",
|
|
32
|
+
"qwen-tts-install": "scripts/install_qwen_tts_client.sh"
|
|
33
|
+
},
|
|
34
|
+
"scripts": {
|
|
35
|
+
"prepack": "chmod +x scripts/qwen_tts_proxy_opus.sh scripts/install_qwen_tts_client.sh",
|
|
36
|
+
"prepublishOnly": "npm run pack:check",
|
|
37
|
+
"pack:check": "npm pack --dry-run",
|
|
38
|
+
"publish:npm": "npm publish --access public --registry https://registry.npmjs.org",
|
|
39
|
+
"publish:github": "npm publish --registry https://npm.pkg.github.com",
|
|
40
|
+
"publish:all": "npm run publish:npm && npm run publish:github"
|
|
41
|
+
},
|
|
42
|
+
"engines": {
|
|
43
|
+
"node": ">=18"
|
|
44
|
+
},
|
|
45
|
+
"publishConfig": {
|
|
46
|
+
"access": "public"
|
|
47
|
+
}
|
|
48
|
+
}
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
|
|
4
|
+
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
|
5
|
+
OPENCLAW_HOME="${OPENCLAW_HOME:-$HOME/.openclaw}"
|
|
6
|
+
CLIENT_ENV_PATH="${OPENCLAW_HOME}/qwen_tts_client.env"
|
|
7
|
+
PROXY_SCRIPT_SOURCE="${ROOT_DIR}/scripts/qwen_tts_proxy_opus.sh"
|
|
8
|
+
PROXY_SCRIPT_TARGET="${OPENCLAW_HOME}/bin/qwen_tts_proxy_opus.sh"
|
|
9
|
+
RUNTIME_SCRIPT_SOURCE="${ROOT_DIR}/scripts/qwen_tts_runtime.py"
|
|
10
|
+
RUNTIME_SCRIPT_TARGET="${OPENCLAW_HOME}/bin/qwen_tts_runtime.py"
|
|
11
|
+
DEFAULT_BASE_URL="https://qwen-tts-118.tailf26c2b.ts.net"
|
|
12
|
+
|
|
13
|
+
print_step() {
|
|
14
|
+
echo
|
|
15
|
+
echo "==> $1"
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
env_quote() {
|
|
19
|
+
python3 -c 'import shlex,sys; print(shlex.quote(sys.argv[1]))' "$1"
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
prompt_value() {
|
|
23
|
+
local label="$1"
|
|
24
|
+
local default_value="$2"
|
|
25
|
+
local result
|
|
26
|
+
read -r -p "${label} [${default_value}]: " result
|
|
27
|
+
if [[ -z "${result}" ]]; then
|
|
28
|
+
result="${default_value}"
|
|
29
|
+
fi
|
|
30
|
+
echo "${result}"
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
print_step "Qwen TTS Client setup"
|
|
34
|
+
echo "Root: ${ROOT_DIR}"
|
|
35
|
+
echo "OpenClaw home: ${OPENCLAW_HOME}"
|
|
36
|
+
|
|
37
|
+
BASE_URL="$(prompt_value "CENTRAL_TTS_BASE_URL" "${DEFAULT_BASE_URL}")"
|
|
38
|
+
if [[ "${BASE_URL}" != http://* && "${BASE_URL}" != https://* ]]; then
|
|
39
|
+
echo "ERROR: CENTRAL_TTS_BASE_URL must start with http:// or https://" >&2
|
|
40
|
+
exit 1
|
|
41
|
+
fi
|
|
42
|
+
|
|
43
|
+
read -r -s -p "CENTRAL_TTS_API_KEY: " API_KEY
|
|
44
|
+
echo
|
|
45
|
+
if [[ -z "${API_KEY}" ]]; then
|
|
46
|
+
echo "ERROR: CENTRAL_TTS_API_KEY cannot be empty" >&2
|
|
47
|
+
exit 1
|
|
48
|
+
fi
|
|
49
|
+
|
|
50
|
+
SMOKE_TEXT="$(prompt_value "Smoke test text" "Проверка удаленного TTS клиента.")"
|
|
51
|
+
DEFAULT_TARGET="$(prompt_value "CENTRAL_TTS_TARGET (voice-note|audio-file|telephony)" "voice-note")"
|
|
52
|
+
DEFAULT_TIMEOUT_SEC="$(prompt_value "CENTRAL_TTS_TIMEOUT_SEC" "120")"
|
|
53
|
+
DEFAULT_RETRIES="$(prompt_value "CENTRAL_TTS_RETRIES" "2")"
|
|
54
|
+
DEFAULT_BACKOFF_MS="$(prompt_value "CENTRAL_TTS_RETRY_BACKOFF_MS" "350")"
|
|
55
|
+
|
|
56
|
+
print_step "Writing ${CLIENT_ENV_PATH}"
|
|
57
|
+
cat > "${CLIENT_ENV_PATH}" <<EOF
|
|
58
|
+
CENTRAL_TTS_BASE_URL=$(env_quote "${BASE_URL}")
|
|
59
|
+
CENTRAL_TTS_API_KEY=$(env_quote "${API_KEY}")
|
|
60
|
+
SMOKE_TEXT=$(env_quote "${SMOKE_TEXT}")
|
|
61
|
+
CENTRAL_TTS_TARGET=$(env_quote "${DEFAULT_TARGET}")
|
|
62
|
+
CENTRAL_TTS_TIMEOUT_SEC=$(env_quote "${DEFAULT_TIMEOUT_SEC}")
|
|
63
|
+
CENTRAL_TTS_RETRIES=$(env_quote "${DEFAULT_RETRIES}")
|
|
64
|
+
CENTRAL_TTS_RETRY_BACKOFF_MS=$(env_quote "${DEFAULT_BACKOFF_MS}")
|
|
65
|
+
CENTRAL_TTS_FFMPEG_TIMEOUT_SEC=$(env_quote "${DEFAULT_TIMEOUT_SEC}")
|
|
66
|
+
CENTRAL_TTS_USE_SYSTEM_PROXY=
|
|
67
|
+
# Optional hints for OpenClaw 2026.4.25+ flow
|
|
68
|
+
CENTRAL_TTS_VOICE=
|
|
69
|
+
CENTRAL_TTS_MODEL=
|
|
70
|
+
CENTRAL_TTS_PERSONA=
|
|
71
|
+
CENTRAL_TTS_SESSION_HINTS_JSON=
|
|
72
|
+
EOF
|
|
73
|
+
chmod 600 "${CLIENT_ENV_PATH}"
|
|
74
|
+
|
|
75
|
+
print_step "Installing runtime scripts"
|
|
76
|
+
mkdir -p "${OPENCLAW_HOME}/bin"
|
|
77
|
+
cp "${PROXY_SCRIPT_SOURCE}" "${PROXY_SCRIPT_TARGET}"
|
|
78
|
+
cp "${RUNTIME_SCRIPT_SOURCE}" "${RUNTIME_SCRIPT_TARGET}"
|
|
79
|
+
chmod +x "${PROXY_SCRIPT_TARGET}"
|
|
80
|
+
chmod +x "${RUNTIME_SCRIPT_TARGET}"
|
|
81
|
+
|
|
82
|
+
print_step "Configuring OpenClaw TTS provider"
|
|
83
|
+
python3 - "${OPENCLAW_HOME}" "${PROXY_SCRIPT_TARGET}" "${RUNTIME_SCRIPT_TARGET}" <<'PY'
|
|
84
|
+
import json
|
|
85
|
+
import pathlib
|
|
86
|
+
import sys
|
|
87
|
+
|
|
88
|
+
openclaw_home = pathlib.Path(sys.argv[1])
|
|
89
|
+
proxy_script = sys.argv[2]
|
|
90
|
+
runtime_script = sys.argv[3]
|
|
91
|
+
config_path = openclaw_home / "openclaw.json"
|
|
92
|
+
|
|
93
|
+
if not config_path.exists():
|
|
94
|
+
print(f"ERROR: {config_path} not found. Run OpenClaw at least once first.", file=sys.stderr)
|
|
95
|
+
sys.exit(1)
|
|
96
|
+
|
|
97
|
+
data = json.loads(config_path.read_text())
|
|
98
|
+
messages = data.setdefault("messages", {})
|
|
99
|
+
tts = messages.setdefault("tts", {})
|
|
100
|
+
providers = tts.setdefault("providers", {})
|
|
101
|
+
|
|
102
|
+
providers["tts-local-cli"] = {
|
|
103
|
+
"enabled": True,
|
|
104
|
+
"command": proxy_script,
|
|
105
|
+
"args": ["{{Text}}", "{{OutputPath}}"],
|
|
106
|
+
"outputFormat": "opus",
|
|
107
|
+
"timeoutMs": 120000,
|
|
108
|
+
"env": {
|
|
109
|
+
"QWEN_TTS_RUNTIME_SCRIPT": runtime_script,
|
|
110
|
+
"QWEN_TTS_CLIENT_ENV": str(openclaw_home / "qwen_tts_client.env"),
|
|
111
|
+
},
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
tts["enabled"] = True
|
|
115
|
+
tts["provider"] = "tts-local-cli"
|
|
116
|
+
tts.setdefault("auto", "off")
|
|
117
|
+
tts.setdefault("persona", "")
|
|
118
|
+
tts.setdefault("personas", {})
|
|
119
|
+
try:
|
|
120
|
+
_cur_max = int(tts.get("maxTextLength", 0))
|
|
121
|
+
except (TypeError, ValueError):
|
|
122
|
+
_cur_max = 0
|
|
123
|
+
# OpenClaw UI defaults to 1500; old installers left 600 and caused "max 600" with longer /tts text.
|
|
124
|
+
tts["maxTextLength"] = max(_cur_max, 1500)
|
|
125
|
+
tts.pop("audioAsVoice", None)
|
|
126
|
+
tts.pop("textLimit", None)
|
|
127
|
+
|
|
128
|
+
config_path.write_text(json.dumps(data, ensure_ascii=False, indent=2) + "\n")
|
|
129
|
+
print(f"updated {config_path}")
|
|
130
|
+
PY
|
|
131
|
+
|
|
132
|
+
print_step "Normalizing local TTS preferences"
|
|
133
|
+
python3 - "${OPENCLAW_HOME}" <<'PY'
|
|
134
|
+
import json
|
|
135
|
+
import pathlib
|
|
136
|
+
import time
|
|
137
|
+
|
|
138
|
+
openclaw_home = pathlib.Path(__import__("sys").argv[1])
|
|
139
|
+
prefs_path = openclaw_home / "settings" / "tts.json"
|
|
140
|
+
if not prefs_path.exists():
|
|
141
|
+
print("no local tts prefs found")
|
|
142
|
+
raise SystemExit(0)
|
|
143
|
+
|
|
144
|
+
try:
|
|
145
|
+
prefs = json.loads(prefs_path.read_text())
|
|
146
|
+
except Exception:
|
|
147
|
+
backup = prefs_path.with_name(f"tts.json.bak.invalid.{int(time.time())}")
|
|
148
|
+
prefs_path.rename(backup)
|
|
149
|
+
print(f"moved invalid prefs to {backup}")
|
|
150
|
+
raise SystemExit(0)
|
|
151
|
+
|
|
152
|
+
tts = prefs.get("tts", {})
|
|
153
|
+
partial_override = isinstance(tts, dict) and "providers" not in tts
|
|
154
|
+
if partial_override:
|
|
155
|
+
backup = prefs_path.with_name(f"tts.json.bak.partial-override.{int(time.time())}")
|
|
156
|
+
prefs_path.rename(backup)
|
|
157
|
+
print(f"moved partial tts override prefs to {backup}")
|
|
158
|
+
else:
|
|
159
|
+
print("local tts prefs left unchanged")
|
|
160
|
+
PY
|
|
161
|
+
|
|
162
|
+
print_step "Restarting gateway service (if available)"
|
|
163
|
+
if command -v systemctl >/dev/null 2>&1 && systemctl --user list-unit-files | awk '{print $1}' | grep -qx "openclaw-gateway.service"; then
|
|
164
|
+
systemctl --user restart openclaw-gateway.service || true
|
|
165
|
+
systemctl --user is-active openclaw-gateway.service || true
|
|
166
|
+
elif command -v launchctl >/dev/null 2>&1; then
|
|
167
|
+
launchctl kickstart -k "gui/$(id -u)/ai.openclaw.gateway" || true
|
|
168
|
+
fi
|
|
169
|
+
|
|
170
|
+
if ! command -v ffmpeg >/dev/null 2>&1; then
|
|
171
|
+
echo "WARN: ffmpeg not in PATH. If the TTS server returns WAV/MP3 instead of Opus-in-Ogg, install ffmpeg (e.g. apt install ffmpeg)." >&2
|
|
172
|
+
fi
|
|
173
|
+
|
|
174
|
+
print_step "Running smoke test"
|
|
175
|
+
TMP_OUT="/tmp/qwen-tts-client-smoke.ogg"
|
|
176
|
+
QWEN_TTS_CLIENT_ENV="${CLIENT_ENV_PATH}" "${PROXY_SCRIPT_TARGET}" "${SMOKE_TEXT}" "${TMP_OUT}"
|
|
177
|
+
ls -lh "${TMP_OUT}"
|
|
178
|
+
file "${TMP_OUT}" || true
|
|
179
|
+
|
|
180
|
+
echo
|
|
181
|
+
echo "Done."
|
|
182
|
+
echo "Client env: ${CLIENT_ENV_PATH}"
|
|
183
|
+
echo "Proxy script: ${PROXY_SCRIPT_TARGET}"
|
|
184
|
+
echo "Next: /tts status"
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
|
|
4
|
+
TEXT="${1:-}"
|
|
5
|
+
OUT="${2:-}"
|
|
6
|
+
|
|
7
|
+
if [[ -z "${TEXT}" || -z "${OUT}" ]]; then
|
|
8
|
+
echo "usage: qwen_tts_proxy_opus.sh <text> <output_path>" >&2
|
|
9
|
+
exit 2
|
|
10
|
+
fi
|
|
11
|
+
|
|
12
|
+
_script_source="${BASH_SOURCE[0]}"
|
|
13
|
+
while [[ -L "${_script_source}" ]]; do
|
|
14
|
+
_link_dir="$(cd "$(dirname "${_script_source}")" && pwd)"
|
|
15
|
+
_script_source="$(readlink "${_script_source}")"
|
|
16
|
+
[[ "${_script_source}" != /* ]] && _script_source="${_link_dir}/${_script_source}"
|
|
17
|
+
done
|
|
18
|
+
SCRIPT_DIR="$(cd "$(dirname "${_script_source}")" && pwd)"
|
|
19
|
+
RUNTIME_SCRIPT="${QWEN_TTS_RUNTIME_SCRIPT:-${SCRIPT_DIR}/qwen_tts_runtime.py}"
|
|
20
|
+
|
|
21
|
+
if [[ ! -f "${RUNTIME_SCRIPT}" ]]; then
|
|
22
|
+
echo "runtime script not found: ${RUNTIME_SCRIPT}" >&2
|
|
23
|
+
exit 1
|
|
24
|
+
fi
|
|
25
|
+
|
|
26
|
+
exec python3 "${RUNTIME_SCRIPT}" "${TEXT}" "${OUT}"
|
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
import shutil
|
|
5
|
+
import socket
|
|
6
|
+
import subprocess
|
|
7
|
+
import sys
|
|
8
|
+
import tempfile
|
|
9
|
+
import time
|
|
10
|
+
import urllib.error
|
|
11
|
+
import urllib.request
|
|
12
|
+
import uuid
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _normalize_text(value: str) -> str:
|
|
17
|
+
return value.encode("utf-8", errors="replace").decode("utf-8", errors="replace")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _load_env_file(path: Path) -> dict[str, str]:
|
|
21
|
+
data: dict[str, str] = {}
|
|
22
|
+
if not path.exists():
|
|
23
|
+
return data
|
|
24
|
+
raw = path.read_bytes()
|
|
25
|
+
try:
|
|
26
|
+
content = raw.decode("utf-8")
|
|
27
|
+
except UnicodeDecodeError:
|
|
28
|
+
# Keep runtime resilient on hosts where env was edited with mixed encodings.
|
|
29
|
+
content = raw.decode("utf-8", errors="replace")
|
|
30
|
+
for raw_line in content.splitlines():
|
|
31
|
+
line = raw_line.strip()
|
|
32
|
+
if not line or line.startswith("#") or "=" not in line:
|
|
33
|
+
continue
|
|
34
|
+
key, value = line.split("=", 1)
|
|
35
|
+
cleaned = value.strip().strip("'").strip('"')
|
|
36
|
+
data[key.strip()] = _normalize_text(cleaned)
|
|
37
|
+
return data
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _env(name: str, default: str = "") -> str:
|
|
41
|
+
return _normalize_text(os.environ.get(name, default).strip())
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _positive_int(value: str, default: int) -> int:
|
|
45
|
+
try:
|
|
46
|
+
num = int(value)
|
|
47
|
+
return num if num > 0 else default
|
|
48
|
+
except ValueError:
|
|
49
|
+
return default
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _truthy_env(name: str) -> bool:
|
|
53
|
+
return _env(name, "").lower() in ("1", "true", "yes", "on")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _build_url_opener() -> urllib.request.OpenerDirector:
|
|
57
|
+
"""
|
|
58
|
+
Gateway processes often inherit HTTP_PROXY/HTTPS_PROXY. urllib honors them by default,
|
|
59
|
+
which breaks private / Tailscale TTS URLs. Disable system proxies unless explicitly opted in.
|
|
60
|
+
"""
|
|
61
|
+
if _truthy_env("CENTRAL_TTS_USE_SYSTEM_PROXY"):
|
|
62
|
+
return urllib.request.build_opener()
|
|
63
|
+
return urllib.request.build_opener(urllib.request.ProxyHandler({}))
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _write_atomic(path: Path, payload: bytes) -> None:
|
|
67
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
68
|
+
with tempfile.NamedTemporaryFile(dir=str(path.parent), prefix=".qwen-tts-", delete=False) as temp_file:
|
|
69
|
+
temp_file.write(payload)
|
|
70
|
+
temp_name = temp_file.name
|
|
71
|
+
os.replace(temp_name, str(path))
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _content_type_base(headers) -> str:
|
|
75
|
+
raw = (headers.get("Content-Type") or "").strip()
|
|
76
|
+
return raw.split(";", 1)[0].strip().lower()
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _is_opus_in_ogg(content: bytes) -> bool:
|
|
80
|
+
if len(content) < 4 or content[:4] != b"OggS":
|
|
81
|
+
return False
|
|
82
|
+
scan = min(len(content), 131072)
|
|
83
|
+
return b"OpusHead" in content[:scan]
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _is_riff_wave(content: bytes) -> bool:
|
|
87
|
+
return len(content) >= 12 and content[:4] == b"RIFF" and content[8:12] == b"WAVE"
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _looks_json_error(content: bytes) -> bool:
|
|
91
|
+
stripped = content.lstrip()
|
|
92
|
+
return bool(stripped) and stripped[:1] == b"{"
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _http_audio_acceptable(content_type: str, content: bytes) -> bool:
|
|
96
|
+
if content_type.startswith("application/json") or content_type.startswith("text/html"):
|
|
97
|
+
return False
|
|
98
|
+
if content_type.startswith("audio/"):
|
|
99
|
+
return bool(content) and not _looks_json_error(content)
|
|
100
|
+
if content_type in ("application/ogg", "application/octet-stream", "binary/octet-stream", ""):
|
|
101
|
+
return bool(content) and not _looks_json_error(content)
|
|
102
|
+
return False
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _ffmpeg_to_opus_file(payload: bytes, out_path: Path, timeout_sec: int) -> None:
|
|
106
|
+
ffmpeg = shutil.which("ffmpeg")
|
|
107
|
+
if not ffmpeg:
|
|
108
|
+
raise RuntimeError(
|
|
109
|
+
"TTS response is not Opus-in-Ogg; install `ffmpeg` to transcode (e.g. apt install ffmpeg)."
|
|
110
|
+
)
|
|
111
|
+
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
112
|
+
tmp = out_path.with_name(f".qwen-tts-ffmpeg-{uuid.uuid4().hex[:10]}.opus")
|
|
113
|
+
try:
|
|
114
|
+
proc = subprocess.run(
|
|
115
|
+
[
|
|
116
|
+
ffmpeg,
|
|
117
|
+
"-hide_banner",
|
|
118
|
+
"-loglevel",
|
|
119
|
+
"error",
|
|
120
|
+
"-y",
|
|
121
|
+
"-i",
|
|
122
|
+
"pipe:0",
|
|
123
|
+
"-c:a",
|
|
124
|
+
"libopus",
|
|
125
|
+
"-b:a",
|
|
126
|
+
"64k",
|
|
127
|
+
str(tmp),
|
|
128
|
+
],
|
|
129
|
+
input=payload,
|
|
130
|
+
capture_output=True,
|
|
131
|
+
timeout=max(timeout_sec, 5),
|
|
132
|
+
)
|
|
133
|
+
if proc.returncode != 0:
|
|
134
|
+
err = (proc.stderr or b"").decode("utf-8", errors="replace").strip()
|
|
135
|
+
raise RuntimeError(f"ffmpeg transcode failed (exit {proc.returncode}): {err[:800]}")
|
|
136
|
+
if not tmp.exists() or tmp.stat().st_size == 0:
|
|
137
|
+
raise RuntimeError("ffmpeg produced empty output")
|
|
138
|
+
os.replace(str(tmp), str(out_path))
|
|
139
|
+
finally:
|
|
140
|
+
if tmp.exists():
|
|
141
|
+
try:
|
|
142
|
+
tmp.unlink()
|
|
143
|
+
except OSError:
|
|
144
|
+
pass
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _ensure_output_audio(content: bytes, content_type: str, out_path: Path, transcode_timeout: int) -> None:
|
|
148
|
+
"""
|
|
149
|
+
OpenClaw tts-local-cli picks format from the OUTPUT FILE EXTENSION only.
|
|
150
|
+
For voice-note it expects real Opus-in-Ogg at *.opus; writing WAV bytes there causes provider_error.
|
|
151
|
+
"""
|
|
152
|
+
suffix = out_path.suffix.lower()
|
|
153
|
+
want_opus_file = suffix in (".opus", ".ogg")
|
|
154
|
+
|
|
155
|
+
if not want_opus_file:
|
|
156
|
+
_write_atomic(out_path, content)
|
|
157
|
+
return
|
|
158
|
+
|
|
159
|
+
if _is_opus_in_ogg(content):
|
|
160
|
+
_write_atomic(out_path, content)
|
|
161
|
+
return
|
|
162
|
+
|
|
163
|
+
if _looks_json_error(content) and not _is_riff_wave(content):
|
|
164
|
+
preview = content[:800].decode("utf-8", errors="replace")
|
|
165
|
+
raise RuntimeError(f"server returned JSON instead of audio: {preview}")
|
|
166
|
+
|
|
167
|
+
if _is_riff_wave(content) or content_type in ("audio/wav", "audio/x-wav", "audio/wave"):
|
|
168
|
+
_ffmpeg_to_opus_file(content, out_path, transcode_timeout)
|
|
169
|
+
return
|
|
170
|
+
|
|
171
|
+
if content_type in ("audio/mpeg", "audio/mp3") or (
|
|
172
|
+
len(content) >= 2 and content[0:1] == b"\xff" and (content[1] & 0xE0) == 0xE0
|
|
173
|
+
):
|
|
174
|
+
_ffmpeg_to_opus_file(content, out_path, transcode_timeout)
|
|
175
|
+
return
|
|
176
|
+
|
|
177
|
+
if content[:4] == b"OggS":
|
|
178
|
+
_ffmpeg_to_opus_file(content, out_path, transcode_timeout)
|
|
179
|
+
return
|
|
180
|
+
|
|
181
|
+
if content_type.startswith("audio/") or content_type in (
|
|
182
|
+
"application/ogg",
|
|
183
|
+
"application/octet-stream",
|
|
184
|
+
"binary/octet-stream",
|
|
185
|
+
):
|
|
186
|
+
_ffmpeg_to_opus_file(content, out_path, transcode_timeout)
|
|
187
|
+
return
|
|
188
|
+
|
|
189
|
+
raise RuntimeError(f"cannot map TTS payload to Opus file (content-type={content_type!r}, {len(content)} bytes)")
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def _build_payload(text: str, request_id: str) -> dict[str, object]:
|
|
193
|
+
session_hints: dict[str, str] = {
|
|
194
|
+
"source": "qwen-tts-client",
|
|
195
|
+
"runtime": "local-cli",
|
|
196
|
+
"host": socket.gethostname(),
|
|
197
|
+
}
|
|
198
|
+
extra_hints = _env("CENTRAL_TTS_SESSION_HINTS_JSON")
|
|
199
|
+
if extra_hints:
|
|
200
|
+
try:
|
|
201
|
+
parsed = json.loads(extra_hints)
|
|
202
|
+
if isinstance(parsed, dict):
|
|
203
|
+
for key, value in parsed.items():
|
|
204
|
+
session_hints[_normalize_text(str(key))] = _normalize_text(str(value))
|
|
205
|
+
except json.JSONDecodeError:
|
|
206
|
+
pass
|
|
207
|
+
|
|
208
|
+
payload: dict[str, object] = {
|
|
209
|
+
"text": _normalize_text(text),
|
|
210
|
+
"requestId": _normalize_text(request_id),
|
|
211
|
+
"target": _env("CENTRAL_TTS_TARGET", "voice-note"),
|
|
212
|
+
"sessionHints": session_hints,
|
|
213
|
+
}
|
|
214
|
+
if _env("CENTRAL_TTS_VOICE"):
|
|
215
|
+
payload["voice"] = _env("CENTRAL_TTS_VOICE")
|
|
216
|
+
if _env("CENTRAL_TTS_MODEL"):
|
|
217
|
+
payload["model"] = _env("CENTRAL_TTS_MODEL")
|
|
218
|
+
if _env("CENTRAL_TTS_PERSONA"):
|
|
219
|
+
payload["persona"] = _env("CENTRAL_TTS_PERSONA")
|
|
220
|
+
return payload
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def main() -> int:
|
|
224
|
+
text = sys.argv[1] if len(sys.argv) > 1 else ""
|
|
225
|
+
out_path = Path(sys.argv[2]) if len(sys.argv) > 2 else None
|
|
226
|
+
default_env = Path.home() / ".openclaw" / "qwen_tts_client.env"
|
|
227
|
+
env_path = Path(_env("QWEN_TTS_CLIENT_ENV", str(default_env)))
|
|
228
|
+
if not text or out_path is None:
|
|
229
|
+
print("usage: qwen_tts_runtime.py <text> <output_path>", file=sys.stderr)
|
|
230
|
+
return 2
|
|
231
|
+
|
|
232
|
+
for key, value in _load_env_file(env_path).items():
|
|
233
|
+
os.environ.setdefault(key, value)
|
|
234
|
+
|
|
235
|
+
base_url = _env("CENTRAL_TTS_BASE_URL")
|
|
236
|
+
api_key = _env("CENTRAL_TTS_API_KEY")
|
|
237
|
+
if not base_url or not api_key:
|
|
238
|
+
print(f"CENTRAL_TTS_BASE_URL or CENTRAL_TTS_API_KEY missing in {env_path}", file=sys.stderr)
|
|
239
|
+
return 1
|
|
240
|
+
|
|
241
|
+
timeout_sec = max(_positive_int(_env("CENTRAL_TTS_TIMEOUT_SEC", "120"), 120), 5)
|
|
242
|
+
max_retries = _positive_int(_env("CENTRAL_TTS_RETRIES", "2"), 2)
|
|
243
|
+
retry_backoff_ms = _positive_int(_env("CENTRAL_TTS_RETRY_BACKOFF_MS", "350"), 350)
|
|
244
|
+
request_id = f"qtts-{uuid.uuid4().hex[:12]}"
|
|
245
|
+
payload = _build_payload(text=text, request_id=request_id)
|
|
246
|
+
request_url = f"{base_url.rstrip('/')}/tts"
|
|
247
|
+
body = json.dumps(payload, ensure_ascii=False).encode("utf-8")
|
|
248
|
+
url_opener = _build_url_opener()
|
|
249
|
+
|
|
250
|
+
last_error = "unknown error"
|
|
251
|
+
for attempt in range(0, max_retries + 1):
|
|
252
|
+
started_at = time.perf_counter()
|
|
253
|
+
request = urllib.request.Request(
|
|
254
|
+
request_url,
|
|
255
|
+
method="POST",
|
|
256
|
+
headers={
|
|
257
|
+
"Content-Type": "application/json",
|
|
258
|
+
"X-API-Key": api_key,
|
|
259
|
+
"X-Request-Id": request_id,
|
|
260
|
+
},
|
|
261
|
+
data=body,
|
|
262
|
+
)
|
|
263
|
+
try:
|
|
264
|
+
with url_opener.open(request, timeout=timeout_sec) as response:
|
|
265
|
+
content = response.read()
|
|
266
|
+
content_type = _content_type_base(response.headers)
|
|
267
|
+
if not content:
|
|
268
|
+
raise RuntimeError("server returned empty audio payload")
|
|
269
|
+
if not _http_audio_acceptable(content_type, content):
|
|
270
|
+
raise RuntimeError(f"unexpected content-type: {content_type or '(missing)'}")
|
|
271
|
+
transcode_timeout = max(
|
|
272
|
+
_positive_int(_env("CENTRAL_TTS_FFMPEG_TIMEOUT_SEC", str(timeout_sec)), timeout_sec),
|
|
273
|
+
5,
|
|
274
|
+
)
|
|
275
|
+
_ensure_output_audio(content, content_type, out_path, transcode_timeout)
|
|
276
|
+
elapsed_ms = (time.perf_counter() - started_at) * 1000
|
|
277
|
+
out_size = out_path.stat().st_size if out_path.exists() else 0
|
|
278
|
+
print(
|
|
279
|
+
f"[qwen-tts-runtime] request_id={request_id} attempt={attempt + 1} status=ok "
|
|
280
|
+
f"latency_ms={elapsed_ms:.2f} bytes_in={len(content)} bytes_out={out_size} "
|
|
281
|
+
f"content_type={content_type or '(missing)'}",
|
|
282
|
+
file=sys.stderr,
|
|
283
|
+
)
|
|
284
|
+
return 0
|
|
285
|
+
except urllib.error.HTTPError as exc:
|
|
286
|
+
error_body = exc.read().decode("utf-8", errors="ignore")
|
|
287
|
+
last_error = f"http {exc.code}: {error_body}"
|
|
288
|
+
except Exception as exc: # pragma: no cover
|
|
289
|
+
last_error = str(exc)
|
|
290
|
+
|
|
291
|
+
elapsed_ms = (time.perf_counter() - started_at) * 1000
|
|
292
|
+
print(
|
|
293
|
+
f"[qwen-tts-runtime] request_id={request_id} attempt={attempt + 1} status=retry "
|
|
294
|
+
f"latency_ms={elapsed_ms:.2f} error={last_error}",
|
|
295
|
+
file=sys.stderr,
|
|
296
|
+
)
|
|
297
|
+
if attempt < max_retries:
|
|
298
|
+
time.sleep((retry_backoff_ms / 1000.0) * (attempt + 1))
|
|
299
|
+
|
|
300
|
+
hint = ""
|
|
301
|
+
if not _truthy_env("CENTRAL_TTS_USE_SYSTEM_PROXY"):
|
|
302
|
+
le = last_error.lower()
|
|
303
|
+
if any(s in le for s in ("timed out", "connection refused", "unreachable", "name or service not known", "nodename", "tunnel", "proxy")):
|
|
304
|
+
hint = " | hint: TTS uses direct TCP (no HTTP_PROXY); set CENTRAL_TTS_USE_SYSTEM_PROXY=1 if you need a proxy."
|
|
305
|
+
print(f"[qwen-tts-runtime] request_id={request_id} status=failed error={last_error}{hint}", file=sys.stderr)
|
|
306
|
+
return 1
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
if __name__ == "__main__":
|
|
310
|
+
raise SystemExit(main())
|