@sdamarketing/qwen-tts-client 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.env.example ADDED
@@ -0,0 +1,17 @@
1
+ CENTRAL_TTS_BASE_URL=https://qwen-tts-118.tailf26c2b.ts.net
2
+ CENTRAL_TTS_API_KEY=change_me_same_value_as_server_tts_api_key
3
+ SMOKE_TEXT=Проверка подключения Qwen TTS Client к удаленному серверу.
4
+ CENTRAL_TTS_TARGET=voice-note
5
+ CENTRAL_TTS_TIMEOUT_SEC=120
6
+ CENTRAL_TTS_RETRIES=2
7
+ CENTRAL_TTS_RETRY_BACKOFF_MS=350
8
+ # When the server returns WAV/MP3/etc. but OpenClaw expects *.opus, runtime transcodes via ffmpeg:
9
+ CENTRAL_TTS_FFMPEG_TIMEOUT_SEC=120
10
+ # Set to 1 only if CENTRAL_TTS_BASE_URL must be reached via HTTP_PROXY/HTTPS_PROXY (default: direct, no proxy).
11
+ CENTRAL_TTS_USE_SYSTEM_PROXY=
12
+
13
+ # Optional OpenClaw 2026.4.25+ hints
14
+ CENTRAL_TTS_VOICE=
15
+ CENTRAL_TTS_MODEL=
16
+ CENTRAL_TTS_PERSONA=
17
+ CENTRAL_TTS_SESSION_HINTS_JSON=
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 sdamarketing
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,100 @@
1
+ # Qwen TTS Client
2
+
3
+ Production-клиент для OpenClaw `tts-local-cli`, который проксирует синтез на удаленный `Qwen TTS Server`.
4
+
5
+ Базовый путь:
6
+
7
+ - `OpenClaw (tts-local-cli)`
8
+ - `qwen_tts_proxy_opus.sh` (тонкий launcher)
9
+ - `qwen_tts_runtime.py` (валидация/retry/diagnostics)
10
+ - `Qwen TTS Server /tts`
11
+ - возврат `OGG/Opus` или `WAV` (по настройке сервера)
12
+
13
+ ## Что делает проект
14
+
15
+ - интерактивно запрашивает endpoint и API key удаленного TTS;
16
+ - генерирует `~/.openclaw/qwen_tts_client.env` с retry/timeout/hints настройками;
17
+ - настраивает `~/.openclaw/openclaw.json` под `tts-local-cli` без удаления других providers;
18
+ - мигрирует legacy-ключи (`audioAsVoice`, `textLimit`) в `maxTextLength`;
19
+ - выполняет smoke-test и проверяет выходной аудиофайл.
20
+
21
+ ## Быстрый запуск
22
+
23
+ ### Установка из npm (npmjs.com)
24
+
25
+ После `npm run publish:all` пакет доступен на обоих реестрах. С npmjs.com ставится **без** `.npmrc`:
26
+
27
+ ```bash
28
+ npm install -g @sdamarketing/qwen-tts-client
29
+ qwen-tts-install
30
+ ```
31
+
32
+ ### Установка из GitHub Packages
33
+
34
+ Скопируй `.npmrc.github.example` в `~/.npmrc` и задай `GITHUB_TOKEN` (`read:packages`), либо:
35
+
36
+ ```bash
37
+ npm install -g @sdamarketing/qwen-tts-client --registry=https://npm.pkg.github.com
38
+ ```
39
+
40
+ ### Публикация в оба реестра (maintainers)
41
+
42
+ Один tarball, два реестра (имя и версия совпадают):
43
+
44
+ ```bash
45
+ # npmjs.com — нужен OTP или NPM_TOKEN с publish
46
+ npm run publish:npm
47
+
48
+ # GitHub Packages — GITHUB_TOKEN с write:packages
49
+ export NODE_AUTH_TOKEN="$GITHUB_TOKEN"
50
+ npm run publish:github
51
+
52
+ # или оба подряд
53
+ npm run publish:all
54
+ ```
55
+
56
+ На npmjs.com для scoped-пакета обязателен `--access public` (уже в скрипте `publish:npm`).
57
+
58
+ CI: при Release workflow `.github/workflows/publish.yml` публикует в npmjs и GitHub Packages **параллельно**.
59
+
60
+ **npmjs.com:** Trusted Publisher на пакете (workflow `publish.yml`, OIDC) — секрет `NPM_TOKEN` не нужен. Ошибки `403`/`EOTP` — см. [docs/02-npm-ci-token.md](docs/02-npm-ci-token.md).
61
+
62
+ Прокси для ручного smoke-test:
63
+
64
+ ```bash
65
+ qwen-tts-proxy "Проверка TTS" /tmp/qwen-tts-smoke.ogg
66
+ ```
67
+
68
+ ### Установка из git
69
+
70
+ ```bash
71
+ git clone https://github.com/sdamarketing/qwen-tts-client.git
72
+ cd qwen-tts-client
73
+ chmod +x ./scripts/install_qwen_tts_client.sh
74
+ ./scripts/install_qwen_tts_client.sh
75
+ ```
76
+
77
+ После установки проверь в чате OpenClaw:
78
+
79
+ - `/tts status`
80
+ - `/tts latest`
81
+ - `/tts chat on`
82
+ - `/tts persona off`
83
+
84
+ ## Новые возможности OpenClaw 2026.4.25+
85
+
86
+ Клиент совместим с расширенным TTS workflow:
87
+
88
+ - chat-level override: `/tts chat on|off|default`;
89
+ - ручная озвучка последнего ответа: `/tts latest`;
90
+ - persona override: `/tts persona <id>|off`;
91
+ - per-agent/per-channel overlays через OpenClaw-конфиг (`agents.list[].tts`, `channels.<channel>.accounts.<id>.tts`).
92
+
93
+ ## Структура
94
+
95
+ - `scripts/install_qwen_tts_client.sh` — интерактивный установщик;
96
+ - `scripts/qwen_tts_proxy_opus.sh` — shell launcher для Local CLI;
97
+ - `scripts/qwen_tts_runtime.py` — runtime транспорт в удаленный `/tts`;
98
+ - `deploy/openclaw/profiles/remote-api` — готовый профиль OpenClaw для remote TTS;
99
+ - `.env.example` — шаблон клиентских переменных;
100
+ - `docs/` — production runbook клиента.
@@ -0,0 +1,17 @@
1
+ # OpenClaw Profile (Remote API)
2
+
3
+ Этот проект использует один production-профиль клиента:
4
+
5
+ - `remote-api` — OpenClaw не запускает локальный TTS и получает финальный `OGG/Opus` от удаленного сервера.
6
+
7
+ ## Файлы
8
+
9
+ - `remote-api/openclaw.tts.json5` — фрагмент для `messages.tts`;
10
+ - `remote-api/openclaw-node.env.example` — пример переменных окружения gateway.
11
+
12
+ ## Принцип
13
+
14
+ - `tts-local-cli` активируется как primary provider;
15
+ - `qwen_tts_proxy_opus.sh` запускает `qwen_tts_runtime.py`;
16
+ - runtime делает HTTPS вызов в удаленный `/tts` и поддерживает retry/timeout/diagnostics;
17
+ - локальная генерация отсутствует, локальная конверсия не выполняется.
@@ -0,0 +1,22 @@
1
+ # OpenClaw gateway runtime
2
+ OPENCLAW_GATEWAY_PORT=18789
3
+ OPENCLAW_DISABLE_BONJOUR=1
4
+ OPENCLAW_NO_RESPAWN=1
5
+ NODE_COMPILE_CACHE=/var/tmp/openclaw-compile-cache
6
+ NODE_OPTIONS=--dns-result-order=ipv4first
7
+
8
+ # Remote Qwen TTS endpoint
9
+ CENTRAL_TTS_BASE_URL=https://qwen-tts-118.tailf26c2b.ts.net
10
+ CENTRAL_TTS_API_KEY=change_me_same_value_as_server_tts_api_key
11
+ CENTRAL_TTS_TARGET=voice-note
12
+ CENTRAL_TTS_TIMEOUT_SEC=120
13
+ CENTRAL_TTS_RETRIES=2
14
+ CENTRAL_TTS_RETRY_BACKOFF_MS=350
15
+ CENTRAL_TTS_FFMPEG_TIMEOUT_SEC=120
16
+ CENTRAL_TTS_USE_SYSTEM_PROXY=
17
+
18
+ # Optional OpenClaw 2026.4.25+ hints
19
+ CENTRAL_TTS_VOICE=
20
+ CENTRAL_TTS_MODEL=
21
+ CENTRAL_TTS_PERSONA=
22
+ CENTRAL_TTS_SESSION_HINTS_JSON=
@@ -0,0 +1,25 @@
1
+ {
2
+ // Merge this fragment into ~/.openclaw/openclaw.json
3
+ messages: {
4
+ tts: {
5
+ enabled: true,
6
+ provider: "tts-local-cli",
7
+ auto: "off",
8
+ persona: "",
9
+ personas: {},
10
+ maxTextLength: 1500,
11
+ providers: {
12
+ "tts-local-cli": {
13
+ enabled: true,
14
+ command: "/home/alekhm/.openclaw/bin/qwen_tts_proxy_opus.sh",
15
+ args: ["{{Text}}", "{{OutputPath}}"],
16
+ outputFormat: "opus",
17
+ timeoutMs: 120000,
18
+ env: {
19
+ QWEN_TTS_RUNTIME_SCRIPT: "/home/alekhm/.openclaw/bin/qwen_tts_runtime.py"
20
+ }
21
+ }
22
+ }
23
+ }
24
+ }
25
+ }
@@ -0,0 +1,65 @@
1
+ # 01. Production Setup
2
+
3
+ ## Цель
4
+
5
+ Подключить OpenClaw-клиент к удаленному `Qwen TTS Server` через `tts-local-cli`, без локальной генерации.
6
+
7
+ ## Предпосылки
8
+
9
+ - установлен и хотя бы один раз запущен OpenClaw (должен существовать `~/.openclaw/openclaw.json`);
10
+ - есть рабочий endpoint сервера `/tts`;
11
+ - есть API key сервера.
12
+
13
+ ## Установка
14
+
15
+ ```bash
16
+ cd products/qwen-tts-client
17
+ chmod +x ./scripts/install_qwen_tts_client.sh
18
+ ./scripts/install_qwen_tts_client.sh
19
+ ```
20
+
21
+ Скрипт:
22
+
23
+ - создаёт `~/.openclaw/qwen_tts_client.env`;
24
+ - устанавливает `qwen_tts_proxy_opus.sh` и `qwen_tts_runtime.py` в `~/.openclaw/bin`;
25
+ - патчит `~/.openclaw/openclaw.json` на `tts-local-cli` без удаления остальных providers;
26
+ - чистит legacy-ключи `audioAsVoice` / `textLimit`;
27
+ - выполняет smoke-test.
28
+
29
+ ## Проверка в OpenClaw
30
+
31
+ - `/tts status`
32
+ - `/tts latest`
33
+ - `/tts chat on`
34
+ - `/tts persona off`
35
+ - отправить тестовое сообщение для озвучки
36
+
37
+ Если после обновления OpenClaw видишь ошибку `Unrecognized keys: "audioAsVoice", "textLimit"`, значит остались старые ключи в `messages.tts`. Удалить их и оставить `maxTextLength`.
38
+
39
+ Если `/tts status` показывает лимит 1500, а синтез падает с **`max 600`**, в `~/.openclaw/openclaw.json` в `messages.tts` всё ещё стоит `maxTextLength: 600`. Поставь **не меньше 1500** (или перезапусти актуальный `install_qwen_tts_client.sh` — он поднимет лимит минимум до 1500, не урезая более высокий, если ты его задавал).
40
+
41
+ ## Ручной профиль (без установщика)
42
+
43
+ Если нужно применить настройки вручную:
44
+
45
+ - взять фрагмент `deploy/openclaw/profiles/remote-api/openclaw.tts.json5`;
46
+ - использовать пример env `deploy/openclaw/profiles/remote-api/openclaw-node.env.example`.
47
+
48
+ ## Архитектура
49
+
50
+ `OpenClaw` -> `tts-local-cli` -> `qwen_tts_proxy_opus.sh` -> `qwen_tts_runtime.py` -> `https://<server>/tts` -> `OGG/Opus | WAV`
51
+
52
+ OpenClaw для `outputFormat: opus` передаёт путь вида `…/speech.opus` и **считает формат по расширению**: если записать туда WAV, озвучка падает с `provider_error`. `qwen_tts_runtime.py` проверяет сигнатуру `OpusHead` в Ogg и при необходимости **транскодирует в Opus через `ffmpeg`**. На gateway/host должен быть доступен `ffmpeg`, если сервер отдаёт не Opus-in-Ogg.
53
+
54
+ Если `tts-local-cli` стабильно падает с `provider_error`, а с ноутбука `curl` до `/tts` работает: у процесса gateway часто заданы `HTTP_PROXY`/`HTTPS_PROXY`. Рантайм по умолчанию **не** использует системный прокси для `CENTRAL_TTS_BASE_URL` (Tailscale и приватные URL так не ломаются). Если TTS доступен **только** через прокси — выставь `CENTRAL_TTS_USE_SYSTEM_PROXY=1` в `qwen_tts_client.env`.
55
+
56
+ Если в логах видно **`503` / `no tunnel here`** на URL вида `*.lhr.life` / `trycloudflare.com` — временный туннель к машине с TTS **умер**; подними туннель заново или переключи `CENTRAL_TTS_BASE_URL` на постоянный хост (например Tailscale `*.ts.net`), затем перезапусти gateway.
57
+
58
+ ## Расширенный payload `/tts`
59
+
60
+ `qwen_tts_runtime.py` отправляет backward-compatible тело:
61
+
62
+ - обязательно: `text`
63
+ - опционально: `voice`, `model`, `persona`, `target`, `requestId`, `sessionHints`
64
+
65
+ Старый контракт (только `text`) продолжает работать.
@@ -0,0 +1,59 @@
1
+ # Публикация на npmjs.com из GitHub Actions
2
+
3
+ ## Ошибки в CI
4
+
5
+ | Код | Причина |
6
+ |-----|---------|
7
+ | `403` + bypass 2fa | `NPM_TOKEN` — не Automation / без bypass 2FA |
8
+ | `EOTP` | В workflow передан `NPM_TOKEN` типа **Publish** — npm требует OTP, в CI его нет |
9
+
10
+ **Рекомендуемый способ:** [npm Trusted Publishers](https://docs.npmjs.com/trusted-publishers) (OIDC). Секрет `NPM_TOKEN` для publish **не нужен**.
11
+
12
+ ## Trusted Publisher (рекомендуется)
13
+
14
+ Один раз на npmjs.com (под аккаунтом с правом publish `@sdamarketing/*`):
15
+
16
+ 1. Открой настройки пакета `@sdamarketing/qwen-tts-client` → **Publishing access** → **Trusted publishing**
17
+ (или org: https://www.npmjs.com/settings/sdamarketing/packages)
18
+ 2. **Add GitHub Actions trusted publisher**
19
+ - Repository: `sdamarketing/qwen-tts-client`
20
+ - Workflow filename: `publish.yml`
21
+ - Environment: пусто (если не используете GitHub Environment)
22
+ 3. В GitHub **удали секрет `NPM_TOKEN`** (если есть) — иначе старый токен может мешать.
23
+ 4. Re-run workflow **Publish package**.
24
+
25
+ Workflow уже настроен: `id-token: write`, `npm publish --provenance`, **без** `NODE_AUTH_TOKEN`.
26
+
27
+ Первый publish scoped-пакета иногда делают локально один раз:
28
+
29
+ ```bash
30
+ npm run publish:npm -- --otp=123456
31
+ ```
32
+
33
+ После появления пакета на npmjs — дальнейшие версии через CI + Trusted Publisher.
34
+
35
+ ## Запасной вариант: NPM_TOKEN
36
+
37
+ Если Trusted Publisher не используете:
38
+
39
+ 1. [Access Tokens](https://www.npmjs.com/settings/~/tokens) → Classic **Automation** (не Publish)
40
+ или Granular с **Bypass two-factor authentication for automation**
41
+ 2. Секрет `NPM_TOKEN` в GitHub Actions
42
+ 3. В workflow **временно** вернуть:
43
+
44
+ ```yaml
45
+ env:
46
+ NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
47
+ ```
48
+
49
+ Не смешивайте Trusted Publisher и Publish-токен в одном job — будет `EOTP` или `403`.
50
+
51
+ ## После смены версии
52
+
53
+ Если версия уже на registry — подними `version` в `package.json` перед повторным publish.
54
+
55
+ ## Локально с 2FA
56
+
57
+ ```bash
58
+ npm run publish:npm -- --otp=123456
59
+ ```
package/docs/README.md ADDED
@@ -0,0 +1,11 @@
1
+ # Qwen TTS Client Docs
2
+
3
+ Документация по отдельному production-проекту `Qwen TTS Client`.
4
+
5
+ - [01. Production Setup](01-production-setup.md)
6
+
7
+ Клиент ориентирован на OpenClaw `2026.4.25+` и учитывает:
8
+
9
+ - `messages.tts.providers` как основную схему;
10
+ - новые команды `/tts latest`, `/tts chat`, `/tts persona`;
11
+ - compatibility с per-agent/per-channel overrides без агрессивной зачистки `openclaw.json`.
package/package.json ADDED
@@ -0,0 +1,48 @@
1
+ {
2
+ "name": "@sdamarketing/qwen-tts-client",
3
+ "version": "1.0.0",
4
+ "description": "OpenClaw tts-local-cli client for remote Qwen TTS Server (OGG/Opus proxy)",
5
+ "keywords": [
6
+ "openclaw",
7
+ "tts",
8
+ "qwen",
9
+ "opus",
10
+ "voice"
11
+ ],
12
+ "homepage": "https://github.com/sdamarketing/qwen-tts-client#readme",
13
+ "bugs": {
14
+ "url": "https://github.com/sdamarketing/qwen-tts-client/issues"
15
+ },
16
+ "repository": {
17
+ "type": "git",
18
+ "url": "git+https://github.com/sdamarketing/qwen-tts-client.git"
19
+ },
20
+ "license": "MIT",
21
+ "author": "sdamarketing",
22
+ "type": "commonjs",
23
+ "files": [
24
+ "scripts/",
25
+ "deploy/",
26
+ "docs/",
27
+ ".env.example",
28
+ "README.md"
29
+ ],
30
+ "bin": {
31
+ "qwen-tts-proxy": "scripts/qwen_tts_proxy_opus.sh",
32
+ "qwen-tts-install": "scripts/install_qwen_tts_client.sh"
33
+ },
34
+ "scripts": {
35
+ "prepack": "chmod +x scripts/qwen_tts_proxy_opus.sh scripts/install_qwen_tts_client.sh",
36
+ "prepublishOnly": "npm run pack:check",
37
+ "pack:check": "npm pack --dry-run",
38
+ "publish:npm": "npm publish --access public --registry https://registry.npmjs.org",
39
+ "publish:github": "npm publish --registry https://npm.pkg.github.com",
40
+ "publish:all": "npm run publish:npm && npm run publish:github"
41
+ },
42
+ "engines": {
43
+ "node": ">=18"
44
+ },
45
+ "publishConfig": {
46
+ "access": "public"
47
+ }
48
+ }
@@ -0,0 +1,184 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
5
+ OPENCLAW_HOME="${OPENCLAW_HOME:-$HOME/.openclaw}"
6
+ CLIENT_ENV_PATH="${OPENCLAW_HOME}/qwen_tts_client.env"
7
+ PROXY_SCRIPT_SOURCE="${ROOT_DIR}/scripts/qwen_tts_proxy_opus.sh"
8
+ PROXY_SCRIPT_TARGET="${OPENCLAW_HOME}/bin/qwen_tts_proxy_opus.sh"
9
+ RUNTIME_SCRIPT_SOURCE="${ROOT_DIR}/scripts/qwen_tts_runtime.py"
10
+ RUNTIME_SCRIPT_TARGET="${OPENCLAW_HOME}/bin/qwen_tts_runtime.py"
11
+ DEFAULT_BASE_URL="https://qwen-tts-118.tailf26c2b.ts.net"
12
+
13
+ print_step() {
14
+ echo
15
+ echo "==> $1"
16
+ }
17
+
18
+ env_quote() {
19
+ python3 -c 'import shlex,sys; print(shlex.quote(sys.argv[1]))' "$1"
20
+ }
21
+
22
+ prompt_value() {
23
+ local label="$1"
24
+ local default_value="$2"
25
+ local result
26
+ read -r -p "${label} [${default_value}]: " result
27
+ if [[ -z "${result}" ]]; then
28
+ result="${default_value}"
29
+ fi
30
+ echo "${result}"
31
+ }
32
+
33
+ print_step "Qwen TTS Client setup"
34
+ echo "Root: ${ROOT_DIR}"
35
+ echo "OpenClaw home: ${OPENCLAW_HOME}"
36
+
37
+ BASE_URL="$(prompt_value "CENTRAL_TTS_BASE_URL" "${DEFAULT_BASE_URL}")"
38
+ if [[ "${BASE_URL}" != http://* && "${BASE_URL}" != https://* ]]; then
39
+ echo "ERROR: CENTRAL_TTS_BASE_URL must start with http:// or https://" >&2
40
+ exit 1
41
+ fi
42
+
43
+ read -r -s -p "CENTRAL_TTS_API_KEY: " API_KEY
44
+ echo
45
+ if [[ -z "${API_KEY}" ]]; then
46
+ echo "ERROR: CENTRAL_TTS_API_KEY cannot be empty" >&2
47
+ exit 1
48
+ fi
49
+
50
+ SMOKE_TEXT="$(prompt_value "Smoke test text" "Проверка удаленного TTS клиента.")"
51
+ DEFAULT_TARGET="$(prompt_value "CENTRAL_TTS_TARGET (voice-note|audio-file|telephony)" "voice-note")"
52
+ DEFAULT_TIMEOUT_SEC="$(prompt_value "CENTRAL_TTS_TIMEOUT_SEC" "120")"
53
+ DEFAULT_RETRIES="$(prompt_value "CENTRAL_TTS_RETRIES" "2")"
54
+ DEFAULT_BACKOFF_MS="$(prompt_value "CENTRAL_TTS_RETRY_BACKOFF_MS" "350")"
55
+
56
+ print_step "Writing ${CLIENT_ENV_PATH}"
57
+ cat > "${CLIENT_ENV_PATH}" <<EOF
58
+ CENTRAL_TTS_BASE_URL=$(env_quote "${BASE_URL}")
59
+ CENTRAL_TTS_API_KEY=$(env_quote "${API_KEY}")
60
+ SMOKE_TEXT=$(env_quote "${SMOKE_TEXT}")
61
+ CENTRAL_TTS_TARGET=$(env_quote "${DEFAULT_TARGET}")
62
+ CENTRAL_TTS_TIMEOUT_SEC=$(env_quote "${DEFAULT_TIMEOUT_SEC}")
63
+ CENTRAL_TTS_RETRIES=$(env_quote "${DEFAULT_RETRIES}")
64
+ CENTRAL_TTS_RETRY_BACKOFF_MS=$(env_quote "${DEFAULT_BACKOFF_MS}")
65
+ CENTRAL_TTS_FFMPEG_TIMEOUT_SEC=$(env_quote "${DEFAULT_TIMEOUT_SEC}")
66
+ CENTRAL_TTS_USE_SYSTEM_PROXY=
67
+ # Optional hints for OpenClaw 2026.4.25+ flow
68
+ CENTRAL_TTS_VOICE=
69
+ CENTRAL_TTS_MODEL=
70
+ CENTRAL_TTS_PERSONA=
71
+ CENTRAL_TTS_SESSION_HINTS_JSON=
72
+ EOF
73
+ chmod 600 "${CLIENT_ENV_PATH}"
74
+
75
+ print_step "Installing runtime scripts"
76
+ mkdir -p "${OPENCLAW_HOME}/bin"
77
+ cp "${PROXY_SCRIPT_SOURCE}" "${PROXY_SCRIPT_TARGET}"
78
+ cp "${RUNTIME_SCRIPT_SOURCE}" "${RUNTIME_SCRIPT_TARGET}"
79
+ chmod +x "${PROXY_SCRIPT_TARGET}"
80
+ chmod +x "${RUNTIME_SCRIPT_TARGET}"
81
+
82
+ print_step "Configuring OpenClaw TTS provider"
83
+ python3 - "${OPENCLAW_HOME}" "${PROXY_SCRIPT_TARGET}" "${RUNTIME_SCRIPT_TARGET}" <<'PY'
84
+ import json
85
+ import pathlib
86
+ import sys
87
+
88
+ openclaw_home = pathlib.Path(sys.argv[1])
89
+ proxy_script = sys.argv[2]
90
+ runtime_script = sys.argv[3]
91
+ config_path = openclaw_home / "openclaw.json"
92
+
93
+ if not config_path.exists():
94
+ print(f"ERROR: {config_path} not found. Run OpenClaw at least once first.", file=sys.stderr)
95
+ sys.exit(1)
96
+
97
+ data = json.loads(config_path.read_text())
98
+ messages = data.setdefault("messages", {})
99
+ tts = messages.setdefault("tts", {})
100
+ providers = tts.setdefault("providers", {})
101
+
102
+ providers["tts-local-cli"] = {
103
+ "enabled": True,
104
+ "command": proxy_script,
105
+ "args": ["{{Text}}", "{{OutputPath}}"],
106
+ "outputFormat": "opus",
107
+ "timeoutMs": 120000,
108
+ "env": {
109
+ "QWEN_TTS_RUNTIME_SCRIPT": runtime_script,
110
+ "QWEN_TTS_CLIENT_ENV": str(openclaw_home / "qwen_tts_client.env"),
111
+ },
112
+ }
113
+
114
+ tts["enabled"] = True
115
+ tts["provider"] = "tts-local-cli"
116
+ tts.setdefault("auto", "off")
117
+ tts.setdefault("persona", "")
118
+ tts.setdefault("personas", {})
119
+ try:
120
+ _cur_max = int(tts.get("maxTextLength", 0))
121
+ except (TypeError, ValueError):
122
+ _cur_max = 0
123
+ # OpenClaw UI defaults to 1500; old installers left 600 and caused "max 600" with longer /tts text.
124
+ tts["maxTextLength"] = max(_cur_max, 1500)
125
+ tts.pop("audioAsVoice", None)
126
+ tts.pop("textLimit", None)
127
+
128
+ config_path.write_text(json.dumps(data, ensure_ascii=False, indent=2) + "\n")
129
+ print(f"updated {config_path}")
130
+ PY
131
+
132
+ print_step "Normalizing local TTS preferences"
133
+ python3 - "${OPENCLAW_HOME}" <<'PY'
134
+ import json
135
+ import pathlib
136
+ import time
137
+
138
+ openclaw_home = pathlib.Path(__import__("sys").argv[1])
139
+ prefs_path = openclaw_home / "settings" / "tts.json"
140
+ if not prefs_path.exists():
141
+ print("no local tts prefs found")
142
+ raise SystemExit(0)
143
+
144
+ try:
145
+ prefs = json.loads(prefs_path.read_text())
146
+ except Exception:
147
+ backup = prefs_path.with_name(f"tts.json.bak.invalid.{int(time.time())}")
148
+ prefs_path.rename(backup)
149
+ print(f"moved invalid prefs to {backup}")
150
+ raise SystemExit(0)
151
+
152
+ tts = prefs.get("tts", {})
153
+ partial_override = isinstance(tts, dict) and "providers" not in tts
154
+ if partial_override:
155
+ backup = prefs_path.with_name(f"tts.json.bak.partial-override.{int(time.time())}")
156
+ prefs_path.rename(backup)
157
+ print(f"moved partial tts override prefs to {backup}")
158
+ else:
159
+ print("local tts prefs left unchanged")
160
+ PY
161
+
162
+ print_step "Restarting gateway service (if available)"
163
+ if command -v systemctl >/dev/null 2>&1 && systemctl --user list-unit-files | awk '{print $1}' | grep -qx "openclaw-gateway.service"; then
164
+ systemctl --user restart openclaw-gateway.service || true
165
+ systemctl --user is-active openclaw-gateway.service || true
166
+ elif command -v launchctl >/dev/null 2>&1; then
167
+ launchctl kickstart -k "gui/$(id -u)/ai.openclaw.gateway" || true
168
+ fi
169
+
170
+ if ! command -v ffmpeg >/dev/null 2>&1; then
171
+ echo "WARN: ffmpeg not in PATH. If the TTS server returns WAV/MP3 instead of Opus-in-Ogg, install ffmpeg (e.g. apt install ffmpeg)." >&2
172
+ fi
173
+
174
+ print_step "Running smoke test"
175
+ TMP_OUT="/tmp/qwen-tts-client-smoke.ogg"
176
+ QWEN_TTS_CLIENT_ENV="${CLIENT_ENV_PATH}" "${PROXY_SCRIPT_TARGET}" "${SMOKE_TEXT}" "${TMP_OUT}"
177
+ ls -lh "${TMP_OUT}"
178
+ file "${TMP_OUT}" || true
179
+
180
+ echo
181
+ echo "Done."
182
+ echo "Client env: ${CLIENT_ENV_PATH}"
183
+ echo "Proxy script: ${PROXY_SCRIPT_TARGET}"
184
+ echo "Next: /tts status"
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ TEXT="${1:-}"
5
+ OUT="${2:-}"
6
+
7
+ if [[ -z "${TEXT}" || -z "${OUT}" ]]; then
8
+ echo "usage: qwen_tts_proxy_opus.sh <text> <output_path>" >&2
9
+ exit 2
10
+ fi
11
+
12
+ _script_source="${BASH_SOURCE[0]}"
13
+ while [[ -L "${_script_source}" ]]; do
14
+ _link_dir="$(cd "$(dirname "${_script_source}")" && pwd)"
15
+ _script_source="$(readlink "${_script_source}")"
16
+ [[ "${_script_source}" != /* ]] && _script_source="${_link_dir}/${_script_source}"
17
+ done
18
+ SCRIPT_DIR="$(cd "$(dirname "${_script_source}")" && pwd)"
19
+ RUNTIME_SCRIPT="${QWEN_TTS_RUNTIME_SCRIPT:-${SCRIPT_DIR}/qwen_tts_runtime.py}"
20
+
21
+ if [[ ! -f "${RUNTIME_SCRIPT}" ]]; then
22
+ echo "runtime script not found: ${RUNTIME_SCRIPT}" >&2
23
+ exit 1
24
+ fi
25
+
26
+ exec python3 "${RUNTIME_SCRIPT}" "${TEXT}" "${OUT}"
@@ -0,0 +1,310 @@
1
+ #!/usr/bin/env python3
2
+ import json
3
+ import os
4
+ import shutil
5
+ import socket
6
+ import subprocess
7
+ import sys
8
+ import tempfile
9
+ import time
10
+ import urllib.error
11
+ import urllib.request
12
+ import uuid
13
+ from pathlib import Path
14
+
15
+
16
+ def _normalize_text(value: str) -> str:
17
+ return value.encode("utf-8", errors="replace").decode("utf-8", errors="replace")
18
+
19
+
20
+ def _load_env_file(path: Path) -> dict[str, str]:
21
+ data: dict[str, str] = {}
22
+ if not path.exists():
23
+ return data
24
+ raw = path.read_bytes()
25
+ try:
26
+ content = raw.decode("utf-8")
27
+ except UnicodeDecodeError:
28
+ # Keep runtime resilient on hosts where env was edited with mixed encodings.
29
+ content = raw.decode("utf-8", errors="replace")
30
+ for raw_line in content.splitlines():
31
+ line = raw_line.strip()
32
+ if not line or line.startswith("#") or "=" not in line:
33
+ continue
34
+ key, value = line.split("=", 1)
35
+ cleaned = value.strip().strip("'").strip('"')
36
+ data[key.strip()] = _normalize_text(cleaned)
37
+ return data
38
+
39
+
40
+ def _env(name: str, default: str = "") -> str:
41
+ return _normalize_text(os.environ.get(name, default).strip())
42
+
43
+
44
+ def _positive_int(value: str, default: int) -> int:
45
+ try:
46
+ num = int(value)
47
+ return num if num > 0 else default
48
+ except ValueError:
49
+ return default
50
+
51
+
52
+ def _truthy_env(name: str) -> bool:
53
+ return _env(name, "").lower() in ("1", "true", "yes", "on")
54
+
55
+
56
+ def _build_url_opener() -> urllib.request.OpenerDirector:
57
+ """
58
+ Gateway processes often inherit HTTP_PROXY/HTTPS_PROXY. urllib honors them by default,
59
+ which breaks private / Tailscale TTS URLs. Disable system proxies unless explicitly opted in.
60
+ """
61
+ if _truthy_env("CENTRAL_TTS_USE_SYSTEM_PROXY"):
62
+ return urllib.request.build_opener()
63
+ return urllib.request.build_opener(urllib.request.ProxyHandler({}))
64
+
65
+
66
+ def _write_atomic(path: Path, payload: bytes) -> None:
67
+ path.parent.mkdir(parents=True, exist_ok=True)
68
+ with tempfile.NamedTemporaryFile(dir=str(path.parent), prefix=".qwen-tts-", delete=False) as temp_file:
69
+ temp_file.write(payload)
70
+ temp_name = temp_file.name
71
+ os.replace(temp_name, str(path))
72
+
73
+
74
+ def _content_type_base(headers) -> str:
75
+ raw = (headers.get("Content-Type") or "").strip()
76
+ return raw.split(";", 1)[0].strip().lower()
77
+
78
+
79
+ def _is_opus_in_ogg(content: bytes) -> bool:
80
+ if len(content) < 4 or content[:4] != b"OggS":
81
+ return False
82
+ scan = min(len(content), 131072)
83
+ return b"OpusHead" in content[:scan]
84
+
85
+
86
+ def _is_riff_wave(content: bytes) -> bool:
87
+ return len(content) >= 12 and content[:4] == b"RIFF" and content[8:12] == b"WAVE"
88
+
89
+
90
+ def _looks_json_error(content: bytes) -> bool:
91
+ stripped = content.lstrip()
92
+ return bool(stripped) and stripped[:1] == b"{"
93
+
94
+
95
+ def _http_audio_acceptable(content_type: str, content: bytes) -> bool:
96
+ if content_type.startswith("application/json") or content_type.startswith("text/html"):
97
+ return False
98
+ if content_type.startswith("audio/"):
99
+ return bool(content) and not _looks_json_error(content)
100
+ if content_type in ("application/ogg", "application/octet-stream", "binary/octet-stream", ""):
101
+ return bool(content) and not _looks_json_error(content)
102
+ return False
103
+
104
+
105
+ def _ffmpeg_to_opus_file(payload: bytes, out_path: Path, timeout_sec: int) -> None:
106
+ ffmpeg = shutil.which("ffmpeg")
107
+ if not ffmpeg:
108
+ raise RuntimeError(
109
+ "TTS response is not Opus-in-Ogg; install `ffmpeg` to transcode (e.g. apt install ffmpeg)."
110
+ )
111
+ out_path.parent.mkdir(parents=True, exist_ok=True)
112
+ tmp = out_path.with_name(f".qwen-tts-ffmpeg-{uuid.uuid4().hex[:10]}.opus")
113
+ try:
114
+ proc = subprocess.run(
115
+ [
116
+ ffmpeg,
117
+ "-hide_banner",
118
+ "-loglevel",
119
+ "error",
120
+ "-y",
121
+ "-i",
122
+ "pipe:0",
123
+ "-c:a",
124
+ "libopus",
125
+ "-b:a",
126
+ "64k",
127
+ str(tmp),
128
+ ],
129
+ input=payload,
130
+ capture_output=True,
131
+ timeout=max(timeout_sec, 5),
132
+ )
133
+ if proc.returncode != 0:
134
+ err = (proc.stderr or b"").decode("utf-8", errors="replace").strip()
135
+ raise RuntimeError(f"ffmpeg transcode failed (exit {proc.returncode}): {err[:800]}")
136
+ if not tmp.exists() or tmp.stat().st_size == 0:
137
+ raise RuntimeError("ffmpeg produced empty output")
138
+ os.replace(str(tmp), str(out_path))
139
+ finally:
140
+ if tmp.exists():
141
+ try:
142
+ tmp.unlink()
143
+ except OSError:
144
+ pass
145
+
146
+
147
+ def _ensure_output_audio(content: bytes, content_type: str, out_path: Path, transcode_timeout: int) -> None:
148
+ """
149
+ OpenClaw tts-local-cli picks format from the OUTPUT FILE EXTENSION only.
150
+ For voice-note it expects real Opus-in-Ogg at *.opus; writing WAV bytes there causes provider_error.
151
+ """
152
+ suffix = out_path.suffix.lower()
153
+ want_opus_file = suffix in (".opus", ".ogg")
154
+
155
+ if not want_opus_file:
156
+ _write_atomic(out_path, content)
157
+ return
158
+
159
+ if _is_opus_in_ogg(content):
160
+ _write_atomic(out_path, content)
161
+ return
162
+
163
+ if _looks_json_error(content) and not _is_riff_wave(content):
164
+ preview = content[:800].decode("utf-8", errors="replace")
165
+ raise RuntimeError(f"server returned JSON instead of audio: {preview}")
166
+
167
+ if _is_riff_wave(content) or content_type in ("audio/wav", "audio/x-wav", "audio/wave"):
168
+ _ffmpeg_to_opus_file(content, out_path, transcode_timeout)
169
+ return
170
+
171
+ if content_type in ("audio/mpeg", "audio/mp3") or (
172
+ len(content) >= 2 and content[0:1] == b"\xff" and (content[1] & 0xE0) == 0xE0
173
+ ):
174
+ _ffmpeg_to_opus_file(content, out_path, transcode_timeout)
175
+ return
176
+
177
+ if content[:4] == b"OggS":
178
+ _ffmpeg_to_opus_file(content, out_path, transcode_timeout)
179
+ return
180
+
181
+ if content_type.startswith("audio/") or content_type in (
182
+ "application/ogg",
183
+ "application/octet-stream",
184
+ "binary/octet-stream",
185
+ ):
186
+ _ffmpeg_to_opus_file(content, out_path, transcode_timeout)
187
+ return
188
+
189
+ raise RuntimeError(f"cannot map TTS payload to Opus file (content-type={content_type!r}, {len(content)} bytes)")
190
+
191
+
192
+ def _build_payload(text: str, request_id: str) -> dict[str, object]:
193
+ session_hints: dict[str, str] = {
194
+ "source": "qwen-tts-client",
195
+ "runtime": "local-cli",
196
+ "host": socket.gethostname(),
197
+ }
198
+ extra_hints = _env("CENTRAL_TTS_SESSION_HINTS_JSON")
199
+ if extra_hints:
200
+ try:
201
+ parsed = json.loads(extra_hints)
202
+ if isinstance(parsed, dict):
203
+ for key, value in parsed.items():
204
+ session_hints[_normalize_text(str(key))] = _normalize_text(str(value))
205
+ except json.JSONDecodeError:
206
+ pass
207
+
208
+ payload: dict[str, object] = {
209
+ "text": _normalize_text(text),
210
+ "requestId": _normalize_text(request_id),
211
+ "target": _env("CENTRAL_TTS_TARGET", "voice-note"),
212
+ "sessionHints": session_hints,
213
+ }
214
+ if _env("CENTRAL_TTS_VOICE"):
215
+ payload["voice"] = _env("CENTRAL_TTS_VOICE")
216
+ if _env("CENTRAL_TTS_MODEL"):
217
+ payload["model"] = _env("CENTRAL_TTS_MODEL")
218
+ if _env("CENTRAL_TTS_PERSONA"):
219
+ payload["persona"] = _env("CENTRAL_TTS_PERSONA")
220
+ return payload
221
+
222
+
223
+ def main() -> int:
224
+ text = sys.argv[1] if len(sys.argv) > 1 else ""
225
+ out_path = Path(sys.argv[2]) if len(sys.argv) > 2 else None
226
+ default_env = Path.home() / ".openclaw" / "qwen_tts_client.env"
227
+ env_path = Path(_env("QWEN_TTS_CLIENT_ENV", str(default_env)))
228
+ if not text or out_path is None:
229
+ print("usage: qwen_tts_runtime.py <text> <output_path>", file=sys.stderr)
230
+ return 2
231
+
232
+ for key, value in _load_env_file(env_path).items():
233
+ os.environ.setdefault(key, value)
234
+
235
+ base_url = _env("CENTRAL_TTS_BASE_URL")
236
+ api_key = _env("CENTRAL_TTS_API_KEY")
237
+ if not base_url or not api_key:
238
+ print(f"CENTRAL_TTS_BASE_URL or CENTRAL_TTS_API_KEY missing in {env_path}", file=sys.stderr)
239
+ return 1
240
+
241
+ timeout_sec = max(_positive_int(_env("CENTRAL_TTS_TIMEOUT_SEC", "120"), 120), 5)
242
+ max_retries = _positive_int(_env("CENTRAL_TTS_RETRIES", "2"), 2)
243
+ retry_backoff_ms = _positive_int(_env("CENTRAL_TTS_RETRY_BACKOFF_MS", "350"), 350)
244
+ request_id = f"qtts-{uuid.uuid4().hex[:12]}"
245
+ payload = _build_payload(text=text, request_id=request_id)
246
+ request_url = f"{base_url.rstrip('/')}/tts"
247
+ body = json.dumps(payload, ensure_ascii=False).encode("utf-8")
248
+ url_opener = _build_url_opener()
249
+
250
+ last_error = "unknown error"
251
+ for attempt in range(0, max_retries + 1):
252
+ started_at = time.perf_counter()
253
+ request = urllib.request.Request(
254
+ request_url,
255
+ method="POST",
256
+ headers={
257
+ "Content-Type": "application/json",
258
+ "X-API-Key": api_key,
259
+ "X-Request-Id": request_id,
260
+ },
261
+ data=body,
262
+ )
263
+ try:
264
+ with url_opener.open(request, timeout=timeout_sec) as response:
265
+ content = response.read()
266
+ content_type = _content_type_base(response.headers)
267
+ if not content:
268
+ raise RuntimeError("server returned empty audio payload")
269
+ if not _http_audio_acceptable(content_type, content):
270
+ raise RuntimeError(f"unexpected content-type: {content_type or '(missing)'}")
271
+ transcode_timeout = max(
272
+ _positive_int(_env("CENTRAL_TTS_FFMPEG_TIMEOUT_SEC", str(timeout_sec)), timeout_sec),
273
+ 5,
274
+ )
275
+ _ensure_output_audio(content, content_type, out_path, transcode_timeout)
276
+ elapsed_ms = (time.perf_counter() - started_at) * 1000
277
+ out_size = out_path.stat().st_size if out_path.exists() else 0
278
+ print(
279
+ f"[qwen-tts-runtime] request_id={request_id} attempt={attempt + 1} status=ok "
280
+ f"latency_ms={elapsed_ms:.2f} bytes_in={len(content)} bytes_out={out_size} "
281
+ f"content_type={content_type or '(missing)'}",
282
+ file=sys.stderr,
283
+ )
284
+ return 0
285
+ except urllib.error.HTTPError as exc:
286
+ error_body = exc.read().decode("utf-8", errors="ignore")
287
+ last_error = f"http {exc.code}: {error_body}"
288
+ except Exception as exc: # pragma: no cover
289
+ last_error = str(exc)
290
+
291
+ elapsed_ms = (time.perf_counter() - started_at) * 1000
292
+ print(
293
+ f"[qwen-tts-runtime] request_id={request_id} attempt={attempt + 1} status=retry "
294
+ f"latency_ms={elapsed_ms:.2f} error={last_error}",
295
+ file=sys.stderr,
296
+ )
297
+ if attempt < max_retries:
298
+ time.sleep((retry_backoff_ms / 1000.0) * (attempt + 1))
299
+
300
+ hint = ""
301
+ if not _truthy_env("CENTRAL_TTS_USE_SYSTEM_PROXY"):
302
+ le = last_error.lower()
303
+ if any(s in le for s in ("timed out", "connection refused", "unreachable", "name or service not known", "nodename", "tunnel", "proxy")):
304
+ hint = " | hint: TTS uses direct TCP (no HTTP_PROXY); set CENTRAL_TTS_USE_SYSTEM_PROXY=1 if you need a proxy."
305
+ print(f"[qwen-tts-runtime] request_id={request_id} status=failed error={last_error}{hint}", file=sys.stderr)
306
+ return 1
307
+
308
+
309
+ if __name__ == "__main__":
310
+ raise SystemExit(main())