openclaw-stt 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,84 @@
1
+ # OpenClaw Speech Input
2
+
3
+ OpenClaw 语音输入插件 — 通过浏览器麦克风进行语音识别后,与 OpenClaw 对话。
4
+
5
+ ## 功能特性
6
+
7
+ | 功能 | 说明 |
8
+ |------|------|
9
+ | 语音识别 | 浏览器原生 Web Speech API,实时语音转文字 |
10
+ | 自动认证 | 自动注入 Gateway Token,无需手动配置 |
11
+ | 多语言支持 | 支持中文、英文等多种语言识别 |
12
+
13
+ ## 安装
14
+
15
+ 安装插件
16
+
17
+ ```bash
18
+ openclaw plugins install openclaw-stt
19
+ ```
20
+
21
+ 打开 HTTP API 服务
22
+
23
+ ```bash
24
+ openclaw config set gateway.http.endpoints.chatCompletions.enabled true
25
+ ```
26
+
27
+ 重启 Gateway:
28
+
29
+ ```bash
30
+ openclaw gateway restart
31
+ ```
32
+
33
+ ## 使用方法
34
+
35
+ ### 命令打开
36
+
37
+ 在任意聊天窗口输入:
38
+
39
+ ```
40
+ /stt
41
+ ```
42
+
43
+ ### 手动打开
44
+
45
+ 浏览器访问:`http://127.0.0.1:18789/plugin/openclaw-stt/`
46
+
47
+ ### 使用流程
48
+
49
+ 1. **授权麦克风** — 首次使用需允许浏览器访问麦克风
50
+ 2. **点击麦克风按钮/长按空格键** — 开始语音输入
51
+ 3. **说话** — 语音自动识别为文字
52
+ 4. **发送** — 再次点击按钮或按 Enter 发送
53
+ 5. **接收回复** — AI 回复以文字显示
54
+
55
+ ## 技术架构
56
+
57
+ ### HTTP 路由
58
+
59
+ | 路径 | 说明 |
60
+ |------|------|
61
+ | `/plugin/openclaw-stt/` | 语音输入 Web 界面 |
62
+ | `/plugin/openclaw-stt/chat` | 聊天代理接口(POST) |
63
+
64
+ ## 常见问题
65
+
66
+ ### 语音识别不工作
67
+
68
+ - 浏览器不支持 —— 推荐使用 Edge、Chrome(国内访问谷歌语音转换业务需要梯子)、或 Safari
69
+ - 未授权麦克风 —— 检查浏览器地址栏左侧的麦克风图标
70
+
71
+ ### 无法连接 Gateway
72
+
73
+ - 确认 Gateway 正在运行:`openclaw gateway status`
74
+
75
+ ### 401 未授权错误
76
+
77
+ - 重启 Gateway:`openclaw gateway restart`
78
+
79
+ ## 卸载
80
+
81
+ ```bash
82
+ openclaw plugins uninstall openclaw-stt
83
+ openclaw gateway restart
84
+ ```
package/index.ts ADDED
@@ -0,0 +1,206 @@
1
+ import type { OpenClawPluginApi } from "openclaw/plugin-sdk";
2
+ import { emptyPluginConfigSchema, runPluginCommandWithTimeout } from "openclaw/plugin-sdk";
3
+ import { readFileSync } from "node:fs";
4
+ import { join } from "node:path";
5
+
6
+ // ─── Constants ───────────────────────────────────────────────────────────────
7
+
8
+ const PLUGIN_ID = "stt";
9
+ const ROUTE_PATH = "/plugin/stt/";
10
+ const CHAT_URI = "/v1/chat/completions";
11
+ const COMMAND_TIMEOUT_MS = 5000;
12
+
13
+ // ─── Platform helpers ─────────────────────────────────────────────────────────
14
+
15
+ /**
16
+ * Returns the platform-appropriate command to open a URL in the default browser.
17
+ */
18
+ function getOpenBrowserCommand(url: string): string[] {
19
+ switch (process.platform) {
20
+ case "win32":
21
+ // Use cmd /c start to avoid PowerShell execution-policy issues
22
+ return ["cmd", "/c", "start", "", url];
23
+ case "darwin":
24
+ return ["open", url];
25
+ default:
26
+ return ["xdg-open", url];
27
+ }
28
+ }
29
+
30
+ function getGatewayFullUrl(api: OpenClawPluginApi): string {
31
+ const gatewayCfg = api.config.gateway;
32
+
33
+ // 1. 若配置中显式定义了 remote url,以远程 URL 为绝对最高优先级
34
+ if (gatewayCfg?.mode === "remote" && gatewayCfg?.remote?.url) {
35
+ // 剔除末尾可能存在的斜杠,保证统一
36
+ return gatewayCfg.remote.url.replace(/\/$/, "");
37
+ }
38
+
39
+ // 2. 本地模式 fallback 拼接
40
+ // 使用 ?? 运算符确保默认值的有效兜底
41
+ const protocol = gatewayCfg?.tls?.enabled ? "https" : "http";
42
+
43
+ // 兼容 bind 为 'loopback' (默认) 或 IP 地址的情况
44
+ const rawBind = gatewayCfg?.bind || "127.0.0.1";
45
+ const host = rawBind === "loopback" ? "127.0.0.1" : rawBind;
46
+
47
+ const port = gatewayCfg?.port || 18789; // Openclaw 默认本地网关端口
48
+
49
+ return `${protocol}://${host}:${port}`;
50
+ }
51
+
52
+ // ─── Plugin definition ────────────────────────────────────────────────────────
53
+
54
+ const voiceInputPlugin = {
55
+ id: PLUGIN_ID,
56
+ name: PLUGIN_ID,
57
+ description: "语音输入插件 — 通过浏览器麦克风进行语音识别并与 OpenClaw 对话",
58
+ configSchema: emptyPluginConfigSchema(),
59
+
60
+ register(api: OpenClawPluginApi): void {
61
+ // Read the HTML once at registration time (not per-request) for performance.
62
+ // The file is bundled alongside index.ts, so __dirname is reliable.
63
+ let htmlTemplate: string;
64
+ try {
65
+ htmlTemplate = readFileSync(join(__dirname, "web.html"), "utf-8");
66
+ } catch (err) {
67
+ api.logger.error(`[${api.name}] Failed to read web.html: ${String(err)}`);
68
+ // Register a stub so the plugin doesn't hard-crash the host process
69
+ htmlTemplate = `<!doctype html><html><body><p>Plugin asset missing: web.html</p></body></html>`;
70
+ }
71
+
72
+ // ── Command: stt ──────────────────────────────────────────────────────────
73
+ api.registerCommand({
74
+ name: "stt",
75
+ description: "打开语音输入页面",
76
+ requireAuth: false,
77
+
78
+ handler: async (ctx) => {
79
+ const webUrl = `${getGatewayFullUrl(api)}${ROUTE_PATH}`;
80
+
81
+ api.logger.info(`[${api.name}] Opening voice-input page: ${webUrl}`);
82
+
83
+ try {
84
+ const result = await runPluginCommandWithTimeout({
85
+ argv: getOpenBrowserCommand(webUrl),
86
+ timeoutMs: COMMAND_TIMEOUT_MS,
87
+ });
88
+ api.logger.info(`[${api.name}] Browser open result: ${JSON.stringify(result)}`);
89
+ } catch (err) {
90
+ const message = err instanceof Error ? err.message : String(err);
91
+ api.logger.warn(`[${api.name}] Could not auto-open browser: ${message}`);
92
+ }
93
+
94
+ return {
95
+ text: `正在打开语音输入界面,如果长时间未响应,您也可以手动[打开语音输入界面](${ROUTE_PATH})。`,
96
+ };
97
+ },
98
+ });
99
+
100
+ // ── HTTP route: serve the voice-input UI ──────────────────────────────────
101
+ api.registerHttpRoute({
102
+ path: ROUTE_PATH,
103
+ auth: "plugin",
104
+ match: "exact",
105
+
106
+ handler: async (_, res) => {
107
+ res.writeHead(200, {
108
+ "Content-Type": "text/html; charset=utf-8",
109
+ // Allow microphone/camera access from the plugin's own origin only
110
+ "Permissions-Policy": "microphone=(self), camera=(self)",
111
+ // Basic clickjacking protection
112
+ "X-Frame-Options": "SAMEORIGIN",
113
+ // Prevent MIME-type sniffing
114
+ "X-Content-Type-Options": "nosniff",
115
+ // Cache-Control: no-store so the injected token is never cached
116
+ "Cache-Control": "no-store",
117
+ });
118
+ res.end(htmlTemplate);
119
+
120
+ return true;
121
+ },
122
+ });
123
+
124
+ // ── HTTP route: proxy chat completions (SSE) ─────────────────────────────
125
+ api.registerHttpRoute({
126
+ path: `${ROUTE_PATH}chat`,
127
+ auth: "plugin",
128
+ match: "exact",
129
+
130
+ handler: async (req, res) => {
131
+ if (req.method !== "POST") {
132
+ res.writeHead(405).end("Method Not Allowed");
133
+ return true;
134
+ }
135
+
136
+ let bodyStr = "";
137
+ for await (const chunk of req) {
138
+ bodyStr += chunk.toString();
139
+ }
140
+
141
+ let clientBody: any;
142
+ try {
143
+ clientBody = JSON.parse(bodyStr);
144
+ } catch {
145
+ res.writeHead(400).end("Invalid JSON");
146
+ return true;
147
+ }
148
+
149
+ const token = (api.config.gateway?.auth?.token as string | undefined) ?? "";
150
+ const targetUrl = `http://${req.headers.host}${CHAT_URI}`;
151
+
152
+ // 构建转发请求头,所有鉴权和路由参数均由服务端配置,不依赖前端传递
153
+ const headers: Record<string, string> = {
154
+ "Content-Type": "application/json",
155
+ };
156
+ if (token) {
157
+ headers["Authorization"] = `Bearer ${token}`;
158
+ }
159
+
160
+ // 固定注入会话标识和 Agent ID,无需前端传递
161
+ headers["x-openclaw-session-key"] = `agent:main:${PLUGIN_ID}`;
162
+ headers["x-openclaw-agent-id"] = "main";
163
+
164
+ try {
165
+ // 仅转发消息体内容,不修改结构
166
+ const upstreamRes = await fetch(targetUrl, {
167
+ method: "POST",
168
+ headers,
169
+ body: JSON.stringify(clientBody),
170
+ });
171
+
172
+ // 透传状态码和内容类型
173
+ res.writeHead(upstreamRes.status, {
174
+ "Content-Type": upstreamRes.headers.get("content-type") || "text/event-stream",
175
+ "Cache-Control": "no-cache",
176
+ Connection: "keep-alive",
177
+ });
178
+
179
+ if (!upstreamRes.body) {
180
+ res.end();
181
+ return true;
182
+ }
183
+
184
+ // 管道式流转发
185
+ for await (const chunk of upstreamRes.body) {
186
+ if (res.destroyed) break;
187
+ res.write(chunk);
188
+ }
189
+ res.end();
190
+ } catch (err) {
191
+ const msg = err instanceof Error ? err.message : "Proxy error";
192
+ if (!res.headersSent) {
193
+ res.writeHead(502, { "Content-Type": "application/json" });
194
+ res.end(JSON.stringify({ error: msg }));
195
+ } else {
196
+ res.destroy();
197
+ }
198
+ }
199
+
200
+ return true;
201
+ },
202
+ });
203
+ },
204
+ };
205
+
206
+ export default voiceInputPlugin;
@@ -0,0 +1,15 @@
1
+ {
2
+ "id": "openclaw-stt",
3
+ "name": "Openclaw Speech To Text",
4
+ "description": "OpenClaw 语音输入插件 - 通过浏览器麦克风进行语音识别后,与 OpenClaw 对话",
5
+ "version": "1.0.0",
6
+ "author": "Haoyue",
7
+ "configSchema": {
8
+ "type": "object",
9
+ "additionalProperties": false,
10
+ "properties": {}
11
+ },
12
+ "openclaw": {
13
+ "extensions": ["./index.ts"]
14
+ }
15
+ }
package/package.json ADDED
@@ -0,0 +1,38 @@
1
+ {
2
+ "name": "openclaw-stt",
3
+ "version": "1.0.0",
4
+ "description": "OpenClaw 语音输入插件 - 通过浏览器麦克风进行语音识别后,与 OpenClaw 对话",
5
+ "keywords": [
6
+ "openclaw",
7
+ "speech",
8
+ "STT"
9
+ ],
10
+ "license": "MIT",
11
+ "author": "Haoyue",
12
+ "files": [
13
+ "index.ts",
14
+ "web.html",
15
+ "openclaw.plugin.json",
16
+ "package.json",
17
+ "tsconfig.json"
18
+ ],
19
+ "type": "module",
20
+ "main": "index.ts",
21
+ "publishConfig": {
22
+ "access": "public",
23
+ "registry": "https://registry.npmjs.org/"
24
+ },
25
+ "devDependencies": {
26
+ "@types/node": "^25.2.0",
27
+ "typescript": "^5.3.0"
28
+ },
29
+ "peerDependencies": {
30
+ "openclaw": ">=2026.3.11"
31
+ },
32
+ "openclaw": {
33
+ "extensions": [
34
+ "./index.ts"
35
+ ],
36
+ "installDependencies": true
37
+ }
38
+ }
package/tsconfig.json ADDED
@@ -0,0 +1,16 @@
1
+ {
2
+ "compilerOptions": {
3
+ "target": "ES2022",
4
+ "module": "NodeNext",
5
+ "moduleResolution": "NodeNext",
6
+ "declaration": true,
7
+ "outDir": "./dist",
8
+ "rootDir": ".",
9
+ "strict": true,
10
+ "esModuleInterop": true,
11
+ "skipLibCheck": true,
12
+ "resolveJsonModule": true
13
+ },
14
+ "include": ["index.ts", "src/**/*.ts"],
15
+ "exclude": ["node_modules", "dist"]
16
+ }