@cheeko-ai/esp32-voice 2026.2.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,110 @@
1
+ import type { OpenClawConfig } from "openclaw/plugin-sdk";
2
+ import { DEFAULT_ACCOUNT_ID, normalizeAccountId } from "openclaw/plugin-sdk";
3
+ import type { Esp32VoiceAccountConfig, ResolvedEsp32VoiceAccount } from "./types.js";
4
+
5
+ /**
6
+ * List all configured ESP32 Voice account IDs.
7
+ */
8
+ export function listEsp32VoiceAccountIds(cfg: OpenClawConfig): string[] {
9
+ const section = cfg.channels?.esp32voice as Esp32VoiceAccountConfig & {
10
+ accounts?: Record<string, Esp32VoiceAccountConfig>;
11
+ };
12
+ if (!section) {
13
+ return [];
14
+ }
15
+
16
+ const ids = new Set<string>();
17
+
18
+ // Base-level config counts as the "default" account.
19
+ if (section.deviceToken || section.deviceId) {
20
+ ids.add(DEFAULT_ACCOUNT_ID);
21
+ }
22
+
23
+ // Named accounts.
24
+ if (section.accounts) {
25
+ for (const key of Object.keys(section.accounts)) {
26
+ ids.add(normalizeAccountId(key));
27
+ }
28
+ }
29
+
30
+ return [...ids];
31
+ }
32
+
33
+ /**
34
+ * Resolve the default account ID.
35
+ */
36
+ export function resolveDefaultEsp32VoiceAccountId(cfg: OpenClawConfig): string {
37
+ const ids = listEsp32VoiceAccountIds(cfg);
38
+ return ids[0] ?? DEFAULT_ACCOUNT_ID;
39
+ }
40
+
41
+ /**
42
+ * Resolve a fully populated account config for a given account ID.
43
+ * Merges base-level config ← per-account overrides ← env vars.
44
+ */
45
+ export function resolveEsp32VoiceAccount(params: {
46
+ cfg: OpenClawConfig;
47
+ accountId?: string;
48
+ }): ResolvedEsp32VoiceAccount {
49
+ const { cfg, accountId: rawAccountId } = params;
50
+ const accountId = normalizeAccountId(rawAccountId ?? DEFAULT_ACCOUNT_ID);
51
+
52
+ const section = cfg.channels?.esp32voice as Esp32VoiceAccountConfig & {
53
+ accounts?: Record<string, Esp32VoiceAccountConfig>;
54
+ };
55
+
56
+ const base: Esp32VoiceAccountConfig = section ?? {};
57
+ const perAccount = section?.accounts?.[accountId] ?? {};
58
+
59
+ // Per-account fields override base fields.
60
+ const merged: Esp32VoiceAccountConfig = { ...base, ...perAccount };
61
+
62
+ // ── Resolve device token ──
63
+ let deviceToken = merged.deviceToken?.trim() || undefined;
64
+ let deviceTokenSource: "config" | "env" | "none" = deviceToken ? "config" : "none";
65
+ if (!deviceToken) {
66
+ const envToken = process.env.ESP32_VOICE_DEVICE_TOKEN?.trim();
67
+ if (envToken) {
68
+ deviceToken = envToken;
69
+ deviceTokenSource = "env";
70
+ }
71
+ }
72
+
73
+ // ── Resolve STT API key ──
74
+ let sttApiKey = merged.sttApiKey?.trim() || undefined;
75
+ if (!sttApiKey) {
76
+ sttApiKey = process.env.DEEPGRAM_API_KEY?.trim() || undefined;
77
+ }
78
+
79
+ // ── Resolve TTS API key ──
80
+ let ttsApiKey = merged.ttsApiKey?.trim() || undefined;
81
+ if (!ttsApiKey) {
82
+ ttsApiKey = process.env.ELEVENLABS_API_KEY?.trim() || process.env.XI_API_KEY?.trim() || undefined;
83
+ }
84
+
85
+ // ── Resolve TTS voice ID ──
86
+ let ttsVoiceId = merged.ttsVoiceId?.trim() || undefined;
87
+ if (!ttsVoiceId) {
88
+ ttsVoiceId = process.env.ELEVENLABS_VOICE_ID?.trim() || undefined;
89
+ }
90
+
91
+ return {
92
+ accountId,
93
+ name: merged.name,
94
+ enabled: merged.enabled !== false,
95
+ deviceToken,
96
+ deviceTokenSource,
97
+ deviceId: merged.deviceId || accountId,
98
+ sttProvider: merged.sttProvider ?? "deepgram",
99
+ sttApiKey,
100
+ sttModel: merged.sttModel,
101
+ ttsProvider: merged.ttsProvider ?? "elevenlabs",
102
+ ttsApiKey,
103
+ ttsVoiceId,
104
+ ttsModel: merged.ttsModel,
105
+ maxResponseLength: merged.maxResponseLength ?? 500,
106
+ voiceOptimized: merged.voiceOptimized !== false,
107
+ language: merged.language ?? "en",
108
+ config: merged,
109
+ };
110
+ }
package/src/channel.ts ADDED
@@ -0,0 +1,270 @@
1
+ import {
2
+ buildChannelConfigSchema,
3
+ DEFAULT_ACCOUNT_ID,
4
+ formatPairingApproveHint,
5
+ normalizeAccountId,
6
+ setAccountEnabledInConfigSection,
7
+ deleteAccountFromConfigSection,
8
+ applyAccountNameToChannelSection,
9
+ type ChannelPlugin,
10
+ } from "openclaw/plugin-sdk";
11
+ import { Esp32VoiceConfigSchema } from "./config-schema.js";
12
+ import {
13
+ listEsp32VoiceAccountIds,
14
+ resolveDefaultEsp32VoiceAccountId,
15
+ resolveEsp32VoiceAccount,
16
+ type ResolvedEsp32VoiceAccount,
17
+ } from "./accounts.js";
18
+ import { monitorEsp32VoiceProvider } from "./monitor.js";
19
+ import { getEsp32VoiceRuntime } from "./runtime.js";
20
+ import { esp32VoiceOnboardingAdapter } from "./onboarding.js";
21
+
22
+ const meta = {
23
+ id: "esp32voice",
24
+ label: "ESP32 Voice",
25
+ selectionLabel: "ESP32 Voice (plugin)",
26
+ detailLabel: "ESP32 Voice Device",
27
+ docsPath: "/channels/esp32-voice",
28
+ docsLabel: "esp32-voice",
29
+ blurb: "ESP32 IoT voice device — speech-to-text-to-speech via HTTP.",
30
+ systemImage: "waveform",
31
+ order: 90,
32
+ quickstartAllowFrom: false,
33
+ } as const;
34
+
35
+ export const esp32VoicePlugin: ChannelPlugin<ResolvedEsp32VoiceAccount> = {
36
+ id: "esp32voice",
37
+ meta: {
38
+ ...meta,
39
+ },
40
+ capabilities: {
41
+ chatTypes: ["direct"],
42
+ reactions: false,
43
+ threads: false,
44
+ media: false,
45
+ },
46
+ reload: { configPrefixes: ["channels.esp32voice"] },
47
+ configSchema: buildChannelConfigSchema(Esp32VoiceConfigSchema),
48
+ config: {
49
+ listAccountIds: (cfg) => listEsp32VoiceAccountIds(cfg),
50
+ resolveAccount: (cfg, accountId) => resolveEsp32VoiceAccount({ cfg, accountId }),
51
+ defaultAccountId: (cfg) => resolveDefaultEsp32VoiceAccountId(cfg),
52
+ setAccountEnabled: ({ cfg, accountId, enabled }) =>
53
+ setAccountEnabledInConfigSection({
54
+ cfg,
55
+ sectionKey: "esp32voice",
56
+ accountId,
57
+ enabled,
58
+ allowTopLevel: true,
59
+ }),
60
+ deleteAccount: ({ cfg, accountId }) =>
61
+ deleteAccountFromConfigSection({
62
+ cfg,
63
+ sectionKey: "esp32voice",
64
+ accountId,
65
+ clearBaseFields: ["deviceToken", "deviceId", "name"],
66
+ }),
67
+ isConfigured: (account) => Boolean(account.deviceToken),
68
+ describeAccount: (account) => ({
69
+ accountId: account.accountId,
70
+ name: account.name,
71
+ enabled: account.enabled,
72
+ configured: Boolean(account.deviceToken),
73
+ deviceTokenSource: account.deviceTokenSource,
74
+ deviceId: account.deviceId,
75
+ language: account.language,
76
+ voiceOptimized: account.voiceOptimized,
77
+ maxResponseLength: account.maxResponseLength,
78
+ }),
79
+ resolveAllowFrom: ({ cfg, accountId }) =>
80
+ resolveEsp32VoiceAccount({ cfg, accountId }).config.allowFrom ?? [],
81
+ formatAllowFrom: ({ allowFrom }) => allowFrom.filter(Boolean),
82
+ },
83
+ security: {
84
+ resolveDmPolicy: ({ cfg, accountId, account }) => {
85
+ const resolvedAccountId = accountId ?? account.accountId ?? DEFAULT_ACCOUNT_ID;
86
+ const useAccountPath = Boolean(cfg.channels?.esp32voice?.accounts?.[resolvedAccountId]);
87
+ const basePath = useAccountPath
88
+ ? `channels.esp32voice.accounts.${resolvedAccountId}.`
89
+ : "channels.esp32voice.";
90
+ return {
91
+ policy: account.config.dmPolicy ?? "pairing",
92
+ allowFrom: account.config.allowFrom ?? [],
93
+ policyPath: `${basePath}dmPolicy`,
94
+ allowFromPath: basePath,
95
+ approveHint: formatPairingApproveHint("esp32voice"),
96
+ normalizeEntry: (raw) => raw.trim().toLowerCase(),
97
+ };
98
+ },
99
+ collectWarnings: () => [],
100
+ },
101
+ pairing: {
102
+ idLabel: "esp32DeviceId",
103
+ normalizeAllowEntry: (entry) => entry.trim().toLowerCase(),
104
+ notifyApproval: async ({ id }) => {
105
+ console.log(`[esp32voice] Device ${id} approved for pairing`);
106
+ },
107
+ },
108
+ outbound: {
109
+ deliveryMode: "direct",
110
+ chunker: (text, limit) => getEsp32VoiceRuntime().channel.text.chunkMarkdownText(text, limit),
111
+ chunkerMode: "markdown",
112
+ textChunkLimit: 500,
113
+ resolveTarget: ({ to }) => {
114
+ const trimmed = to?.trim();
115
+ if (!trimmed) {
116
+ return {
117
+ ok: false,
118
+ error: new Error("Delivering to ESP32 Voice requires --to <deviceId>"),
119
+ };
120
+ }
121
+ return { ok: true, to: trimmed };
122
+ },
123
+ sendText: async ({ to, text }) => {
124
+ // Outbound to ESP32 is handled via the HTTP response (synchronous).
125
+ // This is for CLI `openclaw message send --channel esp32voice` support.
126
+ console.log(`[esp32voice] Outbound message to ${to}: ${text.slice(0, 100)}...`);
127
+ return {
128
+ channel: "esp32voice" as const,
129
+ ok: true,
130
+ messageId: `esp32-${Date.now()}`,
131
+ };
132
+ },
133
+ },
134
+ messaging: {
135
+ normalizeTarget: (target) => target.trim().toLowerCase(),
136
+ targetResolver: {
137
+ looksLikeId: (id) => /^esp32[a-z0-9_-]*$/i.test(id),
138
+ hint: "<deviceId>",
139
+ },
140
+ },
141
+ // Agent prompt: add voice-optimization context so the AI knows responses
142
+ // will be converted to speech on the device.
143
+ agentPrompt: {
144
+ systemPromptSuffix: () =>
145
+ [
146
+ "",
147
+ "## ESP32 Voice Channel Context",
148
+ "The user is communicating via an ESP32 IoT voice device.",
149
+ "Your responses will be converted to speech (TTS) and played through a speaker.",
150
+ "Keep responses concise, conversational, and natural for spoken delivery.",
151
+ "Avoid markdown formatting, code blocks, lists, and URLs — they don't translate well to speech.",
152
+ "Aim for 1-3 sentences unless the user asks for detail.",
153
+ "",
154
+ ].join("\n"),
155
+ },
156
+ status: {
157
+ defaultRuntime: {
158
+ accountId: DEFAULT_ACCOUNT_ID,
159
+ running: false,
160
+ connected: false,
161
+ lastConnectedAt: null,
162
+ lastDisconnect: null,
163
+ lastStartAt: null,
164
+ lastStopAt: null,
165
+ lastError: null,
166
+ },
167
+ buildChannelSummary: ({ snapshot }) => ({
168
+ configured: snapshot.configured ?? false,
169
+ deviceTokenSource: snapshot.deviceTokenSource ?? "none",
170
+ running: snapshot.running ?? false,
171
+ connected: snapshot.connected ?? false,
172
+ lastStartAt: snapshot.lastStartAt ?? null,
173
+ lastStopAt: snapshot.lastStopAt ?? null,
174
+ lastError: snapshot.lastError ?? null,
175
+ deviceId: snapshot.deviceId ?? null,
176
+ }),
177
+ buildAccountSnapshot: ({ account, runtime }) => ({
178
+ accountId: account.accountId,
179
+ name: account.name,
180
+ enabled: account.enabled,
181
+ configured: Boolean(account.deviceToken),
182
+ deviceTokenSource: account.deviceTokenSource,
183
+ deviceId: account.deviceId,
184
+ language: account.language,
185
+ running: runtime?.running ?? false,
186
+ connected: runtime?.connected ?? false,
187
+ lastStartAt: runtime?.lastStartAt ?? null,
188
+ lastStopAt: runtime?.lastStopAt ?? null,
189
+ lastError: runtime?.lastError ?? null,
190
+ lastInboundAt: runtime?.lastInboundAt ?? null,
191
+ lastOutboundAt: runtime?.lastOutboundAt ?? null,
192
+ }),
193
+ },
194
+ setup: {
195
+ resolveAccountId: ({ accountId }) => normalizeAccountId(accountId),
196
+ applyAccountName: ({ cfg, accountId, name }) =>
197
+ applyAccountNameToChannelSection({
198
+ cfg,
199
+ channelKey: "esp32voice",
200
+ accountId,
201
+ name,
202
+ }),
203
+ validateInput: () => {
204
+ // No token required — the WebSocket voice pipeline does not need a
205
+ // pre-configured device token. Devices authenticate via OTP pairing
206
+ // at runtime. The onboarding wizard handles full setup interactively.
207
+ return null;
208
+ },
209
+ applyAccountConfig: ({ cfg, accountId, input }) => {
210
+ const token = input.botToken ?? input.token;
211
+ const namedConfig = applyAccountNameToChannelSection({
212
+ cfg,
213
+ channelKey: "esp32voice",
214
+ accountId,
215
+ name: input.name,
216
+ });
217
+
218
+ if (accountId === DEFAULT_ACCOUNT_ID) {
219
+ return {
220
+ ...namedConfig,
221
+ channels: {
222
+ ...namedConfig.channels,
223
+ esp32voice: {
224
+ ...namedConfig.channels?.esp32voice,
225
+ enabled: true,
226
+ ...(token ? { deviceToken: token } : {}),
227
+ },
228
+ },
229
+ };
230
+ }
231
+ return {
232
+ ...namedConfig,
233
+ channels: {
234
+ ...namedConfig.channels,
235
+ esp32voice: {
236
+ ...namedConfig.channels?.esp32voice,
237
+ enabled: true,
238
+ accounts: {
239
+ ...namedConfig.channels?.esp32voice?.accounts,
240
+ [accountId]: {
241
+ ...namedConfig.channels?.esp32voice?.accounts?.[accountId],
242
+ enabled: true,
243
+ ...(token ? { deviceToken: token } : {}),
244
+ },
245
+ },
246
+ },
247
+ },
248
+ };
249
+ },
250
+ },
251
+ onboarding: esp32VoiceOnboardingAdapter,
252
+ gateway: {
253
+ startAccount: async (ctx) => {
254
+ const account = ctx.account;
255
+ ctx.setStatus({
256
+ accountId: account.accountId,
257
+ deviceId: account.deviceId,
258
+ deviceTokenSource: account.deviceTokenSource,
259
+ });
260
+ ctx.log?.info(`[${account.accountId}] starting ESP32 Voice channel`);
261
+ return monitorEsp32VoiceProvider({
262
+ accountId: account.accountId,
263
+ config: ctx.cfg,
264
+ runtime: getEsp32VoiceRuntime(),
265
+ abortSignal: ctx.abortSignal,
266
+ statusSink: (patch) => ctx.setStatus({ accountId: ctx.accountId, ...patch }),
267
+ });
268
+ },
269
+ },
270
+ };
@@ -0,0 +1,37 @@
1
+ import { DmPolicySchema } from "openclaw/plugin-sdk";
2
+ import { z } from "zod";
3
+
4
+ const Esp32VoiceAccountSchemaBase = z
5
+ .object({
6
+ name: z.string().optional(),
7
+ enabled: z.boolean().optional(),
8
+
9
+ // Auth
10
+ deviceToken: z.string().optional(),
11
+ deviceId: z.string().optional(),
12
+
13
+ // Security
14
+ dmPolicy: DmPolicySchema.optional().default("pairing"),
15
+ allowFrom: z.array(z.string()).optional(),
16
+
17
+ // STT
18
+ sttProvider: z.string().optional().default("deepgram"),
19
+ sttApiKey: z.string().optional(),
20
+ sttModel: z.string().optional(),
21
+
22
+ // TTS
23
+ ttsProvider: z.string().optional().default("elevenlabs"),
24
+ ttsApiKey: z.string().optional(),
25
+ ttsVoiceId: z.string().optional(),
26
+ ttsModel: z.string().optional(),
27
+
28
+ // Voice pipeline
29
+ maxResponseLength: z.number().int().positive().optional().default(500),
30
+ voiceOptimized: z.boolean().optional().default(true),
31
+ language: z.string().optional().default("en"),
32
+ })
33
+ .strict();
34
+
35
+ export const Esp32VoiceConfigSchema = Esp32VoiceAccountSchemaBase.extend({
36
+ accounts: z.record(z.string(), Esp32VoiceAccountSchemaBase.optional()).optional(),
37
+ });
@@ -0,0 +1,173 @@
1
+ /**
2
+ * Device OTP (One-Time Password) pairing system.
3
+ *
4
+ * During onboarding, the user gets a 6-digit OTP code displayed in their terminal.
5
+ * The ESP32 device sends this OTP during its initial hello handshake to pair itself.
6
+ * Once verified, the device is added to the trusted devices list in config.
7
+ */
8
+
9
+ import crypto from "node:crypto";
10
+
11
+ interface PendingOtp {
12
+ code: string;
13
+ deviceId?: string;
14
+ createdAt: number;
15
+ expiresAt: number;
16
+ }
17
+
18
+ interface PairedDevice {
19
+ deviceId: string;
20
+ deviceToken: string;
21
+ pairedAt: string;
22
+ name?: string;
23
+ }
24
+
25
+ const OTP_LENGTH = 6;
26
+ const OTP_EXPIRY_MS = 5 * 60 * 1000; // 5 minutes
27
+
28
+ class DeviceOtpManager {
29
+ /** Pending OTPs waiting for device activation. */
30
+ private pendingOtps = new Map<string, PendingOtp>();
31
+
32
+ /** Active paired devices (in-memory, synced to config). */
33
+ private pairedDevices = new Map<string, PairedDevice>();
34
+
35
+ /**
36
+ * Generate a new OTP for device pairing.
37
+ *
38
+ * @returns The OTP code to display to the user.
39
+ */
40
+ generateOtp(deviceId?: string): string {
41
+ // Generate a cryptographically secure 6-digit code
42
+ const code = crypto.randomInt(100000, 999999).toString();
43
+
44
+ const otp: PendingOtp = {
45
+ code,
46
+ deviceId,
47
+ createdAt: Date.now(),
48
+ expiresAt: Date.now() + OTP_EXPIRY_MS,
49
+ };
50
+
51
+ this.pendingOtps.set(code, otp);
52
+
53
+ // Clean up expired OTPs
54
+ this.cleanupExpired();
55
+
56
+ console.log(`[device-otp] Generated OTP: ${code} (expires in 5 minutes)`);
57
+ return code;
58
+ }
59
+
60
+ /**
61
+ * Verify an OTP from a device and complete pairing.
62
+ *
63
+ * @param code - The OTP code sent by the device.
64
+ * @param deviceId - The device's self-reported ID.
65
+ * @returns The device token to use for future authentication, or null if invalid.
66
+ */
67
+ verifyOtp(code: string, deviceId: string): { deviceToken: string; paired: PairedDevice } | null {
68
+ this.cleanupExpired();
69
+
70
+ const pending = this.pendingOtps.get(code);
71
+ if (!pending) {
72
+ console.warn(`[device-otp] Invalid OTP: ${code}`);
73
+ return null;
74
+ }
75
+
76
+ if (Date.now() > pending.expiresAt) {
77
+ this.pendingOtps.delete(code);
78
+ console.warn(`[device-otp] Expired OTP: ${code}`);
79
+ return null;
80
+ }
81
+
82
+ // OTP is valid — generate a permanent device token
83
+ const deviceToken = crypto.randomBytes(32).toString("hex");
84
+
85
+ const paired: PairedDevice = {
86
+ deviceId,
87
+ deviceToken,
88
+ pairedAt: new Date().toISOString(),
89
+ };
90
+
91
+ this.pairedDevices.set(deviceId, paired);
92
+ this.pendingOtps.delete(code);
93
+
94
+ console.log(`[device-otp] Device "${deviceId}" paired successfully`);
95
+ return { deviceToken, paired };
96
+ }
97
+
98
+ /**
99
+ * Check if a device is paired.
100
+ */
101
+ isPaired(deviceId: string): boolean {
102
+ return this.pairedDevices.has(deviceId);
103
+ }
104
+
105
+ /**
106
+ * Authenticate a device by its token.
107
+ *
108
+ * @returns The device ID if valid, null otherwise.
109
+ */
110
+ authenticateToken(token: string): string | null {
111
+ for (const [deviceId, paired] of this.pairedDevices) {
112
+ if (paired.deviceToken === token) {
113
+ return deviceId;
114
+ }
115
+ }
116
+ return null;
117
+ }
118
+
119
+ /**
120
+ * Get all paired devices.
121
+ */
122
+ listPairedDevices(): PairedDevice[] {
123
+ return [...this.pairedDevices.values()];
124
+ }
125
+
126
+ /**
127
+ * Remove a paired device.
128
+ */
129
+ unpairDevice(deviceId: string): boolean {
130
+ return this.pairedDevices.delete(deviceId);
131
+ }
132
+
133
+ /**
134
+ * Load paired devices from config (call on startup).
135
+ */
136
+ loadFromConfig(devices: Record<string, { deviceToken: string; name?: string }>): void {
137
+ for (const [deviceId, config] of Object.entries(devices)) {
138
+ this.pairedDevices.set(deviceId, {
139
+ deviceId,
140
+ deviceToken: config.deviceToken,
141
+ pairedAt: "config",
142
+ name: config.name,
143
+ });
144
+ }
145
+ console.log(`[device-otp] Loaded ${this.pairedDevices.size} paired device(s) from config`);
146
+ }
147
+
148
+ /**
149
+ * Export paired devices for writing to config.
150
+ */
151
+ exportForConfig(): Record<string, { deviceToken: string; name?: string }> {
152
+ const result: Record<string, { deviceToken: string; name?: string }> = {};
153
+ for (const [deviceId, paired] of this.pairedDevices) {
154
+ result[deviceId] = {
155
+ deviceToken: paired.deviceToken,
156
+ ...(paired.name ? { name: paired.name } : {}),
157
+ };
158
+ }
159
+ return result;
160
+ }
161
+
162
+ private cleanupExpired(): void {
163
+ const now = Date.now();
164
+ for (const [code, otp] of this.pendingOtps) {
165
+ if (now > otp.expiresAt) {
166
+ this.pendingOtps.delete(code);
167
+ }
168
+ }
169
+ }
170
+ }
171
+
172
+ /** Global device OTP manager. */
173
+ export const deviceOtpManager = new DeviceOtpManager();