npm - @cheeko-ai/esp32-voice - Versions diffs - 2026.2.21 - Mend

@cheeko-ai/esp32-voice 2026.2.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/NPM_PUBLISH_READINESS.md +299 -0
package/README.md +226 -0
package/TODO.md +418 -0
package/index.ts +128 -0
package/openclaw.plugin.json +9 -0
package/package.json +62 -0
package/src/accounts.ts +110 -0
package/src/channel.ts +270 -0
package/src/config-schema.ts +37 -0
package/src/device/device-otp.ts +173 -0
package/src/http-handler.ts +154 -0
package/src/monitor.ts +124 -0
package/src/onboarding.ts +575 -0
package/src/runtime.ts +14 -0
package/src/stt/deepgram.ts +215 -0
package/src/stt/stt-provider.ts +107 -0
package/src/stt/stt-registry.ts +71 -0
package/src/tts/elevenlabs.ts +215 -0
package/src/tts/tts-provider.ts +111 -0
package/src/tts/tts-registry.ts +71 -0
package/src/types.ts +136 -0
package/src/voice/voice-endpoint.ts +296 -0
package/src/voice/voice-session.ts +1041 -0

package/src/http-handler.ts ADDED Viewed

@@ -0,0 +1,154 @@
+import type { IncomingMessage, ServerResponse } from "node:http";
+import type { OpenClawConfig } from "openclaw/plugin-sdk";
+import { resolveEsp32VoiceAccount, listEsp32VoiceAccountIds } from "./accounts.js";
+import type {
+  Esp32VoiceInboundMessage,
+  Esp32VoiceOutboundResponse,
+  ResolvedEsp32VoiceAccount,
+} from "./types.js";
+/**
+ * Authenticate an incoming ESP32 HTTP request.
+ *
+ * Checks the `Authorization: Bearer <token>` header against all configured
+ * device account tokens. Returns the matching account or null.
+ */
+export function authenticateEsp32Request(
+  req: IncomingMessage,
+  cfg: OpenClawConfig,
+): ResolvedEsp32VoiceAccount | null {
+  const authHeader = req.headers.authorization?.trim();
+  if (!authHeader) {
+    return null;
+  }
+  const match = authHeader.match(/^Bearer\s+(.+)$/i);
+  if (!match) {
+    return null;
+  }
+  const token = match[1].trim();
+  if (!token) {
+    return null;
+  }
+  // Check all configured accounts for a matching device token.
+  const accountIds = listEsp32VoiceAccountIds(cfg);
+  for (const accountId of accountIds) {
+    const account = resolveEsp32VoiceAccount({ cfg, accountId });
+    if (account.enabled && account.deviceToken && account.deviceToken === token) {
+      return account;
+    }
+  }
+  return null;
+}
+/**
+ * Parse the JSON body from an ESP32 POST request.
+ */
+export async function parseEsp32RequestBody(req: IncomingMessage): Promise<{
+  ok: boolean;
+  data?: Esp32VoiceInboundMessage;
+  error?: string;
+}> {
+  return new Promise((resolve) => {
+    const chunks: Buffer[] = [];
+    let totalSize = 0;
+    const maxBodySize = 64 * 1024; // 64 KB max body
+    req.on("data", (chunk: Buffer) => {
+      totalSize += chunk.length;
+      if (totalSize > maxBodySize) {
+        resolve({ ok: false, error: "Request body too large (max 64 KB)" });
+        req.destroy();
+        return;
+      }
+      chunks.push(chunk);
+    });
+    req.on("end", () => {
+      try {
+        const raw = Buffer.concat(chunks).toString("utf-8");
+        const parsed = JSON.parse(raw);
+        if (!parsed || typeof parsed.text !== "string") {
+          resolve({
+            ok: false,
+            error: 'Invalid request body: expected JSON with "text" field',
+          });
+          return;
+        }
+        const message: Esp32VoiceInboundMessage = {
+          text: parsed.text.trim(),
+          deviceId: typeof parsed.deviceId === "string" ? parsed.deviceId.trim() : undefined,
+          language: typeof parsed.language === "string" ? parsed.language.trim() : undefined,
+          sessionId: typeof parsed.sessionId === "string" ? parsed.sessionId.trim() : undefined,
+        };
+        if (!message.text) {
+          resolve({ ok: false, error: "Empty text field" });
+          return;
+        }
+        resolve({ ok: true, data: message });
+      } catch {
+        resolve({ ok: false, error: "Invalid JSON in request body" });
+      }
+    });
+    req.on("error", (err) => {
+      resolve({ ok: false, error: `Request error: ${err.message}` });
+    });
+  });
+}
+/**
+ * Send a JSON response back to the ESP32 device.
+ */
+export function sendEsp32Response(
+  res: ServerResponse,
+  status: number,
+  body: Esp32VoiceOutboundResponse,
+): void {
+  const json = JSON.stringify(body);
+  res.writeHead(status, {
+    "Content-Type": "application/json",
+    "Content-Length": Buffer.byteLength(json),
+    // ESP32 may not handle CORS but include for debugging from browsers.
+    "Access-Control-Allow-Origin": "*",
+  });
+  res.end(json);
+}
+/**
+ * Truncate response text to the configured maximum length.
+ * Tries to break at a sentence boundary when possible.
+ */
+export function truncateForVoice(text: string, maxLength: number): string {
+  if (text.length <= maxLength) {
+    return text;
+  }
+  // Try to break at the last sentence boundary within the limit.
+  const truncated = text.slice(0, maxLength);
+  const lastSentenceEnd = Math.max(
+    truncated.lastIndexOf(". "),
+    truncated.lastIndexOf("! "),
+    truncated.lastIndexOf("? "),
+    truncated.lastIndexOf(".\n"),
+  );
+  if (lastSentenceEnd > maxLength * 0.5) {
+    return truncated.slice(0, lastSentenceEnd + 1).trim();
+  }
+  // Fall back to word boundary.
+  const lastSpace = truncated.lastIndexOf(" ");
+  if (lastSpace > maxLength * 0.7) {
+    return truncated.slice(0, lastSpace).trim() + "…";
+  }
+  return truncated.trim() + "…";
+}

package/src/monitor.ts ADDED Viewed

@@ -0,0 +1,124 @@
+import type { OpenClawConfig } from "openclaw/plugin-sdk";
+import type { ResolvedEsp32VoiceAccount } from "./types.js";
+import {
+  authenticateEsp32Request,
+  parseEsp32RequestBody,
+  sendEsp32Response,
+  truncateForVoice,
+} from "./http-handler.js";
+import { getEsp32VoiceRuntime } from "./runtime.js";
+type MonitorParams = {
+  accountId: string;
+  config: OpenClawConfig;
+  runtime: ReturnType<typeof getEsp32VoiceRuntime>;
+  abortSignal: AbortSignal;
+  statusSink: (patch: Record<string, unknown>) => void;
+};
+/**
+ * Start monitoring for ESP32 Voice inbound messages.
+ *
+ * Registers an HTTP route on the Gateway at:
+ *   POST /__openclaw__/esp32-voice/message
+ *
+ * The ESP32 device sends transcribed text here and receives
+ * the AI response synchronously in the HTTP response.
+ *
+ * Flow:
+ * 1. ESP32 captures audio → runs STT → gets text
+ * 2. ESP32 POSTs { text, deviceId?, sessionId? } with Bearer token
+ * 3. Gateway authenticates, routes to agent, waits for response
+ * 4. Gateway responds with { ok, text, sessionId }
+ * 5. ESP32 receives text → runs TTS → plays audio
+ */
+export function monitorEsp32VoiceProvider(params: MonitorParams): void {
+  const { statusSink, abortSignal } = params;
+  statusSink({
+    running: true,
+    connected: true,
+    lastStartAt: new Date().toISOString(),
+  });
+  console.log(`[esp32voice] Channel ready. ESP32 devices can POST to the Gateway HTTP endpoint.`);
+  console.log(
+    `[esp32voice] Endpoint: POST /__openclaw__/esp32-voice/message`,
+  );
+  // The HTTP handler is registered via the plugin's registerHttpRoute.
+  // The monitor just tracks lifecycle state.
+  if (abortSignal) {
+    abortSignal.addEventListener("abort", () => {
+      statusSink({
+        running: false,
+        connected: false,
+        lastStopAt: new Date().toISOString(),
+      });
+      console.log("[esp32voice] Channel stopped.");
+    });
+  }
+}
+/**
+ * Process an inbound ESP32 message through the agent and return the response.
+ *
+ * This is called from the HTTP route handler. It:
+ * 1. Validates the authenticated account
+ * 2. Sends the transcribed text to the OpenClaw agent
+ * 3. Waits for the agent response
+ * 4. Truncates the response for voice output
+ * 5. Returns the response text for TTS on the device
+ */
+export async function processEsp32Message(params: {
+  text: string;
+  account: ResolvedEsp32VoiceAccount;
+  deviceId: string;
+  sessionId?: string;
+}): Promise<{ ok: boolean; text?: string; error?: string; sessionId?: string }> {
+  const { text, account, deviceId, sessionId } = params;
+  const runtime = getEsp32VoiceRuntime();
+  try {
+    // Build a voice-optimized system hint if enabled.
+    let messageText = text;
+    if (account.voiceOptimized) {
+      // The agent will receive this as a regular message; the channel's
+      // agentPrompt adapter adds the voice-optimization context.
+      messageText = text;
+    }
+    // Route the message through the OpenClaw agent via the runtime.
+    const result = await runtime.channel.processInboundMessage({
+      channel: "esp32voice",
+      from: deviceId,
+      text: messageText,
+      accountId: account.accountId,
+    });
+    if (!result || !result.text) {
+      return {
+        ok: false,
+        error: "No response from agent",
+        sessionId,
+      };
+    }
+    // Truncate for voice output.
+    const responseText = truncateForVoice(result.text, account.maxResponseLength);
+    return {
+      ok: true,
+      text: responseText,
+      sessionId: sessionId ?? result.sessionId,
+    };
+  } catch (err) {
+    const errorMessage = err instanceof Error ? err.message : "Unknown error";
+    console.error(`[esp32voice] Error processing message from ${deviceId}: ${errorMessage}`);
+    return {
+      ok: false,
+      error: `Processing error: ${errorMessage}`,
+      sessionId,
+    };
+  }
+}