npm - @madeinoz67/voice-server - Versions diffs - 0.1.3 - Mend

@madeinoz67/voice-server 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

package/.claude/commands/speckit.analyze.md +184 -0
package/.claude/commands/speckit.checklist.md +294 -0
package/.claude/commands/speckit.clarify.md +181 -0
package/.claude/commands/speckit.constitution.md +82 -0
package/.claude/commands/speckit.implement.md +135 -0
package/.claude/commands/speckit.plan.md +89 -0
package/.claude/commands/speckit.specify.md +258 -0
package/.claude/commands/speckit.tasks.md +137 -0
package/.claude/commands/speckit.taskstoissues.md +30 -0
package/.claude/settings.local.json +23 -0
package/.codanna/settings.toml +384 -0
package/.env.development +18 -0
package/.env.example +30 -0
package/.github/codeql/config.yml +13 -0
package/.github/codeql.yml +30 -0
package/.github/dependabot.yml +11 -0
package/.github/workflows/ci.yml +308 -0
package/.specify/memory/constitution.md +223 -0
package/.specify/scripts/bash/check-prerequisites.sh +166 -0
package/.specify/scripts/bash/common.sh +156 -0
package/.specify/scripts/bash/create-new-feature.sh +297 -0
package/.specify/scripts/bash/setup-plan.sh +61 -0
package/.specify/scripts/bash/update-agent-context.sh +799 -0
package/.specify/templates/agent-file-template.md +28 -0
package/.specify/templates/checklist-template.md +40 -0
package/.specify/templates/plan-template.md +106 -0
package/.specify/templates/spec-template.md +115 -0
package/.specify/templates/tasks-template.md +261 -0
package/AGENTPERSONALITIES.md +233 -0
package/ATTRIBUTION.md +70 -0
package/CHANGELOG.md +90 -0
package/CLAUDE.md +50 -0
package/Formula/madeinoz-voice-server.rb +106 -0
package/README.md +451 -0
package/bun.lock +212 -0
package/cliff.toml +67 -0
package/docs/KOKORO_VOICES.md +152 -0
package/docs/MIGRATION.md +267 -0
package/docs/VOICE_EXAMPLES.md +283 -0
package/docs/VOICE_GUIDE.md +227 -0
package/docs/VOICE_QUICK_REF.md +157 -0
package/docs/agent-voices.md +114 -0
package/docs/api.md +336 -0
package/docs/assets/voice-server-architecture.png +0 -0
package/docs/assets/voice-server-header.png +0 -0
package/docs/assets/voice-server-pack-logo.png +0 -0
package/docs/index.md +60 -0
package/eslint.config.js +42 -0
package/mkdocs.yml +55 -0
package/package.json +28 -0
package/reports/MLX_AUDIO_EVALUATION.md +302 -0
package/reports/agent/2026-02-06-20-51-mlx-audio-qwen-tts-investigation.md +613 -0
package/reports/agent/2026-02-06-Qwen3-TTS-API-Specification.md +446 -0
package/reports/agent/2026-02-07-python-backend-removal-plan.md +790 -0
package/scripts/generate-reference.ts +139 -0
package/specs/001-qwen-tts/checklists/requirements.md +50 -0
package/specs/001-qwen-tts/contracts/api.yaml +305 -0
package/specs/001-qwen-tts/data-model.md +197 -0
package/specs/001-qwen-tts/plan.md +236 -0
package/specs/001-qwen-tts/quickstart.md +306 -0
package/specs/001-qwen-tts/research.md +194 -0
package/specs/001-qwen-tts/spec.md +135 -0
package/specs/001-qwen-tts/tasks.md +305 -0
package/src/ts/constants/KOKORO_VOICES.ts +141 -0
package/src/ts/middleware/cors.ts +153 -0
package/src/ts/middleware/rate-limiter.ts +200 -0
package/src/ts/models/health.ts +45 -0
package/src/ts/models/notification.ts +69 -0
package/src/ts/models/pronunciation.ts +39 -0
package/src/ts/models/tts.ts +54 -0
package/src/ts/models/voice-config.ts +82 -0
package/src/ts/server.ts +460 -0
package/src/ts/services/mlx-tts-client.ts +337 -0
package/src/ts/services/pronunciation.ts +209 -0
package/src/ts/services/prosody-translator.ts +130 -0
package/src/ts/services/voice-loader.ts +214 -0
package/src/ts/utils/logger.ts +144 -0
package/src/ts/utils/text-sanitizer.ts +118 -0
package/tests/integration/api.test.ts +210 -0
package/tests/mocks/index.ts +152 -0
package/tests/ts/server.test.ts +11 -0
package/tests/unit/middleware/cors.test.ts +146 -0
package/tests/unit/models/validation.test.ts +332 -0
package/tests/unit/services/pronunciation.test.ts +171 -0
package/tests/unit/services/prosody-translator.test.ts +142 -0
package/tsconfig.json +25 -0

package/src/ts/services/voice-loader.ts ADDED Viewed

@@ -0,0 +1,214 @@
+/**
+ * Voice Configuration Loader
+ * Parses AGENTPERSONALITIES.md and caches voice configurations
+ * Supports numeric voice IDs (1-54) mapped to Kokoro voices
+ */
+import type { VoiceConfig } from "@/models/voice-config.js";
+import { logger } from "@/utils/logger.js";
+import { getKokoroVoice, getVoiceInfo, type KokoroVoice } from "@/constants/KOKORO_VOICES.js";
+/**
+ * Voice registry entry from AGENTPERSONALITIES.md
+ */
+interface VoiceRegistryEntry {
+  name: string;
+  voice_id: string;
+  characteristics: string[];
+  description: string;
+  prosody?: {
+    stability?: number;
+    similarity_boost?: number;
+    style?: number;
+    speed?: number;
+    use_speaker_boost?: boolean;
+  };
+}
+/**
+ * Parsed AGENTPERSONALITIES.md structure
+ */
+interface AgentPersonalities {
+  voice_mappings: {
+    voice_registry: Record<string, VoiceRegistryEntry>;
+    default: string;
+    default_voice_id: string;
+  };
+}
+/**
+ * Voice loader cache
+ */
+interface VoiceCache {
+  voices: Map<string, VoiceConfig>;
+  defaultVoiceId: string;
+  lastLoaded: number;
+}
+/**
+ * Voice loader service
+ * Loads and caches voice configurations from AGENTPERSONALITIES.md
+ * Supports numeric voice IDs (1-54) for Kokoro voices
+ */
+class VoiceLoaderService {
+  private cache: VoiceCache | null = null;
+  private cachePath: string;
+  private personalitiesPath: string;
+  constructor() {
+    // Path to AGENTPERSONALITIES.md in PAI skills
+    this.personalitiesPath = `${process.env.HOME}/.claude/skills/Agents/AgentPersonalities.md`;
+    this.cachePath = `${process.env.HOME}/.claude/skills/Agents/Data/Traits.yaml`;
+  }
+  /**
+   * Resolve voice_id to Kokoro voice name
+   * @param voiceId - Numeric voice ID (1-54) or string identifier
+   * @returns Kokoro voice preset name (e.g., "af_heart")
+   */
+  resolveKokoroVoice(voiceId: string): string {
+    return getKokoroVoice(voiceId);
+  }
+  /**
+   * Get voice information for a numeric ID
+   * @param voiceId - Numeric voice ID (1-54)
+   * @returns Voice info or undefined
+   */
+  getVoiceInfo(voiceId: string): KokoroVoice | undefined {
+    return getVoiceInfo(voiceId);
+  }
+  /**
+   * Get all available Kokoro voices
+   * @returns Array of all Kokoro voice information
+   */
+  getAllKokoroVoices(): readonly KokoroVoice[] {
+    const { KOKORO_VOICES } = require("@/constants/KOKORO_VOICES.js");
+    return KOKORO_VOICES;
+  }
+  /**
+   * Load voice configurations from AGENTPERSONALITIES.md
+   */
+  async loadVoices(): Promise<Map<string, VoiceConfig>> {
+    // Return cached voices if available and recent (< 5 minutes old)
+    if (this.cache && Date.now() - this.cache.lastLoaded < 300000) {
+      logger.debug("Using cached voice configurations");
+      return this.cache.voices;
+    }
+    logger.info("Loading voice configurations from AGENTPERSONALITIES.md");
+    try {
+      // For now, return a basic voice mapping
+      // TODO: Implement full AGENTPERSONALITIES.md parser
+      const voices = new Map<string, VoiceConfig>();
+      // Default voices based on Traits.yaml
+      voices.set("marrvin", {
+        voice_id: "marrvin",
+        voice_name: "Default",
+        description: "Default voice",
+        type: "built-in",
+        stability: 0.5,
+        similarity_boost: 0.75,
+        style: 0.0,
+        speed: 1.0,
+        use_speaker_boost: true,
+      });
+      voices.set("marlin", {
+        voice_id: "marlin",
+        voice_name: "Marlin",
+        description: "Alternative voice",
+        type: "built-in",
+        stability: 0.6,
+        similarity_boost: 0.75,
+        style: 0.1,
+        speed: 1.0,
+        use_speaker_boost: true,
+      });
+      voices.set("daniel", {
+        voice_id: "daniel",
+        voice_name: "Daniel",
+        description: "Daniel voice",
+        type: "built-in",
+        stability: 0.7,
+        similarity_boost: 0.85,
+        style: 0.1,
+        speed: 0.95,
+        use_speaker_boost: true,
+      });
+      // Cache the voices
+      this.cache = {
+        voices,
+        defaultVoiceId: "marrvin",
+        lastLoaded: Date.now(),
+      };
+      logger.info(`Loaded ${voices.size} voice configurations`);
+      return voices;
+    } catch (error) {
+      logger.error("Failed to load voice configurations", error as Error);
+      // Return empty map on error
+      return new Map();
+    }
+  }
+  /**
+   * Get a specific voice configuration by ID
+   */
+  async getVoice(voiceId: string): Promise<VoiceConfig | undefined> {
+    const voices = await this.loadVoices();
+    return voices.get(voiceId);
+  }
+  /**
+   * Get the default voice ID
+   */
+  async getDefaultVoiceId(): Promise<string> {
+    if (!this.cache) {
+      await this.loadVoices();
+    }
+    return this.cache?.defaultVoiceId || "marrvin";
+  }
+  /**
+   * Get all available voice IDs
+   */
+  async getAvailableVoices(): Promise<string[]> {
+    const voices = await this.loadVoices();
+    return Array.from(voices.keys());
+  }
+  /**
+   * Clear the voice cache (force reload on next access)
+   */
+  clearCache(): void {
+    logger.debug("Clearing voice cache");
+    this.cache = null;
+  }
+}
+/**
+ * Global voice loader instance
+ */
+let voiceLoaderInstance: VoiceLoaderService | null = null;
+/**
+ * Get the voice loader service instance
+ */
+export function getVoiceLoader(): VoiceLoaderService {
+  if (!voiceLoaderInstance) {
+    voiceLoaderInstance = new VoiceLoaderService();
+  }
+  return voiceLoaderInstance;
+}
+/**
+ * Export types for use in other modules
+ */
+export type { VoiceRegistryEntry, AgentPersonalities, VoiceCache };

package/src/ts/utils/logger.ts ADDED Viewed

@@ -0,0 +1,144 @@
+/**
+ * Structured logger utility
+ * Provides consistent logging across the application
+ */
+/**
+ * Log level enumeration
+ */
+export enum LogLevel {
+  DEBUG = 0,
+  INFO = 1,
+  WARN = 2,
+  ERROR = 3,
+}
+/**
+ * Log entry structure
+ */
+export interface LogEntry {
+  level: LogLevel;
+  message: string;
+  timestamp: Date;
+  context?: Record<string, unknown>;
+  error?: Error;
+}
+/**
+ * Logger configuration
+ */
+export interface LoggerConfig {
+  level: LogLevel;
+  includeTimestamp: boolean;
+  includeContext: boolean;
+}
+/**
+ * Current logger configuration
+ */
+let config: LoggerConfig = {
+  level: LogLevel.INFO,
+  includeTimestamp: true,
+  includeContext: true,
+};
+/**
+ * Format log level for output
+ */
+function formatLevel(level: LogLevel): string {
+  switch (level) {
+    case LogLevel.DEBUG:
+      return "DEBUG";
+    case LogLevel.INFO:
+      return "INFO";
+    case LogLevel.WARN:
+      return "WARN";
+    case LogLevel.ERROR:
+      return "ERROR";
+  }
+}
+/**
+ * Format log entry for output
+ */
+function formatLogEntry(entry: LogEntry): string {
+  const parts: string[] = [];
+  if (config.includeTimestamp) {
+    parts.push(entry.timestamp.toISOString());
+  }
+  parts.push(`[${formatLevel(entry.level)}]`);
+  parts.push(entry.message);
+  if (entry.context && config.includeContext && Object.keys(entry.context).length > 0) {
+    parts.push(JSON.stringify(entry.context));
+  }
+  if (entry.error) {
+    parts.push(`\n${entry.error.stack || entry.error.message}`);
+  }
+  return parts.join(" ");
+}
+/**
+ * Core logging function
+ */
+function log(level: LogLevel, message: string, context?: Record<string, unknown>, error?: Error): void {
+  if (level < config.level) return;
+  const entry: LogEntry = {
+    level,
+    message,
+    timestamp: new Date(),
+    context,
+    error,
+  };
+  const output = formatLogEntry(entry);
+  switch (level) {
+    case LogLevel.DEBUG:
+    case LogLevel.INFO:
+      console.log(output);
+      break;
+    case LogLevel.WARN:
+      console.warn(output);
+      break;
+    case LogLevel.ERROR:
+      console.error(output);
+      break;
+  }
+}
+/**
+ * Logger API
+ */
+export const logger = {
+  debug(message: string, context?: Record<string, unknown>): void {
+    log(LogLevel.DEBUG, message, context);
+  },
+  info(message: string, context?: Record<string, unknown>): void {
+    log(LogLevel.INFO, message, context);
+  },
+  warn(message: string, context?: Record<string, unknown>): void {
+    log(LogLevel.WARN, message, context);
+  },
+  error(message: string, error?: Error, context?: Record<string, unknown>): void {
+    log(LogLevel.ERROR, message, context, error);
+  },
+  setLevel(level: LogLevel): void {
+    config.level = level;
+  },
+  configure(newConfig: Partial<LoggerConfig>): void {
+    config = { ...config, ...newConfig };
+  },
+};
+export default logger;

package/src/ts/utils/text-sanitizer.ts ADDED Viewed

@@ -0,0 +1,118 @@
+/**
+ * Text sanitization utility
+ * Input validation and sanitization for TTS text
+ */
+/**
+ * Sanitization options
+ */
+export interface SanitizeOptions {
+  /** Maximum length (default: 500) */
+  maxLength?: number;
+  /** Remove HTML tags (default: true) */
+  stripHtml?: boolean;
+  /** Remove shell metacharacters (default: true) */
+  stripShellChars?: boolean;
+  /** Normalize whitespace (default: true) */
+  normalizeWhitespace?: boolean;
+}
+/**
+ * Default sanitization options
+ */
+const DEFAULT_OPTIONS: SanitizeOptions = {
+  maxLength: 500,
+  stripHtml: true,
+  stripShellChars: true,
+  normalizeWhitespace: true,
+};
+/**
+ * HTML tag patterns to remove
+ *
+ * We strip all angle brackets rather than trying to parse HTML tags,
+ * which avoids incomplete multi-character sanitization issues.
+ */
+const HTML_TAG_PATTERN = /[<>]/g;
+/**
+ * Shell metacharacter patterns to remove
+ */
+const SHELL_CHARS_PATTERN = /[\$`'"\\;|&()<>]/g;
+/**
+ * Whitespace normalization pattern
+ */
+const WHITESPACE_PATTERN = /\s+/g;
+/**
+ * Script tag detection (conservative)
+ *
+ * Detects any <script ...> or </script ...> tag, including variants with
+ * whitespace around the tag name or attributes within the tag, such as `</script >`
+ * or `</script foo="bar">`.
+ */
+const SCRIPT_PATTERN = /<\s*script\b[^>]*>|<\s*\/\s*script\b[^>]*>/i;
+/**
+ * Sanitize text for TTS input
+ *
+ * Removes potentially dangerous content and normalizes text
+ */
+export function sanitizeText(text: string, options: SanitizeOptions = {}): string {
+  const opts = { ...DEFAULT_OPTIONS, ...options };
+  let result = text;
+  // Check for script tags first (security)
+  if (opts.stripHtml && SCRIPT_PATTERN.test(result)) {
+    throw new Error("Text contains script tags and was rejected");
+  }
+  // Remove HTML tags
+  if (opts.stripHtml) {
+    result = result.replace(HTML_TAG_PATTERN, "");
+  }
+  // Remove shell metacharacters
+  if (opts.stripShellChars) {
+    result = result.replace(SHELL_CHARS_PATTERN, "");
+  }
+  // Normalize whitespace
+  if (opts.normalizeWhitespace) {
+    result = result.replace(WHITESPACE_PATTERN, " ").trim();
+  }
+  // Enforce max length
+  if (opts.maxLength && result.length > opts.maxLength) {
+    result = result.substring(0, opts.maxLength).trim();
+  }
+  return result;
+}
+/**
+ * Validate text is safe for TTS
+ */
+export function isValidText(text: string): boolean {
+  if (!text || text.trim().length === 0) return false;
+  // Check for dangerous patterns
+  if (SCRIPT_PATTERN.test(text)) return false;
+  return true;
+}
+/**
+ * Sanitize title for macOS notification
+ */
+export function sanitizeTitle(title: string): string {
+  return sanitizeText(title, { maxLength: 100 });
+}
+/**
+ * Sanitize message for TTS
+ */
+export function sanitizeMessage(message: string): string {
+  return sanitizeText(message, { maxLength: 500 });
+}

package/tests/integration/api.test.ts ADDED Viewed

@@ -0,0 +1,210 @@
+/**
+ * Integration tests for API endpoints
+ *
+ * Note: These tests require MLX-audio to be installed.
+ * They will be skipped if MLX-audio is not available.
+ */
+import { describe, test, expect, beforeAll, afterAll } from "bun:test";
+// Test server configuration
+const SERVER_HOST = "127.0.0.1";
+const SERVER_PORT = 8899; // Use different port for tests
+const SERVER_URL = `http://${SERVER_HOST}:${SERVER_PORT}`;
+let serverProcess: ReturnType<typeof Bun.spawn> | null = null;
+// Check if MLX-audio is available
+async function checkMLXAudio(): Promise<boolean> {
+  try {
+    const proc = Bun.spawn(["which", "mlx_tts"], {
+      stdout: "pipe",
+      stderr: "pipe",
+    });
+    await proc.exited;
+    return proc.exitCode === 0;
+  } catch {
+    return false;
+  }
+}
+const hasMLXAudio = await checkMLXAudio();
+describe.skipIf(!hasMLXAudio)("Voice Server API Integration Tests (skipped - MLX-audio not available)", () => {
+  beforeAll(async () => {
+    // Start test server
+    serverProcess = Bun.spawn({
+      cmd: ["bun", "run", "src/ts/server.ts"],
+      env: {
+        ...process.env,
+        PORT: SERVER_PORT.toString(),
+        NODE_ENV: "test",
+      },
+      stdout: "pipe",
+      stderr: "pipe",
+    });
+    // Wait for server to be ready
+    let retries = 50;
+    while (retries > 0) {
+      try {
+        const response = await fetch(`${SERVER_URL}/health`);
+        if (response.ok) {
+          break;
+        }
+      } catch {
+        // Server not ready yet
+      }
+      await new Promise(resolve => setTimeout(resolve, 100));
+      retries--;
+    }
+    if (retries === 0) {
+      throw new Error("Server failed to start");
+    }
+  }, 10000);
+  afterAll(() => {
+    if (serverProcess) {
+      serverProcess.kill();
+    }
+  });
+  describe("GET /health", () => {
+    test("should return health status", async () => {
+      const response = await fetch(`${SERVER_URL}/health`);
+      expect(response.status).toBe(200);
+      const data = await response.json();
+      expect(data.status).toBe("healthy");
+      expect(data.port).toBe(SERVER_PORT);
+      expect(data.voice_system).toBeDefined();
+    });
+  });
+  describe("POST /notify", () => {
+    test("should accept valid notification request", async () => {
+      const response = await fetch(`${SERVER_URL}/notify`, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({
+          message: "Test notification",
+          voice_id: "marrvin",
+        }),
+      });
+      expect(response.status).toBe(200);
+      const data = await response.json();
+      expect(data.status).toBe("success");
+    });
+    test("should reject missing message", async () => {
+      const response = await fetch(`${SERVER_URL}/notify`, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({ voice_id: "marrvin" }),
+      });
+      expect(response.status).toBe(400);
+    });
+    test("should reject missing voice_id", async () => {
+      const response = await fetch(`${SERVER_URL}/notify`, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({ message: "Test" }),
+      });
+      expect(response.status).toBe(400);
+    });
+    test("should reject invalid JSON", async () => {
+      const response = await fetch(`${SERVER_URL}/notify`, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: "invalid json",
+      });
+      expect(response.status).toBe(400);
+    });
+  });
+  describe("POST /tts", () => {
+    test("should accept valid TTS request", async () => {
+      const response = await fetch(`${SERVER_URL}/tts`, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({
+          text: "Hello world",
+          voice_id: "marrvin",
+        }),
+      });
+      // Note: This may fail if MLX-audio is not configured
+      // Status could be 200 (success) or 500 (MLX not available)
+      expect([200, 500]).toContain(response.status);
+    });
+    test("should reject missing text", async () => {
+      const response = await fetch(`${SERVER_URL}/tts`, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({ voice_id: "marrvin" }),
+      });
+      expect(response.status).toBe(400);
+    });
+    test("should reject missing voice_id", async () => {
+      const response = await fetch(`${SERVER_URL}/tts`, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({ text: "Hello" }),
+      });
+      expect(response.status).toBe(400);
+    });
+    test("should reject empty text", async () => {
+      const response = await fetch(`${SERVER_URL}/tts`, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({
+          text: "",
+          voice_id: "marrvin",
+        }),
+      });
+      expect(response.status).toBe(400);
+    });
+  });
+  describe("CORS", () => {
+    test("should include CORS headers for allowed origins", async () => {
+      const response = await fetch(`${SERVER_URL}/health`, {
+        headers: { Origin: "http://localhost:3000" },
+      });
+      expect(response.headers.get("Access-Control-Allow-Methods")).toBeDefined();
+    });
+    test("should handle OPTIONS preflight", async () => {
+      const response = await fetch(`${SERVER_URL}/notify`, {
+        method: "OPTIONS",
+        headers: {
+          Origin: "http://localhost:3000",
+          "Access-Control-Request-Method": "POST",
+        },
+      });
+      expect(response.status).toBe(204);
+    });
+  });
+});
+// If MLX-audio is not available, add a placeholder test
+describe.if(hasMLXAudio)("Integration tests", () => {
+  test("placeholder - MLX-audio is available", () => {
+    expect(true).toBe(true);
+  });
+});