npm - @kajidog/mcp-tts-voicevox - Versions diffs - 0.6.1 → 0.7.1 - Mend

@kajidog/mcp-tts-voicevox 0.6.1 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.js CHANGED Viewed

@@ -429,7 +429,7 @@ var init_dist = __esm({
             });
             if (!chunk) {
               if (i === 1) {
-                await new Promise((resolve) => setTimeout(resolve));
+                await new Promise((resolve2) => setTimeout(resolve2));
                 maxReadCount = 3;
                 continue;
               }
@@ -576,7 +576,7 @@ var init_dist = __esm({
 // src/index.ts
 import { readFileSync as readFileSync2 } from "fs";
-import { dirname as dirname2, join as join2 } from "path";
+import { dirname as dirname3, join as join4 } from "path";
 import { fileURLToPath as fileURLToPath2 } from "url";
 // ../../packages/mcp-core/dist/config.js
@@ -620,6 +620,12 @@ function parseBaseCliArgs(argv = process.argv.slice(2)) {
           i++;
         }
         break;
+      case "--api-key":
+        if (nextArg && !nextArg.startsWith("-")) {
+          config2.apiKey = nextArg;
+          i++;
+        }
+        break;
     }
   }
   return config2;
@@ -641,6 +647,9 @@ function parseBaseEnvVars(env = process.env) {
   if (env.MCP_ALLOWED_ORIGINS) {
     config2.allowedOrigins = env.MCP_ALLOWED_ORIGINS.split(",").map((o) => o.trim());
   }
+  if (env.MCP_API_KEY) {
+    config2.apiKey = env.MCP_API_KEY;
+  }
   return config2;
 }
 function filterUndefined(obj) {
@@ -1648,9 +1657,9 @@ var Context = class {
    * })
    * ```
    */
-  json = (object2, arg, headers) => {
+  json = (object3, arg, headers) => {
     return this.#newResponse(
-      JSON.stringify(object2),
+      JSON.stringify(object3),
       arg,
       setDefaultContentType("application/json", headers)
     );
@@ -2805,6 +2814,13 @@ function forbiddenError(message) {
     id: null
   };
 }
+function unauthorizedError(message) {
+  return {
+    jsonrpc: "2.0",
+    error: { code: -32001, message },
+    id: null
+  };
+}
 function validateOrigin(config2) {
   return async (c, next) => {
     const origin = c.req.header("Origin");
@@ -2847,6 +2863,22 @@ function validateHost(config2) {
     return next();
   };
 }
+function validateApiKey(config2) {
+  return async (c, next) => {
+    if (!config2.apiKey || c.req.method === "OPTIONS") {
+      return next();
+    }
+    const xApiKey = c.req.header("X-API-Key");
+    const authorization = c.req.header("Authorization");
+    const bearerToken = authorization?.startsWith("Bearer ") ? authorization.slice(7).trim() : void 0;
+    const providedKey = xApiKey ?? bearerToken;
+    if (providedKey !== config2.apiKey) {
+      console.log("Rejected request with invalid API key");
+      return c.json(unauthorizedError("Unauthorized: Invalid API key"), { status: 401 });
+    }
+    return next();
+  };
+}
 function createHttpApp(options) {
   const { server: server2, config: config2, serverFactory, extraCorsHeaders = [], onSessionInitialized, onSessionClosed } = options;
   const transports = /* @__PURE__ */ new Map();
@@ -2912,6 +2944,8 @@ function createHttpApp(options) {
     "mcp-session-id",
     "Last-Event-ID",
     "mcp-protocol-version",
+    "X-API-Key",
+    "Authorization",
     ...extraCorsHeaders
   ];
   app.use("/mcp", cors({
@@ -2922,6 +2956,7 @@ function createHttpApp(options) {
   }));
   app.use("/mcp", validateOrigin(config2));
   app.use("/mcp", validateHost(config2));
+  app.use("/mcp", validateApiKey(config2));
   app.all("/mcp", handleMCP);
   app.get("/health", handleHealth);
   return app;
@@ -2969,7 +3004,7 @@ async function startHttpServer(options) {
         console.error(`Health check: http://${info.address}:${info.port}/health`);
       });
     }
-    await new Promise((resolve) => setTimeout(resolve, 1e3));
+    await new Promise((resolve2) => setTimeout(resolve2, 1e3));
     console.error("HTTP server startup completed");
   } catch (error) {
     console.error("HTTP server startup failed:", error);
@@ -3016,6 +3051,7 @@ async function launchServer(options) {
 }
 // src/config.ts
+import { join } from "path";
 var defaultConfig = {
   ...defaultBaseConfig,
   voicevoxUrl: "http://localhost:50021",
@@ -3028,7 +3064,15 @@ var defaultConfig = {
   restrictImmediate: false,
   restrictWaitForStart: false,
   restrictWaitForEnd: false,
+  playerDomain: "",
   autoPlay: true,
+  playerExportEnabled: true,
+  playerExportDir: join(process.cwd(), "voicevox-player-exports"),
+  playerCacheDir: join(process.cwd(), ".voicevox-player-cache"),
+  playerStateFile: join(process.cwd(), ".voicevox-player-cache", "player-state.json"),
+  playerAudioCacheEnabled: true,
+  playerAudioCacheTtlDays: 30,
+  playerAudioCacheMaxMb: 512,
   disabledTools: []
 };
 function parseCliArgs(argv = process.argv.slice(2)) {
@@ -3095,6 +3139,48 @@ function parseCliArgs(argv = process.argv.slice(2)) {
       case "--no-auto-play":
         config2.autoPlay = false;
         break;
+      case "--player-export":
+        config2.playerExportEnabled = true;
+        break;
+      case "--no-player-export":
+        config2.playerExportEnabled = false;
+        break;
+      case "--player-export-dir":
+        if (nextArg && !nextArg.startsWith("-")) {
+          config2.playerExportDir = nextArg;
+          i++;
+        }
+        break;
+      case "--player-cache-dir":
+        if (nextArg && !nextArg.startsWith("-")) {
+          config2.playerCacheDir = nextArg;
+          i++;
+        }
+        break;
+      case "--player-state-file":
+        if (nextArg && !nextArg.startsWith("-")) {
+          config2.playerStateFile = nextArg;
+          i++;
+        }
+        break;
+      case "--player-audio-cache":
+        config2.playerAudioCacheEnabled = true;
+        break;
+      case "--no-player-audio-cache":
+        config2.playerAudioCacheEnabled = false;
+        break;
+      case "--player-audio-cache-ttl-days":
+        if (nextArg && !nextArg.startsWith("-")) {
+          config2.playerAudioCacheTtlDays = Number(nextArg);
+          i++;
+        }
+        break;
+      case "--player-audio-cache-max-mb":
+        if (nextArg && !nextArg.startsWith("-")) {
+          config2.playerAudioCacheMaxMb = Number(nextArg);
+          i++;
+        }
+        break;
       case "--disable-tools":
         if (nextArg && !nextArg.startsWith("-")) {
           config2.disabledTools = nextArg.split(",").map((t) => t.trim());
@@ -3138,9 +3224,35 @@ function parseEnvVars(env = process.env) {
   if (env.VOICEVOX_RESTRICT_WAIT_FOR_END === "true") {
     config2.restrictWaitForEnd = true;
   }
+  if (env.VOICEVOX_PLAYER_DOMAIN) {
+    config2.playerDomain = env.VOICEVOX_PLAYER_DOMAIN;
+  }
   if (env.VOICEVOX_AUTO_PLAY !== void 0) {
     config2.autoPlay = env.VOICEVOX_AUTO_PLAY !== "false";
   }
+  if (env.VOICEVOX_PLAYER_EXPORT_ENABLED !== void 0) {
+    config2.playerExportEnabled = env.VOICEVOX_PLAYER_EXPORT_ENABLED !== "false";
+  }
+  if (env.VOICEVOX_PLAYER_EXPORT_DIR) {
+    config2.playerExportDir = env.VOICEVOX_PLAYER_EXPORT_DIR;
+  }
+  if (env.VOICEVOX_PLAYER_CACHE_DIR) {
+    config2.playerCacheDir = env.VOICEVOX_PLAYER_CACHE_DIR;
+  }
+  if (env.VOICEVOX_PLAYER_STATE_FILE) {
+    config2.playerStateFile = env.VOICEVOX_PLAYER_STATE_FILE;
+  }
+  if (env.VOICEVOX_PLAYER_AUDIO_CACHE_ENABLED !== void 0) {
+    config2.playerAudioCacheEnabled = env.VOICEVOX_PLAYER_AUDIO_CACHE_ENABLED !== "false";
+  }
+  if (env.VOICEVOX_PLAYER_AUDIO_CACHE_TTL_DAYS !== void 0) {
+    const ttlDays = Number(env.VOICEVOX_PLAYER_AUDIO_CACHE_TTL_DAYS);
+    if (Number.isFinite(ttlDays)) config2.playerAudioCacheTtlDays = ttlDays;
+  }
+  if (env.VOICEVOX_PLAYER_AUDIO_CACHE_MAX_MB !== void 0) {
+    const maxMb = Number(env.VOICEVOX_PLAYER_AUDIO_CACHE_MAX_MB);
+    if (Number.isFinite(maxMb)) config2.playerAudioCacheMaxMb = maxMb;
+  }
   if (env.VOICEVOX_DISABLED_TOOLS) {
     config2.disabledTools = env.VOICEVOX_DISABLED_TOOLS.split(",").map((t) => t.trim());
   }
@@ -3149,11 +3261,16 @@ function parseEnvVars(env = process.env) {
 function getConfig(argv, env) {
   const cliConfig = parseCliArgs(argv);
   const envConfig = parseEnvVars(env);
-  return {
+  const merged = {
     ...defaultConfig,
     ...filterUndefined(envConfig),
     ...filterUndefined(cliConfig)
   };
+  const isPlayerStateFileExplicit = envConfig.playerStateFile !== void 0 || cliConfig.playerStateFile !== void 0;
+  if (!isPlayerStateFileExplicit) {
+    merged.playerStateFile = join(merged.playerCacheDir, "player-state.json");
+  }
+  return merged;
 }
 // src/server.ts
@@ -3161,29 +3278,81 @@ import { VoicevoxClient } from "@kajidog/voicevox-client";
 import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
 // src/tools/player.ts
-import { randomUUID as randomUUID2 } from "crypto";
-import { readFileSync } from "fs";
-import { dirname, join } from "path";
+import { createHash, randomUUID as randomUUID2 } from "crypto";
+import { mkdirSync as mkdirSync2, readFileSync } from "fs";
+import { readdir, rename, stat, unlink, writeFile as writeFile2 } from "fs/promises";
+import { basename, dirname as dirname2, join as join3 } from "path";
 import { fileURLToPath } from "url";
 import { VoicevoxApi } from "@kajidog/voicevox-client";
 import { RESOURCE_MIME_TYPE, registerAppResource } from "@modelcontextprotocol/ext-apps/server";
+import * as z2 from "zod/v4";
+// src/tools/player-cache-utils.ts
+function resolveAudioCachePolicy(input) {
+  const isDiskCacheEnabled = input.enabledFlag && input.ttlDays !== 0 && input.maxMb !== 0;
+  const ttlMs = input.ttlDays < 0 ? null : input.ttlDays * 24 * 60 * 60 * 1e3;
+  const maxBytes = input.maxMb < 0 ? null : input.maxMb * 1024 * 1024;
+  return { isDiskCacheEnabled, ttlMs, maxBytes };
+}
+function planAudioCacheCleanup(input) {
+  const toDelete = /* @__PURE__ */ new Set();
+  if (input.ttlMs !== null) {
+    for (const entry of input.entries) {
+      if (input.now - entry.mtimeMs > input.ttlMs) {
+        toDelete.add(entry.path);
+      }
+    }
+  }
+  if (input.maxBytes !== null) {
+    const kept = input.entries.filter((entry) => !toDelete.has(entry.path));
+    let totalBytes = kept.reduce((sum, entry) => sum + entry.size, 0);
+    if (totalBytes > input.maxBytes) {
+      const byOldestFirst = [...kept].sort((a, b) => a.mtimeMs - b.mtimeMs);
+      for (const entry of byOldestFirst) {
+        if (totalBytes <= input.maxBytes) break;
+        toDelete.add(entry.path);
+        totalBytes -= entry.size;
+      }
+    }
+  }
+  return toDelete;
+}
+// src/tools/player-ui-tools.ts
+import { spawn, spawnSync } from "child_process";
+import { constants, accessSync } from "fs";
+import { mkdir, writeFile } from "fs/promises";
+import { dirname, join as join2, resolve } from "path";
 import * as z from "zod/v4";
 // src/tools/registration.ts
 import { registerAppTool } from "@modelcontextprotocol/ext-apps/server";
+var TOOL_PREFIX = "voicevox_";
+function addToolPrefix(name) {
+  if (name.startsWith("_")) {
+    return name;
+  }
+  return `${TOOL_PREFIX}${name}`;
+}
+function isToolDisabled(disabledTools, name) {
+  const fullName = addToolPrefix(name);
+  return disabledTools.has(name) || disabledTools.has(fullName);
+}
 function registerToolIfEnabled(server2, disabledTools, name, definition, handler) {
-  if (disabledTools.has(name)) {
-    console.error(`Tool "${name}" is disabled via configuration`);
+  const fullName = addToolPrefix(name);
+  if (isToolDisabled(disabledTools, name)) {
+    console.error(`Tool "${fullName}" is disabled via configuration`);
     return;
   }
-  server2.registerTool(name, definition, handler);
+  server2.registerTool(fullName, definition, handler);
 }
 function registerAppToolIfEnabled(server2, disabledTools, name, definition, handler) {
-  if (disabledTools.has(name)) {
-    console.error(`Tool "${name}" is disabled via configuration`);
+  const fullName = addToolPrefix(name);
+  if (isToolDisabled(disabledTools, name)) {
+    console.error(`Tool "${fullName}" is disabled via configuration`);
     return;
   }
-  registerAppTool(server2, name, definition, handler);
+  registerAppTool(server2, fullName, definition, handler);
 }
 // src/tools/utils.ts
@@ -3191,7 +3360,7 @@ var createErrorResponse = (error) => ({
   content: [
     {
       type: "text",
-      text: `\u30A8\u30E9\u30FC: ${error instanceof Error ? error.message : String(error)}`
+      text: `Error: ${error instanceof Error ? error.message : String(error)}`
     }
   ],
   isError: true
@@ -3239,115 +3408,157 @@ var processTextInput = async (voicevoxClient, text, speaker, speedScale, playbac
   });
 };
-// src/tools/player.ts
-var __dirname = typeof import.meta.dirname === "string" ? import.meta.dirname : dirname(fileURLToPath(import.meta.url));
-var playerHtml;
-try {
-  const htmlPath = join(__dirname, "mcp-app.html");
-  playerHtml = readFileSync(htmlPath, "utf-8");
-} catch {
+// src/tools/player-ui-tools.ts
+var commandExistsCache = /* @__PURE__ */ new Map();
+function commandExists(command) {
+  if (commandExistsCache.has(command)) return commandExistsCache.get(command);
+  if (process.platform === "win32" && command === "explorer") {
+    commandExistsCache.set(command, true);
+    return true;
+  }
+  const checkCmd = process.platform === "win32" ? "where" : "which";
+  const result = spawnSync(checkCmd, [command], { stdio: "ignore" });
+  const exists = result.status === 0;
+  commandExistsCache.set(command, exists);
+  return exists;
+}
+function canOpenExplorer() {
+  if (process.platform === "win32") return commandExists("explorer");
+  if (process.platform === "darwin") return commandExists("open");
+  if (process.platform === "linux") {
+    const hasDisplay = Boolean(process.env.DISPLAY || process.env.WAYLAND_DISPLAY);
+    return hasDisplay && commandExists("xdg-open");
+  }
+  return false;
+}
+function canChooseDirectoryDialog() {
+  return process.platform === "win32" || process.platform === "darwin";
+}
+function sanitizeFilePart(input, fallback) {
+  const value = input.trim().replace(/[<>:"/\\|?*\x00-\x1f]/g, "_").replace(/\s+/g, "_").slice(0, 40);
+  return value.length > 0 ? value : fallback;
+}
+function openDirectoryInExplorer(directoryPath) {
   try {
-    const htmlPath = join(__dirname, "..", "..", "node_modules", "@kajidog", "player-ui", "dist", "mcp-app.html");
-    playerHtml = readFileSync(htmlPath, "utf-8");
+    const child = process.platform === "win32" ? spawn("explorer", [directoryPath], { detached: true, stdio: "ignore" }) : process.platform === "darwin" ? spawn("open", [directoryPath], { detached: true, stdio: "ignore" }) : spawn("xdg-open", [directoryPath], { detached: true, stdio: "ignore" });
+    child.unref();
+    return true;
   } catch {
-    console.error("Warning: player-ui HTML not found. Please build @kajidog/player-ui first.");
-    playerHtml = "<html><body><p>Player UI not available. Please build @kajidog/player-ui.</p></body></html>";
+    return false;
   }
 }
-var playerResourceUri = "ui://speak-player/player.html";
-var speakerCache = null;
-function registerPlayerTools(deps) {
-  const { server: server2, config: config2, disabledTools } = deps;
-  const playerVoicevoxApi = new VoicevoxApi(config2.voicevoxUrl);
-  const getSpeakerList = async () => {
-    if (speakerCache) return speakerCache;
-    try {
-      const speakers = await playerVoicevoxApi.getSpeakers();
-      speakerCache = speakers.flatMap(
-        (speaker) => speaker.styles.map((style) => ({
-          id: style.id,
-          name: style.name,
-          characterName: speaker.name,
-          uuid: speaker.speaker_uuid
-        }))
-      );
-      return speakerCache;
-    } catch {
-      return [];
+function showDirectoryPicker(defaultPath) {
+  return new Promise((resolve2) => {
+    if (process.platform === "win32") {
+      const defaultPathB64 = defaultPath ? Buffer.from(defaultPath).toString("base64") : "";
+      const psScript = `
+        Add-Type -AssemblyName System.Windows.Forms
+        $form = New-Object System.Windows.Forms.Form
+        $form.TopMost = $true
+        $form.ShowInTaskbar = $false
+        $form.WindowState = 'Minimized'
+        $dialog = New-Object System.Windows.Forms.FolderBrowserDialog
+        $dialog.Description = "Select Export Folder"
+        ${defaultPathB64 ? `$dialog.SelectedPath = [System.Text.Encoding]::UTF8.GetString([System.Convert]::FromBase64String("${defaultPathB64}"))` : ""}
+        $dialog.ShowNewFolderButton = $true
+        if ($dialog.ShowDialog($form) -eq [System.Windows.Forms.DialogResult]::OK) {
+            Write-Output $dialog.SelectedPath
+        }
+      `;
+      const child = spawn("powershell", ["-NoProfile", "-Command", psScript], { stdio: ["ignore", "pipe", "ignore"] });
+      let output = "";
+      child.stdout.on("data", (data) => {
+        output += data.toString();
+      });
+      child.on("close", () => {
+        const path = output.trim();
+        resolve2(path || null);
+      });
+    } else if (process.platform === "darwin") {
+      const script = `on run argv
+try
+  ${defaultPath ? "set defaultArg to item 1 of argv" : ""}
+  return POSIX path of (choose folder with prompt "Select Export Folder" ${defaultPath ? "default location POSIX file defaultArg" : ""})
+on error
+  return ""
+end try
+end run`;
+      const args = ["-e", script];
+      if (defaultPath) args.push(defaultPath);
+      const child = spawn("osascript", args, { stdio: ["ignore", "pipe", "ignore"] });
+      let output = "";
+      child.stdout.on("data", (data) => {
+        output += data.toString();
+      });
+      child.on("close", () => {
+        const path = output.trim();
+        resolve2(path || null);
+      });
+    } else {
+      resolve2(null);
     }
-  };
-  const getSpeakerName = async (speakerId) => {
-    const list = await getSpeakerList();
-    const found = list?.find((s) => s.id === speakerId);
-    return found ? `${found.characterName}\uFF08${found.name}\uFF09` : `Speaker ${speakerId}`;
-  };
+  });
+}
+function isKatakana(input) {
+  return /^[ァ-ヶー]+$/.test(input);
+}
+function estimateAccentType(pronunciation) {
+  const smallKana = /* @__PURE__ */ new Set(["\u30E3", "\u30E5", "\u30E7", "\u30A1", "\u30A3", "\u30A5", "\u30A7", "\u30A9", "\u30EE"]);
+  let moraCount = 0;
+  for (const char of pronunciation) {
+    if (char === "\u30FC") continue;
+    if (smallKana.has(char)) continue;
+    moraCount += 1;
+  }
+  return Math.max(1, moraCount);
+}
+function normalizeUserDictionaryWords(dictionary) {
+  return Object.entries(dictionary).map(([wordUuid, word]) => ({
+    wordUuid,
+    surface: word.surface,
+    pronunciation: word.pronunciation,
+    accentType: word.accent_type,
+    priority: word.priority
+  }));
+}
+var moraSchema = z.object({
+  text: z.string(),
+  consonant: z.string().nullable().optional(),
+  consonant_length: z.number().nullable().optional(),
+  vowel: z.string(),
+  vowel_length: z.number(),
+  pitch: z.number()
+});
+var accentPhraseSchema = z.object({
+  moras: z.array(moraSchema),
+  accent: z.number().int(),
+  pause_mora: moraSchema.nullable().optional(),
+  is_interrogative: z.boolean().nullable().optional()
+});
+var audioQuerySchema = z.object({
+  accent_phrases: z.array(accentPhraseSchema),
+  speedScale: z.number(),
+  pitchScale: z.number(),
+  intonationScale: z.number(),
+  volumeScale: z.number(),
+  prePhonemeLength: z.number(),
+  postPhonemeLength: z.number(),
+  outputSamplingRate: z.number(),
+  outputStereo: z.boolean(),
+  kana: z.string().optional(),
+  pauseLengthScale: z.number().optional()
+});
+function registerPlayerUITools(deps, shared) {
+  const { server: server2, disabledTools, config: config2 } = deps;
+  const {
+    playerVoicevoxApi,
+    playerResourceUri: playerResourceUri2,
+    synthesizeWithCache,
+    setSessionState: setSessionState2,
+    getSessionState: getSessionState2,
+    getSpeakerList
+  } = shared;
   const speakerIconCache = /* @__PURE__ */ new Map();
-  registerAppResource(
-    server2,
-    "VOICEVOX Player",
-    playerResourceUri,
-    {
-      description: "Audio player UI for VOICEVOX TTS",
-      mimeType: RESOURCE_MIME_TYPE
-    },
-    async () => ({
-      contents: [{ uri: playerResourceUri, mimeType: RESOURCE_MIME_TYPE, text: playerHtml }]
-    })
-  );
-  registerAppToolIfEnabled(
-    server2,
-    disabledTools,
-    "speak_player",
-    {
-      title: "Speak Player",
-      description: 'Convert text to speech and display an audio player in the UI. Audio is played in the browser, not on the server. Does not use the playback queue. Supports multi-speaker dialogue: prefix each line with speaker ID like "1:Hello\\n2:World".',
-      inputSchema: {
-        text: z.string().describe(
-          'Text to convert to speech. Supports multi-speaker dialogue format with speaker ID prefix per line: "1:Hello\\n2:World". Each line is synthesized with the specified speaker and played sequentially.'
-        ),
-        speaker: z.number().optional().describe("Speaker ID (optional)"),
-        speedScale: z.number().optional().describe("Playback speed (optional, default from environment)"),
-        autoPlay: z.boolean().optional().describe("Auto-play audio when loaded (default: true)")
-      },
-      annotations: {
-        readOnlyHint: true,
-        destructiveHint: false,
-        idempotentHint: false,
-        openWorldHint: true
-      },
-      _meta: { ui: { resourceUri: playerResourceUri } }
-    },
-    async ({
-      text,
-      speaker,
-      speedScale,
-      autoPlay
-    }, extra) => {
-      try {
-        const effectiveSpeaker = getEffectiveSpeaker(speaker, extra.sessionId) ?? config2.defaultSpeaker;
-        const speed = speedScale ?? config2.defaultSpeedScale;
-        const segments = parseStringInput(text);
-        const firstSegment = segments[0];
-        if (!firstSegment) {
-          throw new Error("\u30C6\u30AD\u30B9\u30C8\u304C\u7A7A\u3067\u3059");
-        }
-        const speakerId = firstSegment.speaker ?? effectiveSpeaker;
-        const speakerName = await getSpeakerName(speakerId);
-        const fullText = segments.map((s) => s.text).join(" ");
-        return {
-          content: [
-            {
-              type: "text",
-              text: `Voicevox Player started: ${speakerName} \u300C${fullText.slice(0, 50)}${fullText.length > 50 ? "..." : ""}\u300D`
-            }
-          ],
-          _meta: { viewUUID: randomUUID2() }
-        };
-      } catch (error) {
-        return createErrorResponse(error);
-      }
-    }
-  );
   registerAppToolIfEnabled(
     server2,
     disabledTools,
@@ -3357,7 +3568,7 @@ function registerPlayerTools(deps) {
       description: "Get speaker list for the player UI. This tool is only callable from the app UI.",
       _meta: {
         ui: {
-          resourceUri: playerResourceUri,
+          resourceUri: playerResourceUri2,
           visibility: ["app"]
         }
       }
@@ -3383,7 +3594,7 @@ function registerPlayerTools(deps) {
       },
       _meta: {
         ui: {
-          resourceUri: playerResourceUri,
+          resourceUri: playerResourceUri2,
           visibility: ["app"]
         }
       }
@@ -3406,87 +3617,234 @@ function registerPlayerTools(deps) {
       }
     }
   );
+  registerAppToolIfEnabled(
+    server2,
+    disabledTools,
+    "_save_player_state_for_player",
+    {
+      title: "Save Player State (Player)",
+      description: "Persist current player segments to server state without synthesizing audio. Only callable from the app UI.",
+      inputSchema: {
+        viewUUID: z.string().optional().describe("Player instance ID to associate this state with"),
+        segments: z.array(
+          z.object({
+            text: z.string(),
+            speaker: z.number(),
+            speedScale: z.number().optional(),
+            intonationScale: z.number().optional(),
+            volumeScale: z.number().optional(),
+            prePhonemeLength: z.number().optional(),
+            postPhonemeLength: z.number().optional(),
+            pauseLengthScale: z.number().optional(),
+            audioQuery: audioQuerySchema.optional(),
+            accentPhrases: z.array(accentPhraseSchema).optional()
+          })
+        ).describe("Full current player segment list to persist")
+      },
+      _meta: {
+        ui: {
+          resourceUri: playerResourceUri2,
+          visibility: ["app"]
+        }
+      }
+    },
+    async ({
+      viewUUID,
+      segments
+    }, extra) => {
+      try {
+        if (!segments || segments.length === 0) {
+          throw new Error("segments is required");
+        }
+        const stateKey = viewUUID ?? extra?.sessionId ?? "global";
+        const effectiveDefaultSpeaker = config2.defaultSpeaker;
+        const effectiveSpeed = config2.defaultSpeedScale;
+        const list = await getSpeakerList();
+        const speakerNameMap = /* @__PURE__ */ new Map();
+        for (const speakerId of [...new Set(segments.map((seg) => seg.speaker ?? effectiveDefaultSpeaker))]) {
+          const found = list.find((entry) => entry.id === speakerId);
+          speakerNameMap.set(speakerId, found ? `${found.characterName}\uFF08${found.name}\uFF09` : `Speaker ${speakerId}`);
+        }
+        setSessionState2(stateKey, {
+          segments: segments.map((seg) => {
+            const speakerId = seg.speaker ?? effectiveDefaultSpeaker;
+            return {
+              text: seg.text,
+              speaker: speakerId,
+              speakerName: speakerNameMap.get(speakerId) ?? `Speaker ${speakerId}`,
+              kana: seg.audioQuery?.kana,
+              speedScale: seg.speedScale ?? effectiveSpeed,
+              intonationScale: seg.intonationScale,
+              volumeScale: seg.volumeScale,
+              prePhonemeLength: seg.prePhonemeLength,
+              postPhonemeLength: seg.postPhonemeLength,
+              pauseLengthScale: seg.pauseLengthScale,
+              audioQuery: seg.audioQuery,
+              accentPhrases: seg.audioQuery?.accent_phrases ?? seg.accentPhrases
+            };
+          }),
+          updatedAt: Date.now()
+        });
+        return {
+          content: [{ type: "text", text: JSON.stringify({ ok: true, viewUUID: stateKey, count: segments.length }) }]
+        };
+      } catch (error) {
+        return createErrorResponse(error);
+      }
+    }
+  );
   registerAppToolIfEnabled(
     server2,
     disabledTools,
     "_resynthesize_for_player",
     {
       title: "Resynthesize (Player)",
-      description: "Re-synthesize audio with a different speaker. Only callable from the app UI.",
+      description: "Re-synthesize audio with a different speaker or updated parameters. Only callable from the app UI.",
       inputSchema: {
+        viewUUID: z.string().optional().describe("Player instance ID to associate this synthesis with"),
         text: z.string().describe("Text to re-synthesize"),
         speaker: z.number().optional().describe("Speaker ID (uses server default if omitted)"),
+        audioQuery: audioQuerySchema.optional().describe("Audio query to synthesize from (preferred over text parameters)"),
         speedScale: z.number().optional().describe("Playback speed (uses server default if omitted)"),
+        intonationScale: z.number().optional().describe("Intonation scale \u6291\u63DA (optional)"),
+        volumeScale: z.number().optional().describe("Volume scale \u97F3\u91CF (optional)"),
+        prePhonemeLength: z.number().optional().describe("Pre-phoneme silence length in seconds (optional)"),
+        postPhonemeLength: z.number().optional().describe("Post-phoneme silence length in seconds (optional)"),
+        pauseLengthScale: z.number().optional().describe("Pause length scale between phrases \u9593\u306E\u9577\u3055 (optional)"),
+        accentPhrases: z.array(accentPhraseSchema).optional().describe("Accent phrases override"),
         autoPlay: z.boolean().optional().describe("Auto-play audio when loaded (uses server config if omitted)"),
+        segmentIndex: z.number().int().min(0).optional().describe("Segment index for single-segment state update"),
+        persistState: z.boolean().optional().describe("Persist player state to server store (default: true)"),
         segments: z.array(
           z.object({
             text: z.string(),
-            speaker: z.number()
+            speaker: z.number(),
+            speedScale: z.number().optional(),
+            intonationScale: z.number().optional(),
+            volumeScale: z.number().optional(),
+            prePhonemeLength: z.number().optional(),
+            postPhonemeLength: z.number().optional(),
+            pauseLengthScale: z.number().optional(),
+            audioQuery: audioQuerySchema.optional(),
+            accentPhrases: z.array(accentPhraseSchema).optional()
           })
-        ).optional().describe("Multi-speaker segments to synthesize individually")
+        ).optional().describe("All current player segments \u2014 pass the full list to update server state")
       },
       _meta: {
         ui: {
-          resourceUri: playerResourceUri,
+          resourceUri: playerResourceUri2,
           visibility: ["app"]
         }
       }
     },
     async ({
+      viewUUID,
       text,
       speaker,
+      audioQuery,
       speedScale,
+      intonationScale,
+      volumeScale,
+      prePhonemeLength,
+      postPhonemeLength,
+      pauseLengthScale,
+      accentPhrases,
       autoPlay,
+      segmentIndex,
+      persistState,
       segments
-    }) => {
+    }, extra) => {
       try {
         const effectiveSpeed = speedScale ?? config2.defaultSpeedScale;
         const effectiveAutoPlay = autoPlay ?? config2.autoPlay;
+        const shouldPersistState = persistState !== false;
         const effectiveDefaultSpeaker = speaker ?? config2.defaultSpeaker;
-        if (segments && segments.length > 0) {
-          const results = await Promise.all(
-            segments.map(async (seg) => {
-              const segSpeaker = seg.speaker ?? effectiveDefaultSpeaker;
-              const audioQuery2 = await playerVoicevoxApi.generateQuery(seg.text, segSpeaker);
-              audioQuery2.speedScale = effectiveSpeed;
-              const audioData2 = await playerVoicevoxApi.synthesize(audioQuery2, segSpeaker);
-              const base64Audio2 = Buffer.from(audioData2).toString("base64");
-              const segSpeakerName = await getSpeakerName(segSpeaker);
+        const stateKey = viewUUID ?? extra?.sessionId ?? "global";
+        if (segments && segments.length > 0 && shouldPersistState) {
+          const list = await getSpeakerList();
+          const speakerNameMap = /* @__PURE__ */ new Map();
+          for (const speakerId of [...new Set(segments.map((seg) => seg.speaker ?? effectiveDefaultSpeaker))]) {
+            const found = list.find((entry) => entry.id === speakerId);
+            speakerNameMap.set(speakerId, found ? `${found.characterName}\uFF08${found.name}\uFF09` : `Speaker ${speakerId}`);
+          }
+          setSessionState2(stateKey, {
+            segments: segments.map((seg) => {
+              const speakerId = seg.speaker ?? effectiveDefaultSpeaker;
               return {
-                audioBase64: base64Audio2,
                 text: seg.text,
-                speaker: segSpeaker,
-                speakerName: segSpeakerName
+                speaker: speakerId,
+                speakerName: speakerNameMap.get(speakerId) ?? `Speaker ${speakerId}`,
+                kana: seg.audioQuery?.kana,
+                speedScale: seg.speedScale ?? effectiveSpeed,
+                intonationScale: seg.intonationScale,
+                volumeScale: seg.volumeScale,
+                prePhonemeLength: seg.prePhonemeLength,
+                postPhonemeLength: seg.postPhonemeLength,
+                pauseLengthScale: seg.pauseLengthScale,
+                audioQuery: seg.audioQuery,
+                accentPhrases: seg.audioQuery?.accent_phrases ?? seg.accentPhrases
               };
-            })
-          );
-          return {
-            content: [
-              {
-                type: "text",
-                text: JSON.stringify({
-                  segments: results,
-                  autoPlay: effectiveAutoPlay
-                })
-              }
-            ]
-          };
+            }),
+            updatedAt: Date.now()
+          });
+        }
+        const result = await synthesizeWithCache({
+          text,
+          speaker: effectiveDefaultSpeaker,
+          audioQuery,
+          speedScale: effectiveSpeed,
+          intonationScale,
+          volumeScale,
+          prePhonemeLength,
+          postPhonemeLength,
+          pauseLengthScale,
+          accentPhrases
+        });
+        if (shouldPersistState && segmentIndex !== void 0) {
+          const prev = getSessionState2(stateKey);
+          if (prev?.segments[segmentIndex]) {
+            const nextSegments = prev.segments.slice();
+            nextSegments[segmentIndex] = {
+              ...nextSegments[segmentIndex],
+              text: result.text,
+              speaker: result.speaker,
+              speakerName: result.speakerName,
+              kana: result.kana,
+              audioQuery: result.audioQuery,
+              accentPhrases: result.accentPhrases,
+              speedScale: result.speedScale,
+              intonationScale: result.intonationScale,
+              volumeScale: result.volumeScale,
+              prePhonemeLength: result.prePhonemeLength,
+              postPhonemeLength: result.postPhonemeLength,
+              pauseLengthScale: result.pauseLengthScale
+            };
+            setSessionState2(stateKey, {
+              segments: nextSegments,
+              updatedAt: Date.now()
+            });
+          }
         }
-        const audioQuery = await playerVoicevoxApi.generateQuery(text, effectiveDefaultSpeaker);
-        audioQuery.speedScale = effectiveSpeed;
-        const audioData = await playerVoicevoxApi.synthesize(audioQuery, effectiveDefaultSpeaker);
-        const base64Audio = Buffer.from(audioData).toString("base64");
-        const speakerName = await getSpeakerName(effectiveDefaultSpeaker);
         return {
           content: [
             {
               type: "text",
               text: JSON.stringify({
-                audioBase64: base64Audio,
-                text,
-                speaker: effectiveDefaultSpeaker,
-                speakerName,
-                autoPlay: effectiveAutoPlay
+                audioBase64: result.audioBase64,
+                text: result.text,
+                speaker: result.speaker,
+                speakerName: result.speakerName,
+                kana: result.kana,
+                audioQuery: result.audioQuery,
+                accentPhrases: result.accentPhrases,
+                speedScale: result.speedScale,
+                intonationScale: result.intonationScale,
+                volumeScale: result.volumeScale,
+                prePhonemeLength: result.prePhonemeLength,
+                postPhonemeLength: result.postPhonemeLength,
+                pauseLengthScale: result.pauseLengthScale,
+                autoPlay: effectiveAutoPlay,
+                viewUUID
               })
             }
           ]
@@ -3496,53 +3854,1172 @@ function registerPlayerTools(deps) {
       }
     }
   );
-}
-// src/tools/speak.ts
-import * as z2 from "zod/v4";
-function buildSpeakInputSchema(restrictions) {
-  const schema = {
-    text: z2.string().describe(
-      'Text split by line breaks (\\n). IMPORTANT: Each line = one speech unit (processed and played separately). Keep the FIRST LINE SHORT for quick playback start - audio begins as soon as the first line is synthesized. Example: "Hi!\\nThis is a longer explanation that follows." Optional speaker prefix per line: "1:Hello\\n2:World"'
-    ),
-    query: z2.string().optional().describe("Voice synthesis query"),
-    speaker: z2.number().optional().describe("Default speaker ID (optional)"),
-    speedScale: z2.number().optional().describe("Playback speed (optional, default from environment)")
-  };
-  if (!restrictions.immediate) {
-    schema.immediate = z2.boolean().optional().describe(
-      "If true, stops current playback and plays new audio immediately. If false, waits for current playback to finish. Default depends on environment variable."
-    );
-  }
-  if (!restrictions.waitForStart) {
-    schema.waitForStart = z2.boolean().optional().describe("Wait for playback to start (optional, default: false)");
-  }
-  if (!restrictions.waitForEnd) {
-    schema.waitForEnd = z2.boolean().optional().describe("Wait for playback to end (optional, default: false)");
-  }
-  return schema;
-}
-function registerSpeakTool(deps) {
-  const { server: server2, voicevoxClient, config: config2, disabledTools, restrictions } = deps;
-  registerToolIfEnabled(
+  registerAppToolIfEnabled(
     server2,
     disabledTools,
-    "speak",
+    "_get_user_dictionary_for_player",
     {
-      title: "Speak",
-      description: "Convert text to speech and play it. Text is split by line breaks (\\n) into separate speech units. Each line is processed as an independent audio segment.",
-      inputSchema: buildSpeakInputSchema(restrictions),
-      annotations: {
-        readOnlyHint: false,
-        destructiveHint: false,
-        idempotentHint: false,
-        openWorldHint: true
+      title: "Get User Dictionary (Player)",
+      description: "Get VOICEVOX user dictionary words for the dictionary manager UI.",
+      _meta: {
+        ui: {
+          resourceUri: playerResourceUri2,
+          visibility: ["app"]
+        }
       }
     },
-    async ({
-      text,
-      speaker,
-      query,
+    async () => {
+      try {
+        const dictionary = await playerVoicevoxApi.getUserDictionary();
+        return {
+          content: [{ type: "text", text: JSON.stringify({ words: normalizeUserDictionaryWords(dictionary) }) }]
+        };
+      } catch (error) {
+        return createErrorResponse(error);
+      }
+    }
+  );
+  registerAppToolIfEnabled(
+    server2,
+    disabledTools,
+    "_add_user_dictionary_word_for_player",
+    {
+      title: "Add User Dictionary Word (Player)",
+      description: "Add a word to VOICEVOX user dictionary.",
+      inputSchema: {
+        surface: z.string().describe("Word surface form"),
+        pronunciation: z.string().describe("Katakana reading"),
+        priority: z.number().int().min(0).max(10).optional().describe("Priority 0-10")
+      },
+      _meta: {
+        ui: {
+          resourceUri: playerResourceUri2,
+          visibility: ["app"]
+        }
+      }
+    },
+    async ({
+      surface,
+      pronunciation,
+      priority
+    }) => {
+      try {
+        const normalizedSurface = surface.trim();
+        const normalizedPronunciation = pronunciation.trim();
+        if (!normalizedSurface) throw new Error("surface is required");
+        if (!normalizedPronunciation) throw new Error("pronunciation is required");
+        if (!isKatakana(normalizedPronunciation)) throw new Error("pronunciation must be Katakana");
+        await playerVoicevoxApi.addUserDictionaryWord({
+          surface: normalizedSurface,
+          pronunciation: normalizedPronunciation,
+          accentType: estimateAccentType(normalizedPronunciation),
+          priority: priority ?? 5
+        });
+        const dictionary = await playerVoicevoxApi.getUserDictionary();
+        return {
+          content: [{ type: "text", text: JSON.stringify({ words: normalizeUserDictionaryWords(dictionary) }) }]
+        };
+      } catch (error) {
+        return createErrorResponse(error);
+      }
+    }
+  );
+  registerAppToolIfEnabled(
+    server2,
+    disabledTools,
+    "_update_user_dictionary_word_for_player",
+    {
+      title: "Update User Dictionary Word (Player)",
+      description: "Update a VOICEVOX user dictionary word.",
+      inputSchema: {
+        wordUuid: z.string().describe("Dictionary word UUID"),
+        surface: z.string().describe("Word surface form"),
+        pronunciation: z.string().describe("Katakana reading"),
+        priority: z.number().int().min(0).max(10).optional().describe("Priority 0-10")
+      },
+      _meta: {
+        ui: {
+          resourceUri: playerResourceUri2,
+          visibility: ["app"]
+        }
+      }
+    },
+    async ({
+      wordUuid,
+      surface,
+      pronunciation,
+      priority
+    }) => {
+      try {
+        const normalizedSurface = surface.trim();
+        const normalizedPronunciation = pronunciation.trim();
+        if (!wordUuid.trim()) throw new Error("wordUuid is required");
+        if (!normalizedSurface) throw new Error("surface is required");
+        if (!normalizedPronunciation) throw new Error("pronunciation is required");
+        if (!isKatakana(normalizedPronunciation)) throw new Error("pronunciation must be Katakana");
+        await playerVoicevoxApi.updateUserDictionaryWord({
+          wordUuid: wordUuid.trim(),
+          surface: normalizedSurface,
+          pronunciation: normalizedPronunciation,
+          accentType: estimateAccentType(normalizedPronunciation),
+          priority: priority ?? 5
+        });
+        const dictionary = await playerVoicevoxApi.getUserDictionary();
+        return {
+          content: [{ type: "text", text: JSON.stringify({ words: normalizeUserDictionaryWords(dictionary) }) }]
+        };
+      } catch (error) {
+        return createErrorResponse(error);
+      }
+    }
+  );
+  registerAppToolIfEnabled(
+    server2,
+    disabledTools,
+    "_delete_user_dictionary_word_for_player",
+    {
+      title: "Delete User Dictionary Word (Player)",
+      description: "Delete a VOICEVOX user dictionary word.",
+      inputSchema: {
+        wordUuid: z.string().describe("Dictionary word UUID")
+      },
+      _meta: {
+        ui: {
+          resourceUri: playerResourceUri2,
+          visibility: ["app"]
+        }
+      }
+    },
+    async ({ wordUuid }) => {
+      try {
+        const normalizedWordUuid = wordUuid.trim();
+        if (!normalizedWordUuid) throw new Error("wordUuid is required");
+        await playerVoicevoxApi.deleteUserDictionaryWord(normalizedWordUuid);
+        const dictionary = await playerVoicevoxApi.getUserDictionary();
+        return {
+          content: [{ type: "text", text: JSON.stringify({ words: normalizeUserDictionaryWords(dictionary) }) }]
+        };
+      } catch (error) {
+        return createErrorResponse(error);
+      }
+    }
+  );
+  registerAppToolIfEnabled(
+    server2,
+    disabledTools,
+    "_preview_dictionary_word_for_player",
+    {
+      title: "Preview Dictionary Word (Player)",
+      description: "Preview pronunciation with a random speaker.",
+      inputSchema: {
+        text: z.string().describe("Text to preview")
+      },
+      _meta: {
+        ui: {
+          resourceUri: playerResourceUri2,
+          visibility: ["app"]
+        }
+      }
+    },
+    async ({ text }) => {
+      try {
+        const normalizedText = text.trim();
+        if (!normalizedText) throw new Error("text is required");
+        const speakers = await getSpeakerList();
+        if (speakers.length === 0) throw new Error("No speakers available");
+        const randomSpeaker = speakers[Math.floor(Math.random() * speakers.length)];
+        const result = await synthesizeWithCache({
+          text: normalizedText,
+          speaker: randomSpeaker.id,
+          speedScale: config2.defaultSpeedScale
+        });
+        return {
+          content: [
+            {
+              type: "text",
+              text: JSON.stringify({
+                audioBase64: result.audioBase64,
+                speaker: result.speaker,
+                speakerName: result.speakerName,
+                kana: result.kana
+              })
+            }
+          ]
+        };
+      } catch (error) {
+        return createErrorResponse(error);
+      }
+    }
+  );
+  registerAppToolIfEnabled(
+    server2,
+    disabledTools,
+    "_get_export_capability_for_player",
+    {
+      title: "Get Export Capability (Player)",
+      description: "Return whether track export + folder open is available for player UI.",
+      _meta: {
+        ui: {
+          resourceUri: playerResourceUri2,
+          visibility: ["app"]
+        }
+      }
+    },
+    async () => {
+      const canExport = config2.playerExportEnabled;
+      const canChooseDirectory = canExport && canChooseDirectoryDialog();
+      const canOpenDirectory = canExport && canOpenExplorer();
+      return {
+        content: [
+          {
+            type: "text",
+            text: JSON.stringify({
+              available: canExport,
+              canChooseDirectory,
+              canOpenDirectory,
+              defaultOutputDir: config2.playerExportDir
+            })
+          }
+        ]
+      };
+    }
+  );
+  registerAppToolIfEnabled(
+    server2,
+    disabledTools,
+    "_select_directory_for_player",
+    {
+      title: "Select Export Directory (Player)",
+      description: "Open a native OS directory picker dialog, to be called from the player UI.",
+      inputSchema: {
+        defaultPath: z.string().optional().describe("Default directory path to show")
+      },
+      _meta: {
+        ui: {
+          resourceUri: playerResourceUri2,
+          visibility: ["app"]
+        }
+      }
+    },
+    async ({ defaultPath }) => {
+      try {
+        const selected = await showDirectoryPicker(defaultPath || config2.playerExportDir);
+        return {
+          content: [
+            {
+              type: "text",
+              text: JSON.stringify({ path: selected })
+            }
+          ]
+        };
+      } catch (error) {
+        return createErrorResponse(error);
+      }
+    }
+  );
+  registerAppToolIfEnabled(
+    server2,
+    disabledTools,
+    "_export_tracks_for_player",
+    {
+      title: "Export Tracks (Player)",
+      description: "Save player tracks as wav files and open the target folder in file explorer.",
+      inputSchema: {
+        outputDir: z.string().optional().describe("Output directory path (optional)"),
+        segments: z.array(
+          z.object({
+            audioBase64: z.string().describe("WAV data in base64"),
+            text: z.string().describe("Segment text"),
+            speaker: z.number().describe("Speaker ID"),
+            speakerName: z.string().describe("Speaker display name")
+          })
+        ).describe("Tracks to export")
+      },
+      _meta: {
+        ui: {
+          resourceUri: playerResourceUri2,
+          visibility: ["app"]
+        }
+      }
+    },
+    async ({
+      outputDir,
+      segments
+    }) => {
+      try {
+        if (!config2.playerExportEnabled) {
+          throw new Error("Track export is disabled by VOICEVOX_PLAYER_EXPORT_ENABLED=false");
+        }
+        if (!segments || segments.length === 0) {
+          throw new Error("No tracks to export");
+        }
+        const rawTarget = outputDir?.trim() || config2.playerExportDir;
+        const targetDir = resolve(rawTarget);
+        const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
+        const sessionDir = join2(targetDir, `voicevox-${timestamp}`);
+        await mkdir(sessionDir, { recursive: true });
+        const files = [];
+        for (let i = 0; i < segments.length; i++) {
+          const seg = segments[i];
+          const indexPart = String(i + 1).padStart(2, "0");
+          const speakerPart = sanitizeFilePart(seg.speakerName || `speaker-${seg.speaker}`, `speaker-${seg.speaker}`);
+          const textPart = sanitizeFilePart(seg.text, `segment-${i + 1}`);
+          const fileName = `${indexPart}-${speakerPart}-${textPart}.wav`;
+          const filePath = join2(sessionDir, fileName);
+          await writeFile(filePath, Buffer.from(seg.audioBase64, "base64"));
+          files.push(filePath);
+        }
+        let warning;
+        let openedDirectory = false;
+        if (canOpenExplorer()) {
+          if (process.platform === "win32") {
+            try {
+              const child = spawn("explorer.exe", [sessionDir], { detached: true, stdio: "ignore" });
+              child.unref();
+              openedDirectory = true;
+            } catch (e) {
+              console.error("Failed to open explorer:", e);
+              warning = `WAV\u30D5\u30A1\u30A4\u30EB\u306F\u4FDD\u5B58\u3055\u308C\u307E\u3057\u305F\u304C\u3001\u30D5\u30A9\u30EB\u30C0\u3092\u958B\u3051\u307E\u305B\u3093\u3067\u3057\u305F: ${sessionDir}`;
+            }
+          } else if (openDirectoryInExplorer(sessionDir)) {
+            openedDirectory = true;
+          } else {
+            warning = `WAV\u30D5\u30A1\u30A4\u30EB\u306F\u4FDD\u5B58\u3055\u308C\u307E\u3057\u305F\u304C\u3001\u30D5\u30A9\u30EB\u30C0\u3092\u958B\u3051\u307E\u305B\u3093\u3067\u3057\u305F: ${sessionDir}`;
+          }
+        } else {
+          warning = `WAV\u30D5\u30A1\u30A4\u30EB\u306F\u4FDD\u5B58\u3055\u308C\u307E\u3057\u305F\u3002\u73FE\u5728\u306E\u74B0\u5883\u3067\u306F\u30D5\u30A9\u30EB\u30C0\u81EA\u52D5\u30AA\u30FC\u30D7\u30F3\u306B\u5BFE\u5FDC\u3057\u3066\u3044\u307E\u305B\u3093: ${sessionDir}`;
+        }
+        return {
+          content: [
+            {
+              type: "text",
+              text: JSON.stringify({
+                ok: true,
+                outputDir: sessionDir,
+                count: files.length,
+                files,
+                openedDirectory,
+                warning
+              })
+            }
+          ]
+        };
+      } catch (error) {
+        return createErrorResponse(error);
+      }
+    }
+  );
+}
+// src/tools/player.ts
+var __dirname = typeof import.meta.dirname === "string" ? import.meta.dirname : dirname2(fileURLToPath(import.meta.url));
+var playerHtml;
+try {
+  const htmlPath = join3(__dirname, "mcp-app.html");
+  playerHtml = readFileSync(htmlPath, "utf-8");
+} catch {
+  try {
+    const htmlPath = join3(__dirname, "..", "..", "node_modules", "@kajidog", "player-ui", "dist", "mcp-app.html");
+    playerHtml = readFileSync(htmlPath, "utf-8");
+  } catch {
+    console.error("Warning: player-ui HTML not found. Please build @kajidog/player-ui first.");
+    playerHtml = "<html><body><p>Player UI not available. Please build @kajidog/player-ui.</p></body></html>";
+  }
+}
+var playerResourceUri = "ui://speak-player/player.html";
+var speakerCache = null;
+var playerStorageInitialized = false;
+var audioCacheDir = join3(process.cwd(), ".voicevox-player-cache");
+var audioCacheMem = /* @__PURE__ */ new Map();
+var AUDIO_CACHE_FILE_PATTERN = /^[a-f0-9]{64}\.txt$/;
+var DEFAULT_AUDIO_CACHE_TTL_DAYS = 30;
+var DEFAULT_AUDIO_CACHE_MAX_MB = 512;
+var AUDIO_CACHE_CLEANUP_EVERY_WRITES = 20;
+var audioCacheEnabledFlag = true;
+var audioCacheTtlDays = DEFAULT_AUDIO_CACHE_TTL_DAYS;
+var audioCacheMaxMb = DEFAULT_AUDIO_CACHE_MAX_MB;
+var isAudioDiskCacheEnabled = audioCacheEnabledFlag && audioCacheTtlDays !== 0 && audioCacheMaxMb !== 0;
+var audioCacheTtlMs = audioCacheTtlDays < 0 ? null : audioCacheTtlDays * 24 * 60 * 60 * 1e3;
+var audioCacheMaxBytes = audioCacheMaxMb < 0 ? null : audioCacheMaxMb * 1024 * 1024;
+var isAudioCacheCleanupRunning = false;
+var pendingAudioCacheCleanup = false;
+var writesSinceLastAudioCleanup = 0;
+async function cleanupAudioCacheFiles() {
+  if (!isAudioDiskCacheEnabled) return;
+  try {
+    const entries = await readdir(audioCacheDir, { withFileTypes: true });
+    const now = Date.now();
+    const files = [];
+    for (const entry of entries) {
+      if (!entry.isFile() || !AUDIO_CACHE_FILE_PATTERN.test(entry.name)) continue;
+      const filePath = join3(audioCacheDir, entry.name);
+      let fileStat;
+      try {
+        fileStat = await stat(filePath);
+      } catch {
+        continue;
+      }
+      files.push({ name: entry.name, path: filePath, size: fileStat.size, mtimeMs: fileStat.mtimeMs });
+    }
+    const toDelete = planAudioCacheCleanup({
+      entries: files,
+      now,
+      ttlMs: audioCacheTtlMs,
+      maxBytes: audioCacheMaxBytes
+    });
+    if (toDelete.size === 0) return;
+    for (const path of toDelete) {
+      try {
+        await unlink(path);
+      } catch {
+      }
+      const fileName = basename(path);
+      if (fileName.endsWith(".txt")) {
+        audioCacheMem.delete(fileName.slice(0, -4));
+      }
+    }
+  } catch (error) {
+    console.warn("Warning: failed to cleanup VOICEVOX player audio cache:", error);
+  }
+}
+function scheduleAudioCacheCleanup(force = false) {
+  if (!isAudioDiskCacheEnabled) return;
+  if (!force) {
+    writesSinceLastAudioCleanup += 1;
+    if (writesSinceLastAudioCleanup < AUDIO_CACHE_CLEANUP_EVERY_WRITES) return;
+  }
+  writesSinceLastAudioCleanup = 0;
+  if (isAudioCacheCleanupRunning) {
+    pendingAudioCacheCleanup = true;
+    return;
+  }
+  isAudioCacheCleanupRunning = true;
+  void cleanupAudioCacheFiles().catch((error) => console.warn("Warning: failed to cleanup VOICEVOX player audio cache:", error)).finally(() => {
+    isAudioCacheCleanupRunning = false;
+    if (pendingAudioCacheCleanup) {
+      pendingAudioCacheCleanup = false;
+      scheduleAudioCacheCleanup(true);
+    }
+  });
+}
+var playerSessionState = /* @__PURE__ */ new Map();
+var MAX_TOOL_CONTENT_BYTES = 1024 * 1024;
+var DEFAULT_STATE_PAGE_LIMIT = 100;
+var MAX_STATE_PAGE_LIMIT = 1e3;
+var MAX_PERSISTED_STATES = 500;
+var MAX_STATE_AGE_MS = 30 * 24 * 60 * 60 * 1e3;
+var stateFilePath = join3(audioCacheDir, "player-state.json");
+function createAudioCacheKey(input) {
+  const keyInput = input.audioQuery ? JSON.stringify({
+    speaker: input.speaker,
+    text: input.text,
+    audioQuery: input.audioQuery
+  }) : JSON.stringify({
+    speaker: input.speaker,
+    text: input.text,
+    speedScale: Number(input.speedScale.toFixed(4)),
+    intonationScale: input.intonationScale === void 0 ? null : Number(input.intonationScale.toFixed(4)),
+    volumeScale: input.volumeScale === void 0 ? null : Number(input.volumeScale.toFixed(4)),
+    prePhonemeLength: input.prePhonemeLength === void 0 ? null : Number(input.prePhonemeLength.toFixed(4)),
+    postPhonemeLength: input.postPhonemeLength === void 0 ? null : Number(input.postPhonemeLength.toFixed(4)),
+    pauseLengthScale: input.pauseLengthScale === void 0 ? null : Number(input.pauseLengthScale.toFixed(4)),
+    accentPhrases: input.accentPhrases ?? null
+  });
+  return createHash("sha256").update(keyInput).digest("hex");
+}
+function readCachedAudioBase64(cacheKey2) {
+  const inMemory = audioCacheMem.get(cacheKey2);
+  if (inMemory) return inMemory;
+  if (!isAudioDiskCacheEnabled) return null;
+  const filePath = join3(audioCacheDir, `${cacheKey2}.txt`);
+  try {
+    const base64 = readFileSync(filePath, "utf-8").trim();
+    if (base64.length > 0) {
+      audioCacheMem.set(cacheKey2, base64);
+      return base64;
+    }
+  } catch {
+  }
+  return null;
+}
+async function writeCachedAudioBase64(cacheKey2, base64) {
+  audioCacheMem.set(cacheKey2, base64);
+  if (!isAudioDiskCacheEnabled) return;
+  const filePath = join3(audioCacheDir, `${cacheKey2}.txt`);
+  try {
+    await writeFile2(filePath, base64, "utf-8");
+    scheduleAudioCacheCleanup();
+  } catch (error) {
+    console.warn("Warning: failed to write VOICEVOX player cache:", error);
+  }
+}
+async function saveSessionStateToDisk() {
+  try {
+    const now = Date.now();
+    const validEntries = [...playerSessionState.entries()].filter(([, state]) => now - state.updatedAt <= MAX_STATE_AGE_MS).sort((a, b) => b[1].updatedAt - a[1].updatedAt).slice(0, MAX_PERSISTED_STATES);
+    playerSessionState.clear();
+    for (const [key, state] of validEntries) {
+      playerSessionState.set(key, state);
+    }
+    const payload = JSON.stringify({
+      version: 1,
+      savedAt: now,
+      entries: validEntries
+    });
+    const tempPath = `${stateFilePath}.tmp`;
+    await writeFile2(tempPath, payload, "utf-8");
+    await rename(tempPath, stateFilePath);
+  } catch (error) {
+    console.warn("Warning: failed to persist player state:", error);
+  }
+}
+var saveDebounceTimer = null;
+function scheduleStateSave() {
+  if (saveDebounceTimer !== null) clearTimeout(saveDebounceTimer);
+  saveDebounceTimer = setTimeout(() => {
+    saveDebounceTimer = null;
+    saveSessionStateToDisk().catch((e) => console.warn("Warning: failed to persist player state:", e));
+  }, 300);
+}
+function loadSessionStateFromDisk() {
+  try {
+    const raw2 = readFileSync(stateFilePath, "utf-8");
+    const parsed = JSON.parse(raw2);
+    if (!Array.isArray(parsed.entries)) return;
+    const now = Date.now();
+    for (const entry of parsed.entries) {
+      if (!Array.isArray(entry) || entry.length !== 2) continue;
+      const [key, state] = entry;
+      if (!key || typeof key !== "string") continue;
+      if (!state || typeof state.updatedAt !== "number" || !Array.isArray(state.segments)) continue;
+      if (now - state.updatedAt > MAX_STATE_AGE_MS) continue;
+      playerSessionState.set(key, state);
+    }
+  } catch {
+  }
+}
+function setSessionState(key, state) {
+  playerSessionState.set(key, state);
+  scheduleStateSave();
+}
+function getSessionState(viewUUID, sessionId) {
+  if (viewUUID) {
+    const s2 = playerSessionState.get(viewUUID);
+    if (s2) return s2;
+  }
+  const key = sessionId ?? "global";
+  const s = playerSessionState.get(key);
+  if (s) return s;
+  return void 0;
+}
+function initializePlayerStorage(config2) {
+  if (playerStorageInitialized) return;
+  playerStorageInitialized = true;
+  audioCacheDir = config2.playerCacheDir || audioCacheDir;
+  stateFilePath = config2.playerStateFile || join3(audioCacheDir, "player-state.json");
+  audioCacheEnabledFlag = config2.playerAudioCacheEnabled !== false;
+  audioCacheTtlDays = Number.isFinite(config2.playerAudioCacheTtlDays) ? config2.playerAudioCacheTtlDays : DEFAULT_AUDIO_CACHE_TTL_DAYS;
+  audioCacheMaxMb = Number.isFinite(config2.playerAudioCacheMaxMb) ? config2.playerAudioCacheMaxMb : DEFAULT_AUDIO_CACHE_MAX_MB;
+  const cachePolicy = resolveAudioCachePolicy({
+    enabledFlag: audioCacheEnabledFlag,
+    ttlDays: audioCacheTtlDays,
+    maxMb: audioCacheMaxMb
+  });
+  isAudioDiskCacheEnabled = cachePolicy.isDiskCacheEnabled;
+  audioCacheTtlMs = cachePolicy.ttlMs;
+  audioCacheMaxBytes = cachePolicy.maxBytes;
+  try {
+    mkdirSync2(audioCacheDir, { recursive: true });
+    if (isAudioDiskCacheEnabled) {
+      scheduleAudioCacheCleanup(true);
+    }
+  } catch (error) {
+    console.warn("Warning: failed to create VOICEVOX player cache directory:", error);
+  }
+  try {
+    mkdirSync2(dirname2(stateFilePath), { recursive: true });
+  } catch (error) {
+    console.warn("Warning: failed to prepare player state directory:", error);
+  }
+  loadSessionStateFromDisk();
+}
+function registerPlayerTools(deps) {
+  const { server: server2, config: config2, disabledTools } = deps;
+  initializePlayerStorage(config2);
+  const playerVoicevoxApi = new VoicevoxApi(config2.voicevoxUrl);
+  const getSpeakerList = async () => {
+    if (speakerCache) return speakerCache;
+    try {
+      const speakers = await playerVoicevoxApi.getSpeakers();
+      speakerCache = speakers.flatMap(
+        (speaker) => speaker.styles.map((style) => ({
+          id: style.id,
+          name: style.name,
+          characterName: speaker.name,
+          uuid: speaker.speaker_uuid
+        }))
+      );
+      return speakerCache;
+    } catch {
+      return [];
+    }
+  };
+  const getSpeakerName = async (speakerId) => {
+    const list = await getSpeakerList();
+    const found = list?.find((s) => s.id === speakerId);
+    return found ? `${found.characterName}\uFF08${found.name}\uFF09` : `Speaker ${speakerId}`;
+  };
+  const resolveSpeakerNames = async (speakerIds) => {
+    const uniqueSpeakerIds = [...new Set(speakerIds)];
+    const entries = await Promise.all(uniqueSpeakerIds.map(async (id) => [id, await getSpeakerName(id)]));
+    return new Map(entries);
+  };
+  const getUserDictionaryWords = async () => {
+    const dictionary = await playerVoicevoxApi.getUserDictionary();
+    return Object.entries(dictionary).map(([wordUuid, word]) => ({
+      wordUuid,
+      surface: word.surface,
+      pronunciation: word.pronunciation,
+      accentType: word.accent_type,
+      priority: word.priority
+    }));
+  };
+  const synthesizeWithCache = async ({
+    text,
+    speaker,
+    audioQuery,
+    speedScale,
+    intonationScale,
+    volumeScale,
+    prePhonemeLength,
+    postPhonemeLength,
+    pauseLengthScale,
+    accentPhrases
+  }) => {
+    const speakerName = await getSpeakerName(speaker);
+    let effectiveAudioQuery = audioQuery;
+    if (audioQuery && accentPhrases && accentPhrases.length > 0 && audioQuery.accent_phrases?.length > 0) {
+      try {
+        const updated = await playerVoicevoxApi.updateMoraData(audioQuery.accent_phrases, speaker);
+        effectiveAudioQuery = { ...audioQuery, accent_phrases: updated };
+      } catch (e) {
+        console.warn("[synthesizeWithCache] /mora_data \u518D\u8A08\u7B97\u5931\u6557\u3001\u5143\u306E\u30D4\u30C3\u30C1\u5024\u3092\u4F7F\u7528:", e);
+      }
+    }
+    const cacheKey2 = createAudioCacheKey({
+      text,
+      speaker,
+      audioQuery: effectiveAudioQuery,
+      speedScale,
+      intonationScale,
+      volumeScale,
+      prePhonemeLength,
+      postPhonemeLength,
+      pauseLengthScale,
+      accentPhrases
+    });
+    const cachedBase64 = readCachedAudioBase64(cacheKey2);
+    if (cachedBase64) {
+      let cachedQuery = effectiveAudioQuery;
+      if (!cachedQuery) {
+        const generated = await playerVoicevoxApi.generateQuery(text, speaker);
+        if (accentPhrases) generated.accent_phrases = accentPhrases;
+        generated.speedScale = speedScale;
+        if (intonationScale !== void 0) generated.intonationScale = intonationScale;
+        if (volumeScale !== void 0) generated.volumeScale = volumeScale;
+        if (prePhonemeLength !== void 0) generated.prePhonemeLength = prePhonemeLength;
+        if (postPhonemeLength !== void 0) generated.postPhonemeLength = postPhonemeLength;
+        if (pauseLengthScale !== void 0) generated.pauseLengthScale = pauseLengthScale;
+        cachedQuery = generated;
+      }
+      return {
+        audioBase64: cachedBase64,
+        text,
+        speaker,
+        speakerName,
+        kana: cachedQuery?.kana,
+        audioQuery: cachedQuery,
+        speedScale: cachedQuery?.speedScale ?? speedScale,
+        intonationScale: cachedQuery?.intonationScale ?? intonationScale,
+        volumeScale: cachedQuery?.volumeScale ?? volumeScale,
+        prePhonemeLength: cachedQuery?.prePhonemeLength ?? prePhonemeLength,
+        postPhonemeLength: cachedQuery?.postPhonemeLength ?? postPhonemeLength,
+        pauseLengthScale: cachedQuery?.pauseLengthScale ?? pauseLengthScale,
+        accentPhrases: cachedQuery?.accent_phrases ?? accentPhrases
+      };
+    }
+    const resolvedQuery = effectiveAudioQuery ? { ...effectiveAudioQuery } : await playerVoicevoxApi.generateQuery(text, speaker);
+    if (!effectiveAudioQuery && accentPhrases) resolvedQuery.accent_phrases = accentPhrases;
+    if (!effectiveAudioQuery) {
+      resolvedQuery.speedScale = speedScale;
+      if (intonationScale !== void 0) resolvedQuery.intonationScale = intonationScale;
+      if (volumeScale !== void 0) resolvedQuery.volumeScale = volumeScale;
+      if (prePhonemeLength !== void 0) resolvedQuery.prePhonemeLength = prePhonemeLength;
+      if (postPhonemeLength !== void 0) resolvedQuery.postPhonemeLength = postPhonemeLength;
+      if (pauseLengthScale !== void 0) resolvedQuery.pauseLengthScale = pauseLengthScale;
+    }
+    const audioData = await playerVoicevoxApi.synthesize(resolvedQuery, speaker);
+    const base64Audio = Buffer.from(audioData).toString("base64");
+    await writeCachedAudioBase64(cacheKey2, base64Audio);
+    return {
+      audioBase64: base64Audio,
+      text,
+      speaker,
+      speakerName,
+      kana: resolvedQuery.kana,
+      audioQuery: resolvedQuery,
+      accentPhrases: resolvedQuery.accent_phrases,
+      speedScale: resolvedQuery.speedScale,
+      intonationScale: resolvedQuery.intonationScale,
+      volumeScale: resolvedQuery.volumeScale,
+      prePhonemeLength: resolvedQuery.prePhonemeLength,
+      postPhonemeLength: resolvedQuery.postPhonemeLength,
+      pauseLengthScale: resolvedQuery.pauseLengthScale
+    };
+  };
+  registerAppResource(
+    server2,
+    "VOICEVOX Player",
+    playerResourceUri,
+    {
+      description: "Audio player UI for VOICEVOX TTS",
+      mimeType: RESOURCE_MIME_TYPE
+    },
+    async () => ({
+      contents: [
+        {
+          uri: playerResourceUri,
+          mimeType: RESOURCE_MIME_TYPE,
+          text: playerHtml,
+          _meta: {
+            ui: {
+              csp: {},
+              ...config2.playerDomain ? { domain: config2.playerDomain } : {}
+            }
+          }
+        }
+      ]
+    })
+  );
+  registerAppToolIfEnabled(
+    server2,
+    disabledTools,
+    "open_dictionary_ui",
+    {
+      title: "Open Dictionary UI",
+      description: "Open the user dictionary manager UI for VOICEVOX.",
+      annotations: {
+        readOnlyHint: false,
+        destructiveHint: false,
+        idempotentHint: true,
+        openWorldHint: true
+      },
+      _meta: { ui: { resourceUri: playerResourceUri } }
+    },
+    async () => {
+      try {
+        const words = await getUserDictionaryWords();
+        const notice = "\u8F9E\u66F8\u5909\u66F4\u306F\u65E2\u5B58\u30C8\u30E9\u30C3\u30AF\u306B\u81EA\u52D5\u53CD\u6620\u3055\u308C\u307E\u305B\u3093\u3002Player\u3067\u518D\u751F\u6210\u3059\u308B\u3068\u53CD\u6620\u3055\u308C\u307E\u3059\u3002";
+        return {
+          content: [{ type: "text", text: `Dictionary manager opened. ${words.length} word(s).` }],
+          structuredContent: {
+            mode: "dictionary",
+            dictionaryWords: words,
+            dictionaryNotice: notice
+          },
+          _meta: {
+            mode: "dictionary",
+            dictionaryWords: words,
+            dictionaryNotice: notice
+          }
+        };
+      } catch (error) {
+        return createErrorResponse(error);
+      }
+    }
+  );
+  registerAppToolIfEnabled(
+    server2,
+    disabledTools,
+    "speak_player",
+    {
+      title: "Speak Player",
+      description: 'Create a VOICEVOX player session and display the UI. Returns viewUUID \u2014 save it and pass to resynthesize_player / get_player_state for subsequent operations. Multi-speaker format: "1:Hello\\n2:World". Audio synthesis is performed by the player UI when needed.',
+      inputSchema: {
+        text: z2.string().describe('Text to synthesize. Multi-speaker format: "1:Hello\\n2:World" (speaker ID prefix per line).'),
+        speaker: z2.number().optional().describe("Default speaker ID (optional)"),
+        speedScale: z2.number().optional().describe("Playback speed (optional, default from environment)")
+      },
+      annotations: {
+        readOnlyHint: false,
+        destructiveHint: false,
+        idempotentHint: false,
+        openWorldHint: true
+      },
+      _meta: { ui: { resourceUri: playerResourceUri } }
+    },
+    async ({
+      text,
+      speaker,
+      speedScale
+    }, extra) => {
+      try {
+        if (!text?.trim()) {
+          throw new Error("text is required");
+        }
+        const parsedSegments = parseStringInput(text);
+        if (parsedSegments.length === 0) {
+          throw new Error("Text is empty");
+        }
+        const effectiveSpeaker = getEffectiveSpeaker(speaker, extra.sessionId) ?? config2.defaultSpeaker;
+        const effectiveSpeed = speedScale ?? config2.defaultSpeedScale;
+        const baseSegments = parsedSegments.map((s) => ({
+          text: s.text,
+          speaker: s.speaker ?? effectiveSpeaker,
+          speedScale: effectiveSpeed
+        }));
+        const speakerNameMap = await resolveSpeakerNames(baseSegments.map((s) => s.speaker));
+        const viewUUID = randomUUID2();
+        setSessionState(viewUUID, {
+          segments: baseSegments.map((s) => ({
+            text: s.text,
+            speaker: s.speaker,
+            speakerName: speakerNameMap.get(s.speaker),
+            speedScale: s.speedScale
+          })),
+          updatedAt: Date.now()
+        });
+        const fullText = parsedSegments.map((s) => s.text).join(" ");
+        const textPreview = fullText.slice(0, 60) + (fullText.length > 60 ? "..." : "");
+        const uiSegments = baseSegments.map((s) => ({
+          text: s.text,
+          speaker: s.speaker,
+          speakerName: speakerNameMap.get(s.speaker),
+          speedScale: s.speedScale
+        }));
+        return {
+          content: [
+            {
+              type: "text",
+              text: `Voicevox Player started. viewUUID: ${viewUUID} \u300C${textPreview}\u300D`
+            }
+          ],
+          structuredContent: {
+            viewUUID,
+            autoPlay: config2.autoPlay,
+            segments: uiSegments
+          },
+          _meta: {
+            viewUUID,
+            autoPlay: config2.autoPlay,
+            segments: uiSegments
+          }
+        };
+      } catch (error) {
+        return createErrorResponse(error);
+      }
+    }
+  );
+  registerAppToolIfEnabled(
+    server2,
+    disabledTools,
+    "resynthesize_player",
+    {
+      title: "Resynthesize Player",
+      description: "Update player segments for a new player instance (new viewUUID every call). Typical loop: get_player_state (fetch additional pages if hasMore) -> edit segment parameters -> resynthesize_player -> use returned viewUUID for the next loop. Audio synthesis is performed by the player UI when needed.",
+      inputSchema: {
+        segments: z2.array(
+          z2.object({
+            text: z2.string().describe("Segment text"),
+            speaker: z2.number().optional().describe("Speaker ID"),
+            speedScale: z2.number().optional().describe("Playback speed"),
+            intonationScale: z2.number().optional().describe("Intonation scale (\u6291\u63DA)"),
+            volumeScale: z2.number().optional().describe("Volume scale (\u97F3\u91CF)"),
+            prePhonemeLength: z2.number().optional().describe("Pre-phoneme silence in seconds"),
+            postPhonemeLength: z2.number().optional().describe("Post-phoneme silence in seconds"),
+            pauseLengthScale: z2.number().optional().describe("Pause length scale between phrases (\u9593\u306E\u9577\u3055)"),
+            accentPhrases: z2.array(
+              z2.object({
+                moras: z2.array(
+                  z2.object({
+                    text: z2.string(),
+                    consonant: z2.string().nullable().optional(),
+                    consonant_length: z2.number().nullable().optional(),
+                    vowel: z2.string(),
+                    vowel_length: z2.number(),
+                    pitch: z2.number()
+                  })
+                ),
+                accent: z2.number().int(),
+                pause_mora: z2.object({
+                  text: z2.string(),
+                  consonant: z2.string().nullable().optional(),
+                  consonant_length: z2.number().nullable().optional(),
+                  vowel: z2.string(),
+                  vowel_length: z2.number(),
+                  pitch: z2.number()
+                }).nullable().optional(),
+                is_interrogative: z2.boolean().nullable().optional()
+              })
+            ).optional().describe("Accent phrases")
+          })
+        ).describe(
+          "Full segment list to update. Start from get_player_state.segments, edit needed fields, and send the complete array."
+        ),
+        autoPlay: z2.boolean().optional().describe("Auto-play when loaded (default: true)")
+      },
+      annotations: {
+        readOnlyHint: false,
+        destructiveHint: false,
+        idempotentHint: false,
+        openWorldHint: true
+      },
+      _meta: { ui: { resourceUri: playerResourceUri } }
+    },
+    async ({
+      segments,
+      autoPlay
+    }, extra) => {
+      try {
+        if (!segments || segments.length === 0) {
+          throw new Error("segments is required");
+        }
+        const effectiveDefaultSpeaker = getEffectiveSpeaker(void 0, extra.sessionId) ?? config2.defaultSpeaker;
+        const effectiveSpeed = config2.defaultSpeedScale;
+        const effectiveAutoPlay = autoPlay ?? config2.autoPlay;
+        const viewUUID = randomUUID2();
+        const normalizedSegments = segments.map((seg) => ({
+          text: seg.text,
+          speaker: seg.speaker ?? effectiveDefaultSpeaker,
+          speedScale: seg.speedScale ?? effectiveSpeed,
+          intonationScale: seg.intonationScale,
+          volumeScale: seg.volumeScale,
+          prePhonemeLength: seg.prePhonemeLength,
+          postPhonemeLength: seg.postPhonemeLength,
+          pauseLengthScale: seg.pauseLengthScale,
+          accentPhrases: seg.accentPhrases
+        }));
+        const speakerNameMap = await resolveSpeakerNames(normalizedSegments.map((seg) => seg.speaker));
+        setSessionState(viewUUID, {
+          segments: normalizedSegments.map((seg) => ({
+            text: seg.text,
+            speaker: seg.speaker,
+            speakerName: speakerNameMap.get(seg.speaker),
+            speedScale: seg.speedScale,
+            intonationScale: seg.intonationScale,
+            volumeScale: seg.volumeScale,
+            prePhonemeLength: seg.prePhonemeLength,
+            postPhonemeLength: seg.postPhonemeLength,
+            pauseLengthScale: seg.pauseLengthScale,
+            accentPhrases: seg.accentPhrases
+          })),
+          updatedAt: Date.now()
+        });
+        const uiSegments = normalizedSegments.map((seg) => ({
+          text: seg.text,
+          speaker: seg.speaker,
+          speakerName: speakerNameMap.get(seg.speaker),
+          speedScale: seg.speedScale,
+          intonationScale: seg.intonationScale,
+          volumeScale: seg.volumeScale,
+          prePhonemeLength: seg.prePhonemeLength,
+          postPhonemeLength: seg.postPhonemeLength,
+          pauseLengthScale: seg.pauseLengthScale,
+          accentPhrases: seg.accentPhrases
+        }));
+        return {
+          content: [
+            {
+              type: "text",
+              text: `Voicevox Player updated. viewUUID: ${viewUUID} (${segments.length} segment(s))`
+            }
+          ],
+          structuredContent: {
+            viewUUID,
+            autoPlay: effectiveAutoPlay,
+            segments: uiSegments
+          },
+          _meta: {
+            viewUUID,
+            autoPlay: effectiveAutoPlay,
+            segments: uiSegments
+          }
+        };
+      } catch (error) {
+        return createErrorResponse(error);
+      }
+    }
+  );
+  registerPlayerUITools(deps, {
+    playerVoicevoxApi,
+    playerResourceUri,
+    synthesizeWithCache,
+    setSessionState,
+    getSessionState: (key) => playerSessionState.get(key),
+    getSpeakerList
+  });
+  registerToolIfEnabled(
+    server2,
+    disabledTools,
+    "get_player_state",
+    {
+      title: "Get VOICEVOX Player State",
+      description: "Returns paged editable player state for AI tuning. Use the latest viewUUID from speak_player/resynthesize_player. If hasMore is true, call again with nextCursor to continue.",
+      inputSchema: {
+        viewUUID: z2.string().optional().describe("Player instance ID from speak_player/resynthesize_player. Always pass the latest viewUUID."),
+        cursor: z2.number().int().min(0).optional().describe("Start index in segments array (default: 0)"),
+        limit: z2.number().int().min(1).max(MAX_STATE_PAGE_LIMIT).optional().describe(
+          `Max segments per page (default: ${DEFAULT_STATE_PAGE_LIMIT}, max: ${MAX_STATE_PAGE_LIMIT}). Server may return fewer segments when needed.`
+        )
+      },
+      annotations: {
+        readOnlyHint: true,
+        destructiveHint: false,
+        idempotentHint: true,
+        openWorldHint: false
+      }
+    },
+    async ({ viewUUID, cursor, limit }, extra) => {
+      try {
+        const state = getSessionState(viewUUID, extra?.sessionId);
+        if (!state) {
+          return {
+            content: [
+              {
+                type: "text",
+                text: JSON.stringify({
+                  segments: [],
+                  updatedAt: 0,
+                  total: 0,
+                  cursor: 0,
+                  limit: limit ?? DEFAULT_STATE_PAGE_LIMIT,
+                  hasMore: false,
+                  nextCursor: null,
+                  message: "No player state available. Play something first."
+                })
+              }
+            ]
+          };
+        }
+        const total = state.segments.length;
+        const effectiveCursor = Math.min(cursor ?? 0, total);
+        const requestedLimit = limit ?? DEFAULT_STATE_PAGE_LIMIT;
+        const effectiveLimit = Math.min(requestedLimit, MAX_STATE_PAGE_LIMIT);
+        let pageEnd = Math.min(total, effectiveCursor + effectiveLimit);
+        let pageSegments = state.segments.slice(effectiveCursor, pageEnd);
+        const buildPayload = () => {
+          const hasMore = pageEnd < total;
+          return {
+            segments: pageSegments,
+            updatedAt: state.updatedAt,
+            total,
+            cursor: effectiveCursor,
+            limit: effectiveLimit,
+            hasMore,
+            nextCursor: hasMore ? pageEnd : null
+          };
+        };
+        let payload = buildPayload();
+        let payloadText = JSON.stringify(payload);
+        while (Buffer.byteLength(payloadText, "utf8") > MAX_TOOL_CONTENT_BYTES && pageSegments.length > 0) {
+          pageEnd -= 1;
+          pageSegments = state.segments.slice(effectiveCursor, pageEnd);
+          payload = buildPayload();
+          payloadText = JSON.stringify(payload);
+        }
+        if (Buffer.byteLength(payloadText, "utf8") > MAX_TOOL_CONTENT_BYTES) {
+          return {
+            content: [
+              {
+                type: "text",
+                text: JSON.stringify({
+                  segments: [],
+                  updatedAt: state.updatedAt,
+                  total,
+                  cursor: effectiveCursor,
+                  limit: effectiveLimit,
+                  hasMore: effectiveCursor < total,
+                  nextCursor: effectiveCursor < total ? effectiveCursor : null,
+                  message: "Player state is too large for this request. Request a later cursor or reduce source text size."
+                })
+              }
+            ]
+          };
+        }
+        if (pageSegments.length === 0 && effectiveCursor < total) {
+          return {
+            content: [
+              {
+                type: "text",
+                text: JSON.stringify({
+                  segments: [],
+                  updatedAt: state.updatedAt,
+                  total,
+                  cursor: effectiveCursor,
+                  limit: effectiveLimit,
+                  hasMore: true,
+                  nextCursor: effectiveCursor,
+                  message: "Current segment is too large to include. Advance cursor or reduce segment text size."
+                })
+              }
+            ]
+          };
+        }
+        return {
+          content: [{ type: "text", text: payloadText }]
+        };
+      } catch (error) {
+        return createErrorResponse(error);
+      }
+    }
+  );
+}
+// src/tools/speak.ts
+import * as z3 from "zod/v4";
+function buildSpeakInputSchema(restrictions) {
+  const schema = {
+    text: z3.string().describe(
+      'Text split by line breaks (\\n). IMPORTANT: Each line = one speech unit (processed and played separately). Keep the FIRST LINE SHORT for quick playback start - audio begins as soon as the first line is synthesized. Example: "Hi!\\nThis is a longer explanation that follows." Optional speaker prefix per line: "1:Hello\\n2:World"'
+    ),
+    query: z3.string().optional().describe("Voice synthesis query"),
+    speaker: z3.number().optional().describe("Default speaker ID (optional)"),
+    speedScale: z3.number().optional().describe("Playback speed (optional, default from environment)")
+  };
+  if (!restrictions.immediate) {
+    schema.immediate = z3.boolean().optional().describe(
+      "If true, stops current playback and plays new audio immediately. If false, waits for current playback to finish. Default depends on environment variable."
+    );
+  }
+  if (!restrictions.waitForStart) {
+    schema.waitForStart = z3.boolean().optional().describe("Wait for playback to start (optional, default: false)");
+  }
+  if (!restrictions.waitForEnd) {
+    schema.waitForEnd = z3.boolean().optional().describe("Wait for playback to end (optional, default: false)");
+  }
+  return schema;
+}
+function registerSpeakTool(deps) {
+  const { server: server2, voicevoxClient, config: config2, disabledTools, restrictions } = deps;
+  registerToolIfEnabled(
+    server2,
+    disabledTools,
+    "speak",
+    {
+      title: "Speak",
+      description: "Convert text to speech and play it. Text is split by line breaks (\\n) into separate speech units. Each line is processed as an independent audio segment.",
+      inputSchema: buildSpeakInputSchema(restrictions),
+      annotations: {
+        readOnlyHint: false,
+        destructiveHint: false,
+        idempotentHint: false,
+        openWorldHint: true
+      }
+    },
+    async ({
+      text,
+      speaker,
+      query,
       speedScale,
       immediate,
       waitForStart,
@@ -3580,7 +5057,7 @@ function registerSpeakerTools(deps) {
   registerToolIfEnabled(
     server2,
     disabledTools,
-    "ping_voicevox",
+    "ping",
     {
       title: "Ping VOICEVOX",
       description: "Check if VOICEVOX Engine is running and reachable",
@@ -3622,7 +5099,7 @@ function registerSpeakerTools(deps) {
     async () => {
       try {
         await voicevoxClient.clearQueue();
-        return createSuccessResponse("\u30B9\u30D4\u30FC\u30AB\u30FC\u3092\u505C\u6B62\u3057\u307E\u3057\u305F");
+        return createSuccessResponse("Speaker stopped successfully");
       } catch (error) {
         return createErrorResponse(error);
       }
@@ -3661,7 +5138,7 @@ function registerSpeakerTools(deps) {
 }
 // src/tools/synthesize.ts
-import * as z3 from "zod/v4";
+import * as z4 from "zod/v4";
 function registerSynthesizeTool(deps) {
   const { server: server2, voicevoxClient, disabledTools } = deps;
   registerToolIfEnabled(
@@ -3678,11 +5155,11 @@ function registerSynthesizeTool(deps) {
         openWorldHint: true
       },
       inputSchema: {
-        text: z3.string().optional().describe("Text for voice synthesis (if both query and text provided, query takes precedence)"),
-        query: z3.string().optional().describe("Voice synthesis query"),
-        output: z3.string().describe("Output path for the audio file"),
-        speaker: z3.number().optional().describe("Default speaker ID (optional)"),
-        speedScale: z3.number().optional().describe("Playback speed (optional, default from environment)")
+        text: z4.string().optional().describe("Text for voice synthesis (if both query and text provided, query takes precedence)"),
+        query: z4.string().optional().describe("Voice synthesis query"),
+        output: z4.string().describe("Output path for the audio file"),
+        speaker: z4.number().optional().describe("Default speaker ID (optional)"),
+        speedScale: z4.number().optional().describe("Playback speed (optional, default from environment)")
       }
     },
     async ({
@@ -3703,7 +5180,7 @@ function registerSynthesizeTool(deps) {
           const filePath = await voicevoxClient.generateAudioFile(text, output, effectiveSpeaker, speedScale);
           return createSuccessResponse(filePath);
         }
-        throw new Error("query\u30D1\u30E9\u30E1\u30FC\u30BF\u3068text\u30D1\u30E9\u30E1\u30FC\u30BF\u306E\u3069\u3061\u3089\u304B\u3092\u6307\u5B9A\u3057\u3066\u304F\u3060\u3055\u3044");
+        throw new Error('Either "query" or "text" parameter must be specified');
       } catch (error) {
         return createErrorResponse(error);
       }
@@ -3715,8 +5192,8 @@ function registerSynthesizeTool(deps) {
 var config = getConfig();
 function createServer() {
   const server2 = new McpServer({
-    name: "MCP TTS Voicevox",
-    version: "0.6.1",
+    name: "mcp-tts-voicevox",
+    version: "0.7.1",
     description: "A Voicevox server that converts text to speech for playback and saving."
   });
   const voicevoxClient = new VoicevoxClient({
@@ -3745,7 +5222,7 @@ function createServer() {
 var server = createServer();
 // src/index.ts
-var __dirname2 = dirname2(fileURLToPath2(import.meta.url));
+var __dirname2 = dirname3(fileURLToPath2(import.meta.url));
 function isCLI() {
   if (!isNodejs() || !process.argv) return false;
   const isNpmStart = process.env?.npm_lifecycle_event === "start";
@@ -3799,12 +5276,22 @@ Options:
   Tool Options:
   --disable-tools <tools>     Comma-separated list of tools to disable
-                              (Allowed: speak, speak_player, ping_voicevox,
-                               synthesize_file, stop_speaker, get_speakers)
+                              (e.g.: speak, speak_player, ping, synthesize_file,
+                               stop_speaker, get_speakers)
+                              The "voicevox_" prefix is added automatically.
   UI Player Options:
   --auto-play                 Auto-play audio in UI player (default)
   --no-auto-play              Require manual play in UI player
+  --player-export             Enable track export(download) in UI player (default)
+  --no-player-export          Disable track export(download) in UI player
+  --player-export-dir <dir>   Default output directory for exported tracks
+  --player-cache-dir <dir>    Player cache directory
+  --player-state-file <path>  Persisted player state file path
+  --player-audio-cache        Enable disk audio cache for player (default)
+  --no-player-audio-cache     Disable disk audio cache for player
+  --player-audio-cache-ttl-days <days>  Audio cache retention days (0 disables, -1 unlimited)
+  --player-audio-cache-max-mb <mb>      Audio cache size cap in MB (0 disables, -1 unlimited)
   Server Options:
   --http                      Enable HTTP server mode (remote MCP)
@@ -3812,6 +5299,7 @@ Options:
   --host <host>               HTTP server host (default: 0.0.0.0)
   --allowed-hosts <hosts>     Comma-separated list of allowed hosts (default: localhost,127.0.0.1,[::1])
   --allowed-origins <origins> Comma-separated list of allowed origins
+  --api-key <key>             Require matching API key via X-API-Key or Authorization: Bearer
 Examples:
   npx @kajidog/mcp-tts-voicevox --url http://192.168.1.50:50021 --speaker 3
@@ -3828,7 +5316,7 @@ async function startMCPServer() {
     process.exit(0);
   }
   if (process.argv.includes("--version") || process.argv.includes("-v")) {
-    const pkg = JSON.parse(readFileSync2(join2(__dirname2, "../package.json"), "utf-8"));
+    const pkg = JSON.parse(readFileSync2(join4(__dirname2, "../package.json"), "utf-8"));
     console.log(`@kajidog/mcp-tts-voicevox v${pkg.version}`);
     process.exit(0);
   }