@elizaos/plugin-elevenlabs 1.5.13 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -99814,7 +99814,7 @@ module.exports = __toCommonJS(exports_index_node);
99814
99814
 
99815
99815
  // src/index.ts
99816
99816
  var import_core = require("@elizaos/core");
99817
- var import_elevenlabs_js = __toESM(require_elevenlabs_js());
99817
+ var import_elevenlabs_js = __toESM(require_elevenlabs_js(), 1);
99818
99818
  function isBrowser() {
99819
99819
  return typeof globalThis !== "undefined" && typeof globalThis.document !== "undefined";
99820
99820
  }
@@ -99842,7 +99842,20 @@ function getVoiceSettings(runtime) {
99842
99842
  outputFormat: getSetting(runtime, "ELEVENLABS_OUTPUT_FORMAT", "mp3_44100_128"),
99843
99843
  similarity: getSetting(runtime, "ELEVENLABS_VOICE_SIMILARITY_BOOST", "0.75"),
99844
99844
  style: getSetting(runtime, "ELEVENLABS_VOICE_STYLE", "0"),
99845
- speakerBoost: import_core.parseBooleanFromText(getSetting(runtime, "ELEVENLABS_VOICE_USE_SPEAKER_BOOST", "true") + "")
99845
+ speakerBoost: import_core.parseBooleanFromText(`${getSetting(runtime, "ELEVENLABS_VOICE_USE_SPEAKER_BOOST", "true")}`)
99846
+ };
99847
+ }
99848
+ function getTranscriptionSettings(runtime) {
99849
+ const languageCode = getSetting(runtime, "ELEVENLABS_STT_LANGUAGE_CODE");
99850
+ const numSpeakersStr = getSetting(runtime, "ELEVENLABS_STT_NUM_SPEAKERS");
99851
+ return {
99852
+ apiKey: getApiKey(runtime) || "",
99853
+ modelId: getSetting(runtime, "ELEVENLABS_STT_MODEL_ID", "scribe_v1"),
99854
+ languageCode: languageCode || undefined,
99855
+ timestampsGranularity: getSetting(runtime, "ELEVENLABS_STT_TIMESTAMPS_GRANULARITY", "word"),
99856
+ diarize: import_core.parseBooleanFromText(`${getSetting(runtime, "ELEVENLABS_STT_DIARIZE", "false")}`),
99857
+ numSpeakers: numSpeakersStr ? Number(numSpeakersStr) : undefined,
99858
+ tagAudioEvents: import_core.parseBooleanFromText(`${getSetting(runtime, "ELEVENLABS_STT_TAG_AUDIO_EVENTS", "false")}`)
99846
99859
  };
99847
99860
  }
99848
99861
  async function fetchSpeech(runtime, params) {
@@ -99875,9 +99888,56 @@ async function fetchSpeech(runtime, params) {
99875
99888
  throw error instanceof Error ? error : new Error(msg);
99876
99889
  }
99877
99890
  }
99891
+ async function fetchTranscription(runtime, params) {
99892
+ try {
99893
+ const baseUrl = getBaseURL(runtime);
99894
+ const apiKey = getApiKey(runtime) ?? (isBrowser() ? "sk-proxy" : undefined);
99895
+ const client = new import_elevenlabs_js.ElevenLabsClient({
99896
+ apiKey,
99897
+ baseUrl
99898
+ });
99899
+ const requestParams = {
99900
+ modelId: params.modelId,
99901
+ audio: params.audioFile
99902
+ };
99903
+ if (params.languageCode) {
99904
+ requestParams.languageCode = params.languageCode;
99905
+ }
99906
+ if (params.timestampsGranularity !== "none") {
99907
+ requestParams.timestampsGranularity = params.timestampsGranularity;
99908
+ }
99909
+ if (params.diarize) {
99910
+ requestParams.diarize = true;
99911
+ if (params.numSpeakers) {
99912
+ requestParams.numSpeakers = params.numSpeakers;
99913
+ }
99914
+ }
99915
+ if (params.tagAudioEvents) {
99916
+ requestParams.tagAudioEvents = true;
99917
+ }
99918
+ const response = await client.speechToText.convert(requestParams);
99919
+ if (!response) {
99920
+ throw new Error("Empty response from ElevenLabs STT API");
99921
+ }
99922
+ let transcript = "";
99923
+ if ("transcript" in response && response.transcript) {
99924
+ const transcriptObj = response.transcript;
99925
+ transcript = transcriptObj.text || "";
99926
+ } else if ("transcripts" in response && response.transcripts) {
99927
+ const transcriptsArray = response.transcripts;
99928
+ transcript = transcriptsArray.map((t) => t.text || "").join(`
99929
+ `);
99930
+ }
99931
+ return transcript;
99932
+ } catch (error) {
99933
+ const msg = error instanceof Error ? error.message : String(error);
99934
+ import_core.logger.error(`ElevenLabs fetchTranscription error: ${msg}`);
99935
+ throw error instanceof Error ? error : new Error(msg);
99936
+ }
99937
+ }
99878
99938
  var elevenLabsPlugin = {
99879
99939
  name: "elevenLabs",
99880
- description: "High-quality text-to-speech synthesis using ElevenLabs API with support for multiple voices and languages",
99940
+ description: "High-quality text-to-speech synthesis and speech-to-text transcription using ElevenLabs API with support for multiple voices, languages, and speaker diarization",
99881
99941
  models: {
99882
99942
  [import_core.ModelType.TEXT_TO_SPEECH]: async (runtime, input) => {
99883
99943
  const options = typeof input === "string" ? { text: input } : input;
@@ -99904,6 +99964,46 @@ var elevenLabsPlugin = {
99904
99964
  import_core.logger.error(`ElevenLabs model error: ${msg}`);
99905
99965
  throw error instanceof Error ? error : new Error(msg);
99906
99966
  }
99967
+ },
99968
+ [import_core.ModelType.TRANSCRIPTION]: async (runtime, input) => {
99969
+ const settings = getTranscriptionSettings(runtime);
99970
+ import_core.logger.log(`[ElevenLabs] Using TRANSCRIPTION model: ${settings.modelId}`);
99971
+ try {
99972
+ let audioFile;
99973
+ if (typeof input === "string") {
99974
+ const response = await fetch(input);
99975
+ if (!response.ok) {
99976
+ throw new Error(`Failed to fetch audio from URL: ${input}`);
99977
+ }
99978
+ const arrayBuffer = await response.arrayBuffer();
99979
+ audioFile = Buffer.from(arrayBuffer);
99980
+ } else if (Buffer.isBuffer(input)) {
99981
+ audioFile = input;
99982
+ } else if (typeof input === "object" && "audioUrl" in input) {
99983
+ const response = await fetch(input.audioUrl);
99984
+ if (!response.ok) {
99985
+ throw new Error(`Failed to fetch audio from URL: ${input.audioUrl}`);
99986
+ }
99987
+ const arrayBuffer = await response.arrayBuffer();
99988
+ audioFile = Buffer.from(arrayBuffer);
99989
+ } else {
99990
+ throw new Error("Invalid input type for TRANSCRIPTION model");
99991
+ }
99992
+ const transcript = await fetchTranscription(runtime, {
99993
+ audioFile,
99994
+ modelId: settings.modelId,
99995
+ languageCode: settings.languageCode,
99996
+ timestampsGranularity: settings.timestampsGranularity,
99997
+ diarize: settings.diarize,
99998
+ numSpeakers: settings.numSpeakers,
99999
+ tagAudioEvents: settings.tagAudioEvents
100000
+ });
100001
+ return transcript;
100002
+ } catch (error) {
100003
+ const msg = error instanceof Error ? error.message : String(error);
100004
+ import_core.logger.error(`ElevenLabs transcription error: ${msg}`);
100005
+ throw error instanceof Error ? error : new Error(msg);
100006
+ }
99907
100007
  }
99908
100008
  },
99909
100009
  tests: [
@@ -99926,12 +100026,12 @@ var elevenLabsPlugin = {
99926
100026
  if (!settings.voiceId) {
99927
100027
  throw new Error("Missing voice ID configuration");
99928
100028
  }
99929
- const stability = parseFloat(settings.stability);
99930
- if (isNaN(stability) || stability < 0 || stability > 1) {
100029
+ const stability = Number.parseFloat(settings.stability);
100030
+ if (Number.isNaN(stability) || stability < 0 || stability > 1) {
99931
100031
  throw new Error("Voice stability must be between 0 and 1");
99932
100032
  }
99933
- const similarity = parseFloat(settings.similarity);
99934
- if (isNaN(similarity) || similarity < 0 || similarity > 1) {
100033
+ const similarity = Number.parseFloat(settings.similarity);
100034
+ if (Number.isNaN(similarity) || similarity < 0 || similarity > 1) {
99935
100035
  throw new Error("Voice similarity boost must be between 0 and 1");
99936
100036
  }
99937
100037
  import_core.logger.success("Voice settings validated successfully");
@@ -100015,7 +100115,7 @@ var elevenLabsPlugin = {
100015
100115
  for (const format of pcmFormats) {
100016
100116
  if (format.startsWith("pcm_")) {
100017
100117
  const sampleRate = Number.parseInt(format.slice(4));
100018
- if (isNaN(sampleRate) || sampleRate <= 0) {
100118
+ if (Number.isNaN(sampleRate) || sampleRate <= 0) {
100019
100119
  throw new Error(`Invalid PCM format: ${format}`);
100020
100120
  }
100021
100121
  }
@@ -100024,9 +100124,60 @@ var elevenLabsPlugin = {
100024
100124
  }
100025
100125
  }
100026
100126
  ]
100127
+ },
100128
+ {
100129
+ name: "test eleven labs STT",
100130
+ tests: [
100131
+ {
100132
+ name: "STT settings validation",
100133
+ fn: async (runtime) => {
100134
+ const settings = getTranscriptionSettings(runtime);
100135
+ if (!settings.modelId) {
100136
+ throw new Error("Missing STT model ID configuration");
100137
+ }
100138
+ const validGranularities = ["none", "word", "character"];
100139
+ if (!validGranularities.includes(settings.timestampsGranularity)) {
100140
+ throw new Error(`Invalid timestamps granularity: ${settings.timestampsGranularity}`);
100141
+ }
100142
+ if (settings.numSpeakers !== undefined && (settings.numSpeakers < 1 || settings.numSpeakers > 32)) {
100143
+ throw new Error("Number of speakers must be between 1 and 32");
100144
+ }
100145
+ import_core.logger.success("STT settings validated successfully");
100146
+ }
100147
+ },
100148
+ {
100149
+ name: "STT configuration defaults",
100150
+ fn: async (runtime) => {
100151
+ const settings = getTranscriptionSettings(runtime);
100152
+ if (settings.modelId !== "scribe_v1") {
100153
+ import_core.logger.warn(`Using non-default STT model: ${settings.modelId}`);
100154
+ }
100155
+ if (settings.timestampsGranularity !== "word") {
100156
+ import_core.logger.warn(`Using non-default timestamps granularity: ${settings.timestampsGranularity}`);
100157
+ }
100158
+ import_core.logger.success("STT configuration defaults checked");
100159
+ }
100160
+ },
100161
+ {
100162
+ name: "STT input handling validation",
100163
+ fn: async (runtime) => {
100164
+ const testCases = [
100165
+ { type: "string URL", valid: true },
100166
+ { type: "Buffer", valid: true },
100167
+ { type: "object with audioUrl", valid: true }
100168
+ ];
100169
+ for (const testCase of testCases) {
100170
+ if (!testCase.valid) {
100171
+ throw new Error(`Invalid test case should not be valid: ${testCase.type}`);
100172
+ }
100173
+ }
100174
+ import_core.logger.success("STT input handling validation passed");
100175
+ }
100176
+ }
100177
+ ]
100027
100178
  }
100028
100179
  ]
100029
100180
  };
100030
100181
  var src_default = elevenLabsPlugin;
100031
100182
 
100032
- //# debugId=AD5D1D13D464100B64756E2164756E21
100183
+ //# debugId=F8EB9AB5C99014D864756E2164756E21