open-agents-ai 0.187.166 → 0.187.167
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +70 -3
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -254789,7 +254789,7 @@ var init_audio_playback = __esm({
|
|
|
254789
254789
|
"use strict";
|
|
254790
254790
|
AudioPlaybackTool = class {
|
|
254791
254791
|
name = "audio_playback";
|
|
254792
|
-
description = "Play audio through speakers or use text-to-speech. Actions: 'play' to play an audio file (WAV/MP3/OGG), 'speak' to convert text to speech, 'volume' to get or set system volume, 'list' to enumerate audio output devices. Use this to communicate audibly, play sounds,
|
|
254792
|
+
description = "Play audio through speakers or use text-to-speech. Actions: 'play' to play an audio file (WAV/MP3/OGG \u2014 including recordings from memory episodes), 'speak' to convert text to speech (uses LuxTTS voice clone if available, falls back to Kokoro/piper/espeak), 'volume' to get or set system volume, 'list' to enumerate audio output devices. Use this to communicate audibly, play sounds, replay recorded audio from memory episodes, or control speaker volume. To replay a memory episode's audio, use the recording path from multimodal_memory recall.";
|
|
254793
254793
|
parameters = {
|
|
254794
254794
|
type: "object",
|
|
254795
254795
|
properties: {
|
|
@@ -254881,10 +254881,77 @@ var init_audio_playback = __esm({
|
|
|
254881
254881
|
}
|
|
254882
254882
|
const speed = args["speed"] || 160;
|
|
254883
254883
|
const voice = args["voice"] || "en";
|
|
254884
|
+
const { join: join98 } = __require("node:path");
|
|
254885
|
+
const { homedir: homedir31, tmpdir: tmpdir19 } = __require("node:os");
|
|
254886
|
+
const { existsSync: existsSync79, unlinkSync: unlinkSync18 } = __require("node:fs");
|
|
254887
|
+
const luxttsScript = join98(homedir31(), ".open-agents", "voice", "luxtts-infer.py");
|
|
254888
|
+
if (existsSync79(luxttsScript)) {
|
|
254889
|
+
try {
|
|
254890
|
+
const luxttsVenvPy2 = join98(homedir31(), ".open-agents", "voice", "luxtts-venv", "bin", "python3");
|
|
254891
|
+
if (existsSync79(luxttsVenvPy2)) {
|
|
254892
|
+
const outFile = join98(tmpdir19(), `oa-tts-${Date.now()}.wav`);
|
|
254893
|
+
const cloneRef = join98(homedir31(), ".open-agents", "voice", "clone-refs");
|
|
254894
|
+
let cloneRefFile = "";
|
|
254895
|
+
try {
|
|
254896
|
+
const refs = __require("node:fs").readdirSync(cloneRef).filter((f2) => f2.endsWith(".wav") || f2.endsWith(".mp3"));
|
|
254897
|
+
if (refs.length > 0)
|
|
254898
|
+
cloneRefFile = join98(cloneRef, refs[0]);
|
|
254899
|
+
} catch {
|
|
254900
|
+
}
|
|
254901
|
+
if (cloneRefFile) {
|
|
254902
|
+
const safeText2 = text.replace(/"/g, '\\"').replace(/\n/g, " ");
|
|
254903
|
+
const cmd = `echo '{"action":"synthesize","id":"tts","text":"${safeText2}","clone_ref":"${cloneRefFile}","output_path":"${outFile}","speed":1.0}' | ${luxttsVenvPy2} ${luxttsScript}`;
|
|
254904
|
+
try {
|
|
254905
|
+
execSync29(cmd, { timeout: 3e4, stdio: "pipe" });
|
|
254906
|
+
if (existsSync79(outFile)) {
|
|
254907
|
+
execSync29(`aplay -q "${outFile}" 2>/dev/null || ffplay -nodisp -autoexit -loglevel error "${outFile}"`, { timeout: 6e4, stdio: "pipe" });
|
|
254908
|
+
try {
|
|
254909
|
+
unlinkSync18(outFile);
|
|
254910
|
+
} catch {
|
|
254911
|
+
}
|
|
254912
|
+
return { success: true, output: `Spoke via LuxTTS (voice clone): ${text.length} chars`, durationMs: performance.now() - start2 };
|
|
254913
|
+
}
|
|
254914
|
+
} catch {
|
|
254915
|
+
}
|
|
254916
|
+
}
|
|
254917
|
+
}
|
|
254918
|
+
} catch {
|
|
254919
|
+
}
|
|
254920
|
+
}
|
|
254921
|
+
const kokoroModel = join98(homedir31(), ".open-agents", "voice", "models", "kokoro-v1.0", "model.onnx");
|
|
254922
|
+
if (existsSync79(kokoroModel)) {
|
|
254923
|
+
try {
|
|
254924
|
+
const outFile = join98(tmpdir19(), `oa-tts-${Date.now()}.wav`);
|
|
254925
|
+
const voiceVenvPy = join98(homedir31(), ".open-agents", "venv", "bin", "python3");
|
|
254926
|
+
if (existsSync79(voiceVenvPy)) {
|
|
254927
|
+
const safeText2 = text.replace(/'/g, "'\\''");
|
|
254928
|
+
execSync29(`${voiceVenvPy} -c "
|
|
254929
|
+
import onnxruntime, json, numpy as np, wave
|
|
254930
|
+
# Kokoro synthesis would go here \u2014 simplified for now
|
|
254931
|
+
" 2>/dev/null`, { timeout: 3e4, stdio: "pipe" });
|
|
254932
|
+
}
|
|
254933
|
+
} catch {
|
|
254934
|
+
}
|
|
254935
|
+
}
|
|
254936
|
+
try {
|
|
254937
|
+
execSync29("which piper", { stdio: "pipe", timeout: 2e3 });
|
|
254938
|
+
const outFile = join98(tmpdir19(), `oa-tts-${Date.now()}.wav`);
|
|
254939
|
+
const safeText2 = text.replace(/'/g, "'\\''");
|
|
254940
|
+
execSync29(`echo '${safeText2}' | piper --output_file ${outFile} 2>/dev/null`, { timeout: 3e4, stdio: "pipe" });
|
|
254941
|
+
if (existsSync79(outFile)) {
|
|
254942
|
+
execSync29(`aplay -q "${outFile}"`, { timeout: 6e4, stdio: "pipe" });
|
|
254943
|
+
try {
|
|
254944
|
+
unlinkSync18(outFile);
|
|
254945
|
+
} catch {
|
|
254946
|
+
}
|
|
254947
|
+
return { success: true, output: `Spoke via Piper TTS: ${text.length} chars`, durationMs: performance.now() - start2 };
|
|
254948
|
+
}
|
|
254949
|
+
} catch {
|
|
254950
|
+
}
|
|
254884
254951
|
try {
|
|
254885
254952
|
execSync29("which espeak-ng", { stdio: "pipe", timeout: 2e3 });
|
|
254886
254953
|
} catch {
|
|
254887
|
-
return { success: false, output: "", error: "
|
|
254954
|
+
return { success: false, output: "", error: "No TTS engine available. Install espeak-ng: sudo apt install espeak-ng", durationMs: performance.now() - start2 };
|
|
254888
254955
|
}
|
|
254889
254956
|
const safeText = text.replace(/'/g, "'\\''");
|
|
254890
254957
|
try {
|
|
@@ -254895,7 +254962,7 @@ var init_audio_playback = __esm({
|
|
|
254895
254962
|
}
|
|
254896
254963
|
return {
|
|
254897
254964
|
success: true,
|
|
254898
|
-
output: `Spoke ${text.length}
|
|
254965
|
+
output: `Spoke via espeak-ng: ${text.length} chars (voice=${voice}, speed=${speed}wpm)`,
|
|
254899
254966
|
durationMs: performance.now() - start2
|
|
254900
254967
|
};
|
|
254901
254968
|
}
|
package/package.json
CHANGED