npm - open-agents-ai - Versions diffs - 0.187.169 → 0.187.172 - Mend

open-agents-ai 0.187.169 → 0.187.172

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/dist/index.js +100 -57
package/package.json +1 -1

package/dist/index.js CHANGED Viewed

@@ -258533,10 +258533,28 @@ var init_asr_listen = __esm({
           mkdirSync17(captureDir, { recursive: true });
         const audioFile = join51(captureDir, `listen-${Date.now()}.wav`);
         try {
-          execSync39(`arecord -D ${device} -f S16_LE -r 16000 -c 1 -d ${duration} -q ${audioFile}`, {
-            timeout: (duration + 5) * 1e3,
-            stdio: "pipe"
-          });
+          try {
+            execSync39(`which pw-record`, { stdio: "pipe", timeout: 2e3 });
+            const child = __require("child_process").spawnSync("pw-record", [
+              "--channels",
+              "1",
+              "--rate",
+              "16000",
+              "--format",
+              "s16",
+              audioFile
+            ], { timeout: (duration + 3) * 1e3, stdio: "pipe", killSignal: "SIGINT" });
+            execSync39(`pw-record --channels 1 --rate 16000 --format s16 ${audioFile} & PID=$!; sleep ${duration}; kill $PID 2>/dev/null; wait $PID 2>/dev/null`, {
+              timeout: (duration + 5) * 1e3,
+              stdio: "pipe",
+              shell: "/bin/bash"
+            });
+          } catch {
+            execSync39(`arecord -D ${device} -f S16_LE -r 16000 -c 1 -d ${duration} -q ${audioFile}`, {
+              timeout: (duration + 5) * 1e3,
+              stdio: "pipe"
+            });
+          }
         } catch (err) {
           return { success: false, output: "", error: `Recording failed: ${err instanceof Error ? err.message : String(err)}`, durationMs: performance.now() - start2 };
         }
@@ -258554,73 +258572,98 @@ var init_asr_listen = __esm({
         return this.doTranscribe(file, language, 0, start2);
       }
       doTranscribe(audioFile, language, recordDuration, start2) {
-        try {
-          const langArg = language !== "auto" ? `--language ${language}` : "";
-          const transcript = execSync39(`transcribe-cli ${langArg} "${audioFile}" 2>/dev/null`, { encoding: "utf8", timeout: 12e4 }).trim();
-          if (transcript && transcript.length > 0 && !transcript.includes("transcribe-cli")) {
-            return {
-              success: true,
-              output: `Transcription (${recordDuration > 0 ? recordDuration + "s recording, " : ""}Whisper):
+        const os8 = __require("node:os");
+        const fs4 = __require("node:fs");
+        const path5 = __require("node:path");
+        const homeDir = os8.homedir();
+        const tmpDir = os8.tmpdir();
+        const langArg = language !== "auto" ? `"${language}"` : "None";
+        const whisperScript = `
+import sys, json, os, warnings
+warnings.filterwarnings("ignore")
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
-"${transcript}"
+audio_file = "${audioFile.replace(/\\/g, "/")}"
+lang = ${langArg}
-Audio file: ${audioFile}`,
-              durationMs: performance.now() - start2
-            };
-          }
-        } catch {
-        }
-        try {
-          const venvPy = join51(__require("os").homedir(), ".open-agents", "venv", "bin", "python3");
-          if (existsSync37(venvPy)) {
-            const script = `
-import sys
+# Try faster-whisper first (faster, lower memory)
 try:
     from faster_whisper import WhisperModel
-    model = WhisperModel("base", device="cpu", compute_type="int8")
-    segments, info = model.transcribe("${audioFile}", language="${language === "auto" ? "" : language}" or None)
-    text = " ".join(seg.text.strip() for seg in segments)
-    print(text)
-except Exception as e:
-    print(f"ERROR: {e}", file=sys.stderr)
-    sys.exit(1)
+    model = WhisperModel("base", device="auto", compute_type="int8")
+    segments, info = model.transcribe(audio_file, language=lang if lang else None)
+    text = " ".join(seg.text.strip() for seg in segments).strip()
+    if text:
+        print(json.dumps({"ok": True, "text": text, "backend": "faster-whisper"}))
+        sys.exit(0)
+except Exception:
+    pass
+# Try openai-whisper
+try:
+    import whisper
+    model = whisper.load_model("base")
+    result = model.transcribe(audio_file, language=lang if lang else None)
+    text = result.get("text", "").strip()
+    if text:
+        print(json.dumps({"ok": True, "text": text, "backend": "openai-whisper"}))
+        sys.exit(0)
+except Exception:
+    pass
+print(json.dumps({"ok": False, "error": "No whisper backend available"}))
 `;
-            const scriptFile = join51(__require("os").tmpdir(), `oa-whisper-${Date.now()}.py`);
-            __require("fs").writeFileSync(scriptFile, script);
-            const transcript = execSync39(`${venvPy} ${scriptFile}`, { encoding: "utf8", timeout: 12e4 }).trim();
-            if (transcript && !transcript.startsWith("ERROR")) {
-              return {
-                success: true,
-                output: `Transcription (faster-whisper):
-"${transcript}"
-Audio file: ${audioFile}`,
-                durationMs: performance.now() - start2
-              };
+        const pyPaths = [
+          path5.join(homeDir, ".open-agents", "venv", "bin", "python3"),
+          "python3",
+          "python"
+        ];
+        for (const pyPath of pyPaths) {
+          if (pyPath.includes("/") && !fs4.existsSync(pyPath))
+            continue;
+          const scriptFile = path5.join(tmpDir, `oa-asr-${Date.now()}.py`);
+          fs4.writeFileSync(scriptFile, whisperScript);
+          try {
+            const output = execSync39(`"${pyPath}" "${scriptFile}"`, {
+              encoding: "utf8",
+              timeout: 12e4,
+              env: { ...process.env, PYTHONUNBUFFERED: "1" }
+            }).trim();
+            try {
+              fs4.unlinkSync(scriptFile);
+            } catch {
             }
-          }
-        } catch {
-        }
-        try {
-          const transcript = execSync39(`whisper "${audioFile}" --language ${language} --model base --output_format txt 2>/dev/null`, { encoding: "utf8", timeout: 12e4 }).trim();
-          if (transcript) {
-            return {
-              success: true,
-              output: `Transcription (whisper):
+            const lines = output.split("\n");
+            for (let i2 = lines.length - 1; i2 >= 0; i2--) {
+              try {
+                const result = JSON.parse(lines[i2]);
+                if (result.ok && result.text) {
+                  return {
+                    success: true,
+                    output: `Transcription (${result.backend}, ${recordDuration > 0 ? recordDuration + "s recording" : "file"}):
-"${transcript}"
+"${result.text}"
 Audio file: ${audioFile}`,
-              durationMs: performance.now() - start2
-            };
+                    durationMs: performance.now() - start2
+                  };
+                }
+                if (!result.ok)
+                  continue;
+              } catch {
+                continue;
+              }
+            }
+          } catch {
+            try {
+              fs4.unlinkSync(scriptFile);
+            } catch {
+            }
           }
-        } catch {
         }
         return {
           success: false,
           output: `Audio recorded to: ${audioFile}`,
-          error: "Transcription failed \u2014 no Whisper backend available. Install: pip install faster-whisper",
+          error: "Transcription failed. Install whisper: pip install openai-whisper OR pip install faster-whisper",
           durationMs: performance.now() - start2
         };
       }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "open-agents-ai",
-  "version": "0.187.169",
+  "version": "0.187.172",
   "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
   "type": "module",
   "main": "./dist/index.js",