open-agents-ai 0.187.547 → 0.187.548

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -539822,6 +539822,91 @@ function findMicCaptureCommand() {
539822
539822
  }
539823
539823
  return null;
539824
539824
  }
539825
+ function findTranscribeFileScript() {
539826
+ const thisDir = dirname24(fileURLToPath10(import.meta.url));
539827
+ const candidates = [
539828
+ join87(thisDir, "../../../../packages/execution/scripts/transcribe-file.py"),
539829
+ join87(thisDir, "../../../packages/execution/scripts/transcribe-file.py"),
539830
+ join87(thisDir, "../../execution/scripts/transcribe-file.py"),
539831
+ join87(thisDir, "../scripts/transcribe-file.py"),
539832
+ join87(thisDir, "../../scripts/transcribe-file.py")
539833
+ ];
539834
+ for (const p2 of candidates) {
539835
+ if (existsSync71(p2)) return p2;
539836
+ }
539837
+ try {
539838
+ const globalRoot = execSync46("npm root -g", {
539839
+ encoding: "utf-8",
539840
+ timeout: 5e3,
539841
+ stdio: ["pipe", "pipe", "pipe"]
539842
+ }).trim();
539843
+ const candidates2 = [
539844
+ join87(globalRoot, "open-agents-ai", "dist", "scripts", "transcribe-file.py"),
539845
+ join87(globalRoot, "open-agents-ai", "scripts", "transcribe-file.py")
539846
+ ];
539847
+ for (const p2 of candidates2) {
539848
+ if (existsSync71(p2)) return p2;
539849
+ }
539850
+ } catch {
539851
+ }
539852
+ return null;
539853
+ }
539854
+ async function transcribeFileViaWhisper(filePath, model) {
539855
+ const script = findTranscribeFileScript();
539856
+ if (!script) return null;
539857
+ const bin = process.platform === "win32" ? "Scripts" : "bin";
539858
+ const exe = process.platform === "win32" ? "python.exe" : "python3";
539859
+ const venvPython2 = join87(homedir25(), ".open-agents", "venv", bin, exe);
539860
+ if (!existsSync71(venvPython2)) return null;
539861
+ return new Promise((resolve43) => {
539862
+ const child = spawn18(venvPython2, [script], {
539863
+ stdio: ["pipe", "pipe", "pipe"],
539864
+ env: process.env
539865
+ });
539866
+ let out = "";
539867
+ let err = "";
539868
+ let timer = null;
539869
+ const stop2 = (val) => {
539870
+ if (timer) clearTimeout(timer);
539871
+ try {
539872
+ child.kill("SIGTERM");
539873
+ } catch {
539874
+ }
539875
+ resolve43(val ? parse3(val) : null);
539876
+ };
539877
+ function parse3(raw) {
539878
+ try {
539879
+ const j = JSON.parse(raw);
539880
+ if (j.error) return null;
539881
+ if (typeof j.text !== "string") return null;
539882
+ return { text: j.text.trim(), duration: null, speakers: [], segments: [] };
539883
+ } catch {
539884
+ return null;
539885
+ }
539886
+ }
539887
+ child.stdout?.on("data", (d2) => {
539888
+ out += d2.toString("utf-8");
539889
+ });
539890
+ child.stderr?.on("data", (d2) => {
539891
+ err += d2.toString("utf-8");
539892
+ });
539893
+ child.on?.("error", () => stop2(null));
539894
+ child.on?.("close", () => {
539895
+ if (out.trim()) resolve43(parse3(out));
539896
+ else {
539897
+ void err;
539898
+ resolve43(null);
539899
+ }
539900
+ });
539901
+ timer = setTimeout(() => stop2(null), 12e4);
539902
+ try {
539903
+ child.stdin?.write(JSON.stringify({ path: filePath, model }) + "\n");
539904
+ child.stdin?.end();
539905
+ } catch {
539906
+ stop2(null);
539907
+ }
539908
+ });
539909
+ }
539825
539910
  function findLiveWhisperScript() {
539826
539911
  const thisDir = dirname24(fileURLToPath10(import.meta.url));
539827
539912
  const candidates = [
@@ -540523,32 +540608,48 @@ transcribe-cli error: ${transcribeCliError}` : "";
540523
540608
  } catch {
540524
540609
  }
540525
540610
  }
540526
- if (!tc) return null;
540527
540611
  ensureVenvForTranscribeCli();
540528
- try {
540529
- const result = await tc.transcribe(filePath, {
540530
- model: this.config.model,
540531
- format: "json",
540532
- diarize: false,
540533
- wordTimestamps: false
540534
- });
540612
+ let lastErr = null;
540613
+ if (tc) {
540614
+ try {
540615
+ const result = await tc.transcribe(filePath, {
540616
+ model: this.config.model,
540617
+ format: "json",
540618
+ diarize: false,
540619
+ wordTimestamps: false
540620
+ });
540621
+ if (outputDir) {
540622
+ const { basename: basename21 } = await import("node:path");
540623
+ const transcriptDir = join87(outputDir, ".oa", "transcripts");
540624
+ mkdirSync38(transcriptDir, { recursive: true });
540625
+ const outFile = join87(transcriptDir, `${basename21(filePath)}.txt`);
540626
+ writeFileSync34(outFile, result.text, "utf-8");
540627
+ }
540628
+ return {
540629
+ text: result.text,
540630
+ duration: result.duration,
540631
+ speakers: result.speakers,
540632
+ segments: result.segments
540633
+ };
540634
+ } catch (err) {
540635
+ lastErr = err;
540636
+ }
540637
+ }
540638
+ const fb = await transcribeFileViaWhisper(filePath, this.config.model);
540639
+ if (fb) {
540535
540640
  if (outputDir) {
540536
540641
  const { basename: basename21 } = await import("node:path");
540537
540642
  const transcriptDir = join87(outputDir, ".oa", "transcripts");
540538
540643
  mkdirSync38(transcriptDir, { recursive: true });
540539
540644
  const outFile = join87(transcriptDir, `${basename21(filePath)}.txt`);
540540
- writeFileSync34(outFile, result.text, "utf-8");
540645
+ writeFileSync34(outFile, fb.text, "utf-8");
540541
540646
  }
540542
- return {
540543
- text: result.text,
540544
- duration: result.duration,
540545
- speakers: result.speakers,
540546
- segments: result.segments
540547
- };
540548
- } catch (err) {
540549
- this.emit("error", err instanceof Error ? err : new Error(String(err)));
540550
- return null;
540647
+ return fb;
540648
+ }
540649
+ if (lastErr) {
540650
+ this.emit("error", lastErr instanceof Error ? lastErr : new Error(String(lastErr)));
540551
540651
  }
540652
+ return null;
540552
540653
  }
540553
540654
  // -------------------------------------------------------------------------
540554
540655
  // Auto-mode silence detection
@@ -598677,6 +598778,26 @@ function formatMetrics() {
598677
598778
  lines.push(`oa_errors_total ${metrics.totalErrors}`);
598678
598779
  return lines.join("\n") + "\n";
598679
598780
  }
598781
+ function pcmToWav(pcm, sampleRate, channels, bitsPerSample) {
598782
+ const byteRate = sampleRate * channels * (bitsPerSample / 8);
598783
+ const blockAlign = channels * (bitsPerSample / 8);
598784
+ const dataSize = pcm.length;
598785
+ const header = Buffer.alloc(44);
598786
+ header.write("RIFF", 0);
598787
+ header.writeUInt32LE(36 + dataSize, 4);
598788
+ header.write("WAVE", 8);
598789
+ header.write("fmt ", 12);
598790
+ header.writeUInt32LE(16, 16);
598791
+ header.writeUInt16LE(1, 20);
598792
+ header.writeUInt16LE(channels, 22);
598793
+ header.writeUInt32LE(sampleRate, 24);
598794
+ header.writeUInt32LE(byteRate, 28);
598795
+ header.writeUInt16LE(blockAlign, 32);
598796
+ header.writeUInt16LE(bitsPerSample, 34);
598797
+ header.write("data", 36);
598798
+ header.writeUInt32LE(dataSize, 40);
598799
+ return Buffer.concat([header, pcm]);
598800
+ }
598680
598801
  function jsonResponse(res, status, body) {
598681
598802
  const payload = JSON.stringify(body);
598682
598803
  res.writeHead(status, { "Content-Type": "application/json" });
@@ -602566,9 +602687,11 @@ data: ${JSON.stringify(data)}
602566
602687
  } catch {
602567
602688
  }
602568
602689
  };
602690
+ let receivedAnyTranscript = false;
602569
602691
  const onTranscript = (payload) => {
602570
602692
  const p2 = payload;
602571
602693
  if (!p2 || typeof p2.text !== "string") return;
602694
+ receivedAnyTranscript = true;
602572
602695
  sse(p2.final ? "final" : "partial", { text: p2.text });
602573
602696
  };
602574
602697
  listen.on("transcript", onTranscript);
@@ -602584,19 +602707,51 @@ data: ${JSON.stringify(data)}
602584
602707
  cleanup();
602585
602708
  res.end();
602586
602709
  });
602710
+ const pcmChunks = [];
602587
602711
  req2.on("data", (chunk) => {
602712
+ pcmChunks.push(chunk);
602588
602713
  try {
602589
602714
  const t2 = listen.liveTranscriber;
602590
602715
  if (t2?.write) t2.write(chunk);
602591
602716
  } catch {
602592
602717
  }
602593
602718
  });
602594
- req2.on("end", () => {
602595
- setTimeout(() => {
602719
+ req2.on("end", async () => {
602720
+ await new Promise((r2) => setTimeout(r2, 1500));
602721
+ if (receivedAnyTranscript) {
602596
602722
  sse("done", { ok: true });
602597
602723
  cleanup();
602598
602724
  res.end();
602599
- }, 1500);
602725
+ return;
602726
+ }
602727
+ try {
602728
+ const pcm = Buffer.concat(pcmChunks);
602729
+ if (pcm.length < 320) {
602730
+ sse("done", { ok: true, empty: true });
602731
+ cleanup();
602732
+ res.end();
602733
+ return;
602734
+ }
602735
+ const wav = pcmToWav(pcm, 16e3, 1, 16);
602736
+ const fs7 = await import("node:fs");
602737
+ const os8 = await import("node:os");
602738
+ const path8 = await import("node:path");
602739
+ const tmpPath = path8.join(os8.tmpdir(), `oa-stream-${Date.now()}-${Math.random().toString(36).slice(2, 8)}.wav`);
602740
+ fs7.writeFileSync(tmpPath, wav);
602741
+ const result = await listen.transcribeFile(tmpPath).catch(() => null);
602742
+ try {
602743
+ fs7.unlinkSync(tmpPath);
602744
+ } catch {
602745
+ }
602746
+ if (result && result.text) {
602747
+ sse("final", { text: result.text });
602748
+ }
602749
+ sse("done", { ok: true, mode: "one-shot" });
602750
+ } catch (err) {
602751
+ sse("error", { message: err instanceof Error ? err.message : String(err) });
602752
+ }
602753
+ cleanup();
602754
+ res.end();
602600
602755
  });
602601
602756
  } catch (err) {
602602
602757
  jsonResponse(res, 500, { error: "transcribe_stream_failed", message: err instanceof Error ? err.message : String(err) });
@@ -0,0 +1,63 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Transcribe an audio file using openai-whisper (CPU-only).
4
+
5
+ stdin: JSON {"path": "...", "model": "tiny|base|small|medium|large-v3"}
6
+ stdout: JSON {"text": "...", "duration": null, "segments": []}
7
+ or {"error": "<kind>", "message": "..."}
8
+
9
+ Force-loads on CPU because the venv's torch may not match the local GPU
10
+ (e.g. a GT 1030 with sm_61 against a sm_75+ torch build) — those mismatches
11
+ print noisy CUDA warnings and then crash. CPU is slower but always works.
12
+ """
13
+ import sys, os, json, warnings
14
+
15
+ # Quiet CUDA warnings from torch even though we won't use the GPU.
16
+ warnings.filterwarnings("ignore")
17
+ os.environ.setdefault("CUDA_VISIBLE_DEVICES", "") # hide all GPUs
18
+ os.environ.setdefault("PYTHONWARNINGS", "ignore")
19
+
20
+ try:
21
+ import whisper
22
+ except Exception as e:
23
+ print(json.dumps({"error": "deps_missing", "message": str(e)}))
24
+ sys.exit(0)
25
+
26
+
27
+ def main() -> None:
28
+ raw = sys.stdin.read()
29
+ try:
30
+ data = json.loads(raw) if raw.strip() else {}
31
+ except Exception as e:
32
+ print(json.dumps({"error": "bad_input", "message": str(e)}))
33
+ return
34
+
35
+ path = data.get("path")
36
+ if not path or not os.path.exists(path):
37
+ print(json.dumps({"error": "file_not_found", "path": path or ""}))
38
+ return
39
+
40
+ model_name = data.get("model") or "tiny"
41
+ try:
42
+ model = whisper.load_model(model_name, device="cpu")
43
+ result = model.transcribe(path, fp16=False)
44
+ except Exception as e:
45
+ print(json.dumps({"error": "whisper_failed", "message": str(e)}))
46
+ return
47
+
48
+ text = (result.get("text") or "").strip()
49
+ raw_segments = result.get("segments") or []
50
+ segments = [
51
+ {
52
+ "start": float(s.get("start", 0.0)),
53
+ "end": float(s.get("end", 0.0)),
54
+ "text": (s.get("text") or "").strip(),
55
+ }
56
+ for s in raw_segments
57
+ ]
58
+ duration = float(segments[-1]["end"]) if segments else None
59
+ print(json.dumps({"text": text, "duration": duration, "segments": segments}))
60
+
61
+
62
+ if __name__ == "__main__":
63
+ main()
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "open-agents-ai",
3
- "version": "0.187.547",
3
+ "version": "0.187.548",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "open-agents-ai",
9
- "version": "0.187.547",
9
+ "version": "0.187.548",
10
10
  "hasInstallScript": true,
11
11
  "license": "CC-BY-NC-4.0",
12
12
  "dependencies": {
@@ -3132,12 +3132,12 @@
3132
3132
  }
3133
3133
  },
3134
3134
  "node_modules/express-rate-limit": {
3135
- "version": "8.5.0",
3136
- "resolved": "https://registry.npmjs.org/express-rate-limit/-/express-rate-limit-8.5.0.tgz",
3137
- "integrity": "sha512-XKhFohWaSBdVJNTi5TaHziqnPkv04I9UQV6q1Wy7Ui6GGQZVW12ojDFwqer14EvCXxjvPG0CyWXx7cAXpALB4Q==",
3135
+ "version": "8.5.1",
3136
+ "resolved": "https://registry.npmjs.org/express-rate-limit/-/express-rate-limit-8.5.1.tgz",
3137
+ "integrity": "sha512-5O6KYmyJEpuPJV5hNTXKbAHWRqrzyu+OI3vUnSd2kXFubIVpG7ezpgxQy76Zo5GQZtrQBg86hF+CM/NX+cioiQ==",
3138
3138
  "license": "MIT",
3139
3139
  "dependencies": {
3140
- "ip-address": "10.1.0"
3140
+ "ip-address": "^10.2.0"
3141
3141
  },
3142
3142
  "engines": {
3143
3143
  "node": ">= 16"
@@ -3750,9 +3750,9 @@
3750
3750
  "license": "Apache-2.0 OR MIT"
3751
3751
  },
3752
3752
  "node_modules/ip-address": {
3753
- "version": "10.1.0",
3754
- "resolved": "https://registry.npmjs.org/ip-address/-/ip-address-10.1.0.tgz",
3755
- "integrity": "sha512-XXADHxXmvT9+CRxhXg56LJovE+bmWnEWB78LB83VZTprKTmaC5QfruXocxzTZ2Kl0DNwKuBdlIhjL8LeY8Sf8Q==",
3753
+ "version": "10.2.0",
3754
+ "resolved": "https://registry.npmjs.org/ip-address/-/ip-address-10.2.0.tgz",
3755
+ "integrity": "sha512-/+S6j4E9AHvW9SWMSEY9Xfy66O5PWvVEJ08O0y5JGyEKQpojb0K0GKpz/v5HJ/G0vi3D2sjGK78119oXZeE0qA==",
3756
3756
  "license": "MIT",
3757
3757
  "engines": {
3758
3758
  "node": ">= 12"
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "open-agents-ai",
3
- "version": "0.187.547",
3
+ "version": "0.187.548",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",