npm - @vortex-os/computer-use - Versions diffs - 0.7.0 → 0.7.1 - Mend

@vortex-os/computer-use 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/README.md +179 -177
package/computer-use.config.example.json +29 -28
package/package.json +74 -73
package/scripts/activity.mjs +92 -92
package/scripts/audio-duck.ps1 +180 -180
package/scripts/classify.ps1 +8 -8
package/scripts/fetch-supertonic.mjs +82 -65
package/scripts/lib.ps1 +679 -679
package/scripts/mcp-stdio.mjs +1337 -1324
package/scripts/noise-filter.mjs +135 -135
package/scripts/ocr.ps1 +92 -92
package/scripts/speak-supertonic.mjs +296 -296
package/scripts/speak.ps1 +58 -58
package/scripts/speech-safety.mjs +104 -104
package/scripts/vlm.mjs +106 -106

package/scripts/audio-duck.ps1 CHANGED Viewed

@@ -1,180 +1,180 @@
-# computer-use — audio ducking helper (pwsh; WASAPI Core Audio via inline C#, NO install).
-#
-# When the companion speaks, briefly lower OTHER apps' audio sessions (game / video / music) so the voice
-# stands out, then restore EXACTLY on completion. Per-app, not master volume — our own voice is excluded.
-#
-# SAFETY: restore is the whole point. Volumes are snapshotted and always restored in a finally block, even if
-# playback throws or the process is asked to stop — a failed restore would leave the user's other apps quiet.
-#
-# Two uses:
-#   1. Dot-source and call [CU.AudioDuck]::Duck(factor, excludePids) -> handle ; [CU.AudioDuck]::Restore(handle).
-#      (speak.ps1 / Heami wraps System.Speech this way, excluding its own $PID.)
-#   2. -WavPath <wav>: duck (excluding THIS process), play the WAV via SoundPlayer, restore in finally.
-#      (speak-supertonic.mjs spawns this to play its synthesized WAV.)
-#
-# factor is the multiplier applied to other sessions (0.45 = reduce to 45%). 1.0 disables ducking.
-param(
-  [string]$WavPath = '',
-  [double]$Factor = 0.45,
-  [int[]]$ExcludePid = @()
-)
-$ErrorActionPreference = 'Stop'
-try { [Console]::OutputEncoding = [System.Text.UTF8Encoding]::new($false) } catch {}
-if (-not ('CU.AudioDuck' -as [type])) {
-Add-Type -TypeDefinition @'
-using System;
-using System.Collections.Generic;
-using System.Runtime.InteropServices;
-namespace CU {
-  [ComImport, Guid("BCDE0395-E52F-467C-8E3D-C4579291692E")] class MMDeviceEnumeratorComObject { }
-  enum EDataFlow { eRender = 0, eCapture = 1, eAll = 2 }
-  enum ERole { eConsole = 0, eMultimedia = 1, eCommunications = 2 }
-  [ComImport, Guid("A95664D2-9614-4F35-A746-DE8DB63617E6"), InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
-  interface IMMDeviceEnumerator {
-    [PreserveSig] int EnumAudioEndpoints(EDataFlow dataFlow, int dwStateMask, out IMMDeviceCollection ppDevices);
-    [PreserveSig] int GetDefaultAudioEndpoint(EDataFlow dataFlow, ERole role, out IMMDevice ppEndpoint);
-  }
-  [ComImport, Guid("0BD7A1BE-7A1A-44DB-8397-CC5392387B5E"), InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
-  interface IMMDeviceCollection {
-    [PreserveSig] int GetCount(out uint pcDevices);
-    [PreserveSig] int Item(uint nDevice, out IMMDevice ppDevice);
-  }
-  [ComImport, Guid("D666063F-1587-4E43-81F1-B948E807363F"), InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
-  interface IMMDevice {
-    [PreserveSig] int Activate(ref Guid iid, int dwClsCtx, IntPtr pActivationParams,
-      [MarshalAs(UnmanagedType.IUnknown)] out object ppInterface);
-  }
-  [ComImport, Guid("77AA99A0-1BD6-484F-8BC7-2C654C9A9B6F"), InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
-  interface IAudioSessionManager2 {
-    [PreserveSig] int NotImpl1();
-    [PreserveSig] int NotImpl2();
-    [PreserveSig] int GetSessionEnumerator(out IAudioSessionEnumerator SessionEnum);
-  }
-  [ComImport, Guid("E2F5BB11-0570-40CA-ACDD-3AA01277DEE8"), InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
-  interface IAudioSessionEnumerator {
-    [PreserveSig] int GetCount(out int SessionCount);
-    [PreserveSig] int GetSession(int SessionCount, out IAudioSessionControl Session);
-  }
-  [ComImport, Guid("F4B1A599-7266-4319-A8CA-E70ACB11E8CD"), InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
-  interface IAudioSessionControl { }
-  [ComImport, Guid("bfb7ff88-7239-4fc9-8fa2-07c950be9c6d"), InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
-  interface IAudioSessionControl2 {
-    // 9 inherited IAudioSessionControl slots (unused) ...
-    [PreserveSig] int N1();  [PreserveSig] int N2();  [PreserveSig] int N3();
-    [PreserveSig] int N4();  [PreserveSig] int N5();  [PreserveSig] int N6();
-    [PreserveSig] int N7();  [PreserveSig] int N8();  [PreserveSig] int N9();
-    // IAudioSessionControl2 own slots:
-    [PreserveSig] int GetSessionIdentifier([MarshalAs(UnmanagedType.LPWStr)] out string s);
-    [PreserveSig] int GetSessionInstanceIdentifier([MarshalAs(UnmanagedType.LPWStr)] out string s);
-    [PreserveSig] int GetProcessId(out uint pid);
-    [PreserveSig] int IsSystemSoundsSession();
-  }
-  [ComImport, Guid("87CE5498-68D6-44E5-9215-6DA47EF883D8"), InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
-  interface ISimpleAudioVolume {
-    [PreserveSig] int SetMasterVolume(float level, ref Guid ctx);
-    [PreserveSig] int GetMasterVolume(out float level);
-    [PreserveSig] int SetMute(bool mute, ref Guid ctx);
-    [PreserveSig] int GetMute(out bool mute);
-  }
-  public class Handle { internal ISimpleAudioVolume Vol; public float Original; public uint Pid; }
-  public static class AudioDuck {
-    static Guid IID_IAudioSessionManager2 = new Guid("77AA99A0-1BD6-484F-8BC7-2C654C9A9B6F");
-    static Guid CTX = Guid.Empty;
-    static IAudioSessionEnumerator SessionsForDevice(IMMDevice dev) {
-      object o; if (dev.Activate(ref IID_IAudioSessionManager2, 23 /*CLSCTX_ALL*/, IntPtr.Zero, out o) != 0) return null;
-      var mgr = (IAudioSessionManager2)o;
-      IAudioSessionEnumerator en; if (mgr.GetSessionEnumerator(out en) != 0) return null;
-      return en;
-    }
-    // Lower every session (except excludePids and system-sounds) to original*factor, across ALL active render
-    // devices (not just the default endpoint) so virtual-audio routing (e.g. VoiceMeeter) is still caught.
-    // Returns restore handles. DEVICE_STATE_ACTIVE = 0x1.
-    public static List<Handle> Duck(double factor, int[] excludePids) {
-      var handles = new List<Handle>();
-      var deo = (IMMDeviceEnumerator)(new MMDeviceEnumeratorComObject());
-      IMMDeviceCollection coll;
-      if (deo.EnumAudioEndpoints(EDataFlow.eRender, 0x1, out coll) != 0 || coll == null) return handles;
-      uint dcount; if (coll.GetCount(out dcount) != 0) return handles;
-      var excl = new HashSet<uint>(); foreach (var p in excludePids) excl.Add((uint)p);
-      for (uint di = 0; di < dcount; di++) {
-        IMMDevice dev; if (coll.Item(di, out dev) != 0 || dev == null) continue;
-        var en = SessionsForDevice(dev); if (en == null) continue;
-        int count; if (en.GetCount(out count) != 0) continue;
-        for (int i = 0; i < count; i++) {
-          IAudioSessionControl ctl; if (en.GetSession(i, out ctl) != 0 || ctl == null) continue;
-          try {
-            var c2 = (IAudioSessionControl2)ctl;
-            if (c2.IsSystemSoundsSession() == 0) continue; // S_OK(0) means IS system sounds -> skip it
-            uint pid; if (c2.GetProcessId(out pid) != 0) continue;
-            if (excl.Contains(pid)) continue;
-            var vol = (ISimpleAudioVolume)ctl;
-            float cur; if (vol.GetMasterVolume(out cur) != 0) continue;
-            var h = new Handle { Vol = vol, Original = cur, Pid = pid };
-            vol.SetMasterVolume((float)(cur * factor), ref CTX);
-            handles.Add(h);
-          } catch { }
-        }
-      }
-      return handles;
-    }
-    public static void Restore(List<Handle> handles) {
-      if (handles == null) return;
-      foreach (var h in handles) { try { h.Vol.SetMasterVolume(h.Original, ref CTX); } catch { } }
-    }
-    // Inspection helpers (for the isolation test): current live level / snapshotted original per handle.
-    public static float[] Levels(List<Handle> handles) {
-      var a = new float[handles.Count];
-      for (int i = 0; i < handles.Count; i++) { float c = 0; try { handles[i].Vol.GetMasterVolume(out c); } catch { } a[i] = c; }
-      return a;
-    }
-    public static float[] Originals(List<Handle> handles) {
-      var a = new float[handles.Count];
-      for (int i = 0; i < handles.Count; i++) a[i] = handles[i].Original;
-      return a;
-    }
-    public static uint[] Pids(List<Handle> handles) {
-      var a = new uint[handles.Count];
-      for (int i = 0; i < handles.Count; i++) a[i] = handles[i].Pid;
-      return a;
-    }
-  }
-}
-'@
-}
-function Invoke-Duck([double]$factor, [int[]]$exclude) {
-  if ($factor -ge 1.0 -or $factor -lt 0) { return $null }
-  try { return [CU.AudioDuck]::Duck($factor, $exclude) } catch { return $null }
-}
-function Restore-Duck($handles) {
-  if ($null -eq $handles) { return }
-  try { [CU.AudioDuck]::Restore($handles) } catch { }
-}
-# -WavPath mode: duck others (excluding THIS process, which owns the playback), play, restore in finally.
-if ($WavPath) {
-  $h = Invoke-Duck $Factor @($PID)
-  try {
-    $p = New-Object System.Media.SoundPlayer $WavPath
-    $p.PlaySync()
-  } finally {
-    Restore-Duck $h
-  }
-}
+# computer-use — audio ducking helper (pwsh; WASAPI Core Audio via inline C#, NO install).
+#
+# When the companion speaks, briefly lower OTHER apps' audio sessions (game / video / music) so the voice
+# stands out, then restore EXACTLY on completion. Per-app, not master volume — our own voice is excluded.
+#
+# SAFETY: restore is the whole point. Volumes are snapshotted and always restored in a finally block, even if
+# playback throws or the process is asked to stop — a failed restore would leave the user's other apps quiet.
+#
+# Two uses:
+#   1. Dot-source and call [CU.AudioDuck]::Duck(factor, excludePids) -> handle ; [CU.AudioDuck]::Restore(handle).
+#      (speak.ps1 / Heami wraps System.Speech this way, excluding its own $PID.)
+#   2. -WavPath <wav>: duck (excluding THIS process), play the WAV via SoundPlayer, restore in finally.
+#      (speak-supertonic.mjs spawns this to play its synthesized WAV.)
+#
+# factor is the multiplier applied to other sessions (0.45 = reduce to 45%). 1.0 disables ducking.
+param(
+  [string]$WavPath = '',
+  [double]$Factor = 0.45,
+  [int[]]$ExcludePid = @()
+)
+$ErrorActionPreference = 'Stop'
+try { [Console]::OutputEncoding = [System.Text.UTF8Encoding]::new($false) } catch {}
+if (-not ('CU.AudioDuck' -as [type])) {
+Add-Type -TypeDefinition @'
+using System;
+using System.Collections.Generic;
+using System.Runtime.InteropServices;
+namespace CU {
+  [ComImport, Guid("BCDE0395-E52F-467C-8E3D-C4579291692E")] class MMDeviceEnumeratorComObject { }
+  enum EDataFlow { eRender = 0, eCapture = 1, eAll = 2 }
+  enum ERole { eConsole = 0, eMultimedia = 1, eCommunications = 2 }
+  [ComImport, Guid("A95664D2-9614-4F35-A746-DE8DB63617E6"), InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
+  interface IMMDeviceEnumerator {
+    [PreserveSig] int EnumAudioEndpoints(EDataFlow dataFlow, int dwStateMask, out IMMDeviceCollection ppDevices);
+    [PreserveSig] int GetDefaultAudioEndpoint(EDataFlow dataFlow, ERole role, out IMMDevice ppEndpoint);
+  }
+  [ComImport, Guid("0BD7A1BE-7A1A-44DB-8397-CC5392387B5E"), InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
+  interface IMMDeviceCollection {
+    [PreserveSig] int GetCount(out uint pcDevices);
+    [PreserveSig] int Item(uint nDevice, out IMMDevice ppDevice);
+  }
+  [ComImport, Guid("D666063F-1587-4E43-81F1-B948E807363F"), InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
+  interface IMMDevice {
+    [PreserveSig] int Activate(ref Guid iid, int dwClsCtx, IntPtr pActivationParams,
+      [MarshalAs(UnmanagedType.IUnknown)] out object ppInterface);
+  }
+  [ComImport, Guid("77AA99A0-1BD6-484F-8BC7-2C654C9A9B6F"), InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
+  interface IAudioSessionManager2 {
+    [PreserveSig] int NotImpl1();
+    [PreserveSig] int NotImpl2();
+    [PreserveSig] int GetSessionEnumerator(out IAudioSessionEnumerator SessionEnum);
+  }
+  [ComImport, Guid("E2F5BB11-0570-40CA-ACDD-3AA01277DEE8"), InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
+  interface IAudioSessionEnumerator {
+    [PreserveSig] int GetCount(out int SessionCount);
+    [PreserveSig] int GetSession(int SessionCount, out IAudioSessionControl Session);
+  }
+  [ComImport, Guid("F4B1A599-7266-4319-A8CA-E70ACB11E8CD"), InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
+  interface IAudioSessionControl { }
+  [ComImport, Guid("bfb7ff88-7239-4fc9-8fa2-07c950be9c6d"), InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
+  interface IAudioSessionControl2 {
+    // 9 inherited IAudioSessionControl slots (unused) ...
+    [PreserveSig] int N1();  [PreserveSig] int N2();  [PreserveSig] int N3();
+    [PreserveSig] int N4();  [PreserveSig] int N5();  [PreserveSig] int N6();
+    [PreserveSig] int N7();  [PreserveSig] int N8();  [PreserveSig] int N9();
+    // IAudioSessionControl2 own slots:
+    [PreserveSig] int GetSessionIdentifier([MarshalAs(UnmanagedType.LPWStr)] out string s);
+    [PreserveSig] int GetSessionInstanceIdentifier([MarshalAs(UnmanagedType.LPWStr)] out string s);
+    [PreserveSig] int GetProcessId(out uint pid);
+    [PreserveSig] int IsSystemSoundsSession();
+  }
+  [ComImport, Guid("87CE5498-68D6-44E5-9215-6DA47EF883D8"), InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
+  interface ISimpleAudioVolume {
+    [PreserveSig] int SetMasterVolume(float level, ref Guid ctx);
+    [PreserveSig] int GetMasterVolume(out float level);
+    [PreserveSig] int SetMute(bool mute, ref Guid ctx);
+    [PreserveSig] int GetMute(out bool mute);
+  }
+  public class Handle { internal ISimpleAudioVolume Vol; public float Original; public uint Pid; }
+  public static class AudioDuck {
+    static Guid IID_IAudioSessionManager2 = new Guid("77AA99A0-1BD6-484F-8BC7-2C654C9A9B6F");
+    static Guid CTX = Guid.Empty;
+    static IAudioSessionEnumerator SessionsForDevice(IMMDevice dev) {
+      object o; if (dev.Activate(ref IID_IAudioSessionManager2, 23 /*CLSCTX_ALL*/, IntPtr.Zero, out o) != 0) return null;
+      var mgr = (IAudioSessionManager2)o;
+      IAudioSessionEnumerator en; if (mgr.GetSessionEnumerator(out en) != 0) return null;
+      return en;
+    }
+    // Lower every session (except excludePids and system-sounds) to original*factor, across ALL active render
+    // devices (not just the default endpoint) so virtual-audio routing (e.g. VoiceMeeter) is still caught.
+    // Returns restore handles. DEVICE_STATE_ACTIVE = 0x1.
+    public static List<Handle> Duck(double factor, int[] excludePids) {
+      var handles = new List<Handle>();
+      var deo = (IMMDeviceEnumerator)(new MMDeviceEnumeratorComObject());
+      IMMDeviceCollection coll;
+      if (deo.EnumAudioEndpoints(EDataFlow.eRender, 0x1, out coll) != 0 || coll == null) return handles;
+      uint dcount; if (coll.GetCount(out dcount) != 0) return handles;
+      var excl = new HashSet<uint>(); foreach (var p in excludePids) excl.Add((uint)p);
+      for (uint di = 0; di < dcount; di++) {
+        IMMDevice dev; if (coll.Item(di, out dev) != 0 || dev == null) continue;
+        var en = SessionsForDevice(dev); if (en == null) continue;
+        int count; if (en.GetCount(out count) != 0) continue;
+        for (int i = 0; i < count; i++) {
+          IAudioSessionControl ctl; if (en.GetSession(i, out ctl) != 0 || ctl == null) continue;
+          try {
+            var c2 = (IAudioSessionControl2)ctl;
+            if (c2.IsSystemSoundsSession() == 0) continue; // S_OK(0) means IS system sounds -> skip it
+            uint pid; if (c2.GetProcessId(out pid) != 0) continue;
+            if (excl.Contains(pid)) continue;
+            var vol = (ISimpleAudioVolume)ctl;
+            float cur; if (vol.GetMasterVolume(out cur) != 0) continue;
+            var h = new Handle { Vol = vol, Original = cur, Pid = pid };
+            vol.SetMasterVolume((float)(cur * factor), ref CTX);
+            handles.Add(h);
+          } catch { }
+        }
+      }
+      return handles;
+    }
+    public static void Restore(List<Handle> handles) {
+      if (handles == null) return;
+      foreach (var h in handles) { try { h.Vol.SetMasterVolume(h.Original, ref CTX); } catch { } }
+    }
+    // Inspection helpers (for the isolation test): current live level / snapshotted original per handle.
+    public static float[] Levels(List<Handle> handles) {
+      var a = new float[handles.Count];
+      for (int i = 0; i < handles.Count; i++) { float c = 0; try { handles[i].Vol.GetMasterVolume(out c); } catch { } a[i] = c; }
+      return a;
+    }
+    public static float[] Originals(List<Handle> handles) {
+      var a = new float[handles.Count];
+      for (int i = 0; i < handles.Count; i++) a[i] = handles[i].Original;
+      return a;
+    }
+    public static uint[] Pids(List<Handle> handles) {
+      var a = new uint[handles.Count];
+      for (int i = 0; i < handles.Count; i++) a[i] = handles[i].Pid;
+      return a;
+    }
+  }
+}
+'@
+}
+function Invoke-Duck([double]$factor, [int[]]$exclude) {
+  if ($factor -ge 1.0 -or $factor -lt 0) { return $null }
+  try { return [CU.AudioDuck]::Duck($factor, $exclude) } catch { return $null }
+}
+function Restore-Duck($handles) {
+  if ($null -eq $handles) { return }
+  try { [CU.AudioDuck]::Restore($handles) } catch { }
+}
+# -WavPath mode: duck others (excluding THIS process, which owns the playback), play, restore in finally.
+if ($WavPath) {
+  $h = Invoke-Duck $Factor @($PID)
+  try {
+    $p = New-Object System.Media.SoundPlayer $WavPath
+    $p.PlaySync()
+  } finally {
+    Restore-Duck $h
+  }
+}

package/scripts/classify.ps1 CHANGED Viewed

@@ -1,8 +1,8 @@
-# computer-use — classify_activity raw-signal adapter over lib.ps1::Get-AxClassifyActivity.
-# Output = a single JSON blob of raw signals (foreground process/title, notification state, UIA count, fullscreen).
-# The JS side (activity.mjs) derives the activity class from these. Read-only, no images.
-param([int]$UiaCap = 60)
-$ErrorActionPreference = 'Stop'
-. (Join-Path $PSScriptRoot 'lib.ps1')
-Initialize-AxEnv
-Get-AxClassifyActivity -UiaCap $UiaCap | ConvertTo-Json -Depth 4
+# computer-use — classify_activity raw-signal adapter over lib.ps1::Get-AxClassifyActivity.
+# Output = a single JSON blob of raw signals (foreground process/title, notification state, UIA count, fullscreen).
+# The JS side (activity.mjs) derives the activity class from these. Read-only, no images.
+param([int]$UiaCap = 60)
+$ErrorActionPreference = 'Stop'
+. (Join-Path $PSScriptRoot 'lib.ps1')
+Initialize-AxEnv
+Get-AxClassifyActivity -UiaCap $UiaCap | ConvertTo-Json -Depth 4

package/scripts/fetch-supertonic.mjs CHANGED Viewed

@@ -1,65 +1,82 @@
-#!/usr/bin/env node
-// computer-use — Supertonic 3 model downloader (one-time, ~380 MB) for the optional neural TTS engine.
-//
-// Downloads Supertone/supertonic-3 (OpenRAIL-M weights — commercial use permitted) into the model cache the
-// speak path expects. Idempotent: existing non-empty files are skipped, so re-running resumes a partial fetch.
-// Usage: node fetch-supertonic.mjs [targetDir]   (default: VORTEX_CU_TTS_MODEL_DIR or ~/.vortex/computer-use/supertonic-3)
-//
-// The engine code (speak-supertonic.mjs) is adapted from Supertone's MIT Node example; only the weights are
-// downloaded here, never bundled, keeping the npm package small and the license boundary clean.
-import { createWriteStream, existsSync, statSync, mkdirSync, renameSync, unlinkSync } from 'node:fs';
-import { Readable } from 'node:stream';
-import { pipeline } from 'node:stream/promises';
-import { join, dirname } from 'node:path';
-import { homedir } from 'node:os';
-const HF = 'https://huggingface.co/Supertone/supertonic-3/resolve/main';
-const FILES = [
-  'onnx/duration_predictor.onnx',
-  'onnx/text_encoder.onnx',
-  'onnx/vector_estimator.onnx',
-  'onnx/vocoder.onnx',
-  'onnx/tts.json',
-  'onnx/unicode_indexer.json',
-  'config.json',
-  ...['F1', 'F2', 'F3', 'F4', 'F5', 'M1', 'M2', 'M3', 'M4', 'M5'].map((v) => `voice_styles/${v}.json`),
-];
-const targetDir = process.argv[2] || process.env.VORTEX_CU_TTS_MODEL_DIR || join(homedir(), '.vortex', 'computer-use', 'supertonic-3');
-async function fetchToFile(url, dest) {
-  const res = await fetch(url, { redirect: 'follow' });
-  if (!res.ok || !res.body) throw new Error(`HTTP ${res.status}`);
-  const tmp = dest + '.part';
-  mkdirSync(dirname(dest), { recursive: true });
-  await pipeline(Readable.fromWeb(res.body), createWriteStream(tmp));
-  renameSync(tmp, dest);
-  return statSync(dest).size;
-}
-async function main() {
-  console.log(`Supertonic 3 model cache: ${targetDir}`);
-  let downloaded = 0, skipped = 0, bytes = 0;
-  for (const rel of FILES) {
-    const dest = join(targetDir, rel);
-    if (existsSync(dest) && statSync(dest).size > 0) { skipped++; continue; }
-    process.stdout.write(`  ↓ ${rel} ... `);
-    try {
-      const t = Date.now();
-      const sz = await fetchToFile(`${HF}/${rel}`, dest);
-      bytes += sz;
-      downloaded++;
-      console.log(`${(sz / 1048576).toFixed(1)} MB (${((Date.now() - t) / 1000).toFixed(1)}s)`);
-    } catch (e) {
-      console.log(`FAILED: ${e.message}`);
-      try { unlinkSync(dest + '.part'); } catch {}
-      console.error(`\nDownload failed for ${rel}. Re-run to resume.`);
-      process.exit(1);
-    }
-  }
-  console.log(`\nDone — ${downloaded} downloaded (${(bytes / 1048576).toFixed(0)} MB), ${skipped} already present.`);
-  console.log('Neural TTS is ready. Set VORTEX_CU_TTS_ENGINE=auto (default) to use it.');
-}
-main().catch((e) => { console.error(e); process.exit(1); });
+#!/usr/bin/env node
+// computer-use — Supertonic 3 model downloader (one-time, ~380 MB) for the optional neural TTS engine.
+//
+// Downloads Supertone/supertonic-3 (OpenRAIL-M weights — commercial use permitted) into the model cache the
+// speak path expects. Idempotent: existing non-empty files are skipped, so re-running resumes a partial fetch.
+// Usage: node fetch-supertonic.mjs [targetDir]   (default: VORTEX_CU_TTS_MODEL_DIR or ~/.vortex/computer-use/supertonic-3)
+//
+// The engine code (speak-supertonic.mjs) is adapted from Supertone's MIT Node example; only the weights are
+// downloaded here, never bundled, keeping the npm package small and the license boundary clean.
+import { createWriteStream, existsSync, statSync, mkdirSync, renameSync, unlinkSync, writeFileSync } from 'node:fs';
+import { Readable } from 'node:stream';
+import { pipeline } from 'node:stream/promises';
+import { join, dirname } from 'node:path';
+import { homedir } from 'node:os';
+const HF = 'https://huggingface.co/Supertone/supertonic-3/resolve/main';
+const FILES = [
+  'onnx/duration_predictor.onnx',
+  'onnx/text_encoder.onnx',
+  'onnx/vector_estimator.onnx',
+  'onnx/vocoder.onnx',
+  'onnx/tts.json',
+  'onnx/unicode_indexer.json',
+  'config.json',
+  ...['F1', 'F2', 'F3', 'F4', 'F5', 'M1', 'M2', 'M3', 'M4', 'M5'].map((v) => `voice_styles/${v}.json`),
+];
+const targetDir = process.argv[2] || process.env.VORTEX_CU_TTS_MODEL_DIR || join(homedir(), '.vortex', 'computer-use', 'supertonic-3');
+// Per-process temp suffix so two concurrent fetchers (e.g. two instances on one
+// machine racing the same file) never write the SAME `.part` stream — each
+// renames its own complete download into place, so a present `dest` is always a
+// whole file, never a half-written one.
+const PART_SUFFIX = `.${process.pid}.part`;
+// Clear the session-start auto-download lock (base writes it before spawning us)
+// on exit — success OR failure — so a fast/transient failure never wedges the
+// retry window for hours. Harmless when run manually (no such lock, or already
+// gone). `exit` fires on normal completion and on process.exit(); the unlink is
+// synchronous as required there.
+const DOWNLOAD_LOCK = join(homedir(), '.vortex', 'computer-use', '.supertonic-download.lock');
+process.on('exit', () => { try { unlinkSync(DOWNLOAD_LOCK); } catch {} });
+async function fetchToFile(url, dest) {
+  const res = await fetch(url, { redirect: 'follow' });
+  if (!res.ok || !res.body) throw new Error(`HTTP ${res.status}`);
+  const tmp = dest + PART_SUFFIX;
+  mkdirSync(dirname(dest), { recursive: true });
+  await pipeline(Readable.fromWeb(res.body), createWriteStream(tmp));
+  renameSync(tmp, dest);
+  return statSync(dest).size;
+}
+async function main() {
+  console.log(`Supertonic 3 model cache: ${targetDir}`);
+  let downloaded = 0, skipped = 0, bytes = 0;
+  for (const rel of FILES) {
+    const dest = join(targetDir, rel);
+    if (existsSync(dest) && statSync(dest).size > 0) { skipped++; continue; }
+    process.stdout.write(`  ↓ ${rel} ... `);
+    try {
+      const t = Date.now();
+      const sz = await fetchToFile(`${HF}/${rel}`, dest);
+      bytes += sz;
+      downloaded++;
+      console.log(`${(sz / 1048576).toFixed(1)} MB (${((Date.now() - t) / 1000).toFixed(1)}s)`);
+    } catch (e) {
+      console.log(`FAILED: ${e.message}`);
+      try { unlinkSync(dest + PART_SUFFIX); } catch {}
+      console.error(`\nDownload failed for ${rel}. Re-run to resume.`);
+      process.exit(1);
+    }
+  }
+  // Completion marker — written only after every file has landed, so a consumer
+  // (e.g. base session-start's auto-download gate) can check ONE path to know the
+  // model is fully present instead of stat'ing the whole file list. A partial or
+  // interrupted run exits before this, leaving no marker, so the next run resumes.
+  try { writeFileSync(join(targetDir, '.ready'), new Date().toISOString() + '\n'); } catch {}
+  console.log(`\nDone — ${downloaded} downloaded (${(bytes / 1048576).toFixed(0)} MB), ${skipped} already present.`);
+  console.log('Neural TTS is ready. Set VORTEX_CU_TTS_ENGINE=auto (default) to use it.');
+}
+main().catch((e) => { console.error(e); process.exit(1); });