npm - @aj-archipelago/cortex - Versions diffs - 1.3.5 → 1.3.7 - Mend

@aj-archipelago/cortex 1.3.5 → 1.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (94) hide show

package/helper-apps/cortex-realtime-voice-server/client/src/chat/audio/WavStreamPlayer.ts ADDED Viewed

@@ -0,0 +1,290 @@
+import { StreamProcessorSrc } from './worklets/StreamProcessor';
+import { AudioAnalysis, AudioAnalysisOutputType } from './analysis/AudioAnalysis';
+interface WavStreamPlayerOptions {
+  sampleRate?: number;
+  minBufferSize?: number;
+}
+interface TrackSampleOffset {
+  trackId: string | null;
+  offset: number;
+  currentTime: number;
+}
+/**
+ * Plays audio streams received in raw PCM16 chunks from the browser
+ */
+export class WavStreamPlayer {
+  private readonly scriptSrc: string;
+  private readonly sampleRate: number;
+  private readonly minBufferSize: number;
+  private context: AudioContext | null;
+  private stream: AudioWorkletNode | null;
+  private analyser: AnalyserNode | null;
+  private trackSampleOffsets: Record<string, TrackSampleOffset>;
+  private interruptedTrackIds: Record<string, boolean>;
+  private isRestarting: boolean;
+  public onTrackComplete?: (trackId: string) => void;
+  public currentTrackId: string | null;
+  /**
+   * Creates a new WavStreamPlayer instance
+   * @param options
+   */
+  constructor({ sampleRate = 44100, minBufferSize = 10 }: WavStreamPlayerOptions = {}) {
+    this.scriptSrc = StreamProcessorSrc;
+    this.sampleRate = sampleRate;
+    this.minBufferSize = minBufferSize;
+    this.context = null;
+    this.stream = null;
+    this.analyser = null;
+    this.trackSampleOffsets = {};
+    this.interruptedTrackIds = {};
+    this.isRestarting = false;
+    this.currentTrackId = null;
+  }
+  /**
+   * Connects the audio context and enables output to speakers
+   */
+  async connect(): Promise<boolean> {
+    this.context = new AudioContext({ sampleRate: this.sampleRate });
+    if (this.context.state === 'suspended') {
+      await this.context.resume();
+    }
+    try {
+      await this.context.audioWorklet.addModule(this.scriptSrc);
+    } catch (e) {
+      console.error(e);
+      throw new Error(`Could not add audioWorklet module: ${this.scriptSrc}`);
+    }
+    const analyser = this.context.createAnalyser();
+    analyser.fftSize = 1024;
+    analyser.smoothingTimeConstant = 0.8;
+    this.analyser = analyser;
+    return true;
+  }
+  /**
+   * Gets the current frequency domain data from the playing track
+   * @param analysisType
+   * @param minDecibels default -100
+   * @param maxDecibels default -30
+   */
+  getFrequencies(
+    analysisType: 'frequency' | 'music' | 'voice' = 'frequency',
+    minDecibels = -100,
+    maxDecibels = -30,
+  ): AudioAnalysisOutputType {
+    if (!this.analyser) {
+      throw new Error('Not connected, please call .connect() first');
+    }
+    return AudioAnalysis.getFrequencies(
+      this.analyser,
+      this.sampleRate,
+      null,
+      analysisType,
+      minDecibels,
+      maxDecibels,
+    );
+  }
+  /**
+   * Starts audio streaming
+   * @private
+   */
+  private _start(): boolean {
+    if (!this.context) {
+      throw new Error('AudioContext not initialized');
+    }
+    if (this.isRestarting) {
+      return false;
+    }
+    try {
+      const streamNode = new AudioWorkletNode(this.context, 'stream_processor');
+      streamNode.connect(this.context.destination);
+      streamNode.port.onmessage = (e: MessageEvent) => {
+        const { event } = e.data;
+        if (event === 'stop') {
+          streamNode.disconnect();
+          this.stream = null;
+          this.isRestarting = false;
+          if (e.data.reason === 'max_underruns_reached') {
+            console.warn(`Audio stream stopped due to ${e.data.finalCount} consecutive underruns`);
+          }
+        } else if (event === 'offset') {
+          const { requestId, trackId, offset } = e.data;
+          const currentTime = offset / this.sampleRate;
+          this.trackSampleOffsets[requestId] = { trackId, offset, currentTime };
+        } else if (event === 'track_complete') {
+          const { trackId } = e.data;
+          this.onTrackComplete?.(trackId);
+        } else if (event === 'error') {
+          console.error('Stream processor error:', e.data.error);
+          this._handleStreamError();
+        } else if (event === 'underrun') {
+          console.warn(
+            `Audio buffer underrun: ${e.data.count} frames without data. ` +
+            `Buffer size: ${e.data.bufferSize}/${e.data.maxBuffers}`
+          );
+        }
+      };
+      if (this.analyser) {
+        this.analyser.disconnect();
+        streamNode.connect(this.analyser);
+      }
+      this.stream = streamNode;
+      // Send minBufferSize to the worklet
+      streamNode.port.postMessage({ event: 'config', minBufferSize: this.minBufferSize });
+      return true;
+    } catch (error) {
+      console.error('Error starting stream:', error);
+      this.isRestarting = false;
+      return false;
+    }
+  }
+  /**
+   * Handles stream errors by attempting to restart
+   * @private
+   */
+  private async _handleStreamError() {
+    if (this.isRestarting) return;
+    this.isRestarting = true;
+    try {
+      if (this.stream) {
+        this.stream.disconnect();
+        this.stream = null;
+      }
+      await new Promise(resolve => setTimeout(resolve, 100));
+      this._start();
+    } finally {
+      this.isRestarting = false;
+    }
+  }
+  /**
+   * Adds 16BitPCM data to the currently playing audio stream
+   * You can add chunks beyond the current play point and they will be queued for play
+   * @param arrayBuffer
+   * @param trackId
+   */
+  public add16BitPCM(pcmData: ArrayBuffer, trackId: string) {
+    if (!this.context || !this.analyser) {
+      return new Int16Array();
+    }
+    this.currentTrackId = trackId;
+    try {
+      if (this.interruptedTrackIds[trackId]) {
+        return new Int16Array();
+      }
+      if (!this.stream && !this._start()) {
+        throw new Error('Failed to start audio stream');
+      }
+      let buffer: Int16Array;
+      try {
+        if (pcmData instanceof Int16Array) {
+          buffer = pcmData;
+        } else {
+          buffer = new Int16Array(pcmData);
+        }
+      } catch (error) {
+        console.error('Error creating Int16Array:', error);
+        return new Int16Array();
+      }
+      if (!buffer.length) {
+        console.warn('Received empty buffer for track:', trackId);
+        return buffer;
+      }
+      this.stream?.port.postMessage({ event: 'write', buffer, trackId });
+      return buffer;
+    } catch (error) {
+      console.error('Error processing audio chunk:', error);
+      this._handleStreamError();
+      return new Int16Array();
+    }
+  }
+  /**
+   * Clears the interrupted state for a track
+   * @param trackId
+   */
+  clearInterruptedState(trackId: string): void {
+    delete this.interruptedTrackIds[trackId];
+  }
+  /**
+   * Clears all interrupted states
+   */
+  clearAllInterruptedStates(): void {
+    this.interruptedTrackIds = {};
+  }
+  /**
+   * Gets the offset (sample count) of the currently playing stream
+   * @param interrupt
+   */
+  async getTrackSampleOffset(interrupt = false): Promise<TrackSampleOffset | null> {
+    if (!this.stream) {
+      return null;
+    }
+    const requestId = crypto.randomUUID();
+    this.stream.port.postMessage({
+      event: interrupt ? 'interrupt' : 'offset',
+      requestId,
+    });
+    let trackSampleOffset: TrackSampleOffset | undefined;
+    while (!trackSampleOffset) {
+      trackSampleOffset = this.trackSampleOffsets[requestId];
+      await new Promise((r) => setTimeout(() => r(null), 1));
+    }
+    const { trackId } = trackSampleOffset;
+    if (interrupt && trackId) {
+      this.interruptedTrackIds[trackId] = true;
+    }
+    return trackSampleOffset;
+  }
+  /**
+   * Strips the current stream and returns the sample offset of the audio
+   */
+  async interrupt(): Promise<TrackSampleOffset | null> {
+    return this.getTrackSampleOffset(true);
+  }
+  /**
+   * Gets the analyser node
+   */
+  getAnalyser(): AnalyserNode | null {
+    return this.analyser;
+  }
+  /**
+   * Sets a callback to be called when a track completes playback
+   * @param callback The callback function that receives the trackId
+   */
+  setTrackCompleteCallback(callback: (trackId: string) => void) {
+    this.onTrackComplete = callback;
+  }
+  async fadeOut(durationMs: number) {
+    if (!this.context) return;
+    const gainNode = this.context.createGain();
+    gainNode.gain.setValueAtTime(1, this.context.currentTime);
+    gainNode.gain.linearRampToValueAtTime(0, this.context.currentTime + durationMs / 1000);
+    // Insert gain node before destination
+    this.stream?.disconnect();
+    this.stream?.connect(gainNode);
+    gainNode.connect(this.context.destination);
+    return new Promise(resolve => setTimeout(resolve, durationMs));
+  }
+}

package/helper-apps/cortex-realtime-voice-server/client/src/chat/audio/analysis/AudioAnalysis.ts ADDED Viewed

@@ -0,0 +1,186 @@
+import {
+  noteFrequencies,
+  noteFrequencyLabels,
+  voiceFrequencies,
+  voiceFrequencyLabels,
+} from './constants';
+/**
+ * Output of AudioAnalysis for the frequency domain of the audio
+ */
+export interface AudioAnalysisOutputType {
+  values: Float32Array;
+  frequencies: number[];
+  labels: string[];
+}
+type AnalysisType = 'frequency' | 'music' | 'voice';
+/**
+ * Analyzes audio for visual output
+ */
+export class AudioAnalysis {
+  private audio: HTMLAudioElement;
+  private context: AudioContext | OfflineAudioContext;
+  private analyser: AnalyserNode;
+  private sampleRate: number;
+  private audioBuffer: AudioBuffer | null;
+  private fftResults: Float32Array[] = [];
+  /**
+   * Retrieves frequency domain data from an AnalyserNode adjusted to a decibel range
+   * returns human-readable formatting and labels
+   */
+  static getFrequencies(
+    analyser: AnalyserNode,
+    sampleRate: number,
+    fftResult: Float32Array | null,
+    analysisType: AnalysisType = 'frequency',
+    minDecibels: number = -100,
+    maxDecibels: number = -30
+  ): AudioAnalysisOutputType {
+    if (!fftResult) {
+      fftResult = new Float32Array(analyser.frequencyBinCount);
+      analyser.getFloatFrequencyData(fftResult);
+    }
+    const nyquistFrequency = sampleRate / 2;
+    const frequencyStep = (1 / fftResult.length) * nyquistFrequency;
+    let outputValues: number[];
+    let frequencies: number[];
+    let labels: string[];
+    if (analysisType === 'music' || analysisType === 'voice') {
+      const useFrequencies = analysisType === 'voice' ? voiceFrequencies : noteFrequencies;
+      const aggregateOutput = Array(useFrequencies.length).fill(minDecibels);
+      for (let i = 0; i < fftResult.length; i++) {
+        const frequency = i * frequencyStep;
+        const amplitude = fftResult[i] || 0;
+        for (let n = useFrequencies.length - 1; n >= 0; n--) {
+          const useFrequency = useFrequencies[n] || 0;
+          if (frequency > useFrequency) {
+            aggregateOutput[n] = Math.max(aggregateOutput[n], amplitude);
+            break;
+          }
+        }
+      }
+      outputValues = aggregateOutput;
+      frequencies = analysisType === 'voice' ? voiceFrequencies : noteFrequencies;
+      labels = analysisType === 'voice' ? voiceFrequencyLabels : noteFrequencyLabels;
+    } else {
+      outputValues = Array.from(fftResult);
+      frequencies = outputValues.map((_, i) => frequencyStep * i);
+      labels = frequencies.map((f) => `${f.toFixed(2)} Hz`);
+    }
+    // We normalize to {0, 1}
+    const normalizedOutput = outputValues.map((v) =>
+      Math.max(0, Math.min((v - minDecibels) / (maxDecibels - minDecibels), 1))
+    );
+    const values = new Float32Array(normalizedOutput);
+    return {
+      values,
+      frequencies,
+      labels,
+    };
+  }
+  /**
+   * Creates a new AudioAnalysis instance for an HTMLAudioElement
+   */
+  constructor(audioElement: HTMLAudioElement, audioBuffer: AudioBuffer | null = null) {
+    this.audio = audioElement;
+    this.audioBuffer = audioBuffer;
+    if (audioBuffer) {
+      const { length, sampleRate } = audioBuffer;
+      const offlineAudioContext = new OfflineAudioContext({
+        length,
+        sampleRate,
+      });
+      const source = offlineAudioContext.createBufferSource();
+      source.buffer = audioBuffer;
+      const analyser = offlineAudioContext.createAnalyser();
+      analyser.fftSize = 8192;
+      analyser.smoothingTimeConstant = 0.1;
+      source.connect(analyser);
+      const renderQuantumInSeconds = 1 / 60;
+      const durationInSeconds = length / sampleRate;
+      const analyze = (index: number) => {
+        const suspendTime = renderQuantumInSeconds * index;
+        if (suspendTime < durationInSeconds) {
+          offlineAudioContext.suspend(suspendTime).then(() => {
+            const fftResult = new Float32Array(analyser.frequencyBinCount);
+            analyser.getFloatFrequencyData(fftResult);
+            this.fftResults.push(fftResult);
+            analyze(index + 1);
+          });
+        }
+        if (index === 1) {
+          offlineAudioContext.startRendering();
+        } else {
+          offlineAudioContext.resume();
+        }
+      };
+      source.start(0);
+      analyze(1);
+      this.context = offlineAudioContext;
+      this.analyser = analyser;
+      this.sampleRate = sampleRate;
+    } else {
+      const audioContext = new AudioContext();
+      const track = audioContext.createMediaElementSource(audioElement);
+      const analyser = audioContext.createAnalyser();
+      analyser.fftSize = 8192;
+      analyser.smoothingTimeConstant = 0.1;
+      track.connect(analyser);
+      analyser.connect(audioContext.destination);
+      this.context = audioContext;
+      this.analyser = analyser;
+      this.sampleRate = this.context.sampleRate;
+    }
+  }
+  /**
+   * Gets the current frequency domain data from the playing audio track
+   */
+  getFrequencies(
+    analysisType: AnalysisType = 'frequency',
+    minDecibels: number = -100,
+    maxDecibels: number = -30
+  ): AudioAnalysisOutputType {
+    let fftResult: Float32Array | null = null;
+    if (this.audioBuffer && this.fftResults.length) {
+      const pct = this.audio.currentTime / this.audio.duration;
+      const index = Math.min(
+        Math.floor(pct * this.fftResults.length),
+        this.fftResults.length - 1
+      );
+      fftResult = this.fftResults[index] ?? null;
+    }
+    return AudioAnalysis.getFrequencies(
+      this.analyser,
+      this.sampleRate,
+      fftResult ?? null,
+      analysisType,
+      minDecibels,
+      maxDecibels
+    );
+  }
+  /**
+   * Resume the internal AudioContext if it was suspended due to the lack of
+   * user interaction when the AudioAnalysis was instantiated.
+   */
+  async resumeIfSuspended(): Promise<true> {
+    if (this.context.state === 'suspended') {
+      await this.context.resume();
+    }
+    return true;
+  }
+}

package/helper-apps/cortex-realtime-voice-server/client/src/chat/audio/analysis/constants.ts ADDED Viewed

@@ -0,0 +1,59 @@
+/**
+ * Constants for help with visualization
+ * Helps map frequency ranges from Fast Fourier Transform
+ * to human-interpretable ranges, notably music ranges and
+ * human vocal ranges.
+ */
+// Eighth octave frequencies
+const octave8Frequencies: number[] = [
+  4186.01, 4434.92, 4698.63, 4978.03, 5274.04, 5587.65, 5919.91, 6271.93,
+  6644.88, 7040.0, 7458.62, 7902.13,
+];
+// Labels for each of the above frequencies
+const octave8FrequencyLabels: string[] = [
+  'C',
+  'C#',
+  'D',
+  'D#',
+  'E',
+  'F',
+  'F#',
+  'G',
+  'G#',
+  'A',
+  'A#',
+  'B',
+];
+/**
+ * All note frequencies from 1st to 8th octave
+ * in format "A#8" (A#, 8th octave)
+ */
+export const noteFrequencies: number[] = [];
+export const noteFrequencyLabels: string[] = [];
+for (let i = 1; i <= 8; i++) {
+  for (let f = 0; f < octave8Frequencies.length; f++) {
+    const freq = octave8Frequencies[f] || 0;
+    const baseNote = octave8FrequencyLabels[f] || 'C';
+    noteFrequencies.push(freq / Math.pow(2, 8 - i));
+    noteFrequencyLabels.push( baseNote + i);
+  }
+}
+/**
+ * Subset of the note frequencies between 32 and 2000 Hz
+ * 6 octave range: C1 to B6
+ */
+const voiceFrequencyRange: [number, number] = [32.0, 2000.0];
+export const voiceFrequencies: number[] = noteFrequencies.filter((freq) => {
+  return freq > voiceFrequencyRange[0] && freq < voiceFrequencyRange[1];
+});
+export const voiceFrequencyLabels: string[] = noteFrequencyLabels.filter((_, i) => {
+  return (
+    noteFrequencies[i] &&
+    noteFrequencies[i] > voiceFrequencyRange[0] &&
+    noteFrequencies[i] < voiceFrequencyRange[1]
+  );
+})