npm - @chat21/chat21-web-widget - Versions diffs - 5.1.30 → 5.1.32-rc13 - Mend

@chat21/chat21-web-widget 5.1.30 → 5.1.32-rc13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

package/src/app/providers/tts-audio-playback-coordinator.service.ts ADDED Viewed

@@ -0,0 +1,93 @@
+import { Injectable } from '@angular/core';
+import { BehaviorSubject, Observable, Subject } from 'rxjs';
+/**
+ * Garantisce un solo messaggio TTS in riproduzione alla volta.
+ * Se arrivano più messaggi TTS, vengono riprodotti in coda (FIFO) senza interrompere quello corrente.
+ */
+@Injectable({ providedIn: 'root' })
+export class TtsAudioPlaybackCoordinator {
+  private currentOwnerId: string | null = null;
+  private readonly queue: Array<{ ownerId: string; start: () => void }> = [];
+  /** Emits true while any TTS is playing or queued; false when the queue is fully drained. */
+  private readonly _isTTSPlaying$ = new BehaviorSubject<boolean>(false);
+  readonly isTTSPlaying$ = this._isTTSPlaying$.asObservable();
+  /** Emits once when stopAll() is called — signals every AudioSyncComponent to abort immediately. */
+  private readonly _stopAll$ = new Subject<void>();
+  readonly stopAllPlayback$: Observable<void> = this._stopAll$.asObservable();
+  /**
+   * Richiede l'avvio della riproduzione TTS per `ownerId`.
+   * Se non c'è nessun TTS attivo, parte subito; altrimenti viene messo in coda.
+   */
+  requestStart(ownerId: string, start: () => void): void {
+    const id = (ownerId || '').trim();
+    if (!id) {
+      return;
+    }
+    if (this.currentOwnerId === id) {
+      return;
+    }
+    if (this.queue.some((j) => j.ownerId === id)) {
+      return;
+    }
+    if (this.currentOwnerId) {
+      this.queue.push({ ownerId: id, start });
+      return;
+    }
+    this.currentOwnerId = id;
+    this._isTTSPlaying$.next(true);
+    try {
+      start();
+    } catch {
+      this.releaseIfCurrent(id);
+    }
+  }
+  /** Chiamare a fine riproduzione naturale (`ended`) se questo messaggio era ancora “attivo”. */
+  releaseIfCurrent(ownerId: string): void {
+    const id = (ownerId || '').trim();
+    if (!id) {
+      return;
+    }
+    if (this.currentOwnerId !== id) {
+      // Se era in coda, rimuovilo.
+      const idx = this.queue.findIndex((j) => j.ownerId === id);
+      if (idx !== -1) {
+        this.queue.splice(idx, 1);
+      }
+      return;
+    }
+    this.currentOwnerId = null;
+    const next = this.queue.shift();
+    if (!next) {
+      this._isTTSPlaying$.next(false);
+      return;
+    }
+    this.currentOwnerId = next.ownerId;
+    try {
+      next.start();
+    } catch {
+      this.releaseIfCurrent(next.ownerId);
+    }
+  }
+  /** Distruzione componente o stop esplicito. */
+  release(ownerId: string): void {
+    this.releaseIfCurrent(ownerId);
+  }
+  /**
+   * Stops all TTS playback immediately and clears the queue.
+   * Broadcasts on stopAllPlayback$ so every AudioSyncComponent can abort its stream and reveal all text.
+   */
+  stopAll(): void {
+    this.queue.length = 0;
+    this.currentOwnerId = null;
+    this._isTTSPlaying$.next(false);
+    this._stopAll$.next();
+  }
+}

package/src/app/providers/voice/STT&TTS/openai-voice.config.ts ADDED Viewed

@@ -0,0 +1,12 @@
+/**
+ * Configurazione opzionale per i servizi voce OpenAI (da `environment` o runtime).
+ */
+export interface OpenAiVoiceEnvironmentConfig {
+  /** Obbligatoria per chiamate API reali; se assente, STT/TTS non inviano richieste. */
+  apiKey?: string;
+  baseUrl?: string;
+  transcriptionModel?: string;
+  ttsModel?: string;
+  /** Voce predefinita TTS (es. `alloy`). */
+  ttsVoice?: string;
+}

package/src/app/providers/voice/STT&TTS/openai-voice.provider.ts ADDED Viewed

@@ -0,0 +1,171 @@
+import { HttpClient, HttpErrorResponse, HttpHeaders } from '@angular/common/http';
+import { Injectable } from '@angular/core';
+import { firstValueFrom } from 'rxjs';
+import { environment } from 'src/environments/environment';
+import type { OpenAiVoiceEnvironmentConfig } from './openai-voice.config';
+import {
+  SpeechToTextProvider,
+  TextToSpeechProvider,
+  type SpeechToTextRequest,
+  type SpeechToTextResult,
+  type TextToSpeechRequest,
+  type TextToSpeechResult,
+} from './speech-provider.abstract';
+import { AppConfigService } from '../../app-config.service';
+const DEFAULT_BASE = 'https://api.openai.com/v1';
+const DEFAULT_TRANSCRIPTION_MODEL = 'whisper-1';
+const DEFAULT_TTS_MODEL = 'tts-1';
+const DEFAULT_VOICE = 'alloy';
+const DEFAULT_FORMAT = 'mp3';
+/**
+ * Provider OpenAI unico: STT (Whisper) + TTS, entrambi via {@link HttpClient}.
+ */
+@Injectable({ providedIn: 'root' })
+export class OpenAiVoiceProviderService extends SpeechToTextProvider implements TextToSpeechProvider {
+  constructor(
+    private readonly httpClient: HttpClient,
+    private readonly appConfig: AppConfigService
+  ) {
+    super();
+  }
+  async transcribe(request: SpeechToTextRequest): Promise<SpeechToTextResult> {
+    const cfg = this.getConfig();
+    const apiKey = cfg.apiKey?.trim();
+    if (!apiKey) {
+      return { text: '' };
+    }
+    const base = (cfg.baseUrl ?? DEFAULT_BASE).replace(/\/$/, '');
+    const model = cfg.transcriptionModel ?? DEFAULT_TRANSCRIPTION_MODEL;
+    const url = `${base}/audio/transcriptions`;
+    const ext = this.extensionForMime(request.mimeType);
+    const file = new File([request.audio], `segment.${ext}`, { type: request.mimeType });
+    const form = new FormData();
+    form.append('file', file);
+    form.append('model', model);
+    if (request.language) {
+      form.append('language', request.language);
+    }
+    const headers = new HttpHeaders({
+      Authorization: `Bearer ${apiKey}`,
+    });
+    try {
+      const data = await firstValueFrom(
+        this.httpClient.post<{ text?: string }>(url, form, { headers }),
+      );
+      return { text: (data.text ?? '').trim() };
+    } catch (e) {
+      if (e instanceof HttpErrorResponse && e.error instanceof Blob) {
+        const errText = await e.error.text();
+        throw new Error(`OpenAI transcription ${e.status}: ${errText || e.statusText}`);
+      }
+      throw this.mapOpenAiHttpError(e);
+    }
+  }
+  async synthesize(request: TextToSpeechRequest): Promise<TextToSpeechResult> {
+    const cfg = this.getConfig();
+    const apiKey = cfg.apiKey?.trim();
+    if (!apiKey) {
+      throw new Error('OpenAI API key not configured (environment.openAiVoice.apiKey)');
+    }
+    const base = (cfg.baseUrl ?? DEFAULT_BASE).replace(/\/$/, '');
+    const model = cfg.ttsModel ?? DEFAULT_TTS_MODEL;
+    const voice = request.voice ?? cfg.ttsVoice ?? DEFAULT_VOICE;
+    const responseFormat =
+      (request.responseFormat as 'mp3' | 'opus' | 'aac' | 'flac' | undefined) ?? DEFAULT_FORMAT;
+    const url = `${base}/audio/speech`;
+    const body = {
+      model,
+      voice,
+      input: request.text,
+      response_format: responseFormat,
+    };
+    const headers = new HttpHeaders({
+      Authorization: `Bearer ${apiKey}`,
+      'Content-Type': 'application/json',
+    });
+    try {
+      const blob = await firstValueFrom(
+        this.httpClient.post(url, body, {
+          headers,
+          responseType: 'blob',
+        }),
+      );
+      return { audio: blob, mimeType: this.mimeForFormat(responseFormat) };
+    } catch (e) {
+      if (e instanceof HttpErrorResponse && e.error instanceof Blob) {
+        const errText = await e.error.text();
+        throw new Error(`OpenAI TTS ${e.status}: ${errText || e.statusText}`);
+      }
+      if (e instanceof HttpErrorResponse) {
+        throw new Error(`OpenAI TTS ${e.status}: ${e.message || e.statusText}`);
+      }
+      throw e;
+    }
+  }
+  private getConfig(): OpenAiVoiceEnvironmentConfig {
+    return this.appConfig.getConfig().openAiKey ?? {};
+  }
+  private mapOpenAiHttpError(e: unknown): Error {
+    if (!(e instanceof HttpErrorResponse)) {
+      return e instanceof Error ? e : new Error(String(e));
+    }
+    const label = 'OpenAI transcription';
+    if (e.error instanceof Blob) {
+      return new Error(`${label} ${e.status}: ${e.statusText}`);
+    }
+    if (typeof e.error === 'object' && e.error !== null && 'error' in e.error) {
+      const err = (e.error as { error?: { message?: string } }).error;
+      return new Error(`${label} ${e.status}: ${err?.message ?? JSON.stringify(e.error)}`);
+    }
+    if (typeof e.error === 'string') {
+      return new Error(`${label} ${e.status}: ${e.error}`);
+    }
+    return new Error(`${label} ${e.status}: ${e.message || e.statusText}`);
+  }
+  private extensionForMime(mime: string): string {
+    if (mime.includes('webm')) {
+      return 'webm';
+    }
+    if (mime.includes('mp4') || mime.includes('m4a')) {
+      return 'm4a';
+    }
+    if (mime.includes('wav')) {
+      return 'wav';
+    }
+    if (mime.includes('mpeg') || mime.includes('mp3')) {
+      return 'mp3';
+    }
+    return 'webm';
+  }
+  private mimeForFormat(fmt: string): string {
+    switch (fmt) {
+      case 'opus':
+        return 'audio/opus';
+      case 'aac':
+        return 'audio/aac';
+      case 'flac':
+        return 'audio/flac';
+      case 'mp3':
+      default:
+        return 'audio/mpeg';
+    }
+  }
+}

package/src/app/providers/voice/STT&TTS/speech-provider.abstract.ts ADDED Viewed

@@ -0,0 +1,39 @@
+/**
+ * Contratti astratti per Speech-to-Text e Text-to-Speech.
+ * Implementazione OpenAI unificata: `OpenAiVoiceProviderService` (`openai-voice.provider.ts`).
+ */
+/** Input per la trascrizione di un segmento audio. */
+export interface SpeechToTextRequest {
+  audio: Blob;
+  mimeType: string;
+  /** ISO 639-1 opzionale (es. `it`, `en`). */
+  language?: string;
+}
+export interface SpeechToTextResult {
+  text: string;
+}
+/** Input per la sintesi vocale. */
+export interface TextToSpeechRequest {
+  text: string;
+  /** Voce provider-specific (es. OpenAI: `alloy`, `echo`, …). */
+  voice?: string;
+  language?: string;
+  /** Formato audio desiderato (dipende dal provider). */
+  responseFormat?: string;
+}
+export interface TextToSpeechResult {
+  audio: Blob;
+  mimeType: string;
+}
+export abstract class SpeechToTextProvider {
+  abstract transcribe(request: SpeechToTextRequest): Promise<SpeechToTextResult>;
+}
+export abstract class TextToSpeechProvider {
+  abstract synthesize(request: TextToSpeechRequest): Promise<TextToSpeechResult>;
+}

package/src/app/providers/voice/audio.types.ts ADDED Viewed

@@ -0,0 +1,34 @@
+/**
+ * Tipi condivisi per cattura microfono, VAD e registrazione (WebM).
+ */
+export const DEFAULT_VOICE_AUDIO_CONSTRAINTS: MediaTrackConstraints = {
+  echoCancellation: true,
+  noiseSuppression: true,
+  autoGainControl: true,
+};
+export const DEFAULT_VOICE_MEDIA_STREAM_CONSTRAINTS: MediaStreamConstraints = {
+  audio: DEFAULT_VOICE_AUDIO_CONSTRAINTS,
+};
+export interface VoiceRecordedBlob {
+  blob: Blob;
+  mimeType: string;
+}
+/**
+ * Segmento audio dopo VAD; può includere `transcript` se STT è configurato e abilitato.
+ */
+export interface VoiceSegmentPayload extends VoiceRecordedBlob {
+  transcript?: string;
+  transcriptionError?: string;
+}
+export interface VoiceSessionStartOptions {
+  /** Opzionale se usi solo {@link VoiceService.audioSegment$}. */
+  onRecordingComplete?: (result: VoiceSegmentPayload) => void;
+  constraints?: MediaStreamConstraints;
+  /** Default `true`. Se `false`, non viene chiamato lo STT sul segmento. */
+  enableTranscription?: boolean;
+}

package/src/app/providers/voice/vad.service.spec.ts ADDED Viewed

@@ -0,0 +1,28 @@
+import { Location } from '@angular/common';
+import { TestBed } from '@angular/core/testing';
+import { VadService } from './vad.service';
+describe('VadService', () => {
+  let service: VadService;
+  beforeEach(() => {
+    TestBed.configureTestingModule({
+      providers: [
+        VadService,
+        {
+          provide: Location,
+          useValue: {
+            prepareExternalUrl: (url: string) => `/${url}`,
+          },
+        },
+      ],
+    });
+    service = TestBed.inject(VadService);
+  });
+  it('should expose VAD and ONNX WASM base URLs with trailing slash', () => {
+    expect(service.getVadAssetBaseUrl()).toBe('/assets/vad/');
+    expect(service.getOnnxWasmBaseUrl()).toBe('/assets/onnx/');
+  });
+});

package/src/app/providers/voice/vad.service.ts ADDED Viewed

@@ -0,0 +1,70 @@
+import { Location } from '@angular/common';
+import { Injectable } from '@angular/core';
+import { MicVAD, getDefaultRealTimeVADOptions } from '@ricky0123/vad-web';
+import type { RealTimeVADOptions } from '@ricky0123/vad-web';
+/**
+ * MicVAD (@ricky0123/vad-web): modelli in assets/vad/, WASM ONNX in assets/onnx/
+ * (allineato a ort.env.wasm.wasmPaths = "/assets/onnx/").
+ */
+@Injectable({ providedIn: 'root' })
+export class VadService {
+  private onnxRuntimeEnvPromise: Promise<void> | null = null;
+  constructor(private readonly location: Location) {}
+  /**
+   * Base URL per silero_vad_legacy.onnx / vad.worklet.bundle.min.js
+   * (MicVAD usa baseAssetPath + nome file interno, non modelURL singolo).
+   */
+  getVadAssetBaseUrl(): string {
+    return this.ensureTrailingSlash(this.location.prepareExternalUrl('assets/vad/'));
+  }
+  /** Base URL per ort-wasm-*.mjs / .wasm (es. /assets/onnx/). */
+  getOnnxWasmBaseUrl(): string {
+    return this.ensureTrailingSlash(this.location.prepareExternalUrl('assets/onnx/'));
+  }
+  /**
+   * Pre-configura il modulo onnxruntime-web/wasm (stesso usato da MicVAD):
+   * wasmPaths + numThreads prima del primo MicVAD.new.
+   */
+  ensureOnnxRuntimeEnv(): Promise<void> {
+    if (!this.onnxRuntimeEnvPromise) {
+      this.onnxRuntimeEnvPromise = (async () => {
+        const ort = await import('onnxruntime-web/wasm');
+        const wasmBase = this.getOnnxWasmBaseUrl();
+        ort.env.wasm.wasmPaths = wasmBase;
+        ort.env.wasm.numThreads = 1;
+        ort.env.logLevel = 'error';
+      })();
+    }
+    return this.onnxRuntimeEnvPromise;
+  }
+  async createMicVad(overrides: Partial<RealTimeVADOptions>): Promise<MicVAD> {
+    await this.ensureOnnxRuntimeEnv();
+    const base = getDefaultRealTimeVADOptions('legacy');
+    const vadBase = this.getVadAssetBaseUrl();
+    const ortWasmBase = this.getOnnxWasmBaseUrl();
+    return MicVAD.new({
+      ...base,
+      startOnLoad: false,
+      baseAssetPath: vadBase,
+      onnxWASMBasePath: ortWasmBase,
+      ortConfig: (ort) => {
+        base.ortConfig?.(ort);
+        ort.env.wasm.wasmPaths = ortWasmBase;
+        ort.env.wasm.numThreads = 1;
+        ort.env.logLevel = 'error';
+      },
+      ...overrides,
+    });
+  }
+  private ensureTrailingSlash(path: string): string {
+    return path.endsWith('/') ? path : `${path}/`;
+  }
+}

package/src/app/providers/voice/voice.service.spec.ts ADDED Viewed

@@ -0,0 +1,60 @@
+import { TestBed } from '@angular/core/testing';
+import { VoiceService } from './voice.service';
+import { VadService } from './vad.service';
+describe('VoiceService', () => {
+  let service: VoiceService;
+  let vadService: jasmine.SpyObj<VadService>;
+  let mockVad: { start: jasmine.Spy; pause: jasmine.Spy; destroy: jasmine.Spy };
+  beforeEach(() => {
+    mockVad = {
+      start: jasmine.createSpy('start').and.returnValue(Promise.resolve()),
+      pause: jasmine.createSpy('pause').and.returnValue(Promise.resolve()),
+      destroy: jasmine.createSpy('destroy').and.returnValue(Promise.resolve()),
+    };
+    vadService = jasmine.createSpyObj('VadService', ['ensureOnnxRuntimeEnv', 'createMicVad']);
+    vadService.ensureOnnxRuntimeEnv.and.returnValue(Promise.resolve());
+    vadService.createMicVad.and.returnValue(Promise.resolve(mockVad as any));
+    TestBed.configureTestingModule({
+      providers: [VoiceService, { provide: VadService, useValue: vadService }],
+    });
+    service = TestBed.inject(VoiceService);
+  });
+  it('startSession should call ensureOnnxRuntimeEnv', async () => {
+    const stream = new MediaStream();
+    spyOn(navigator.mediaDevices, 'getUserMedia').and.returnValue(Promise.resolve(stream));
+    await service.startSession({});
+    expect(vadService.ensureOnnxRuntimeEnv).toHaveBeenCalled();
+  });
+  it('startSession should request mic, create MicVAD, and start', async () => {
+    const stream = new MediaStream();
+    spyOn(navigator.mediaDevices, 'getUserMedia').and.returnValue(Promise.resolve(stream));
+    await service.startSession({
+      onRecordingComplete: () => {},
+    });
+    expect(navigator.mediaDevices.getUserMedia).toHaveBeenCalled();
+    expect(vadService.createMicVad).toHaveBeenCalled();
+    expect(mockVad.start).toHaveBeenCalled();
+  });
+  it('stopSession should destroy VAD and stop tracks', async () => {
+    const track = jasmine.createSpyObj<MediaStreamTrack>('MediaStreamTrack', ['stop']);
+    const stream = new MediaStream([track]);
+    spyOn(navigator.mediaDevices, 'getUserMedia').and.returnValue(Promise.resolve(stream));
+    await service.startSession({ onRecordingComplete: () => {} });
+    await service.stopSession();
+    expect(track.stop).toHaveBeenCalled();
+  });
+});