@chat21/chat21-web-widget 5.1.30-rc1 → 5.1.32-rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/CHANGELOG.md +44 -71
  2. package/angular.json +3 -1
  3. package/deploy_amazon_beta.sh +7 -17
  4. package/deploy_amazon_prod.sh +41 -0
  5. package/docs/changelog/this-branch.md +47 -0
  6. package/package.json +4 -1
  7. package/src/app/app.component.ts +1 -2
  8. package/src/app/app.module.ts +9 -0
  9. package/src/app/component/conversation-detail/conversation/conversation.component.html +4 -0
  10. package/src/app/component/conversation-detail/conversation/conversation.component.ts +8 -0
  11. package/src/app/component/conversation-detail/conversation-content/conversation-content.component.html +2 -2
  12. package/src/app/component/conversation-detail/conversation-content/conversation-content.component.ts +2 -0
  13. package/src/app/component/conversation-detail/conversation-footer/conversation-footer.component.html +42 -0
  14. package/src/app/component/conversation-detail/conversation-footer/conversation-footer.component.scss +91 -0
  15. package/src/app/component/conversation-detail/conversation-footer/conversation-footer.component.ts +101 -7
  16. package/src/app/component/message/audio/audio.component.ts +0 -5
  17. package/src/app/component/message/audio-sync/audio-sync.component.html +19 -0
  18. package/src/app/component/message/audio-sync/audio-sync.component.scss +65 -0
  19. package/src/app/component/message/audio-sync/audio-sync.component.spec.ts +23 -0
  20. package/src/app/component/message/audio-sync/audio-sync.component.ts +197 -0
  21. package/src/app/component/message/bubble-message/bubble-message.component.html +6 -1
  22. package/src/app/component/message/bubble-message/bubble-message.component.ts +2 -1
  23. package/src/app/providers/global-settings.service.ts +10 -0
  24. package/src/app/providers/voice/STT&TTS/openai-voice.config.ts +12 -0
  25. package/src/app/providers/voice/STT&TTS/openai-voice.provider.ts +171 -0
  26. package/src/app/providers/voice/STT&TTS/speech-provider.abstract.ts +39 -0
  27. package/src/app/providers/voice/audio.types.ts +34 -0
  28. package/src/app/providers/voice/vad.service.spec.ts +28 -0
  29. package/src/app/providers/voice/vad.service.ts +70 -0
  30. package/src/app/providers/voice/voice.service.spec.ts +60 -0
  31. package/src/app/providers/voice/voice.service.ts +264 -0
  32. package/src/app/shims/onnxruntime-web-wasm.ts +4 -0
  33. package/src/app/utils/conversation-sender-classifier.ts +21 -0
  34. package/src/app/utils/globals.ts +3 -0
  35. package/src/assets/onnx/ort-wasm-simd-threaded.mjs +59 -0
  36. package/src/assets/onnx/ort-wasm-simd-threaded.wasm +0 -0
  37. package/src/assets/vad/silero_vad_legacy.onnx +0 -0
  38. package/src/assets/vad/vad.worklet.bundle.min.js +1 -0
  39. package/src/chat21-core/models/message.ts +2 -1
  40. package/src/chat21-core/providers/firebase/firebase-conversation-handler.ts +3 -2
  41. package/src/chat21-core/providers/mqtt/mqtt-conversation-handler.ts +12 -0
  42. package/src/chat21-core/utils/utils-message.ts +7 -0
  43. package/tsconfig.json +5 -0
@@ -0,0 +1,171 @@
1
+ import { HttpClient, HttpErrorResponse, HttpHeaders } from '@angular/common/http';
2
+ import { Injectable } from '@angular/core';
3
+ import { firstValueFrom } from 'rxjs';
4
+ import { environment } from 'src/environments/environment';
5
+
6
+ import type { OpenAiVoiceEnvironmentConfig } from './openai-voice.config';
7
+ import {
8
+ SpeechToTextProvider,
9
+ TextToSpeechProvider,
10
+ type SpeechToTextRequest,
11
+ type SpeechToTextResult,
12
+ type TextToSpeechRequest,
13
+ type TextToSpeechResult,
14
+ } from './speech-provider.abstract';
15
+ import { AppConfigService } from '../../app-config.service';
16
+
17
+ const DEFAULT_BASE = 'https://api.openai.com/v1';
18
+ const DEFAULT_TRANSCRIPTION_MODEL = 'whisper-1';
19
+ const DEFAULT_TTS_MODEL = 'tts-1';
20
+ const DEFAULT_VOICE = 'alloy';
21
+ const DEFAULT_FORMAT = 'mp3';
22
+
23
+ /**
24
+ * Provider OpenAI unico: STT (Whisper) + TTS, entrambi via {@link HttpClient}.
25
+ */
26
+ @Injectable({ providedIn: 'root' })
27
+ export class OpenAiVoiceProviderService extends SpeechToTextProvider implements TextToSpeechProvider {
28
+ constructor(
29
+ private readonly httpClient: HttpClient,
30
+ private readonly appConfig: AppConfigService
31
+ ) {
32
+ super();
33
+ }
34
+
35
+ async transcribe(request: SpeechToTextRequest): Promise<SpeechToTextResult> {
36
+ const cfg = this.getConfig();
37
+ const apiKey = cfg.apiKey?.trim();
38
+ if (!apiKey) {
39
+ return { text: '' };
40
+ }
41
+
42
+ const base = (cfg.baseUrl ?? DEFAULT_BASE).replace(/\/$/, '');
43
+ const model = cfg.transcriptionModel ?? DEFAULT_TRANSCRIPTION_MODEL;
44
+ const url = `${base}/audio/transcriptions`;
45
+
46
+ const ext = this.extensionForMime(request.mimeType);
47
+ const file = new File([request.audio], `segment.${ext}`, { type: request.mimeType });
48
+
49
+ const form = new FormData();
50
+ form.append('file', file);
51
+ form.append('model', model);
52
+ if (request.language) {
53
+ form.append('language', request.language);
54
+ }
55
+
56
+ const headers = new HttpHeaders({
57
+ Authorization: `Bearer ${apiKey}`,
58
+ });
59
+
60
+ try {
61
+ const data = await firstValueFrom(
62
+ this.httpClient.post<{ text?: string }>(url, form, { headers }),
63
+ );
64
+ return { text: (data.text ?? '').trim() };
65
+ } catch (e) {
66
+ if (e instanceof HttpErrorResponse && e.error instanceof Blob) {
67
+ const errText = await e.error.text();
68
+ throw new Error(`OpenAI transcription ${e.status}: ${errText || e.statusText}`);
69
+ }
70
+ throw this.mapOpenAiHttpError(e);
71
+ }
72
+ }
73
+
74
+ async synthesize(request: TextToSpeechRequest): Promise<TextToSpeechResult> {
75
+ const cfg = this.getConfig();
76
+ const apiKey = cfg.apiKey?.trim();
77
+ if (!apiKey) {
78
+ throw new Error('OpenAI API key not configured (environment.openAiVoice.apiKey)');
79
+ }
80
+
81
+ const base = (cfg.baseUrl ?? DEFAULT_BASE).replace(/\/$/, '');
82
+ const model = cfg.ttsModel ?? DEFAULT_TTS_MODEL;
83
+ const voice = request.voice ?? cfg.ttsVoice ?? DEFAULT_VOICE;
84
+ const responseFormat =
85
+ (request.responseFormat as 'mp3' | 'opus' | 'aac' | 'flac' | undefined) ?? DEFAULT_FORMAT;
86
+ const url = `${base}/audio/speech`;
87
+
88
+ const body = {
89
+ model,
90
+ voice,
91
+ input: request.text,
92
+ response_format: responseFormat,
93
+ };
94
+
95
+ const headers = new HttpHeaders({
96
+ Authorization: `Bearer ${apiKey}`,
97
+ 'Content-Type': 'application/json',
98
+ });
99
+
100
+ try {
101
+ const blob = await firstValueFrom(
102
+ this.httpClient.post(url, body, {
103
+ headers,
104
+ responseType: 'blob',
105
+ }),
106
+ );
107
+ return { audio: blob, mimeType: this.mimeForFormat(responseFormat) };
108
+ } catch (e) {
109
+ if (e instanceof HttpErrorResponse && e.error instanceof Blob) {
110
+ const errText = await e.error.text();
111
+ throw new Error(`OpenAI TTS ${e.status}: ${errText || e.statusText}`);
112
+ }
113
+ if (e instanceof HttpErrorResponse) {
114
+ throw new Error(`OpenAI TTS ${e.status}: ${e.message || e.statusText}`);
115
+ }
116
+ throw e;
117
+ }
118
+ }
119
+
120
+ private getConfig(): OpenAiVoiceEnvironmentConfig {
121
+ return this.appConfig.getConfig().openAiKey ?? {};
122
+ }
123
+
124
+ private mapOpenAiHttpError(e: unknown): Error {
125
+ if (!(e instanceof HttpErrorResponse)) {
126
+ return e instanceof Error ? e : new Error(String(e));
127
+ }
128
+ const label = 'OpenAI transcription';
129
+ if (e.error instanceof Blob) {
130
+ return new Error(`${label} ${e.status}: ${e.statusText}`);
131
+ }
132
+ if (typeof e.error === 'object' && e.error !== null && 'error' in e.error) {
133
+ const err = (e.error as { error?: { message?: string } }).error;
134
+ return new Error(`${label} ${e.status}: ${err?.message ?? JSON.stringify(e.error)}`);
135
+ }
136
+ if (typeof e.error === 'string') {
137
+ return new Error(`${label} ${e.status}: ${e.error}`);
138
+ }
139
+ return new Error(`${label} ${e.status}: ${e.message || e.statusText}`);
140
+ }
141
+
142
+ private extensionForMime(mime: string): string {
143
+ if (mime.includes('webm')) {
144
+ return 'webm';
145
+ }
146
+ if (mime.includes('mp4') || mime.includes('m4a')) {
147
+ return 'm4a';
148
+ }
149
+ if (mime.includes('wav')) {
150
+ return 'wav';
151
+ }
152
+ if (mime.includes('mpeg') || mime.includes('mp3')) {
153
+ return 'mp3';
154
+ }
155
+ return 'webm';
156
+ }
157
+
158
+ private mimeForFormat(fmt: string): string {
159
+ switch (fmt) {
160
+ case 'opus':
161
+ return 'audio/opus';
162
+ case 'aac':
163
+ return 'audio/aac';
164
+ case 'flac':
165
+ return 'audio/flac';
166
+ case 'mp3':
167
+ default:
168
+ return 'audio/mpeg';
169
+ }
170
+ }
171
+ }
@@ -0,0 +1,39 @@
1
+ /**
2
+ * Contratti astratti per Speech-to-Text e Text-to-Speech.
3
+ * Implementazione OpenAI unificata: `OpenAiVoiceProviderService` (`openai-voice.provider.ts`).
4
+ */
5
+
6
+ /** Input per la trascrizione di un segmento audio. */
7
+ export interface SpeechToTextRequest {
8
+ audio: Blob;
9
+ mimeType: string;
10
+ /** ISO 639-1 opzionale (es. `it`, `en`). */
11
+ language?: string;
12
+ }
13
+
14
+ export interface SpeechToTextResult {
15
+ text: string;
16
+ }
17
+
18
+ /** Input per la sintesi vocale. */
19
+ export interface TextToSpeechRequest {
20
+ text: string;
21
+ /** Voce provider-specific (es. OpenAI: `alloy`, `echo`, …). */
22
+ voice?: string;
23
+ language?: string;
24
+ /** Formato audio desiderato (dipende dal provider). */
25
+ responseFormat?: string;
26
+ }
27
+
28
+ export interface TextToSpeechResult {
29
+ audio: Blob;
30
+ mimeType: string;
31
+ }
32
+
33
+ export abstract class SpeechToTextProvider {
34
+ abstract transcribe(request: SpeechToTextRequest): Promise<SpeechToTextResult>;
35
+ }
36
+
37
+ export abstract class TextToSpeechProvider {
38
+ abstract synthesize(request: TextToSpeechRequest): Promise<TextToSpeechResult>;
39
+ }
@@ -0,0 +1,34 @@
1
+ /**
2
+ * Tipi condivisi per cattura microfono, VAD e registrazione (WebM).
3
+ */
4
+
5
+ export const DEFAULT_VOICE_AUDIO_CONSTRAINTS: MediaTrackConstraints = {
6
+ echoCancellation: true,
7
+ noiseSuppression: true,
8
+ autoGainControl: true,
9
+ };
10
+
11
+ export const DEFAULT_VOICE_MEDIA_STREAM_CONSTRAINTS: MediaStreamConstraints = {
12
+ audio: DEFAULT_VOICE_AUDIO_CONSTRAINTS,
13
+ };
14
+
15
+ export interface VoiceRecordedBlob {
16
+ blob: Blob;
17
+ mimeType: string;
18
+ }
19
+
20
+ /**
21
+ * Segmento audio dopo VAD; può includere `transcript` se STT è configurato e abilitato.
22
+ */
23
+ export interface VoiceSegmentPayload extends VoiceRecordedBlob {
24
+ transcript?: string;
25
+ transcriptionError?: string;
26
+ }
27
+
28
+ export interface VoiceSessionStartOptions {
29
+ /** Opzionale se usi solo {@link VoiceService.audioSegment$}. */
30
+ onRecordingComplete?: (result: VoiceSegmentPayload) => void;
31
+ constraints?: MediaStreamConstraints;
32
+ /** Default `true`. Se `false`, non viene chiamato lo STT sul segmento. */
33
+ enableTranscription?: boolean;
34
+ }
@@ -0,0 +1,28 @@
1
+ import { Location } from '@angular/common';
2
+ import { TestBed } from '@angular/core/testing';
3
+
4
+ import { VadService } from './vad.service';
5
+
6
+ describe('VadService', () => {
7
+ let service: VadService;
8
+
9
+ beforeEach(() => {
10
+ TestBed.configureTestingModule({
11
+ providers: [
12
+ VadService,
13
+ {
14
+ provide: Location,
15
+ useValue: {
16
+ prepareExternalUrl: (url: string) => `/${url}`,
17
+ },
18
+ },
19
+ ],
20
+ });
21
+ service = TestBed.inject(VadService);
22
+ });
23
+
24
+ it('should expose VAD and ONNX WASM base URLs with trailing slash', () => {
25
+ expect(service.getVadAssetBaseUrl()).toBe('/assets/vad/');
26
+ expect(service.getOnnxWasmBaseUrl()).toBe('/assets/onnx/');
27
+ });
28
+ });
@@ -0,0 +1,70 @@
1
+ import { Location } from '@angular/common';
2
+ import { Injectable } from '@angular/core';
3
+ import { MicVAD, getDefaultRealTimeVADOptions } from '@ricky0123/vad-web';
4
+ import type { RealTimeVADOptions } from '@ricky0123/vad-web';
5
+
6
+ /**
7
+ * MicVAD (@ricky0123/vad-web): modelli in assets/vad/, WASM ONNX in assets/onnx/
8
+ * (allineato a ort.env.wasm.wasmPaths = "/assets/onnx/").
9
+ */
10
+ @Injectable({ providedIn: 'root' })
11
+ export class VadService {
12
+ private onnxRuntimeEnvPromise: Promise<void> | null = null;
13
+
14
+ constructor(private readonly location: Location) {}
15
+
16
+ /**
17
+ * Base URL per silero_vad_legacy.onnx / vad.worklet.bundle.min.js
18
+ * (MicVAD usa baseAssetPath + nome file interno, non modelURL singolo).
19
+ */
20
+ getVadAssetBaseUrl(): string {
21
+ return this.ensureTrailingSlash(this.location.prepareExternalUrl('assets/vad/'));
22
+ }
23
+
24
+ /** Base URL per ort-wasm-*.mjs / .wasm (es. /assets/onnx/). */
25
+ getOnnxWasmBaseUrl(): string {
26
+ return this.ensureTrailingSlash(this.location.prepareExternalUrl('assets/onnx/'));
27
+ }
28
+
29
+ /**
30
+ * Pre-configura il modulo onnxruntime-web/wasm (stesso usato da MicVAD):
31
+ * wasmPaths + numThreads prima del primo MicVAD.new.
32
+ */
33
+ ensureOnnxRuntimeEnv(): Promise<void> {
34
+ if (!this.onnxRuntimeEnvPromise) {
35
+ this.onnxRuntimeEnvPromise = (async () => {
36
+ const ort = await import('onnxruntime-web/wasm');
37
+ const wasmBase = this.getOnnxWasmBaseUrl();
38
+ ort.env.wasm.wasmPaths = wasmBase;
39
+ ort.env.wasm.numThreads = 1;
40
+ ort.env.logLevel = 'error';
41
+ })();
42
+ }
43
+ return this.onnxRuntimeEnvPromise;
44
+ }
45
+
46
+ async createMicVad(overrides: Partial<RealTimeVADOptions>): Promise<MicVAD> {
47
+ await this.ensureOnnxRuntimeEnv();
48
+ const base = getDefaultRealTimeVADOptions('legacy');
49
+ const vadBase = this.getVadAssetBaseUrl();
50
+ const ortWasmBase = this.getOnnxWasmBaseUrl();
51
+
52
+ return MicVAD.new({
53
+ ...base,
54
+ startOnLoad: false,
55
+ baseAssetPath: vadBase,
56
+ onnxWASMBasePath: ortWasmBase,
57
+ ortConfig: (ort) => {
58
+ base.ortConfig?.(ort);
59
+ ort.env.wasm.wasmPaths = ortWasmBase;
60
+ ort.env.wasm.numThreads = 1;
61
+ ort.env.logLevel = 'error';
62
+ },
63
+ ...overrides,
64
+ });
65
+ }
66
+
67
+ private ensureTrailingSlash(path: string): string {
68
+ return path.endsWith('/') ? path : `${path}/`;
69
+ }
70
+ }
@@ -0,0 +1,60 @@
1
+ import { TestBed } from '@angular/core/testing';
2
+
3
+ import { VoiceService } from './voice.service';
4
+ import { VadService } from './vad.service';
5
+
6
+ describe('VoiceService', () => {
7
+ let service: VoiceService;
8
+ let vadService: jasmine.SpyObj<VadService>;
9
+
10
+ let mockVad: { start: jasmine.Spy; pause: jasmine.Spy; destroy: jasmine.Spy };
11
+
12
+ beforeEach(() => {
13
+ mockVad = {
14
+ start: jasmine.createSpy('start').and.returnValue(Promise.resolve()),
15
+ pause: jasmine.createSpy('pause').and.returnValue(Promise.resolve()),
16
+ destroy: jasmine.createSpy('destroy').and.returnValue(Promise.resolve()),
17
+ };
18
+ vadService = jasmine.createSpyObj('VadService', ['ensureOnnxRuntimeEnv', 'createMicVad']);
19
+ vadService.ensureOnnxRuntimeEnv.and.returnValue(Promise.resolve());
20
+ vadService.createMicVad.and.returnValue(Promise.resolve(mockVad as any));
21
+
22
+ TestBed.configureTestingModule({
23
+ providers: [VoiceService, { provide: VadService, useValue: vadService }],
24
+ });
25
+ service = TestBed.inject(VoiceService);
26
+ });
27
+
28
+ it('startSession should call ensureOnnxRuntimeEnv', async () => {
29
+ const stream = new MediaStream();
30
+ spyOn(navigator.mediaDevices, 'getUserMedia').and.returnValue(Promise.resolve(stream));
31
+
32
+ await service.startSession({});
33
+
34
+ expect(vadService.ensureOnnxRuntimeEnv).toHaveBeenCalled();
35
+ });
36
+
37
+ it('startSession should request mic, create MicVAD, and start', async () => {
38
+ const stream = new MediaStream();
39
+ spyOn(navigator.mediaDevices, 'getUserMedia').and.returnValue(Promise.resolve(stream));
40
+
41
+ await service.startSession({
42
+ onRecordingComplete: () => {},
43
+ });
44
+
45
+ expect(navigator.mediaDevices.getUserMedia).toHaveBeenCalled();
46
+ expect(vadService.createMicVad).toHaveBeenCalled();
47
+ expect(mockVad.start).toHaveBeenCalled();
48
+ });
49
+
50
+ it('stopSession should destroy VAD and stop tracks', async () => {
51
+ const track = jasmine.createSpyObj<MediaStreamTrack>('MediaStreamTrack', ['stop']);
52
+ const stream = new MediaStream([track]);
53
+ spyOn(navigator.mediaDevices, 'getUserMedia').and.returnValue(Promise.resolve(stream));
54
+
55
+ await service.startSession({ onRecordingComplete: () => {} });
56
+ await service.stopSession();
57
+
58
+ expect(track.stop).toHaveBeenCalled();
59
+ });
60
+ });
@@ -0,0 +1,264 @@
1
+ import { Inject, Injectable, Optional } from '@angular/core';
2
+ import type { MicVAD } from '@ricky0123/vad-web';
3
+ import { getDefaultRealTimeVADOptions } from '@ricky0123/vad-web';
4
+ import { BehaviorSubject, Observable, Subject } from 'rxjs';
5
+ import { LoggerInstance } from 'src/chat21-core/providers/logger/loggerInstance';
6
+ import { LoggerService } from 'src/chat21-core/providers/abstract/logger.service';
7
+
8
+ import {
9
+ DEFAULT_VOICE_MEDIA_STREAM_CONSTRAINTS,
10
+ VoiceSegmentPayload,
11
+ VoiceSessionStartOptions,
12
+ } from './audio.types';
13
+ import { SpeechToTextProvider } from './STT&TTS/speech-provider.abstract';
14
+ import { VadService } from './vad.service';
15
+
16
+ const VOICE_RECORDING_MIME = 'audio/webm';
17
+
18
+ /**
19
+ * Voce: VadService (ONNX WASM) → MicVAD → MediaRecorder su ogni segmento parlato.
20
+ * Opzionalmente STT (`SpeechToTextProvider`) arricchisce il payload con `transcript`.
21
+ */
22
+ @Injectable({ providedIn: 'root' })
23
+ export class VoiceService {
24
+ private vad?: MicVAD;
25
+ private stream?: MediaStream;
26
+ private mediaRecorder?: MediaRecorder;
27
+ private audioChunks: Blob[] = [];
28
+ private sessionConstraints: MediaStreamConstraints = DEFAULT_VOICE_MEDIA_STREAM_CONSTRAINTS;
29
+ private onRecordingComplete?: (result: VoiceSegmentPayload) => void;
30
+ private enableTranscription = true;
31
+
32
+ private readonly audioSegmentSubject = new Subject<VoiceSegmentPayload>();
33
+ /** Emesso a ogni fine segmento parlato: audio WebM + opzionalmente `transcript` / `transcriptionError`. */
34
+ readonly audioSegment$: Observable<VoiceSegmentPayload> = this.audioSegmentSubject.asObservable();
35
+
36
+ // 🔊 REALTIME VOLUME STREAM
37
+ private readonly volumeSubject = new BehaviorSubject<number>(0);
38
+ readonly volume$: Observable<number> = this.volumeSubject.asObservable();
39
+
40
+ // 🎧 AUDIO ANALYSER
41
+ private audioContext?: AudioContext;
42
+ private analyser?: AnalyserNode;
43
+ /** Buffer dedicato (`ArrayBuffer`) per compatibilità con `getByteFrequencyData`. */
44
+ private dataArray?: Uint8Array;
45
+
46
+ private readonly logger: LoggerService = LoggerInstance.getInstance();
47
+
48
+ constructor(
49
+ private readonly vadService: VadService,
50
+ @Optional() @Inject(SpeechToTextProvider) private readonly speechToText: SpeechToTextProvider | null,
51
+ ) {}
52
+
53
+ get isSessionActive(): boolean {
54
+ return !!this.vad || !!this.stream;
55
+ }
56
+
57
+ /**
58
+ * Richiede il microfono, avvia VAD in ascolto (inizio/fine parlato) e registra in WebM per segmento.
59
+ */
60
+ async startSession(options: VoiceSessionStartOptions = {}): Promise<void> {
61
+ await this.stopSession();
62
+
63
+ this.sessionConstraints = options.constraints ?? DEFAULT_VOICE_MEDIA_STREAM_CONSTRAINTS;
64
+ this.onRecordingComplete = options.onRecordingComplete;
65
+ this.enableTranscription = options.enableTranscription !== false;
66
+
67
+ await this.vadService.ensureOnnxRuntimeEnv();
68
+
69
+ this.stream = await navigator.mediaDevices.getUserMedia(this.sessionConstraints);
70
+
71
+ // 🎧 AUDIO ANALYSER INIT
72
+ this.initAudioAnalyser(this.stream);
73
+
74
+ const vadDefaults = getDefaultRealTimeVADOptions('legacy');
75
+
76
+ this.vad = await this.vadService.createMicVad({
77
+ getStream: async () => this.stream as MediaStream,
78
+ pauseStream: vadDefaults.pauseStream,
79
+ resumeStream: async () => {
80
+ this.stream = await navigator.mediaDevices.getUserMedia(this.sessionConstraints);
81
+ this.initAudioAnalyser(this.stream);
82
+ return this.stream;
83
+ },
84
+ onSpeechStart: () => {
85
+ this.logger.log('[VoiceService] speech start');
86
+ this.startMediaRecorderSegment();
87
+ },
88
+ onSpeechEnd: () => {
89
+ this.logger.log('[VoiceService] speech end');
90
+ this.stopMediaRecorderSegment();
91
+ },
92
+ minSpeechMs: 480,
93
+ redemptionMs: 1920,
94
+ preSpeechPadMs: 960,
95
+ });
96
+
97
+ await this.vad.start();
98
+
99
+ // 🔁 start volume loop
100
+ this.startVolumeLoop();
101
+ }
102
+
103
+ async stopSession(): Promise<void> {
104
+ if (this.mediaRecorder?.state === 'recording') {
105
+ this.mediaRecorder.stop();
106
+ }
107
+
108
+ this.mediaRecorder = undefined;
109
+ this.audioChunks = [];
110
+
111
+ if (this.vad) {
112
+ try {
113
+ await this.vad.pause();
114
+ await this.vad.destroy();
115
+ } catch (e) {
116
+ this.logger.log('[VoiceService] stopSession VAD cleanup', e);
117
+ }
118
+ this.vad = undefined;
119
+ }
120
+
121
+ if (this.stream) {
122
+ this.stream.getTracks().forEach((t) => t.stop());
123
+ this.stream = undefined;
124
+ }
125
+
126
+ // 🎧 cleanup audio context
127
+ this.audioContext?.close();
128
+ this.audioContext = undefined;
129
+ this.analyser = undefined;
130
+ this.dataArray = undefined;
131
+
132
+ this.volumeSubject.next(0);
133
+
134
+ this.onRecordingComplete = undefined;
135
+ }
136
+
137
+ /**
138
+ * 🎧 AUDIO ANALYSER INIT
139
+ */
140
+ private initAudioAnalyser(stream: MediaStream): void {
141
+ this.audioContext = new AudioContext();
142
+
143
+ const source = this.audioContext.createMediaStreamSource(stream);
144
+
145
+ this.analyser = this.audioContext.createAnalyser();
146
+ this.analyser.fftSize = 256;
147
+
148
+ const bins = this.analyser.frequencyBinCount;
149
+ this.dataArray = new Uint8Array(new ArrayBuffer(bins));
150
+
151
+ source.connect(this.analyser);
152
+ }
153
+
154
+ /**
155
+ * 🔁 VOLUME LOOP
156
+ */
157
+ private startVolumeLoop(): void {
158
+ const tick = () => {
159
+ if (!this.analyser || !this.dataArray) {
160
+ requestAnimationFrame(tick);
161
+ return;
162
+ }
163
+
164
+ this.analyser.getByteFrequencyData(this.dataArray);
165
+
166
+ let sum = 0;
167
+ for (let i = 0; i < this.dataArray.length; i++) {
168
+ sum += this.dataArray[i];
169
+ }
170
+
171
+ const volume = sum / this.dataArray.length;
172
+
173
+ this.volumeSubject.next(volume);
174
+
175
+ requestAnimationFrame(tick);
176
+ };
177
+
178
+ tick();
179
+ }
180
+
181
+ /**
182
+ * 🎙️ RECORD SEGMENT START
183
+ */
184
+ private startMediaRecorderSegment(): void {
185
+ if (this.mediaRecorder?.state === 'recording') return;
186
+ if (!this.stream) return;
187
+
188
+ this.audioChunks = [];
189
+
190
+ this.mediaRecorder = new MediaRecorder(this.stream, {
191
+ mimeType: VOICE_RECORDING_MIME,
192
+ });
193
+
194
+ this.mediaRecorder.ondataavailable = (event) => {
195
+ if (event.data.size > 0) {
196
+ this.audioChunks.push(event.data);
197
+ }
198
+ };
199
+
200
+ this.mediaRecorder.start();
201
+ }
202
+
203
+ /**
204
+ * 🛑 RECORD SEGMENT STOP
205
+ */
206
+ private stopMediaRecorderSegment(): void {
207
+ if (!this.mediaRecorder) return;
208
+
209
+ this.mediaRecorder.stop();
210
+
211
+ this.mediaRecorder.onstop = () => {
212
+ const blob = new Blob(this.audioChunks, {
213
+ type: VOICE_RECORDING_MIME,
214
+ });
215
+
216
+ void this.finalizeSegment(blob, VOICE_RECORDING_MIME);
217
+ };
218
+ }
219
+
220
+ /**
221
+ * 🧠 FINALIZE SEGMENT (STT optional)
222
+ */
223
+ private async finalizeSegment(blob: Blob, mimeType: string): Promise<void> {
224
+ const base: VoiceSegmentPayload = { blob, mimeType };
225
+
226
+ const runStt =
227
+ this.enableTranscription &&
228
+ !!this.speechToText &&
229
+ blob.size > 0;
230
+
231
+ if (!runStt) {
232
+ this.emitSegmentPayload(base);
233
+ return;
234
+ }
235
+
236
+ try {
237
+ const { text } = await this.speechToText.transcribe({
238
+ audio: blob,
239
+ mimeType,
240
+ });
241
+
242
+ this.emitSegmentPayload({ ...base, transcript: text });
243
+ } catch (e) {
244
+ const msg = e instanceof Error ? e.message : String(e);
245
+ this.logger.log('[VoiceService] transcription failed', msg);
246
+
247
+ this.emitSegmentPayload({
248
+ ...base,
249
+ transcriptionError: msg,
250
+ });
251
+ }
252
+ }
253
+
254
+ /**
255
+ * 📡 EMIT RESULT
256
+ */
257
+ private emitSegmentPayload(payload: VoiceSegmentPayload): void {
258
+ this.logger.log( '[VoiceService] segment ready', payload.transcript ?? payload.transcriptionError ?? payload.blob.size);
259
+
260
+ this.audioSegmentSubject.next(payload);
261
+
262
+ this.onRecordingComplete?.(payload);
263
+ }
264
+ }
@@ -0,0 +1,4 @@
1
+ export * from "onnxruntime-web";
2
+ import * as ort from "onnxruntime-web";
3
+
4
+ export default ort;