@framers/agentos 0.1.74 → 0.1.76
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +139 -34
- package/dist/core/agency/AgentCommunicationBus.d.ts +1 -0
- package/dist/core/agency/AgentCommunicationBus.d.ts.map +1 -1
- package/dist/core/agency/AgentCommunicationBus.js +62 -8
- package/dist/core/agency/AgentCommunicationBus.js.map +1 -1
- package/dist/core/agency/IAgentCommunicationBus.d.ts +1 -1
- package/dist/core/agency/IAgentCommunicationBus.d.ts.map +1 -1
- package/dist/orchestration/builders/index.d.ts +1 -1
- package/dist/orchestration/builders/index.d.ts.map +1 -1
- package/dist/orchestration/builders/index.js +1 -1
- package/dist/orchestration/builders/index.js.map +1 -1
- package/dist/orchestration/builders/nodes.d.ts +15 -0
- package/dist/orchestration/builders/nodes.d.ts.map +1 -1
- package/dist/orchestration/builders/nodes.js +33 -0
- package/dist/orchestration/builders/nodes.js.map +1 -1
- package/dist/orchestration/runtime/LoopController.d.ts +10 -10
- package/dist/orchestration/runtime/LoopController.d.ts.map +1 -1
- package/dist/orchestration/runtime/LoopController.js +1 -1
- package/dist/orchestration/runtime/LoopController.js.map +1 -1
- package/dist/orchestration/runtime/index.d.ts +1 -1
- package/dist/orchestration/runtime/index.d.ts.map +1 -1
- package/dist/orchestration/runtime/index.js.map +1 -1
- package/dist/speech/FallbackProxy.d.ts +104 -0
- package/dist/speech/FallbackProxy.d.ts.map +1 -0
- package/dist/speech/FallbackProxy.js +151 -0
- package/dist/speech/FallbackProxy.js.map +1 -0
- package/dist/speech/SpeechProviderResolver.d.ts +103 -0
- package/dist/speech/SpeechProviderResolver.d.ts.map +1 -0
- package/dist/speech/SpeechProviderResolver.js +256 -0
- package/dist/speech/SpeechProviderResolver.js.map +1 -0
- package/dist/speech/SpeechRuntime.d.ts +23 -1
- package/dist/speech/SpeechRuntime.d.ts.map +1 -1
- package/dist/speech/SpeechRuntime.js +82 -8
- package/dist/speech/SpeechRuntime.js.map +1 -1
- package/dist/speech/index.d.ts +6 -0
- package/dist/speech/index.d.ts.map +1 -1
- package/dist/speech/index.js +6 -0
- package/dist/speech/index.js.map +1 -1
- package/dist/speech/providerCatalog.d.ts.map +1 -1
- package/dist/speech/providerCatalog.js +15 -1
- package/dist/speech/providerCatalog.js.map +1 -1
- package/dist/speech/providers/AssemblyAISTTProvider.d.ts +49 -0
- package/dist/speech/providers/AssemblyAISTTProvider.d.ts.map +1 -0
- package/dist/speech/providers/AssemblyAISTTProvider.js +151 -0
- package/dist/speech/providers/AssemblyAISTTProvider.js.map +1 -0
- package/dist/speech/providers/AzureSpeechSTTProvider.d.ts +48 -0
- package/dist/speech/providers/AzureSpeechSTTProvider.d.ts.map +1 -0
- package/dist/speech/providers/AzureSpeechSTTProvider.js +90 -0
- package/dist/speech/providers/AzureSpeechSTTProvider.js.map +1 -0
- package/dist/speech/providers/AzureSpeechTTSProvider.d.ts +60 -0
- package/dist/speech/providers/AzureSpeechTTSProvider.d.ts.map +1 -0
- package/dist/speech/providers/AzureSpeechTTSProvider.js +127 -0
- package/dist/speech/providers/AzureSpeechTTSProvider.js.map +1 -0
- package/dist/speech/providers/DeepgramBatchSTTProvider.d.ts +55 -0
- package/dist/speech/providers/DeepgramBatchSTTProvider.d.ts.map +1 -0
- package/dist/speech/providers/DeepgramBatchSTTProvider.js +102 -0
- package/dist/speech/providers/DeepgramBatchSTTProvider.js.map +1 -0
- package/dist/speech/types.d.ts +35 -0
- package/dist/speech/types.d.ts.map +1 -1
- package/dist/voice/CallManager.d.ts +1 -1
- package/dist/voice/CallManager.d.ts.map +1 -1
- package/dist/voice/CallManager.js +9 -0
- package/dist/voice/CallManager.js.map +1 -1
- package/dist/voice/MediaStreamParser.d.ts +83 -0
- package/dist/voice/MediaStreamParser.d.ts.map +1 -0
- package/dist/voice/MediaStreamParser.js +2 -0
- package/dist/voice/MediaStreamParser.js.map +1 -0
- package/dist/voice/TelephonyStreamTransport.d.ts +112 -0
- package/dist/voice/TelephonyStreamTransport.d.ts.map +1 -0
- package/dist/voice/TelephonyStreamTransport.js +208 -0
- package/dist/voice/TelephonyStreamTransport.js.map +1 -0
- package/dist/voice/index.d.ts +10 -0
- package/dist/voice/index.d.ts.map +1 -1
- package/dist/voice/index.js +11 -0
- package/dist/voice/index.js.map +1 -1
- package/dist/voice/parsers/PlivoMediaStreamParser.d.ts +43 -0
- package/dist/voice/parsers/PlivoMediaStreamParser.d.ts.map +1 -0
- package/dist/voice/parsers/PlivoMediaStreamParser.js +92 -0
- package/dist/voice/parsers/PlivoMediaStreamParser.js.map +1 -0
- package/dist/voice/parsers/TelnyxMediaStreamParser.d.ts +51 -0
- package/dist/voice/parsers/TelnyxMediaStreamParser.d.ts.map +1 -0
- package/dist/voice/parsers/TelnyxMediaStreamParser.js +103 -0
- package/dist/voice/parsers/TelnyxMediaStreamParser.js.map +1 -0
- package/dist/voice/parsers/TwilioMediaStreamParser.d.ts +50 -0
- package/dist/voice/parsers/TwilioMediaStreamParser.d.ts.map +1 -0
- package/dist/voice/parsers/TwilioMediaStreamParser.js +144 -0
- package/dist/voice/parsers/TwilioMediaStreamParser.js.map +1 -0
- package/dist/voice/providers/plivo.d.ts +77 -0
- package/dist/voice/providers/plivo.d.ts.map +1 -0
- package/dist/voice/providers/plivo.js +180 -0
- package/dist/voice/providers/plivo.js.map +1 -0
- package/dist/voice/providers/telnyx.d.ts +93 -0
- package/dist/voice/providers/telnyx.d.ts.map +1 -0
- package/dist/voice/providers/telnyx.js +193 -0
- package/dist/voice/providers/telnyx.js.map +1 -0
- package/dist/voice/providers/twilio.d.ts +79 -0
- package/dist/voice/providers/twilio.d.ts.map +1 -0
- package/dist/voice/providers/twilio.js +191 -0
- package/dist/voice/providers/twilio.js.map +1 -0
- package/dist/voice/twiml.d.ts +69 -0
- package/dist/voice/twiml.d.ts.map +1 -0
- package/dist/voice/twiml.js +92 -0
- package/dist/voice/twiml.js.map +1 -0
- package/dist/voice/types.d.ts +9 -1
- package/dist/voice/types.d.ts.map +1 -1
- package/dist/voice-pipeline/AcousticEndpointDetector.d.ts +90 -0
- package/dist/voice-pipeline/AcousticEndpointDetector.d.ts.map +1 -0
- package/dist/voice-pipeline/AcousticEndpointDetector.js +123 -0
- package/dist/voice-pipeline/AcousticEndpointDetector.js.map +1 -0
- package/dist/voice-pipeline/HardCutBargeinHandler.d.ts +67 -0
- package/dist/voice-pipeline/HardCutBargeinHandler.d.ts.map +1 -0
- package/dist/voice-pipeline/HardCutBargeinHandler.js +55 -0
- package/dist/voice-pipeline/HardCutBargeinHandler.js.map +1 -0
- package/dist/voice-pipeline/HeuristicEndpointDetector.d.ts +128 -0
- package/dist/voice-pipeline/HeuristicEndpointDetector.d.ts.map +1 -0
- package/dist/voice-pipeline/HeuristicEndpointDetector.js +240 -0
- package/dist/voice-pipeline/HeuristicEndpointDetector.js.map +1 -0
- package/dist/voice-pipeline/SoftFadeBargeinHandler.d.ts +96 -0
- package/dist/voice-pipeline/SoftFadeBargeinHandler.d.ts.map +1 -0
- package/dist/voice-pipeline/SoftFadeBargeinHandler.js +69 -0
- package/dist/voice-pipeline/SoftFadeBargeinHandler.js.map +1 -0
- package/dist/voice-pipeline/VoicePipelineOrchestrator.d.ts +122 -0
- package/dist/voice-pipeline/VoicePipelineOrchestrator.d.ts.map +1 -0
- package/dist/voice-pipeline/VoicePipelineOrchestrator.js +317 -0
- package/dist/voice-pipeline/VoicePipelineOrchestrator.js.map +1 -0
- package/dist/voice-pipeline/WebSocketStreamTransport.d.ts +148 -0
- package/dist/voice-pipeline/WebSocketStreamTransport.d.ts.map +1 -0
- package/dist/voice-pipeline/WebSocketStreamTransport.js +207 -0
- package/dist/voice-pipeline/WebSocketStreamTransport.js.map +1 -0
- package/dist/voice-pipeline/index.d.ts +13 -0
- package/dist/voice-pipeline/index.d.ts.map +1 -0
- package/dist/voice-pipeline/index.js +13 -0
- package/dist/voice-pipeline/index.js.map +1 -0
- package/dist/voice-pipeline/types.d.ts +905 -0
- package/dist/voice-pipeline/types.d.ts.map +1 -0
- package/dist/voice-pipeline/types.js +23 -0
- package/dist/voice-pipeline/types.js.map +1 -0
- package/package.json +6 -1
package/dist/voice/types.d.ts
CHANGED
|
@@ -99,7 +99,7 @@ export interface CallRecord {
|
|
|
99
99
|
* Normalized webhook event from any telephony provider.
|
|
100
100
|
* Uses a discriminated union on the `kind` field.
|
|
101
101
|
*/
|
|
102
|
-
export type NormalizedCallEvent = NormalizedCallRinging | NormalizedCallAnswered | NormalizedCallCompleted | NormalizedCallFailed | NormalizedCallBusy | NormalizedCallNoAnswer | NormalizedCallVoicemail | NormalizedCallHangupUser | NormalizedCallError | NormalizedTranscript | NormalizedSpeechStart | NormalizedMediaStreamConnected;
|
|
102
|
+
export type NormalizedCallEvent = NormalizedCallRinging | NormalizedCallAnswered | NormalizedCallCompleted | NormalizedCallFailed | NormalizedCallBusy | NormalizedCallNoAnswer | NormalizedCallVoicemail | NormalizedCallHangupUser | NormalizedCallError | NormalizedTranscript | NormalizedSpeechStart | NormalizedMediaStreamConnected | NormalizedDtmfReceived;
|
|
103
103
|
interface NormalizedEventBase {
|
|
104
104
|
/** Provider-assigned event ID for idempotency. */
|
|
105
105
|
eventId: string;
|
|
@@ -150,6 +150,14 @@ export interface NormalizedMediaStreamConnected extends NormalizedEventBase {
|
|
|
150
150
|
kind: 'media-stream-connected';
|
|
151
151
|
streamSid: string;
|
|
152
152
|
}
|
|
153
|
+
/** DTMF digit received during a call. */
|
|
154
|
+
export interface NormalizedDtmfReceived extends NormalizedEventBase {
|
|
155
|
+
kind: 'call-dtmf';
|
|
156
|
+
/** The digit pressed: '0'-'9', '*', '#' */
|
|
157
|
+
digit: string;
|
|
158
|
+
/** How long the key was pressed (ms), if available from provider */
|
|
159
|
+
durationMs?: number;
|
|
160
|
+
}
|
|
153
161
|
/** Raw webhook context passed to provider verification. */
|
|
154
162
|
export interface WebhookContext {
|
|
155
163
|
/** HTTP method (usually POST). */
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/voice/types.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAMH;;GAEG;AACH,MAAM,MAAM,iBAAiB,GACzB,QAAQ,GACR,QAAQ,GACR,OAAO,GACP,MAAM,GACN,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC;AAMlB;;;;;GAKG;AACH,MAAM,MAAM,SAAS,GAEjB,WAAW,GACX,SAAS,GACT,UAAU,GACV,QAAQ,GAER,UAAU,GACV,WAAW,GAEX,WAAW,GACX,aAAa,GACb,YAAY,GACZ,SAAS,GACT,OAAO,GACP,QAAQ,GACR,WAAW,GACX,MAAM,GACN,WAAW,CAAC;AAEhB,0EAA0E;AAC1E,eAAO,MAAM,oBAAoB,gBAU/B,CAAC;AAEH,6DAA6D;AAC7D,eAAO,MAAM,mBAAmB,gBAAgD,CAAC;AAEjF,qEAAqE;AACrE,eAAO,MAAM,WAAW,EAAE,SAAS,SAAS,EAO3C,CAAC;AAMF;;;;GAIG;AACH,MAAM,MAAM,QAAQ,GAAG,QAAQ,GAAG,cAAc,CAAC;AAEjD;;GAEG;AACH,MAAM,MAAM,aAAa,GAAG,UAAU,GAAG,SAAS,CAAC;AAEnD;;;;;;GAMG;AACH,MAAM,MAAM,aAAa,GAAG,UAAU,GAAG,WAAW,GAAG,SAAS,GAAG,MAAM,CAAC;AAM1E,2CAA2C;AAC3C,MAAM,WAAW,eAAe;IAC9B,kDAAkD;IAClD,SAAS,EAAE,MAAM,CAAC;IAClB,iBAAiB;IACjB,OAAO,EAAE,KAAK,GAAG,MAAM,CAAC;IACxB,uBAAuB;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,sEAAsE;IACtE,OAAO,EAAE,OAAO,CAAC;CAClB;AAMD,8BAA8B;AAC9B,MAAM,MAAM,MAAM,GAAG,MAAM,CAAC;AAE5B;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,qCAAqC;IACrC,MAAM,EAAE,MAAM,CAAC;IACf,wDAAwD;IACxD,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,4CAA4C;IAC5C,QAAQ,EAAE,iBAAiB,CAAC;IAC5B,2CAA2C;IAC3C,KAAK,EAAE,SAAS,CAAC;IACjB,sBAAsB;IACtB,SAAS,EAAE,aAAa,CAAC;IACzB,6BAA6B;IAC7B,IAAI,EAAE,QAAQ,CAAC;IACf,wCAAwC;IACxC,UAAU,EAAE,MAAM,CAAC;IACnB,uCAAuC;IACvC,QAAQ,EAAE,MAAM,CAAC;IACjB,oDAAoD;IACpD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,+BAA+B;IAC/B,UAAU,EAAE,eAAe,EAAE,CAAC;IAC9B,6DAA6D;IAC7D,iBAAiB,EAAE,MAAM,EAAE,CAAC;IAC5B,sDAAsD;IACtD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,qDAAqD;IACrD,SAAS,EAAE,MAAM,CAAC;IAClB,kEAAkE;IAClE,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,qDAAqD;IACrD,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,kCAAkC;IAClC,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACpC;AAMD;;;GAGG;AACH,MAAM,MAAM,mBAAmB,GAC3B,qBAAqB,GACrB,sBAAsB,GACtB,uBAAuB,GACvB,oBAAoB,GACpB,kBAAkB,GAClB,sBAAsB,GACtB,uBAAuB,GACvB,wBAAwB,GACxB,mBAAmB,GACnB,oBAAoB,GACpB,qBAAqB,GACrB,8BAA8B,CAAC;
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/voice/types.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAMH;;GAEG;AACH,MAAM,MAAM,iBAAiB,GACzB,QAAQ,GACR,QAAQ,GACR,OAAO,GACP,MAAM,GACN,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC;AAMlB;;;;;GAKG;AACH,MAAM,MAAM,SAAS,GAEjB,WAAW,GACX,SAAS,GACT,UAAU,GACV,QAAQ,GAER,UAAU,GACV,WAAW,GAEX,WAAW,GACX,aAAa,GACb,YAAY,GACZ,SAAS,GACT,OAAO,GACP,QAAQ,GACR,WAAW,GACX,MAAM,GACN,WAAW,CAAC;AAEhB,0EAA0E;AAC1E,eAAO,MAAM,oBAAoB,gBAU/B,CAAC;AAEH,6DAA6D;AAC7D,eAAO,MAAM,mBAAmB,gBAAgD,CAAC;AAEjF,qEAAqE;AACrE,eAAO,MAAM,WAAW,EAAE,SAAS,SAAS,EAO3C,CAAC;AAMF;;;;GAIG;AACH,MAAM,MAAM,QAAQ,GAAG,QAAQ,GAAG,cAAc,CAAC;AAEjD;;GAEG;AACH,MAAM,MAAM,aAAa,GAAG,UAAU,GAAG,SAAS,CAAC;AAEnD;;;;;;GAMG;AACH,MAAM,MAAM,aAAa,GAAG,UAAU,GAAG,WAAW,GAAG,SAAS,GAAG,MAAM,CAAC;AAM1E,2CAA2C;AAC3C,MAAM,WAAW,eAAe;IAC9B,kDAAkD;IAClD,SAAS,EAAE,MAAM,CAAC;IAClB,iBAAiB;IACjB,OAAO,EAAE,KAAK,GAAG,MAAM,CAAC;IACxB,uBAAuB;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,sEAAsE;IACtE,OAAO,EAAE,OAAO,CAAC;CAClB;AAMD,8BAA8B;AAC9B,MAAM,MAAM,MAAM,GAAG,MAAM,CAAC;AAE5B;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,qCAAqC;IACrC,MAAM,EAAE,MAAM,CAAC;IACf,wDAAwD;IACxD,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,4CAA4C;IAC5C,QAAQ,EAAE,iBAAiB,CAAC;IAC5B,2CAA2C;IAC3C,KAAK,EAAE,SAAS,CAAC;IACjB,sBAAsB;IACtB,SAAS,EAAE,aAAa,CAAC;IACzB,6BAA6B;IAC7B,IAAI,EAAE,QAAQ,CAAC;IACf,wCAAwC;IACxC,UAAU,EAAE,MAAM,CAAC;IACnB,uCAAuC;IACvC,QAAQ,EAAE,MAAM,CAAC;IACjB,oDAAoD;IACpD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,+BAA+B;IAC/B,UAAU,EAAE,eAAe,EAAE,CAAC;IAC9B,6DAA6D;IAC7D,iBAAiB,EAAE,MAAM,EAAE,CAAC;IAC5B,sDAAsD;IACtD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,qDAAqD;IACrD,SAAS,EAAE,MAAM,CAAC;IAClB,kEAAkE;IAClE,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,qDAAqD;IACrD,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,kCAAkC;IAClC,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACpC;AAMD;;;GAGG;AACH,MAAM,MAAM,mBAAmB,GAC3B,qBAAqB,GACrB,sBAAsB,GACtB,uBAAuB,GACvB,oBAAoB,GACpB,kBAAkB,GAClB,sBAAsB,GACtB,uBAAuB,GACvB,wBAAwB,GACxB,mBAAmB,GACnB,oBAAoB,GACpB,qBAAqB,GACrB,8BAA8B,GAC9B,sBAAsB,CAAC;AAE3B,UAAU,mBAAmB;IAC3B,kDAAkD;IAClD,OAAO,EAAE,MAAM,CAAC;IAChB,iCAAiC;IACjC,cAAc,EAAE,MAAM,CAAC;IACvB,2BAA2B;IAC3B,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,qBAAsB,SAAQ,mBAAmB;IAChE,IAAI,EAAE,cAAc,CAAC;CACtB;AACD,MAAM,WAAW,sBAAuB,SAAQ,mBAAmB;IACjE,IAAI,EAAE,eAAe,CAAC;CACvB;AACD,MAAM,WAAW,uBAAwB,SAAQ,mBAAmB;IAClE,IAAI,EAAE,gBAAgB,CAAC;IACvB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AACD,MAAM,WAAW,oBAAqB,SAAQ,mBAAmB;IAC/D,IAAI,EAAE,aAAa,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AACD,MAAM,WAAW,kBAAmB,SAAQ,mBAAmB;IAC7D,IAAI,EAAE,WAAW,CAAC;CACnB;AACD,MAAM,WAAW,sBAAuB,SAAQ,mBAAmB;IACjE,IAAI,EAAE,gBAAgB,CAAC;CACxB;AACD,MAAM,WAAW,uBAAwB,SAAQ,mBAAmB;IAClE,IAAI,EAAE,gBAAgB,CAAC;CACxB;AACD,MAAM,WAAW,wBAAyB,SAAQ,mBAAmB;IACnE,IAAI,EAAE,kBAAkB,CAAC;CAC1B;AACD,MAAM,WAAW,mBAAoB,SAAQ,mBAAmB;IAC9D,IAAI,EAAE,YAAY,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;CACf;AACD,MAAM,WAAW,oBAAqB,SAAQ,mBAAmB;IAC/D,IAAI,EAAE,YAAY,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,OAAO,CAAC;CAClB;AACD,MAAM,WAAW,qBAAsB,SAAQ,mBAAmB;IAChE,IAAI,EAAE,cAAc,CAAC;CACtB;AACD,MAAM,WAAW,8BAA+B,SAAQ,mBAAmB;IACzE,IAAI,EAAE,wBAAwB,CAAC;IAC/B,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,yCAAyC;AACzC,MAAM,WAAW,sBAAuB,SAAQ,mBAAmB;IACjE,IAAI,EAAE,WAAW,CAAC;IAClB,2CAA2C;IAC3C,KAAK,EAAE,MAAM,CAAC;IACd,oEAAoE;IACpE,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAMD,2DAA2D;AAC3D,MAAM,WAAW,cAAc;IAC7B,kCAAkC;IAClC,MAAM,EAAE,MAAM,CAAC;IACf,0DAA0D;IAC1D,GAAG,EAAE,MAAM,CAAC;IACZ,oBAAoB;IACpB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,EAAE,GAAG,SAAS,CAAC,CAAC;IACvD,2CAA2C;IAC3C,IAAI,EAAE,MAAM,GAAG,MAAM,CAAC;IACtB,+DAA+D;IAC/D,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACrC;AAED,gDAAgD;AAChD,MAAM,WAAW,yBAAyB;IACxC,8CAA8C;IAC9C,KAAK,EAAE,OAAO,CAAC;IACf,4CAA4C;IAC5C,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,mEAAmE;AACnE,MAAM,WAAW,kBAAkB;IACjC,oDAAoD;IACpD,MAAM,EAAE,mBAAmB,EAAE,CAAC;IAC9B,gDAAgD;IAChD,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAMD,oCAAoC;AACpC,MAAM,MAAM,oBAAoB,GAAG,QAAQ,GAAG,YAAY,GAAG,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC;AAE3E,mDAAmD;AACnD,MAAM,WAAW,kBAAkB;IACjC,2BAA2B;IAC3B,QAAQ,CAAC,EAAE,oBAAoB,CAAC;IAChC,uBAAuB;IACvB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,wBAAwB;IACxB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,iCAAiC;IACjC,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACnC;AAED,yCAAyC;AACzC,MAAM,WAAW,kBAAkB;IACjC,iEAAiE;IACjE,QAAQ,CAAC,EAAE,iBAAiB,GAAG,SAAS,GAAG,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC;IACzD,6BAA6B;IAC7B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,iCAAiC;IACjC,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACnC;AAMD,uCAAuC;AACvC,MAAM,WAAW,oBAAoB;IACnC,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,oBAAoB;IACnC,MAAM,EAAE,MAAM,CAAC;IACf,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,mBAAmB;IAClC,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,qCAAqC;AACrC,MAAM,MAAM,cAAc,GACtB;IAAE,QAAQ,EAAE,QAAQ,CAAC;IAAC,MAAM,EAAE,oBAAoB,CAAA;CAAE,GACpD;IAAE,QAAQ,EAAE,QAAQ,CAAC;IAAC,MAAM,EAAE,oBAAoB,CAAA;CAAE,GACpD;IAAE,QAAQ,EAAE,OAAO,CAAC;IAAC,MAAM,EAAE,mBAAmB,CAAA;CAAE,GAClD;IAAE,QAAQ,EAAE,MAAM,CAAC;IAAC,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;CAAE,CAAC;AAE3D,4CAA4C;AAC5C,MAAM,WAAW,eAAe;IAC9B,iCAAiC;IACjC,QAAQ,EAAE,cAAc,CAAC;IACzB,oCAAoC;IACpC,GAAG,CAAC,EAAE,kBAAkB,CAAC;IACzB,oCAAoC;IACpC,GAAG,CAAC,EAAE,kBAAkB,CAAC;IACzB,2BAA2B;IAC3B,aAAa,CAAC,EAAE,aAAa,CAAC;IAC9B,2DAA2D;IAC3D,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;IAC1B,4CAA4C;IAC5C,WAAW,CAAC,EAAE,QAAQ,CAAC;IACvB,+DAA+D;IAC/D,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,yDAAyD;IACzD,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,kCAAkC;IAClC,SAAS,CAAC,EAAE;QACV,kDAAkD;QAClD,OAAO,EAAE,OAAO,CAAC;QACjB,uEAAuE;QACvE,MAAM,CAAC,EAAE,MAAM,CAAC;KACjB,CAAC;CACH"}
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module voice-pipeline/AcousticEndpointDetector
|
|
3
|
+
*
|
|
4
|
+
* Acoustic-only endpoint detector that wraps {@link SilenceDetector} to convert
|
|
5
|
+
* VAD events into turn-completion decisions. It ignores transcript content entirely
|
|
6
|
+
* and relies solely on the duration of post-speech silence to decide when the user
|
|
7
|
+
* has finished speaking.
|
|
8
|
+
*
|
|
9
|
+
* Emits:
|
|
10
|
+
* - `'turn_complete'` ({@link TurnCompleteEvent}) — silence exceeded the configured
|
|
11
|
+
* `utteranceEndThresholdMs` after the most recent `speech_end` VAD event.
|
|
12
|
+
* - `'speech_start'` () — re-emitted when a `speech_start` VAD event is received.
|
|
13
|
+
*/
|
|
14
|
+
import { EventEmitter } from 'node:events';
|
|
15
|
+
import type { IEndpointDetector, VadEvent, TranscriptEvent } from './types.js';
|
|
16
|
+
/**
|
|
17
|
+
* Constructor options for {@link AcousticEndpointDetector}.
|
|
18
|
+
*/
|
|
19
|
+
export interface AcousticEndpointDetectorConfig {
|
|
20
|
+
/**
|
|
21
|
+
* Silence duration after speech (ms) that triggers a "significant pause"
|
|
22
|
+
* notification on the underlying {@link SilenceDetector}. Does not directly
|
|
23
|
+
* cause `turn_complete` to fire, but is forwarded to the SilenceDetector.
|
|
24
|
+
* @defaultValue 1500
|
|
25
|
+
*/
|
|
26
|
+
significantPauseThresholdMs?: number;
|
|
27
|
+
/**
|
|
28
|
+
* Silence duration after speech (ms) that triggers `turn_complete` with
|
|
29
|
+
* `reason: 'silence_timeout'`.
|
|
30
|
+
* @defaultValue 3000
|
|
31
|
+
*/
|
|
32
|
+
utteranceEndThresholdMs?: number;
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Purely acoustic endpoint detector.
|
|
36
|
+
*
|
|
37
|
+
* Delegates silence timing to a {@link SilenceDetector} instance. VAD
|
|
38
|
+
* `speech_end` events start the silence clock; `speech_start` events cancel
|
|
39
|
+
* any pending turn-complete emission. Transcript content is completely ignored.
|
|
40
|
+
*
|
|
41
|
+
* @example
|
|
42
|
+
* ```ts
|
|
43
|
+
* const detector = new AcousticEndpointDetector({ utteranceEndThresholdMs: 2000 });
|
|
44
|
+
* detector.on('turn_complete', (event) => console.log('Turn done:', event));
|
|
45
|
+
* detector.pushVadEvent({ type: 'speech_end', timestamp: Date.now() });
|
|
46
|
+
* ```
|
|
47
|
+
*/
|
|
48
|
+
export declare class AcousticEndpointDetector extends EventEmitter implements IEndpointDetector {
|
|
49
|
+
/** @inheritdoc */
|
|
50
|
+
readonly mode: "acoustic";
|
|
51
|
+
/** Underlying silence-duration tracker. */
|
|
52
|
+
private readonly silenceDetector;
|
|
53
|
+
/**
|
|
54
|
+
* Timestamp (ms) when the current speech segment began. Tracked so that
|
|
55
|
+
* `durationMs` in the emitted {@link TurnCompleteEvent} can be computed.
|
|
56
|
+
*/
|
|
57
|
+
private speechStartTimeMs;
|
|
58
|
+
/**
|
|
59
|
+
* Timestamp (ms) when the most recent `speech_end` VAD event was received.
|
|
60
|
+
* Used to calculate `durationMs` for the turn-complete event.
|
|
61
|
+
*/
|
|
62
|
+
private speechEndTimeMs;
|
|
63
|
+
/**
|
|
64
|
+
* Creates a new AcousticEndpointDetector.
|
|
65
|
+
*
|
|
66
|
+
* @param config - Optional silence-threshold overrides.
|
|
67
|
+
*/
|
|
68
|
+
constructor(config?: AcousticEndpointDetectorConfig);
|
|
69
|
+
/**
|
|
70
|
+
* Converts a {@link VadEvent} into the SilenceDetector's expected API calls.
|
|
71
|
+
*
|
|
72
|
+
* - `speech_start` — resets silence state and re-emits `'speech_start'` on self.
|
|
73
|
+
* - `speech_end` — starts the silence clock.
|
|
74
|
+
* - `silence` — treated as ongoing non-speech frames.
|
|
75
|
+
*
|
|
76
|
+
* @param event - Incoming VAD event.
|
|
77
|
+
*/
|
|
78
|
+
pushVadEvent(event: VadEvent): void;
|
|
79
|
+
/**
|
|
80
|
+
* No-op — this detector is purely acoustic and does not use transcript content.
|
|
81
|
+
*
|
|
82
|
+
* @param _event - Ignored transcript event.
|
|
83
|
+
*/
|
|
84
|
+
pushTranscript(_event: TranscriptEvent): void;
|
|
85
|
+
/**
|
|
86
|
+
* Resets all internal state and timers. Call at the start of each new turn.
|
|
87
|
+
*/
|
|
88
|
+
reset(): void;
|
|
89
|
+
}
|
|
90
|
+
//# sourceMappingURL=AcousticEndpointDetector.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"AcousticEndpointDetector.d.ts","sourceRoot":"","sources":["../../src/voice-pipeline/AcousticEndpointDetector.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAE3C,OAAO,KAAK,EACV,iBAAiB,EACjB,QAAQ,EACR,eAAe,EAEhB,MAAM,YAAY,CAAC;AAMpB;;GAEG;AACH,MAAM,WAAW,8BAA8B;IAC7C;;;;;OAKG;IACH,2BAA2B,CAAC,EAAE,MAAM,CAAC;IAErC;;;;OAIG;IACH,uBAAuB,CAAC,EAAE,MAAM,CAAC;CAClC;AAMD;;;;;;;;;;;;;GAaG;AACH,qBAAa,wBAAyB,SAAQ,YAAa,YAAW,iBAAiB;IACrF,kBAAkB;IAClB,SAAgB,IAAI,EAAG,UAAU,CAAU;IAE3C,2CAA2C;IAC3C,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAkB;IAElD;;;OAGG;IACH,OAAO,CAAC,iBAAiB,CAAuB;IAEhD;;;OAGG;IACH,OAAO,CAAC,eAAe,CAAuB;IAI9C;;;;OAIG;gBACS,MAAM,GAAE,8BAAmC;IAgCvD;;;;;;;;OAQG;IACI,YAAY,CAAC,KAAK,EAAE,QAAQ,GAAG,IAAI;IAyB1C;;;;OAIG;IACI,cAAc,CAAC,MAAM,EAAE,eAAe,GAAG,IAAI;IAIpD;;OAEG;IACI,KAAK,IAAI,IAAI;CAKrB"}
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module voice-pipeline/AcousticEndpointDetector
|
|
3
|
+
*
|
|
4
|
+
* Acoustic-only endpoint detector that wraps {@link SilenceDetector} to convert
|
|
5
|
+
* VAD events into turn-completion decisions. It ignores transcript content entirely
|
|
6
|
+
* and relies solely on the duration of post-speech silence to decide when the user
|
|
7
|
+
* has finished speaking.
|
|
8
|
+
*
|
|
9
|
+
* Emits:
|
|
10
|
+
* - `'turn_complete'` ({@link TurnCompleteEvent}) — silence exceeded the configured
|
|
11
|
+
* `utteranceEndThresholdMs` after the most recent `speech_end` VAD event.
|
|
12
|
+
* - `'speech_start'` () — re-emitted when a `speech_start` VAD event is received.
|
|
13
|
+
*/
|
|
14
|
+
import { EventEmitter } from 'node:events';
|
|
15
|
+
import { SilenceDetector } from '../core/audio/SilenceDetector.js';
|
|
16
|
+
// ---------------------------------------------------------------------------
|
|
17
|
+
// Implementation
|
|
18
|
+
// ---------------------------------------------------------------------------
|
|
19
|
+
/**
|
|
20
|
+
* Purely acoustic endpoint detector.
|
|
21
|
+
*
|
|
22
|
+
* Delegates silence timing to a {@link SilenceDetector} instance. VAD
|
|
23
|
+
* `speech_end` events start the silence clock; `speech_start` events cancel
|
|
24
|
+
* any pending turn-complete emission. Transcript content is completely ignored.
|
|
25
|
+
*
|
|
26
|
+
* @example
|
|
27
|
+
* ```ts
|
|
28
|
+
* const detector = new AcousticEndpointDetector({ utteranceEndThresholdMs: 2000 });
|
|
29
|
+
* detector.on('turn_complete', (event) => console.log('Turn done:', event));
|
|
30
|
+
* detector.pushVadEvent({ type: 'speech_end', timestamp: Date.now() });
|
|
31
|
+
* ```
|
|
32
|
+
*/
|
|
33
|
+
export class AcousticEndpointDetector extends EventEmitter {
|
|
34
|
+
// ---------------------------------------------------------------------------
|
|
35
|
+
/**
|
|
36
|
+
* Creates a new AcousticEndpointDetector.
|
|
37
|
+
*
|
|
38
|
+
* @param config - Optional silence-threshold overrides.
|
|
39
|
+
*/
|
|
40
|
+
constructor(config = {}) {
|
|
41
|
+
super();
|
|
42
|
+
/** @inheritdoc */
|
|
43
|
+
this.mode = 'acoustic';
|
|
44
|
+
/**
|
|
45
|
+
* Timestamp (ms) when the current speech segment began. Tracked so that
|
|
46
|
+
* `durationMs` in the emitted {@link TurnCompleteEvent} can be computed.
|
|
47
|
+
*/
|
|
48
|
+
this.speechStartTimeMs = null;
|
|
49
|
+
/**
|
|
50
|
+
* Timestamp (ms) when the most recent `speech_end` VAD event was received.
|
|
51
|
+
* Used to calculate `durationMs` for the turn-complete event.
|
|
52
|
+
*/
|
|
53
|
+
this.speechEndTimeMs = null;
|
|
54
|
+
const sdConfig = {
|
|
55
|
+
significantPauseThresholdMs: config.significantPauseThresholdMs ?? 1500,
|
|
56
|
+
utteranceEndThresholdMs: config.utteranceEndThresholdMs ?? 3000,
|
|
57
|
+
};
|
|
58
|
+
this.silenceDetector = new SilenceDetector(sdConfig);
|
|
59
|
+
// When SilenceDetector decides the utterance has ended, fire turn_complete.
|
|
60
|
+
this.silenceDetector.on('utterance_end_detected', (_silenceDurationMs) => {
|
|
61
|
+
const durationMs = this.speechStartTimeMs !== null && this.speechEndTimeMs !== null
|
|
62
|
+
? this.speechEndTimeMs - this.speechStartTimeMs
|
|
63
|
+
: 0;
|
|
64
|
+
const event = {
|
|
65
|
+
transcript: '', // Acoustic mode has no transcript access
|
|
66
|
+
confidence: 0,
|
|
67
|
+
durationMs,
|
|
68
|
+
reason: 'silence_timeout',
|
|
69
|
+
};
|
|
70
|
+
this.emit('turn_complete', event);
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
// ---------------------------------------------------------------------------
|
|
74
|
+
// IEndpointDetector
|
|
75
|
+
// ---------------------------------------------------------------------------
|
|
76
|
+
/**
|
|
77
|
+
* Converts a {@link VadEvent} into the SilenceDetector's expected API calls.
|
|
78
|
+
*
|
|
79
|
+
* - `speech_start` — resets silence state and re-emits `'speech_start'` on self.
|
|
80
|
+
* - `speech_end` — starts the silence clock.
|
|
81
|
+
* - `silence` — treated as ongoing non-speech frames.
|
|
82
|
+
*
|
|
83
|
+
* @param event - Incoming VAD event.
|
|
84
|
+
*/
|
|
85
|
+
pushVadEvent(event) {
|
|
86
|
+
// Minimal VADResult stub — SilenceDetector's public methods only use it as
|
|
87
|
+
// a pass-through parameter and don't inspect its contents.
|
|
88
|
+
const vadResultStub = { timestamp: event.timestamp };
|
|
89
|
+
switch (event.type) {
|
|
90
|
+
case 'speech_start':
|
|
91
|
+
this.speechStartTimeMs = event.timestamp;
|
|
92
|
+
this.speechEndTimeMs = null;
|
|
93
|
+
this.silenceDetector.handleSpeechStart(vadResultStub);
|
|
94
|
+
this.emit('speech_start');
|
|
95
|
+
break;
|
|
96
|
+
case 'speech_end':
|
|
97
|
+
this.speechEndTimeMs = event.timestamp;
|
|
98
|
+
this.silenceDetector.handleSpeechEnd(vadResultStub, 0);
|
|
99
|
+
break;
|
|
100
|
+
case 'silence':
|
|
101
|
+
// Periodic silence heartbeat — pass as a non-speech frame.
|
|
102
|
+
this.silenceDetector.handleNoVoiceActivity(vadResultStub);
|
|
103
|
+
break;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* No-op — this detector is purely acoustic and does not use transcript content.
|
|
108
|
+
*
|
|
109
|
+
* @param _event - Ignored transcript event.
|
|
110
|
+
*/
|
|
111
|
+
pushTranscript(_event) {
|
|
112
|
+
// Intentional no-op: acoustic mode ignores linguistic content.
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Resets all internal state and timers. Call at the start of each new turn.
|
|
116
|
+
*/
|
|
117
|
+
reset() {
|
|
118
|
+
this.speechStartTimeMs = null;
|
|
119
|
+
this.speechEndTimeMs = null;
|
|
120
|
+
this.silenceDetector.reset();
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
//# sourceMappingURL=AcousticEndpointDetector.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"AcousticEndpointDetector.js","sourceRoot":"","sources":["../../src/voice-pipeline/AcousticEndpointDetector.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAC3C,OAAO,EAAE,eAAe,EAA8B,MAAM,kCAAkC,CAAC;AAgC/F,8EAA8E;AAC9E,iBAAiB;AACjB,8EAA8E;AAE9E;;;;;;;;;;;;;GAaG;AACH,MAAM,OAAO,wBAAyB,SAAQ,YAAY;IAmBxD,8EAA8E;IAE9E;;;;OAIG;IACH,YAAY,SAAyC,EAAE;QACrD,KAAK,EAAE,CAAC;QA1BV,kBAAkB;QACF,SAAI,GAAG,UAAmB,CAAC;QAK3C;;;WAGG;QACK,sBAAiB,GAAkB,IAAI,CAAC;QAEhD;;;WAGG;QACK,oBAAe,GAAkB,IAAI,CAAC;QAY5C,MAAM,QAAQ,GAA0B;YACtC,2BAA2B,EAAE,MAAM,CAAC,2BAA2B,IAAI,IAAI;YACvE,uBAAuB,EAAE,MAAM,CAAC,uBAAuB,IAAI,IAAI;SAChE,CAAC;QAEF,IAAI,CAAC,eAAe,GAAG,IAAI,eAAe,CAAC,QAAQ,CAAC,CAAC;QAErD,4EAA4E;QAC5E,IAAI,CAAC,eAAe,CAAC,EAAE,CAAC,wBAAwB,EAAE,CAAC,kBAA0B,EAAE,EAAE;YAC/E,MAAM,UAAU,GACd,IAAI,CAAC,iBAAiB,KAAK,IAAI,IAAI,IAAI,CAAC,eAAe,KAAK,IAAI;gBAC9D,CAAC,CAAC,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC,iBAAiB;gBAC/C,CAAC,CAAC,CAAC,CAAC;YAER,MAAM,KAAK,GAAsB;gBAC/B,UAAU,EAAE,EAAE,EAAI,yCAAyC;gBAC3D,UAAU,EAAE,CAAC;gBACb,UAAU;gBACV,MAAM,EAAE,iBAAiB;aAC1B,CAAC;YAEF,IAAI,CAAC,IAAI,CAAC,eAAe,EAAE,KAAK,CAAC,CAAC;QACpC,CAAC,CAAC,CAAC;IACL,CAAC;IAED,8EAA8E;IAC9E,oBAAoB;IACpB,8EAA8E;IAE9E;;;;;;;;OAQG;IACI,YAAY,CAAC,KAAe;QACjC,2EAA2E;QAC3E,2DAA2D;QAC3D,MAAM,aAAa,GAAG,EAAE,SAAS,EAAE,KAAK,CAAC,SAAS,EAAW,CAAC;QAE9D,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;YACnB,KAAK,cAAc;gBACjB,IAAI,CAAC,iBAAiB,GAAG,KAAK,CAAC,SAAS,CAAC;gBACzC,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC;gBAC5B,IAAI,CAAC,eAAe,CAAC,iBAAiB,CAAC,aAAa,CAAC,CAAC;gBACtD,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;gBAC1B,MAAM;YAER,KAAK,YAAY;gBACf,IAAI,CAAC,eAAe,GAAG,KAAK,CAAC,SAAS,CAAC;gBACvC,IAAI,CAAC,eAAe,CAAC,eAAe,CAAC,aAAa,EAAE,CAAC,CAAC,CAAC;gBACvD,MAAM;YAER,KAAK,SAAS;gBACZ,2DAA2D;gBAC3D,IAAI,CAAC,eAAe,CAAC,qBAAqB,CAAC,aAAa,CAAC,CAAC;gBAC1D,MAAM;QACV,CAAC;IACH,CAAC;IAED;;;;OAIG;IACI,cAAc,CAAC,MAAuB;QAC3C,+DAA+D;IACjE,CAAC;IAED;;OAEG;IACI,KAAK;QACV,IAAI,CAAC,iBAAiB,GAAG,IAAI,CAAC;QAC9B,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC;QAC5B,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,CAAC;IAC/B,CAAC;CACF"}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module voice-pipeline/HardCutBargeinHandler
|
|
3
|
+
*
|
|
4
|
+
* Implements a hard-cut barge-in policy: when the user speaks over TTS output
|
|
5
|
+
* for at least `minSpeechMs` milliseconds, playback is stopped immediately with
|
|
6
|
+
* no fade-out. Short detections below the threshold are treated as accidental
|
|
7
|
+
* noise and ignored.
|
|
8
|
+
*/
|
|
9
|
+
import type { BargeinAction, BargeinContext, IBargeinHandler } from './types.js';
|
|
10
|
+
/**
|
|
11
|
+
* Construction options for {@link HardCutBargeinHandler}.
|
|
12
|
+
*/
|
|
13
|
+
export interface HardCutBargeinHandlerOptions {
|
|
14
|
+
/**
|
|
15
|
+
* Minimum confirmed speech duration (in milliseconds) required before a
|
|
16
|
+
* barge-in is treated as intentional. Detections shorter than this value are
|
|
17
|
+
* returned as `{ type: 'ignore' }` to avoid reacting to background noise.
|
|
18
|
+
*
|
|
19
|
+
* @defaultValue 300
|
|
20
|
+
*/
|
|
21
|
+
minSpeechMs?: number;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Barge-in handler that applies a hard-cut strategy.
|
|
25
|
+
*
|
|
26
|
+
* When the user speaks over an active TTS stream, this handler immediately
|
|
27
|
+
* cancels playback if the detected speech exceeds `minSpeechMs`. Below that
|
|
28
|
+
* threshold the interruption is considered noise and playback continues
|
|
29
|
+
* uninterrupted.
|
|
30
|
+
*
|
|
31
|
+
* @example
|
|
32
|
+
* ```ts
|
|
33
|
+
* const handler = new HardCutBargeinHandler({ minSpeechMs: 250 });
|
|
34
|
+
* const action = handler.handleBargein({ speechDurationMs: 400, ... });
|
|
35
|
+
* // action.type === 'cancel'
|
|
36
|
+
* ```
|
|
37
|
+
*/
|
|
38
|
+
export declare class HardCutBargeinHandler implements IBargeinHandler {
|
|
39
|
+
/**
|
|
40
|
+
* The interruption strategy implemented by this handler.
|
|
41
|
+
* Always `'hard-cut'`.
|
|
42
|
+
*/
|
|
43
|
+
readonly mode: "hard-cut";
|
|
44
|
+
/**
|
|
45
|
+
* Minimum speech duration in milliseconds before the interruption is
|
|
46
|
+
* considered intentional.
|
|
47
|
+
*/
|
|
48
|
+
private readonly minSpeechMs;
|
|
49
|
+
/**
|
|
50
|
+
* Constructs a new {@link HardCutBargeinHandler}.
|
|
51
|
+
*
|
|
52
|
+
* @param options - Optional configuration. Defaults to `{ minSpeechMs: 300 }`.
|
|
53
|
+
*/
|
|
54
|
+
constructor(options?: HardCutBargeinHandlerOptions);
|
|
55
|
+
/**
|
|
56
|
+
* Evaluate the barge-in context and return the action the pipeline should take.
|
|
57
|
+
*
|
|
58
|
+
* - If `context.speechDurationMs >= minSpeechMs`, returns
|
|
59
|
+
* `{ type: 'cancel', injectMarker: '[interrupted]' }` to immediately halt TTS.
|
|
60
|
+
* - Otherwise returns `{ type: 'ignore' }` to continue playback.
|
|
61
|
+
*
|
|
62
|
+
* @param context - Snapshot of the barge-in state at the moment of detection.
|
|
63
|
+
* @returns The pipeline action to execute.
|
|
64
|
+
*/
|
|
65
|
+
handleBargein(context: BargeinContext): BargeinAction;
|
|
66
|
+
}
|
|
67
|
+
//# sourceMappingURL=HardCutBargeinHandler.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"HardCutBargeinHandler.d.ts","sourceRoot":"","sources":["../../src/voice-pipeline/HardCutBargeinHandler.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,cAAc,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAEjF;;GAEG;AACH,MAAM,WAAW,4BAA4B;IAC3C;;;;;;OAMG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;;;;;;;;;;;;;GAcG;AACH,qBAAa,qBAAsB,YAAW,eAAe;IAC3D;;;OAGG;IACH,QAAQ,CAAC,IAAI,EAAG,UAAU,CAAU;IAEpC;;;OAGG;IACH,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAS;IAErC;;;;OAIG;gBACS,OAAO,GAAE,4BAAiC;IAItD;;;;;;;;;OASG;IACH,aAAa,CAAC,OAAO,EAAE,cAAc,GAAG,aAAa;CAMtD"}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module voice-pipeline/HardCutBargeinHandler
|
|
3
|
+
*
|
|
4
|
+
* Implements a hard-cut barge-in policy: when the user speaks over TTS output
|
|
5
|
+
* for at least `minSpeechMs` milliseconds, playback is stopped immediately with
|
|
6
|
+
* no fade-out. Short detections below the threshold are treated as accidental
|
|
7
|
+
* noise and ignored.
|
|
8
|
+
*/
|
|
9
|
+
/**
|
|
10
|
+
* Barge-in handler that applies a hard-cut strategy.
|
|
11
|
+
*
|
|
12
|
+
* When the user speaks over an active TTS stream, this handler immediately
|
|
13
|
+
* cancels playback if the detected speech exceeds `minSpeechMs`. Below that
|
|
14
|
+
* threshold the interruption is considered noise and playback continues
|
|
15
|
+
* uninterrupted.
|
|
16
|
+
*
|
|
17
|
+
* @example
|
|
18
|
+
* ```ts
|
|
19
|
+
* const handler = new HardCutBargeinHandler({ minSpeechMs: 250 });
|
|
20
|
+
* const action = handler.handleBargein({ speechDurationMs: 400, ... });
|
|
21
|
+
* // action.type === 'cancel'
|
|
22
|
+
* ```
|
|
23
|
+
*/
|
|
24
|
+
export class HardCutBargeinHandler {
|
|
25
|
+
/**
|
|
26
|
+
* Constructs a new {@link HardCutBargeinHandler}.
|
|
27
|
+
*
|
|
28
|
+
* @param options - Optional configuration. Defaults to `{ minSpeechMs: 300 }`.
|
|
29
|
+
*/
|
|
30
|
+
constructor(options = {}) {
|
|
31
|
+
/**
|
|
32
|
+
* The interruption strategy implemented by this handler.
|
|
33
|
+
* Always `'hard-cut'`.
|
|
34
|
+
*/
|
|
35
|
+
this.mode = 'hard-cut';
|
|
36
|
+
this.minSpeechMs = options.minSpeechMs ?? 300;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Evaluate the barge-in context and return the action the pipeline should take.
|
|
40
|
+
*
|
|
41
|
+
* - If `context.speechDurationMs >= minSpeechMs`, returns
|
|
42
|
+
* `{ type: 'cancel', injectMarker: '[interrupted]' }` to immediately halt TTS.
|
|
43
|
+
* - Otherwise returns `{ type: 'ignore' }` to continue playback.
|
|
44
|
+
*
|
|
45
|
+
* @param context - Snapshot of the barge-in state at the moment of detection.
|
|
46
|
+
* @returns The pipeline action to execute.
|
|
47
|
+
*/
|
|
48
|
+
handleBargein(context) {
|
|
49
|
+
if (context.speechDurationMs >= this.minSpeechMs) {
|
|
50
|
+
return { type: 'cancel', injectMarker: '[interrupted]' };
|
|
51
|
+
}
|
|
52
|
+
return { type: 'ignore' };
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
//# sourceMappingURL=HardCutBargeinHandler.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"HardCutBargeinHandler.js","sourceRoot":"","sources":["../../src/voice-pipeline/HardCutBargeinHandler.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAkBH;;;;;;;;;;;;;;GAcG;AACH,MAAM,OAAO,qBAAqB;IAahC;;;;OAIG;IACH,YAAY,UAAwC,EAAE;QAjBtD;;;WAGG;QACM,SAAI,GAAG,UAAmB,CAAC;QAclC,IAAI,CAAC,WAAW,GAAG,OAAO,CAAC,WAAW,IAAI,GAAG,CAAC;IAChD,CAAC;IAED;;;;;;;;;OASG;IACH,aAAa,CAAC,OAAuB;QACnC,IAAI,OAAO,CAAC,gBAAgB,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACjD,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,YAAY,EAAE,eAAe,EAAE,CAAC;QAC3D,CAAC;QACD,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;IAC5B,CAAC;CACF"}
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module voice-pipeline/HeuristicEndpointDetector
|
|
3
|
+
*
|
|
4
|
+
* A lightweight, rule-based endpoint detector that combines terminal punctuation
|
|
5
|
+
* analysis with a configurable silence timeout to determine when the user has
|
|
6
|
+
* finished speaking. Suitable for low-latency deployments where an LLM-based
|
|
7
|
+
* semantic detector would add unacceptable round-trip overhead.
|
|
8
|
+
*
|
|
9
|
+
* Detection strategy:
|
|
10
|
+
* 1. On `speech_end`, if the accumulated final transcript ends with `.`, `?`, or `!`,
|
|
11
|
+
* fire `turn_complete` immediately with reason `'punctuation'`.
|
|
12
|
+
* 2. Otherwise, start a silence timer (default 1 500 ms). If speech does not
|
|
13
|
+
* resume before the timer fires, emit `turn_complete` with reason `'silence_timeout'`.
|
|
14
|
+
* 3. Backchannel phrases (e.g. "uh huh", "yeah") are recognised, suppressed from
|
|
15
|
+
* accumulation, and re-emitted as `'backchannel_detected'` events so the
|
|
16
|
+
* pipeline can decide whether to suppress an agent response.
|
|
17
|
+
*/
|
|
18
|
+
import { EventEmitter } from 'node:events';
|
|
19
|
+
import type { IEndpointDetector, TranscriptEvent, VadEvent } from './types.js';
|
|
20
|
+
/**
|
|
21
|
+
* Constructor options for {@link HeuristicEndpointDetector}.
|
|
22
|
+
*/
|
|
23
|
+
export interface HeuristicEndpointDetectorOptions {
|
|
24
|
+
/**
|
|
25
|
+
* How long (ms) to wait after `speech_end` before emitting `turn_complete`
|
|
26
|
+
* when no terminal punctuation is detected.
|
|
27
|
+
* @defaultValue 1500
|
|
28
|
+
*/
|
|
29
|
+
silenceTimeoutMs?: number;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Heuristic endpoint detector that uses terminal punctuation and a silence
|
|
33
|
+
* timeout to decide when the user's turn is complete.
|
|
34
|
+
*
|
|
35
|
+
* Emits:
|
|
36
|
+
* - `'turn_complete'` ({@link TurnCompleteEvent}) — user turn has ended.
|
|
37
|
+
* - `'backchannel_detected'` (`{ text: string }`) — a backchannel phrase was
|
|
38
|
+
* recognised; accumulation is suppressed for this utterance.
|
|
39
|
+
*
|
|
40
|
+
* @example
|
|
41
|
+
* ```typescript
|
|
42
|
+
* const detector = new HeuristicEndpointDetector({ silenceTimeoutMs: 1000 });
|
|
43
|
+
* detector.on('turn_complete', (event) => console.log('Turn done:', event));
|
|
44
|
+
* detector.pushTranscript({ text: 'Hello there.', isFinal: true, confidence: 0.95, words: [] });
|
|
45
|
+
* detector.pushVadEvent({ type: 'speech_end', timestamp: Date.now(), source: 'vad' });
|
|
46
|
+
* // → 'turn_complete' fires immediately with reason 'punctuation'
|
|
47
|
+
* ```
|
|
48
|
+
*/
|
|
49
|
+
export declare class HeuristicEndpointDetector extends EventEmitter implements IEndpointDetector {
|
|
50
|
+
/**
|
|
51
|
+
* Active detection strategy label.
|
|
52
|
+
* Typed as `'hybrid'` to satisfy {@link IEndpointDetector.mode}; consumers
|
|
53
|
+
* that need to distinguish heuristic detectors may inspect `instanceof`.
|
|
54
|
+
*/
|
|
55
|
+
readonly mode: IEndpointDetector['mode'];
|
|
56
|
+
/** Resolved silence timeout in milliseconds. */
|
|
57
|
+
private readonly silenceTimeoutMs;
|
|
58
|
+
/** The latest final transcript text accumulated for the current turn. */
|
|
59
|
+
private accumulatedText;
|
|
60
|
+
/** Whether the VAD currently reports active speech. */
|
|
61
|
+
private speechActive;
|
|
62
|
+
/** Handle to a pending silence timeout, or `null` if none is running. */
|
|
63
|
+
private silenceTimer;
|
|
64
|
+
/** Wall-clock timestamp (ms) when the current turn's speech started. */
|
|
65
|
+
private turnStartMs;
|
|
66
|
+
/** Confidence of the most recent final transcript. */
|
|
67
|
+
private lastConfidence;
|
|
68
|
+
/**
|
|
69
|
+
* Create a new {@link HeuristicEndpointDetector}.
|
|
70
|
+
*
|
|
71
|
+
* @param options — Optional configuration overrides.
|
|
72
|
+
*/
|
|
73
|
+
constructor(options?: HeuristicEndpointDetectorOptions);
|
|
74
|
+
/**
|
|
75
|
+
* Ingest a transcript event from the upstream STT session.
|
|
76
|
+
*
|
|
77
|
+
* Only final events (`isFinal: true`) affect internal state. Interim results
|
|
78
|
+
* are silently ignored — they may arrive very frequently and their text is
|
|
79
|
+
* unstable.
|
|
80
|
+
*
|
|
81
|
+
* If the final text is a recognised backchannel phrase the detector emits
|
|
82
|
+
* `'backchannel_detected'` and returns without accumulating the text, so that
|
|
83
|
+
* a subsequent `speech_end` event does not trigger `turn_complete`.
|
|
84
|
+
*
|
|
85
|
+
* @param transcript — Transcript event from the STT session.
|
|
86
|
+
*/
|
|
87
|
+
pushTranscript(transcript: TranscriptEvent): void;
|
|
88
|
+
/**
|
|
89
|
+
* Ingest a VAD (voice activity detection) event.
|
|
90
|
+
*
|
|
91
|
+
* - `speech_start`: marks the turn as active and cancels any pending silence
|
|
92
|
+
* timer (the user resumed speaking before the timeout elapsed).
|
|
93
|
+
* - `speech_end`: if accumulated text is available, either fires
|
|
94
|
+
* `turn_complete` immediately (punctuation) or starts the silence timer.
|
|
95
|
+
* - `silence`: heartbeat events are ignored; only explicit `speech_end`
|
|
96
|
+
* drives the timeout logic.
|
|
97
|
+
*
|
|
98
|
+
* @param event — VAD transition event.
|
|
99
|
+
*/
|
|
100
|
+
pushVadEvent(event: VadEvent): void;
|
|
101
|
+
/**
|
|
102
|
+
* Reset all internal state, cancel pending timers, and prepare the detector
|
|
103
|
+
* for the next user turn. Should be called by the pipeline after each
|
|
104
|
+
* `turn_complete` event before audio for the next turn begins to arrive.
|
|
105
|
+
*/
|
|
106
|
+
reset(): void;
|
|
107
|
+
/**
|
|
108
|
+
* Emit `turn_complete` with the currently accumulated transcript and then
|
|
109
|
+
* reset internal state so the detector is ready for the next turn.
|
|
110
|
+
*
|
|
111
|
+
* @param reason — The semantic reason driving this completion.
|
|
112
|
+
* @param speechEndTimestamp — Unix epoch ms timestamp of the `speech_end` event,
|
|
113
|
+
* used to compute `durationMs`.
|
|
114
|
+
*/
|
|
115
|
+
private _emitTurnComplete;
|
|
116
|
+
/**
|
|
117
|
+
* Start the silence-timeout timer. If the user does not resume speaking
|
|
118
|
+
* within {@link silenceTimeoutMs} ms the detector fires `turn_complete`.
|
|
119
|
+
*
|
|
120
|
+
* @param speechEndTimestamp — Timestamp passed through to `_emitTurnComplete`.
|
|
121
|
+
*/
|
|
122
|
+
private _startSilenceTimer;
|
|
123
|
+
/**
|
|
124
|
+
* Cancel a pending silence timer without any side effects.
|
|
125
|
+
*/
|
|
126
|
+
private _clearSilenceTimer;
|
|
127
|
+
}
|
|
128
|
+
//# sourceMappingURL=HeuristicEndpointDetector.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"HeuristicEndpointDetector.d.ts","sourceRoot":"","sources":["../../src/voice-pipeline/HeuristicEndpointDetector.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAC3C,OAAO,KAAK,EACV,iBAAiB,EACjB,eAAe,EACf,QAAQ,EAET,MAAM,YAAY,CAAC;AAwCpB;;GAEG;AACH,MAAM,WAAW,gCAAgC;IAC/C;;;;OAIG;IACH,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAMD;;;;;;;;;;;;;;;;;GAiBG;AACH,qBAAa,yBACX,SAAQ,YACR,YAAW,iBAAiB;IAE5B;;;;OAIG;IACH,QAAQ,CAAC,IAAI,EAAE,iBAAiB,CAAC,MAAM,CAAC,CAAe;IAEvD,gDAAgD;IAChD,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAAS;IAE1C,yEAAyE;IACzE,OAAO,CAAC,eAAe,CAAM;IAE7B,uDAAuD;IACvD,OAAO,CAAC,YAAY,CAAS;IAE7B,yEAAyE;IACzE,OAAO,CAAC,YAAY,CAA8C;IAElE,wEAAwE;IACxE,OAAO,CAAC,WAAW,CAAuB;IAE1C,sDAAsD;IACtD,OAAO,CAAC,cAAc,CAAK;IAM3B;;;;OAIG;gBACS,OAAO,GAAE,gCAAqC;IAS1D;;;;;;;;;;;;OAYG;IACH,cAAc,CAAC,UAAU,EAAE,eAAe,GAAG,IAAI;IAwBjD;;;;;;;;;;;OAWG;IACH,YAAY,CAAC,KAAK,EAAE,QAAQ,GAAG,IAAI;IAyCnC;;;;OAIG;IACH,KAAK,IAAI,IAAI;IAYb;;;;;;;OAOG;IACH,OAAO,CAAC,iBAAiB;IAoBzB;;;;;OAKG;IACH,OAAO,CAAC,kBAAkB;IAQ1B;;OAEG;IACH,OAAO,CAAC,kBAAkB;CAM3B"}
|