@absolutejs/voice 0.0.22-beta.612 → 0.0.22-beta.613
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/bargeInDetector.d.ts +8 -0
- package/dist/index.js +13 -6
- package/dist/testing/index.js +7 -1
- package/package.json +1 -1
|
@@ -15,6 +15,14 @@ export type VoiceBargeInVerdict = {
|
|
|
15
15
|
shouldCancel: boolean;
|
|
16
16
|
/** Diagnostic label, surfaced on the barge_in / barge_in_suppressed trace. */
|
|
17
17
|
reason?: string;
|
|
18
|
+
/**
|
|
19
|
+
* The acoustic measurements the decision used, surfaced on the trace for
|
|
20
|
+
* tuning the thresholds against real audio. Omitted when no audio was judged.
|
|
21
|
+
*/
|
|
22
|
+
metrics?: {
|
|
23
|
+
voicedMs: number;
|
|
24
|
+
rms: number;
|
|
25
|
+
};
|
|
18
26
|
};
|
|
19
27
|
export type VoiceBargeInDetector = {
|
|
20
28
|
evaluate: (input: VoiceBargeInInput) => Promise<VoiceBargeInVerdict> | VoiceBargeInVerdict;
|
package/dist/index.js
CHANGED
|
@@ -5287,12 +5287,17 @@ var createVoiceSession = (options) => {
|
|
|
5287
5287
|
partialText: triggeringText,
|
|
5288
5288
|
wordCount,
|
|
5289
5289
|
...getTurnAudioForDetector()
|
|
5290
|
-
})) : {
|
|
5290
|
+
})) : {
|
|
5291
|
+
metrics: undefined,
|
|
5292
|
+
reason: undefined,
|
|
5293
|
+
shouldCancel: !isBackchannelByText
|
|
5294
|
+
};
|
|
5291
5295
|
const reason = verdict.reason ?? (verdict.shouldCancel ? "stt_partial" : "backchannel");
|
|
5292
5296
|
if (verdict.shouldCancel) {
|
|
5293
5297
|
backchannelSuppressedAt = null;
|
|
5294
5298
|
appendTurnLatencyStage({
|
|
5295
5299
|
metadata: {
|
|
5300
|
+
...verdict.metrics,
|
|
5296
5301
|
partial: triggeringText.slice(0, 200),
|
|
5297
5302
|
source: reason,
|
|
5298
5303
|
wordCount
|
|
@@ -5305,6 +5310,7 @@ var createVoiceSession = (options) => {
|
|
|
5305
5310
|
backchannelSuppressedAt = Date.now();
|
|
5306
5311
|
appendTurnLatencyStage({
|
|
5307
5312
|
metadata: {
|
|
5313
|
+
...verdict.metrics,
|
|
5308
5314
|
partial: triggeringText.slice(0, 200),
|
|
5309
5315
|
reason,
|
|
5310
5316
|
wordCount
|
|
@@ -42037,19 +42043,20 @@ var createAcousticBargeInDetector = (options = {}) => {
|
|
|
42037
42043
|
return input.isBackchannelByText ? { reason: "text_backchannel", shouldCancel: false } : { reason: "text_only", shouldCancel: true };
|
|
42038
42044
|
}
|
|
42039
42045
|
const { durationMs, rms } = measureTurnAudio(turnAudio, turnAudioFormat);
|
|
42046
|
+
const metrics = { rms, voicedMs: Math.round(durationMs) };
|
|
42040
42047
|
if (durationMs >= sustainedMs) {
|
|
42041
|
-
return { reason: "acoustic_sustained", shouldCancel: true };
|
|
42048
|
+
return { metrics, reason: "acoustic_sustained", shouldCancel: true };
|
|
42042
42049
|
}
|
|
42043
42050
|
if (input.isBackchannelByText) {
|
|
42044
|
-
return { reason: "acoustic_backchannel", shouldCancel: false };
|
|
42051
|
+
return { metrics, reason: "acoustic_backchannel", shouldCancel: false };
|
|
42045
42052
|
}
|
|
42046
42053
|
if (rms >= emphaticRms) {
|
|
42047
|
-
return { reason: "acoustic_emphatic", shouldCancel: true };
|
|
42054
|
+
return { metrics, reason: "acoustic_emphatic", shouldCancel: true };
|
|
42048
42055
|
}
|
|
42049
42056
|
if (rms <= noiseFloorRms) {
|
|
42050
|
-
return { reason: "acoustic_noise_floor", shouldCancel: false };
|
|
42057
|
+
return { metrics, reason: "acoustic_noise_floor", shouldCancel: false };
|
|
42051
42058
|
}
|
|
42052
|
-
return { reason: "acoustic_ambiguous", shouldCancel: true };
|
|
42059
|
+
return { metrics, reason: "acoustic_ambiguous", shouldCancel: true };
|
|
42053
42060
|
}
|
|
42054
42061
|
};
|
|
42055
42062
|
};
|
package/dist/testing/index.js
CHANGED
|
@@ -7514,12 +7514,17 @@ var createVoiceSession = (options) => {
|
|
|
7514
7514
|
partialText: triggeringText,
|
|
7515
7515
|
wordCount,
|
|
7516
7516
|
...getTurnAudioForDetector()
|
|
7517
|
-
})) : {
|
|
7517
|
+
})) : {
|
|
7518
|
+
metrics: undefined,
|
|
7519
|
+
reason: undefined,
|
|
7520
|
+
shouldCancel: !isBackchannelByText
|
|
7521
|
+
};
|
|
7518
7522
|
const reason = verdict.reason ?? (verdict.shouldCancel ? "stt_partial" : "backchannel");
|
|
7519
7523
|
if (verdict.shouldCancel) {
|
|
7520
7524
|
backchannelSuppressedAt = null;
|
|
7521
7525
|
appendTurnLatencyStage({
|
|
7522
7526
|
metadata: {
|
|
7527
|
+
...verdict.metrics,
|
|
7523
7528
|
partial: triggeringText.slice(0, 200),
|
|
7524
7529
|
source: reason,
|
|
7525
7530
|
wordCount
|
|
@@ -7532,6 +7537,7 @@ var createVoiceSession = (options) => {
|
|
|
7532
7537
|
backchannelSuppressedAt = Date.now();
|
|
7533
7538
|
appendTurnLatencyStage({
|
|
7534
7539
|
metadata: {
|
|
7540
|
+
...verdict.metrics,
|
|
7535
7541
|
partial: triggeringText.slice(0, 200),
|
|
7536
7542
|
reason,
|
|
7537
7543
|
wordCount
|