@absolutejs/voice 0.0.18 → 0.0.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/testing/fixtures.d.ts +1 -0
- package/dist/testing/index.d.ts +2 -0
- package/dist/testing/index.js +759 -0
- package/dist/testing/resilience.d.ts +20 -0
- package/dist/testing/sessionBenchmark.d.ts +61 -0
- package/fixtures/README.md +48 -0
- package/fixtures/manifest.json +109 -0
- package/fixtures/pcm/multiturn-three-mixed.pcm +0 -0
- package/fixtures/pcm/multiturn-two-clean.pcm +0 -0
- package/fixtures/pcm/stella-bulgaria-bulgarian20.pcm +0 -0
- package/fixtures/pcm/stella-ghana-english507.pcm +0 -0
- package/fixtures/pcm/stella-india-english37.pcm +0 -0
- package/fixtures/pcm/stella-jamaica-jamaican-creole-english1.pcm +0 -0
- package/fixtures/pcm/stella-liberia-liberian-pidgin-english2.pcm +0 -0
- package/fixtures/pcm/stella-pakistan-english519.pcm +0 -0
- package/fixtures/pcm/stella-sierra-leone-krio5.pcm +0 -0
- package/fixtures/pcm/stella-singapore-english655.pcm +0 -0
- package/package.json +8 -1
package/dist/testing/index.d.ts
CHANGED
package/dist/testing/index.js
CHANGED
|
@@ -434,9 +434,768 @@ var loadVoiceTestFixtures = async (fixtureDirectory) => {
|
|
|
434
434
|
};
|
|
435
435
|
}));
|
|
436
436
|
};
|
|
437
|
+
// src/store.ts
|
|
438
|
+
var createId = () => crypto.randomUUID();
|
|
439
|
+
var createVoiceSessionRecord = (id) => ({
|
|
440
|
+
committedTurnIds: [],
|
|
441
|
+
createdAt: Date.now(),
|
|
442
|
+
currentTurn: {
|
|
443
|
+
finalText: "",
|
|
444
|
+
partialText: "",
|
|
445
|
+
transcripts: []
|
|
446
|
+
},
|
|
447
|
+
id,
|
|
448
|
+
reconnect: { attempts: 0 },
|
|
449
|
+
status: "active",
|
|
450
|
+
transcripts: [],
|
|
451
|
+
turns: []
|
|
452
|
+
});
|
|
453
|
+
var resetVoiceSessionRecord = (id, existing) => ({
|
|
454
|
+
...createVoiceSessionRecord(id),
|
|
455
|
+
metadata: existing?.metadata
|
|
456
|
+
});
|
|
457
|
+
var toVoiceSessionSummary = (session) => ({
|
|
458
|
+
createdAt: session.createdAt,
|
|
459
|
+
id: session.id,
|
|
460
|
+
lastActivityAt: session.lastActivityAt,
|
|
461
|
+
status: session.status,
|
|
462
|
+
turnCount: session.turns.length
|
|
463
|
+
});
|
|
464
|
+
|
|
465
|
+
// src/memoryStore.ts
|
|
466
|
+
var createVoiceMemoryStore = () => {
|
|
467
|
+
const sessions = new Map;
|
|
468
|
+
const get = async (id) => sessions.get(id);
|
|
469
|
+
const getOrCreate = async (id) => {
|
|
470
|
+
let session = sessions.get(id);
|
|
471
|
+
if (!session) {
|
|
472
|
+
session = createVoiceSessionRecord(id);
|
|
473
|
+
sessions.set(id, session);
|
|
474
|
+
}
|
|
475
|
+
return session;
|
|
476
|
+
};
|
|
477
|
+
const set = async (id, value) => {
|
|
478
|
+
sessions.set(id, value);
|
|
479
|
+
};
|
|
480
|
+
const list = async () => Array.from(sessions.values()).map((session) => toVoiceSessionSummary(session)).sort((first, second) => (second.lastActivityAt ?? second.createdAt) - (first.lastActivityAt ?? first.createdAt));
|
|
481
|
+
const remove = async (id) => {
|
|
482
|
+
sessions.delete(id);
|
|
483
|
+
};
|
|
484
|
+
return { get, getOrCreate, list, remove, set };
|
|
485
|
+
};
|
|
486
|
+
|
|
487
|
+
// src/logger.ts
|
|
488
|
+
var noop = () => {};
|
|
489
|
+
var createNoopLogger = () => ({
|
|
490
|
+
debug: noop,
|
|
491
|
+
error: noop,
|
|
492
|
+
info: noop,
|
|
493
|
+
warn: noop
|
|
494
|
+
});
|
|
495
|
+
var resolveLogger = (logger) => ({
|
|
496
|
+
...createNoopLogger(),
|
|
497
|
+
...logger
|
|
498
|
+
});
|
|
499
|
+
|
|
500
|
+
// src/session.ts
|
|
501
|
+
var DEFAULT_RECONNECT_TIMEOUT = 30000;
|
|
502
|
+
var DEFAULT_MAX_RECONNECT_ATTEMPTS = 10;
|
|
503
|
+
var toError = (value) => value instanceof Error ? value : new Error(String(value));
|
|
504
|
+
var createEmptyCurrentTurn = () => ({
|
|
505
|
+
finalText: "",
|
|
506
|
+
partialText: "",
|
|
507
|
+
transcripts: []
|
|
508
|
+
});
|
|
509
|
+
var cloneTranscript = (transcript) => ({ ...transcript });
|
|
510
|
+
var setTurnResult = (session, turnId, input) => {
|
|
511
|
+
session.turns = session.turns.map((turn) => turn.id === turnId ? {
|
|
512
|
+
...turn,
|
|
513
|
+
assistantText: input.assistantText ?? turn.assistantText,
|
|
514
|
+
result: input.result ?? turn.result
|
|
515
|
+
} : turn);
|
|
516
|
+
};
|
|
517
|
+
var createVoiceSession = (options) => {
|
|
518
|
+
const logger = resolveLogger(options.logger);
|
|
519
|
+
const reconnect = {
|
|
520
|
+
maxAttempts: options.reconnect.maxAttempts ?? DEFAULT_MAX_RECONNECT_ATTEMPTS,
|
|
521
|
+
strategy: options.reconnect.strategy ?? "resume-last-turn",
|
|
522
|
+
timeout: options.reconnect.timeout ?? DEFAULT_RECONNECT_TIMEOUT
|
|
523
|
+
};
|
|
524
|
+
const turnDetection = {
|
|
525
|
+
silenceMs: options.turnDetection.silenceMs ?? DEFAULT_SILENCE_MS,
|
|
526
|
+
speechThreshold: options.turnDetection.speechThreshold ?? DEFAULT_SPEECH_THRESHOLD
|
|
527
|
+
};
|
|
528
|
+
let socket = options.socket;
|
|
529
|
+
let sttSession = null;
|
|
530
|
+
let silenceTimer = null;
|
|
531
|
+
let speechDetected = false;
|
|
532
|
+
const clearSilenceTimer = () => {
|
|
533
|
+
if (!silenceTimer) {
|
|
534
|
+
return;
|
|
535
|
+
}
|
|
536
|
+
clearTimeout(silenceTimer);
|
|
537
|
+
silenceTimer = null;
|
|
538
|
+
};
|
|
539
|
+
const send = async (message) => {
|
|
540
|
+
try {
|
|
541
|
+
await Promise.resolve(socket.send(JSON.stringify(message)));
|
|
542
|
+
} catch (error) {
|
|
543
|
+
logger.warn("voice socket send failed", {
|
|
544
|
+
error: toError(error).message,
|
|
545
|
+
sessionId: options.id,
|
|
546
|
+
type: message.type
|
|
547
|
+
});
|
|
548
|
+
}
|
|
549
|
+
};
|
|
550
|
+
const readSession = async () => options.store.getOrCreate(options.id);
|
|
551
|
+
const writeSession = async (mutate) => {
|
|
552
|
+
const session = await options.store.getOrCreate(options.id);
|
|
553
|
+
mutate(session);
|
|
554
|
+
await options.store.set(options.id, session);
|
|
555
|
+
return session;
|
|
556
|
+
};
|
|
557
|
+
const closeAdapter = async (reason) => {
|
|
558
|
+
if (!sttSession) {
|
|
559
|
+
return;
|
|
560
|
+
}
|
|
561
|
+
const activeSession = sttSession;
|
|
562
|
+
sttSession = null;
|
|
563
|
+
try {
|
|
564
|
+
await activeSession.close(reason);
|
|
565
|
+
} catch (error) {
|
|
566
|
+
logger.warn("voice stt close failed", {
|
|
567
|
+
error: toError(error).message,
|
|
568
|
+
sessionId: options.id
|
|
569
|
+
});
|
|
570
|
+
}
|
|
571
|
+
};
|
|
572
|
+
const scheduleSilenceCommit = () => {
|
|
573
|
+
if (silenceTimer) {
|
|
574
|
+
return;
|
|
575
|
+
}
|
|
576
|
+
silenceTimer = setTimeout(() => {
|
|
577
|
+
api.commitTurn("silence");
|
|
578
|
+
}, turnDetection.silenceMs);
|
|
579
|
+
};
|
|
580
|
+
const handleError = async (event) => {
|
|
581
|
+
await send({
|
|
582
|
+
message: event.error.message,
|
|
583
|
+
recoverable: event.recoverable,
|
|
584
|
+
type: "error"
|
|
585
|
+
});
|
|
586
|
+
if (!event.recoverable) {
|
|
587
|
+
await api.fail(event.error);
|
|
588
|
+
}
|
|
589
|
+
};
|
|
590
|
+
const handleClose = async (event) => {
|
|
591
|
+
if (event.recoverable === false) {
|
|
592
|
+
await api.fail(new Error(event.reason ?? "Speech-to-text session closed"));
|
|
593
|
+
}
|
|
594
|
+
};
|
|
595
|
+
const handlePartial = async (transcript) => {
|
|
596
|
+
await writeSession((session) => {
|
|
597
|
+
session.currentTurn.lastAudioAt = Date.now();
|
|
598
|
+
session.currentTurn.partialText = buildTurnText(session.currentTurn.transcripts, transcript.text);
|
|
599
|
+
session.lastActivityAt = Date.now();
|
|
600
|
+
session.status = "active";
|
|
601
|
+
});
|
|
602
|
+
await send({
|
|
603
|
+
transcript,
|
|
604
|
+
type: "partial"
|
|
605
|
+
});
|
|
606
|
+
};
|
|
607
|
+
const handleFinal = async (transcript) => {
|
|
608
|
+
await writeSession((session) => {
|
|
609
|
+
const alreadyPresent = session.currentTurn.transcripts.some((existing) => existing.id === transcript.id);
|
|
610
|
+
if (!alreadyPresent) {
|
|
611
|
+
session.currentTurn.transcripts = [
|
|
612
|
+
...session.currentTurn.transcripts,
|
|
613
|
+
cloneTranscript(transcript)
|
|
614
|
+
];
|
|
615
|
+
session.transcripts = [
|
|
616
|
+
...session.transcripts,
|
|
617
|
+
cloneTranscript(transcript)
|
|
618
|
+
];
|
|
619
|
+
}
|
|
620
|
+
session.currentTurn.finalText = buildTurnText(session.currentTurn.transcripts, session.currentTurn.partialText);
|
|
621
|
+
session.currentTurn.lastAudioAt = Date.now();
|
|
622
|
+
session.lastActivityAt = Date.now();
|
|
623
|
+
session.status = "active";
|
|
624
|
+
});
|
|
625
|
+
await send({
|
|
626
|
+
transcript,
|
|
627
|
+
type: "final"
|
|
628
|
+
});
|
|
629
|
+
};
|
|
630
|
+
const ensureAdapter = async () => {
|
|
631
|
+
if (sttSession) {
|
|
632
|
+
return sttSession;
|
|
633
|
+
}
|
|
634
|
+
sttSession = await options.stt.open({
|
|
635
|
+
format: {
|
|
636
|
+
channels: 1,
|
|
637
|
+
container: "raw",
|
|
638
|
+
encoding: "pcm_s16le",
|
|
639
|
+
sampleRateHz: 16000
|
|
640
|
+
},
|
|
641
|
+
sessionId: options.id
|
|
642
|
+
});
|
|
643
|
+
sttSession.on("partial", ({ transcript }) => {
|
|
644
|
+
handlePartial(transcript);
|
|
645
|
+
});
|
|
646
|
+
sttSession.on("final", ({ transcript }) => {
|
|
647
|
+
handleFinal(transcript);
|
|
648
|
+
});
|
|
649
|
+
sttSession.on("endOfTurn", ({ reason }) => {
|
|
650
|
+
clearSilenceTimer();
|
|
651
|
+
api.commitTurn(reason);
|
|
652
|
+
});
|
|
653
|
+
sttSession.on("error", (event) => {
|
|
654
|
+
handleError(event);
|
|
655
|
+
});
|
|
656
|
+
sttSession.on("close", (event) => {
|
|
657
|
+
handleClose(event);
|
|
658
|
+
});
|
|
659
|
+
return sttSession;
|
|
660
|
+
};
|
|
661
|
+
const completeTurn = async (session, turn) => {
|
|
662
|
+
const output = await options.route.onTurn({
|
|
663
|
+
api,
|
|
664
|
+
context: options.context,
|
|
665
|
+
session,
|
|
666
|
+
turn
|
|
667
|
+
});
|
|
668
|
+
if (output?.assistantText) {
|
|
669
|
+
await writeSession((currentSession) => {
|
|
670
|
+
setTurnResult(currentSession, turn.id, {
|
|
671
|
+
assistantText: output.assistantText
|
|
672
|
+
});
|
|
673
|
+
});
|
|
674
|
+
await send({
|
|
675
|
+
text: output.assistantText,
|
|
676
|
+
turnId: turn.id,
|
|
677
|
+
type: "assistant"
|
|
678
|
+
});
|
|
679
|
+
}
|
|
680
|
+
if (output?.result !== undefined) {
|
|
681
|
+
await writeSession((currentSession) => {
|
|
682
|
+
setTurnResult(currentSession, turn.id, {
|
|
683
|
+
result: output.result
|
|
684
|
+
});
|
|
685
|
+
});
|
|
686
|
+
}
|
|
687
|
+
if (output?.complete) {
|
|
688
|
+
await api.complete(output.result);
|
|
689
|
+
}
|
|
690
|
+
};
|
|
691
|
+
const api = {
|
|
692
|
+
id: options.id,
|
|
693
|
+
close: async (reason) => {
|
|
694
|
+
clearSilenceTimer();
|
|
695
|
+
await closeAdapter(reason);
|
|
696
|
+
await Promise.resolve(socket.close(1000, reason));
|
|
697
|
+
},
|
|
698
|
+
commitTurn: async (reason = "manual") => {
|
|
699
|
+
clearSilenceTimer();
|
|
700
|
+
const session = await readSession();
|
|
701
|
+
if (session.status === "completed" || session.status === "failed") {
|
|
702
|
+
return;
|
|
703
|
+
}
|
|
704
|
+
const text = buildTurnText(session.currentTurn.transcripts, session.currentTurn.partialText);
|
|
705
|
+
if (!text) {
|
|
706
|
+
return;
|
|
707
|
+
}
|
|
708
|
+
const turn = {
|
|
709
|
+
committedAt: Date.now(),
|
|
710
|
+
id: createId(),
|
|
711
|
+
text,
|
|
712
|
+
transcripts: session.currentTurn.transcripts.length > 0 ? session.currentTurn.transcripts.map(cloneTranscript) : [
|
|
713
|
+
{
|
|
714
|
+
id: createId(),
|
|
715
|
+
isFinal: false,
|
|
716
|
+
text
|
|
717
|
+
}
|
|
718
|
+
]
|
|
719
|
+
};
|
|
720
|
+
const updatedSession = await writeSession((currentSession) => {
|
|
721
|
+
currentSession.committedTurnIds = [
|
|
722
|
+
...currentSession.committedTurnIds,
|
|
723
|
+
turn.id
|
|
724
|
+
];
|
|
725
|
+
currentSession.currentTurn = createEmptyCurrentTurn();
|
|
726
|
+
currentSession.lastActivityAt = Date.now();
|
|
727
|
+
currentSession.status = "active";
|
|
728
|
+
currentSession.turns = [...currentSession.turns, turn];
|
|
729
|
+
});
|
|
730
|
+
speechDetected = false;
|
|
731
|
+
logger.info("voice turn committed", {
|
|
732
|
+
reason,
|
|
733
|
+
sessionId: options.id,
|
|
734
|
+
turnId: turn.id
|
|
735
|
+
});
|
|
736
|
+
await send({
|
|
737
|
+
turn,
|
|
738
|
+
type: "turn"
|
|
739
|
+
});
|
|
740
|
+
await completeTurn(updatedSession, turn);
|
|
741
|
+
},
|
|
742
|
+
complete: async (result) => {
|
|
743
|
+
clearSilenceTimer();
|
|
744
|
+
const session = await writeSession((currentSession) => {
|
|
745
|
+
if (currentSession.status === "completed") {
|
|
746
|
+
return;
|
|
747
|
+
}
|
|
748
|
+
currentSession.lastActivityAt = Date.now();
|
|
749
|
+
currentSession.status = "completed";
|
|
750
|
+
if (result !== undefined && currentSession.turns.length > 0) {
|
|
751
|
+
const lastTurn = currentSession.turns.at(-1);
|
|
752
|
+
if (lastTurn) {
|
|
753
|
+
setTurnResult(currentSession, lastTurn.id, {
|
|
754
|
+
result
|
|
755
|
+
});
|
|
756
|
+
}
|
|
757
|
+
}
|
|
758
|
+
});
|
|
759
|
+
await send({
|
|
760
|
+
sessionId: options.id,
|
|
761
|
+
type: "complete"
|
|
762
|
+
});
|
|
763
|
+
await closeAdapter("complete");
|
|
764
|
+
speechDetected = false;
|
|
765
|
+
await options.route.onComplete({
|
|
766
|
+
api,
|
|
767
|
+
context: options.context,
|
|
768
|
+
session
|
|
769
|
+
});
|
|
770
|
+
},
|
|
771
|
+
connect: async (nextSocket) => {
|
|
772
|
+
socket = nextSocket;
|
|
773
|
+
const existingSession = await options.store.get(options.id);
|
|
774
|
+
let session = existingSession ?? createVoiceSessionRecord(options.id);
|
|
775
|
+
let shouldFireOnSession = !existingSession;
|
|
776
|
+
if (existingSession?.status === "reconnecting") {
|
|
777
|
+
const nextAttempts = existingSession.reconnect.attempts + 1;
|
|
778
|
+
const reconnectExpired = existingSession.reconnect.lastDisconnectAt !== undefined && Date.now() - existingSession.reconnect.lastDisconnectAt > reconnect.timeout;
|
|
779
|
+
const tooManyAttempts = nextAttempts > reconnect.maxAttempts;
|
|
780
|
+
if (reconnect.strategy === "fail" && (reconnectExpired || tooManyAttempts)) {
|
|
781
|
+
await api.fail(new Error("Voice session reconnect policy exhausted"));
|
|
782
|
+
return;
|
|
783
|
+
}
|
|
784
|
+
if (reconnect.strategy === "restart" && (reconnectExpired || tooManyAttempts)) {
|
|
785
|
+
session = resetVoiceSessionRecord(options.id, existingSession);
|
|
786
|
+
shouldFireOnSession = true;
|
|
787
|
+
} else {
|
|
788
|
+
session = {
|
|
789
|
+
...existingSession,
|
|
790
|
+
reconnect: {
|
|
791
|
+
...existingSession.reconnect,
|
|
792
|
+
attempts: nextAttempts
|
|
793
|
+
},
|
|
794
|
+
status: "active"
|
|
795
|
+
};
|
|
796
|
+
}
|
|
797
|
+
}
|
|
798
|
+
await options.store.set(options.id, session);
|
|
799
|
+
await send({
|
|
800
|
+
sessionId: options.id,
|
|
801
|
+
status: session.status,
|
|
802
|
+
type: "session"
|
|
803
|
+
});
|
|
804
|
+
if (shouldFireOnSession) {
|
|
805
|
+
await options.route.onSession?.({
|
|
806
|
+
api,
|
|
807
|
+
context: options.context,
|
|
808
|
+
session
|
|
809
|
+
});
|
|
810
|
+
}
|
|
811
|
+
if (session.status === "completed") {
|
|
812
|
+
await send({
|
|
813
|
+
sessionId: options.id,
|
|
814
|
+
type: "complete"
|
|
815
|
+
});
|
|
816
|
+
return;
|
|
817
|
+
}
|
|
818
|
+
await ensureAdapter();
|
|
819
|
+
},
|
|
820
|
+
disconnect: async (event) => {
|
|
821
|
+
clearSilenceTimer();
|
|
822
|
+
await closeAdapter(event?.reason);
|
|
823
|
+
if (reconnect.strategy === "fail") {
|
|
824
|
+
await api.fail(new Error(event?.reason ?? "Voice socket disconnected"));
|
|
825
|
+
return;
|
|
826
|
+
}
|
|
827
|
+
await writeSession((session) => {
|
|
828
|
+
if (session.status === "completed" || session.status === "failed") {
|
|
829
|
+
return;
|
|
830
|
+
}
|
|
831
|
+
session.lastActivityAt = Date.now();
|
|
832
|
+
session.reconnect.lastDisconnectAt = Date.now();
|
|
833
|
+
session.status = "reconnecting";
|
|
834
|
+
});
|
|
835
|
+
speechDetected = false;
|
|
836
|
+
},
|
|
837
|
+
fail: async (error) => {
|
|
838
|
+
clearSilenceTimer();
|
|
839
|
+
const session = await writeSession((currentSession) => {
|
|
840
|
+
currentSession.lastActivityAt = Date.now();
|
|
841
|
+
currentSession.status = "failed";
|
|
842
|
+
});
|
|
843
|
+
const resolvedError = toError(error);
|
|
844
|
+
await send({
|
|
845
|
+
message: resolvedError.message,
|
|
846
|
+
recoverable: false,
|
|
847
|
+
type: "error"
|
|
848
|
+
});
|
|
849
|
+
await closeAdapter("failed");
|
|
850
|
+
speechDetected = false;
|
|
851
|
+
await options.route.onError?.({
|
|
852
|
+
api,
|
|
853
|
+
context: options.context,
|
|
854
|
+
error: resolvedError,
|
|
855
|
+
session,
|
|
856
|
+
sessionId: options.id
|
|
857
|
+
});
|
|
858
|
+
},
|
|
859
|
+
receiveAudio: async (audio) => {
|
|
860
|
+
const session = await readSession();
|
|
861
|
+
if (session.status === "completed" || session.status === "failed") {
|
|
862
|
+
return;
|
|
863
|
+
}
|
|
864
|
+
const adapter = await ensureAdapter();
|
|
865
|
+
const audioLevel = measureAudioLevel(audio);
|
|
866
|
+
await writeSession((currentSession) => {
|
|
867
|
+
currentSession.currentTurn.lastAudioAt = Date.now();
|
|
868
|
+
currentSession.lastActivityAt = Date.now();
|
|
869
|
+
currentSession.status = "active";
|
|
870
|
+
});
|
|
871
|
+
if (audioLevel >= turnDetection.speechThreshold) {
|
|
872
|
+
speechDetected = true;
|
|
873
|
+
clearSilenceTimer();
|
|
874
|
+
} else if (speechDetected) {
|
|
875
|
+
const currentSession = await readSession();
|
|
876
|
+
const hasTurnText = Boolean(buildTurnText(currentSession.currentTurn.transcripts, currentSession.currentTurn.partialText));
|
|
877
|
+
if (hasTurnText) {
|
|
878
|
+
scheduleSilenceCommit();
|
|
879
|
+
}
|
|
880
|
+
}
|
|
881
|
+
await adapter.send(audio);
|
|
882
|
+
},
|
|
883
|
+
snapshot: async () => readSession()
|
|
884
|
+
};
|
|
885
|
+
return api;
|
|
886
|
+
};
|
|
887
|
+
|
|
888
|
+
// src/testing/resilience.ts
|
|
889
|
+
var roundMetric2 = (value, digits = 4) => {
|
|
890
|
+
const factor = 10 ** digits;
|
|
891
|
+
return Math.round(value * factor) / factor;
|
|
892
|
+
};
|
|
893
|
+
var createMockSocket = () => ({
|
|
894
|
+
close: async () => {},
|
|
895
|
+
send: async () => {}
|
|
896
|
+
});
|
|
897
|
+
var createSpeechChunk = (sample) => new Int16Array(160).fill(sample);
|
|
898
|
+
var createFakeAdapter = () => {
|
|
899
|
+
const listeners = {
|
|
900
|
+
close: [],
|
|
901
|
+
endOfTurn: [],
|
|
902
|
+
error: [],
|
|
903
|
+
final: [],
|
|
904
|
+
partial: []
|
|
905
|
+
};
|
|
906
|
+
const session = {
|
|
907
|
+
close: async () => {},
|
|
908
|
+
emit: async (event, payload) => {
|
|
909
|
+
for (const listener of listeners[event]) {
|
|
910
|
+
await listener(payload);
|
|
911
|
+
}
|
|
912
|
+
},
|
|
913
|
+
on: (event, handler) => {
|
|
914
|
+
listeners[event].push(handler);
|
|
915
|
+
return () => {
|
|
916
|
+
const index = listeners[event].indexOf(handler);
|
|
917
|
+
if (index >= 0) {
|
|
918
|
+
listeners[event].splice(index, 1);
|
|
919
|
+
}
|
|
920
|
+
};
|
|
921
|
+
},
|
|
922
|
+
send: async (_audio) => {}
|
|
923
|
+
};
|
|
924
|
+
return {
|
|
925
|
+
adapter: {
|
|
926
|
+
kind: "stt",
|
|
927
|
+
open: () => session
|
|
928
|
+
},
|
|
929
|
+
session
|
|
930
|
+
};
|
|
931
|
+
};
|
|
932
|
+
var runScenario = async (id, title, run) => {
|
|
933
|
+
const store = createVoiceMemoryStore();
|
|
934
|
+
const adapter = createFakeAdapter();
|
|
935
|
+
const turns = [];
|
|
936
|
+
const voice = createVoiceSession({
|
|
937
|
+
context: {},
|
|
938
|
+
id,
|
|
939
|
+
logger: {},
|
|
940
|
+
reconnect: {
|
|
941
|
+
maxAttempts: 2,
|
|
942
|
+
strategy: "resume-last-turn",
|
|
943
|
+
timeout: 5000
|
|
944
|
+
},
|
|
945
|
+
route: {
|
|
946
|
+
onComplete: async () => {},
|
|
947
|
+
onTurn: async ({ turn }) => {
|
|
948
|
+
turns.push(turn.text);
|
|
949
|
+
}
|
|
950
|
+
},
|
|
951
|
+
socket: createMockSocket(),
|
|
952
|
+
store,
|
|
953
|
+
stt: adapter.adapter,
|
|
954
|
+
turnDetection: {
|
|
955
|
+
silenceMs: 20,
|
|
956
|
+
speechThreshold: 0.01
|
|
957
|
+
}
|
|
958
|
+
});
|
|
959
|
+
await voice.connect(createMockSocket());
|
|
960
|
+
try {
|
|
961
|
+
await run({
|
|
962
|
+
adapter,
|
|
963
|
+
commit: async (text, transcriptId = `${id}-${turns.length}`) => {
|
|
964
|
+
await adapter.session.emit("final", {
|
|
965
|
+
receivedAt: Date.now(),
|
|
966
|
+
transcript: {
|
|
967
|
+
id: transcriptId,
|
|
968
|
+
isFinal: true,
|
|
969
|
+
text
|
|
970
|
+
},
|
|
971
|
+
type: "final"
|
|
972
|
+
});
|
|
973
|
+
await voice.receiveAudio(createSpeechChunk(16000));
|
|
974
|
+
await voice.receiveAudio(createSpeechChunk(0));
|
|
975
|
+
await Bun.sleep(60);
|
|
976
|
+
},
|
|
977
|
+
connectNewSocket: async () => {
|
|
978
|
+
await voice.connect(createMockSocket());
|
|
979
|
+
},
|
|
980
|
+
disconnect: async () => {
|
|
981
|
+
await voice.disconnect({
|
|
982
|
+
recoverable: true,
|
|
983
|
+
type: "close"
|
|
984
|
+
});
|
|
985
|
+
},
|
|
986
|
+
turns
|
|
987
|
+
});
|
|
988
|
+
} finally {
|
|
989
|
+
await voice.close("resilience-complete");
|
|
990
|
+
}
|
|
991
|
+
const uniqueTurns = new Set(turns.map((turn) => turn.toLowerCase()));
|
|
992
|
+
const replayedTurns = turns.length - uniqueTurns.size;
|
|
993
|
+
return {
|
|
994
|
+
actualTurns: turns,
|
|
995
|
+
id,
|
|
996
|
+
passes: replayedTurns === 0,
|
|
997
|
+
replayedTurns,
|
|
998
|
+
title
|
|
999
|
+
};
|
|
1000
|
+
};
|
|
1001
|
+
var runVoiceResilienceBenchmark = async () => {
|
|
1002
|
+
const scenarios = await Promise.all([
|
|
1003
|
+
runScenario("resume-no-replay", "Reconnect after first turn does not replay committed text", async ({ commit, connectNewSocket, disconnect }) => {
|
|
1004
|
+
await commit("Reconnect should not duplicate prior turns");
|
|
1005
|
+
await disconnect();
|
|
1006
|
+
await connectNewSocket();
|
|
1007
|
+
await commit("A second turn should still commit after resume");
|
|
1008
|
+
}),
|
|
1009
|
+
runScenario("duplicate-final-id", "Duplicate transcript ids do not create replayed turns", async ({ adapter, connectNewSocket, disconnect, turns, commit }) => {
|
|
1010
|
+
await commit("Duplicate final ids should still produce one turn", "same-id");
|
|
1011
|
+
await disconnect();
|
|
1012
|
+
await connectNewSocket();
|
|
1013
|
+
await adapter.session.emit("final", {
|
|
1014
|
+
receivedAt: Date.now(),
|
|
1015
|
+
transcript: {
|
|
1016
|
+
id: "same-id",
|
|
1017
|
+
isFinal: true,
|
|
1018
|
+
text: "Duplicate final ids should still produce one turn"
|
|
1019
|
+
},
|
|
1020
|
+
type: "final"
|
|
1021
|
+
});
|
|
1022
|
+
if (turns.length === 1) {
|
|
1023
|
+
await commit("Fresh transcripts should still commit later");
|
|
1024
|
+
}
|
|
1025
|
+
})
|
|
1026
|
+
]);
|
|
1027
|
+
const passCount = scenarios.filter((scenario) => scenario.passes).length;
|
|
1028
|
+
const replayFailures = scenarios.filter((scenario) => scenario.replayedTurns > 0).length;
|
|
1029
|
+
return {
|
|
1030
|
+
generatedAt: Date.now(),
|
|
1031
|
+
scenarios,
|
|
1032
|
+
summary: {
|
|
1033
|
+
duplicateTurnRate: roundMetric2(scenarios.length > 0 ? replayFailures / scenarios.length : 0),
|
|
1034
|
+
passCount,
|
|
1035
|
+
passRate: roundMetric2(scenarios.length > 0 ? passCount / scenarios.length : 0),
|
|
1036
|
+
replayFailureRate: roundMetric2(scenarios.length > 0 ? replayFailures / scenarios.length : 0),
|
|
1037
|
+
scenarioCount: scenarios.length
|
|
1038
|
+
}
|
|
1039
|
+
};
|
|
1040
|
+
};
|
|
1041
|
+
// src/testing/sessionBenchmark.ts
|
|
1042
|
+
var average2 = (values) => values.length > 0 ? values.reduce((sum, value) => sum + value, 0) / values.length : 0;
|
|
1043
|
+
var roundMetric3 = (value, digits = 4) => {
|
|
1044
|
+
const factor = 10 ** digits;
|
|
1045
|
+
return Math.round(value * factor) / factor;
|
|
1046
|
+
};
|
|
1047
|
+
var chunkAudio2 = (audio, bytesPerChunk) => {
|
|
1048
|
+
const chunks = [];
|
|
1049
|
+
for (let offset = 0;offset < audio.byteLength; offset += bytesPerChunk) {
|
|
1050
|
+
chunks.push(audio.slice(offset, offset + bytesPerChunk));
|
|
1051
|
+
}
|
|
1052
|
+
return chunks;
|
|
1053
|
+
};
|
|
1054
|
+
var createSilence2 = (byteLength) => new Uint8Array(byteLength);
|
|
1055
|
+
var createMockSocket2 = () => ({
|
|
1056
|
+
close: async () => {},
|
|
1057
|
+
send: async () => {}
|
|
1058
|
+
});
|
|
1059
|
+
var runVoiceSessionBenchmarkScenario = async (adapter, fixture) => {
|
|
1060
|
+
const store = createVoiceMemoryStore();
|
|
1061
|
+
const turns = [];
|
|
1062
|
+
const session = createVoiceSession({
|
|
1063
|
+
context: {},
|
|
1064
|
+
id: `session-bench-${fixture.id}`,
|
|
1065
|
+
logger: {},
|
|
1066
|
+
reconnect: {
|
|
1067
|
+
maxAttempts: 2,
|
|
1068
|
+
strategy: "resume-last-turn",
|
|
1069
|
+
timeout: 5000
|
|
1070
|
+
},
|
|
1071
|
+
route: {
|
|
1072
|
+
onComplete: async () => {},
|
|
1073
|
+
onTurn: async ({ turn }) => {
|
|
1074
|
+
turns.push(turn.text);
|
|
1075
|
+
}
|
|
1076
|
+
},
|
|
1077
|
+
socket: createMockSocket2(),
|
|
1078
|
+
store,
|
|
1079
|
+
stt: adapter,
|
|
1080
|
+
turnDetection: {
|
|
1081
|
+
silenceMs: fixture.silenceMs ?? DEFAULT_SILENCE_MS,
|
|
1082
|
+
speechThreshold: fixture.speechThreshold ?? DEFAULT_SPEECH_THRESHOLD
|
|
1083
|
+
}
|
|
1084
|
+
});
|
|
1085
|
+
const startedAt = Date.now();
|
|
1086
|
+
let reconnectTriggered = false;
|
|
1087
|
+
await session.connect(createMockSocket2());
|
|
1088
|
+
try {
|
|
1089
|
+
const chunkDurationMs = fixture.chunkDurationMs ?? 100;
|
|
1090
|
+
const bytesPerMillisecond = fixture.format.sampleRateHz * fixture.format.channels * 2 / 1000;
|
|
1091
|
+
const bytesPerChunk = Math.max(2, Math.floor(bytesPerMillisecond * chunkDurationMs));
|
|
1092
|
+
const chunks = chunkAudio2(fixture.audio, bytesPerChunk);
|
|
1093
|
+
for (const [index, chunk] of chunks.entries()) {
|
|
1094
|
+
await session.receiveAudio(chunk);
|
|
1095
|
+
await Bun.sleep(chunkDurationMs);
|
|
1096
|
+
if (fixture.reconnectAtChunkIndex !== undefined && index === fixture.reconnectAtChunkIndex && !reconnectTriggered) {
|
|
1097
|
+
reconnectTriggered = true;
|
|
1098
|
+
await session.disconnect({
|
|
1099
|
+
reason: "benchmark-reconnect",
|
|
1100
|
+
recoverable: true,
|
|
1101
|
+
type: "close"
|
|
1102
|
+
});
|
|
1103
|
+
await Bun.sleep(fixture.reconnectPauseMs ?? 150);
|
|
1104
|
+
await session.connect(createMockSocket2());
|
|
1105
|
+
}
|
|
1106
|
+
}
|
|
1107
|
+
const tailPaddingMs = fixture.tailPaddingMs ?? 1200;
|
|
1108
|
+
if (tailPaddingMs > 0) {
|
|
1109
|
+
const tailBytes = Math.max(2, Math.floor(bytesPerMillisecond * tailPaddingMs));
|
|
1110
|
+
for (const chunk of chunkAudio2(createSilence2(tailBytes), bytesPerChunk)) {
|
|
1111
|
+
await session.receiveAudio(chunk);
|
|
1112
|
+
await Bun.sleep(chunkDurationMs);
|
|
1113
|
+
}
|
|
1114
|
+
}
|
|
1115
|
+
await Bun.sleep(Math.max(1200, fixture.silenceMs ?? DEFAULT_SILENCE_MS));
|
|
1116
|
+
} finally {
|
|
1117
|
+
await session.close("session-benchmark-complete");
|
|
1118
|
+
}
|
|
1119
|
+
const duplicateTurnCount = Math.max(0, turns.length - new Set(turns.map((turn) => turn.toLowerCase())).size);
|
|
1120
|
+
const turnResults = fixture.expectedTurnTexts.map((expectedText, index) => {
|
|
1121
|
+
const actualText = turns[index];
|
|
1122
|
+
if (!actualText) {
|
|
1123
|
+
return {
|
|
1124
|
+
actualText: "",
|
|
1125
|
+
expectedText,
|
|
1126
|
+
index,
|
|
1127
|
+
passes: false
|
|
1128
|
+
};
|
|
1129
|
+
}
|
|
1130
|
+
const accuracy = scoreTranscriptAccuracy(actualText, expectedText, fixture.transcriptThreshold ?? 0.35);
|
|
1131
|
+
return {
|
|
1132
|
+
actualText,
|
|
1133
|
+
accuracy,
|
|
1134
|
+
expectedText,
|
|
1135
|
+
index,
|
|
1136
|
+
passes: accuracy.passesThreshold
|
|
1137
|
+
};
|
|
1138
|
+
});
|
|
1139
|
+
for (let index = fixture.expectedTurnTexts.length;index < turns.length; index += 1) {
|
|
1140
|
+
turnResults.push({
|
|
1141
|
+
actualText: turns[index] ?? "",
|
|
1142
|
+
expectedText: undefined,
|
|
1143
|
+
index,
|
|
1144
|
+
passes: false
|
|
1145
|
+
});
|
|
1146
|
+
}
|
|
1147
|
+
const turnCountDelta = turns.length - fixture.expectedTurnTexts.length;
|
|
1148
|
+
return {
|
|
1149
|
+
actualTurns: turns,
|
|
1150
|
+
duplicateTurnCount,
|
|
1151
|
+
elapsedMs: Date.now() - startedAt,
|
|
1152
|
+
expectedTurns: fixture.expectedTurnTexts,
|
|
1153
|
+
fixtureId: fixture.id,
|
|
1154
|
+
passes: duplicateTurnCount === 0 && turnCountDelta === 0 && turnResults.every((result) => result.passes),
|
|
1155
|
+
reconnectTriggered,
|
|
1156
|
+
tags: fixture.tags ?? [],
|
|
1157
|
+
title: fixture.title,
|
|
1158
|
+
turnCountDelta,
|
|
1159
|
+
turnResults
|
|
1160
|
+
};
|
|
1161
|
+
};
|
|
1162
|
+
var summarizeVoiceSessionBenchmark = (adapterId, scenarios) => {
|
|
1163
|
+
const passCount = scenarios.filter((scenario) => scenario.passes).length;
|
|
1164
|
+
const reconnectScenarios = scenarios.filter((scenario) => scenario.reconnectTriggered);
|
|
1165
|
+
const reconnectSuccessCount = reconnectScenarios.filter((scenario) => scenario.passes).length;
|
|
1166
|
+
const turnAccuracies = scenarios.flatMap((scenario) => scenario.turnResults.map((turn) => turn.accuracy?.wordErrorRate).filter((value) => typeof value === "number"));
|
|
1167
|
+
return {
|
|
1168
|
+
adapterId,
|
|
1169
|
+
averageElapsedMs: roundMetric3(average2(scenarios.map((scenario) => scenario.elapsedMs)), 2),
|
|
1170
|
+
averageWordErrorRate: roundMetric3(average2(turnAccuracies)),
|
|
1171
|
+
duplicateTurnRate: roundMetric3(scenarios.length > 0 ? scenarios.filter((scenario) => scenario.duplicateTurnCount > 0).length / scenarios.length : 0),
|
|
1172
|
+
passCount,
|
|
1173
|
+
passRate: roundMetric3(scenarios.length > 0 ? passCount / scenarios.length : 0),
|
|
1174
|
+
reconnectSuccessRate: roundMetric3(reconnectScenarios.length > 0 ? reconnectSuccessCount / reconnectScenarios.length : 1),
|
|
1175
|
+
scenarioCount: scenarios.length,
|
|
1176
|
+
scenariosWithDuplicateTurns: scenarios.filter((scenario) => scenario.duplicateTurnCount > 0).length,
|
|
1177
|
+
scenariosWithTurnCountMismatch: scenarios.filter((scenario) => scenario.turnCountDelta !== 0).length
|
|
1178
|
+
};
|
|
1179
|
+
};
|
|
1180
|
+
var runVoiceSessionBenchmark = async (input) => {
|
|
1181
|
+
const scenarioResults = [];
|
|
1182
|
+
for (const scenario of input.scenarios) {
|
|
1183
|
+
scenarioResults.push(await runVoiceSessionBenchmarkScenario(input.adapter, scenario));
|
|
1184
|
+
}
|
|
1185
|
+
return {
|
|
1186
|
+
adapterId: input.adapterId,
|
|
1187
|
+
generatedAt: Date.now(),
|
|
1188
|
+
scenarios: scenarioResults,
|
|
1189
|
+
summary: summarizeVoiceSessionBenchmark(input.adapterId, scenarioResults)
|
|
1190
|
+
};
|
|
1191
|
+
};
|
|
437
1192
|
export {
|
|
1193
|
+
summarizeVoiceSessionBenchmark,
|
|
438
1194
|
summarizeSTTBenchmark,
|
|
439
1195
|
scoreTranscriptAccuracy,
|
|
1196
|
+
runVoiceSessionBenchmarkScenario,
|
|
1197
|
+
runVoiceSessionBenchmark,
|
|
1198
|
+
runVoiceResilienceBenchmark,
|
|
440
1199
|
runSTTAdapterFixture,
|
|
441
1200
|
runSTTAdapterBenchmark,
|
|
442
1201
|
mergeFinalTranscriptText,
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
export type VoiceResilienceScenarioResult = {
|
|
2
|
+
actualTurns: string[];
|
|
3
|
+
id: string;
|
|
4
|
+
passes: boolean;
|
|
5
|
+
replayedTurns: number;
|
|
6
|
+
title: string;
|
|
7
|
+
};
|
|
8
|
+
export type VoiceResilienceSummary = {
|
|
9
|
+
duplicateTurnRate: number;
|
|
10
|
+
passCount: number;
|
|
11
|
+
passRate: number;
|
|
12
|
+
replayFailureRate: number;
|
|
13
|
+
scenarioCount: number;
|
|
14
|
+
};
|
|
15
|
+
export type VoiceResilienceReport = {
|
|
16
|
+
generatedAt: number;
|
|
17
|
+
scenarios: VoiceResilienceScenarioResult[];
|
|
18
|
+
summary: VoiceResilienceSummary;
|
|
19
|
+
};
|
|
20
|
+
export declare const runVoiceResilienceBenchmark: () => Promise<VoiceResilienceReport>;
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import type { STTAdapter } from '../types';
|
|
2
|
+
import { type VoiceTranscriptAccuracy } from './accuracy';
|
|
3
|
+
import type { VoiceTestFixture } from './fixtures';
|
|
4
|
+
export type VoiceSessionBenchmarkScenario = VoiceTestFixture & {
|
|
5
|
+
expectedTurnTexts: string[];
|
|
6
|
+
reconnectAtChunkIndex?: number;
|
|
7
|
+
reconnectPauseMs?: number;
|
|
8
|
+
silenceMs?: number;
|
|
9
|
+
speechThreshold?: number;
|
|
10
|
+
transcriptThreshold?: number;
|
|
11
|
+
};
|
|
12
|
+
export type VoiceSessionBenchmarkTurnResult = {
|
|
13
|
+
actualText: string;
|
|
14
|
+
accuracy?: VoiceTranscriptAccuracy;
|
|
15
|
+
expectedText?: string;
|
|
16
|
+
index: number;
|
|
17
|
+
passes: boolean;
|
|
18
|
+
};
|
|
19
|
+
export type VoiceSessionBenchmarkScenarioResult = {
|
|
20
|
+
actualTurns: string[];
|
|
21
|
+
duplicateTurnCount: number;
|
|
22
|
+
elapsedMs: number;
|
|
23
|
+
expectedTurns: string[];
|
|
24
|
+
fixtureId: string;
|
|
25
|
+
passes: boolean;
|
|
26
|
+
reconnectTriggered: boolean;
|
|
27
|
+
tags: string[];
|
|
28
|
+
title: string;
|
|
29
|
+
turnCountDelta: number;
|
|
30
|
+
turnResults: VoiceSessionBenchmarkTurnResult[];
|
|
31
|
+
};
|
|
32
|
+
export type VoiceSessionBenchmarkSummary = {
|
|
33
|
+
adapterId: string;
|
|
34
|
+
averageElapsedMs: number;
|
|
35
|
+
averageWordErrorRate: number;
|
|
36
|
+
duplicateTurnRate: number;
|
|
37
|
+
passCount: number;
|
|
38
|
+
passRate: number;
|
|
39
|
+
reconnectSuccessRate: number;
|
|
40
|
+
scenarioCount: number;
|
|
41
|
+
scenariosWithDuplicateTurns: number;
|
|
42
|
+
scenariosWithTurnCountMismatch: number;
|
|
43
|
+
};
|
|
44
|
+
export type VoiceSessionBenchmarkReport = {
|
|
45
|
+
adapterId: string;
|
|
46
|
+
generatedAt: number;
|
|
47
|
+
scenarios: VoiceSessionBenchmarkScenarioResult[];
|
|
48
|
+
summary: VoiceSessionBenchmarkSummary;
|
|
49
|
+
};
|
|
50
|
+
export declare const runVoiceSessionBenchmarkScenario: (adapter: STTAdapter, fixture: VoiceSessionBenchmarkScenario) => Promise<VoiceSessionBenchmarkScenarioResult>;
|
|
51
|
+
export declare const summarizeVoiceSessionBenchmark: (adapterId: string, scenarios: VoiceSessionBenchmarkScenarioResult[]) => VoiceSessionBenchmarkSummary;
|
|
52
|
+
export declare const runVoiceSessionBenchmark: (input: {
|
|
53
|
+
adapter: STTAdapter;
|
|
54
|
+
adapterId: string;
|
|
55
|
+
scenarios: VoiceSessionBenchmarkScenario[];
|
|
56
|
+
}) => Promise<{
|
|
57
|
+
adapterId: string;
|
|
58
|
+
generatedAt: number;
|
|
59
|
+
scenarios: VoiceSessionBenchmarkScenarioResult[];
|
|
60
|
+
summary: VoiceSessionBenchmarkSummary;
|
|
61
|
+
}>;
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
## Bundled Fixtures
|
|
2
|
+
|
|
3
|
+
This directory contains small public benchmark fixtures for `@absolutejs/voice`.
|
|
4
|
+
|
|
5
|
+
### Sources
|
|
6
|
+
|
|
7
|
+
- `quietly-alone-clean.pcm`
|
|
8
|
+
- `traveled-back-route-clean.pcm`
|
|
9
|
+
- `rainstorms-noisy.pcm`
|
|
10
|
+
|
|
11
|
+
These are derived from public-domain LibriSpeech material, with the noisy variant created by mixing synthetic noise into a clean base utterance for adapter comparison.
|
|
12
|
+
|
|
13
|
+
- `stella-india-english37.pcm`
|
|
14
|
+
- `stella-ghana-english507.pcm`
|
|
15
|
+
- `stella-singapore-english655.pcm`
|
|
16
|
+
- `stella-pakistan-english519.pcm`
|
|
17
|
+
- `stella-jamaica-jamaican-creole-english1.pcm`
|
|
18
|
+
- `stella-liberia-liberian-pidgin-english2.pcm`
|
|
19
|
+
- `stella-sierra-leone-krio5.pcm`
|
|
20
|
+
- `stella-bulgaria-bulgarian20.pcm`
|
|
21
|
+
|
|
22
|
+
These are derived from the Speech Accent Archive at George Mason University using the shared "Please call Stella..." elicitation paragraph.
|
|
23
|
+
|
|
24
|
+
Archive:
|
|
25
|
+
|
|
26
|
+
- https://accent.gmu.edu/
|
|
27
|
+
|
|
28
|
+
License:
|
|
29
|
+
|
|
30
|
+
- https://creativecommons.org/licenses/by-nc-sa/2.0/
|
|
31
|
+
|
|
32
|
+
Selected speaker pages:
|
|
33
|
+
|
|
34
|
+
- https://accent.gmu.edu/browse_language.php?function=detail&speakerid=96
|
|
35
|
+
- https://accent.gmu.edu/browse_language.php?function=detail&speakerid=1800
|
|
36
|
+
- https://accent.gmu.edu/browse_language.php?function=detail&speakerid=3033
|
|
37
|
+
- https://accent.gmu.edu/browse_language.php?function=detail&speakerid=1882
|
|
38
|
+
- https://accent.gmu.edu/browse_language.php?function=detail&speakerid=967
|
|
39
|
+
- https://accent.gmu.edu/browse_language.php?function=detail&speakerid=2141
|
|
40
|
+
- https://accent.gmu.edu/browse_language.php?function=detail&speakerid=1140
|
|
41
|
+
- https://accent.gmu.edu/browse_language.php?function=detail&speakerid=2691
|
|
42
|
+
|
|
43
|
+
### Synthetic Multi-Turn Fixtures
|
|
44
|
+
|
|
45
|
+
- `multiturn-two-clean.pcm`
|
|
46
|
+
- `multiturn-three-mixed.pcm`
|
|
47
|
+
|
|
48
|
+
These are synthetic conversation-style fixtures created by concatenating the bundled public-domain base clips with inserted silence to exercise turn commit behavior.
|
package/fixtures/manifest.json
CHANGED
|
@@ -28,5 +28,114 @@
|
|
|
28
28
|
"chunkDurationMs": 100,
|
|
29
29
|
"difficulty": "noisy",
|
|
30
30
|
"tags": ["noisy", "long", "synthetic-noise", "librispeech"]
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"id": "stella-india-english37",
|
|
34
|
+
"title": "International English accent from India",
|
|
35
|
+
"audioPath": "stella-india-english37.pcm",
|
|
36
|
+
"expectedText": "PLEASE CALL STELLA ASK HER TO BRING THESE THINGS WITH HER FROM THE STORE SIX SPOONS OF FRESH SNOW PEAS FIVE THICK SLABS OF BLUE CHEESE AND MAYBE A SNACK FOR HER BROTHER BOB WE ALSO NEED A SMALL PLASTIC SNAKE AND A BIG TOY FROG FOR THE KIDS SHE CAN SCOOP THESE THINGS INTO THREE RED BAGS AND WE WILL GO MEET HER WEDNESDAY AT THE TRAIN STATION",
|
|
37
|
+
"expectedTerms": ["stella", "snow peas", "blue cheese", "brother bob", "plastic snake", "toy frog", "three red bags", "train station"],
|
|
38
|
+
"chunkDurationMs": 100,
|
|
39
|
+
"difficulty": "challenging",
|
|
40
|
+
"tags": ["accent", "challenging", "india", "international", "speech-accent-archive"]
|
|
41
|
+
},
|
|
42
|
+
{
|
|
43
|
+
"id": "stella-ghana-english507",
|
|
44
|
+
"title": "International English accent from Ghana",
|
|
45
|
+
"audioPath": "stella-ghana-english507.pcm",
|
|
46
|
+
"expectedText": "PLEASE CALL STELLA ASK HER TO BRING THESE THINGS WITH HER FROM THE STORE SIX SPOONS OF FRESH SNOW PEAS FIVE THICK SLABS OF BLUE CHEESE AND MAYBE A SNACK FOR HER BROTHER BOB WE ALSO NEED A SMALL PLASTIC SNAKE AND A BIG TOY FROG FOR THE KIDS SHE CAN SCOOP THESE THINGS INTO THREE RED BAGS AND WE WILL GO MEET HER WEDNESDAY AT THE TRAIN STATION",
|
|
47
|
+
"expectedTerms": ["stella", "snow peas", "blue cheese", "brother bob", "plastic snake", "toy frog", "three red bags", "train station"],
|
|
48
|
+
"chunkDurationMs": 100,
|
|
49
|
+
"difficulty": "challenging",
|
|
50
|
+
"tags": ["accent", "challenging", "ghana", "international", "speech-accent-archive"]
|
|
51
|
+
},
|
|
52
|
+
{
|
|
53
|
+
"id": "stella-singapore-english655",
|
|
54
|
+
"title": "International English accent from Singapore",
|
|
55
|
+
"audioPath": "stella-singapore-english655.pcm",
|
|
56
|
+
"expectedText": "PLEASE CALL STELLA ASK HER TO BRING THESE THINGS WITH HER FROM THE STORE SIX SPOONS OF FRESH SNOW PEAS FIVE THICK SLABS OF BLUE CHEESE AND MAYBE A SNACK FOR HER BROTHER BOB WE ALSO NEED A SMALL PLASTIC SNAKE AND A BIG TOY FROG FOR THE KIDS SHE CAN SCOOP THESE THINGS INTO THREE RED BAGS AND WE WILL GO MEET HER WEDNESDAY AT THE TRAIN STATION",
|
|
57
|
+
"expectedTerms": ["stella", "snow peas", "blue cheese", "brother bob", "plastic snake", "toy frog", "three red bags", "train station"],
|
|
58
|
+
"chunkDurationMs": 100,
|
|
59
|
+
"difficulty": "challenging",
|
|
60
|
+
"tags": ["accent", "challenging", "international", "singapore", "speech-accent-archive"]
|
|
61
|
+
},
|
|
62
|
+
{
|
|
63
|
+
"id": "stella-pakistan-english519",
|
|
64
|
+
"title": "International English accent from Pakistan",
|
|
65
|
+
"audioPath": "stella-pakistan-english519.pcm",
|
|
66
|
+
"expectedText": "PLEASE CALL STELLA ASK HER TO BRING THESE THINGS WITH HER FROM THE STORE SIX SPOONS OF FRESH SNOW PEAS FIVE THICK SLABS OF BLUE CHEESE AND MAYBE A SNACK FOR HER BROTHER BOB WE ALSO NEED A SMALL PLASTIC SNAKE AND A BIG TOY FROG FOR THE KIDS SHE CAN SCOOP THESE THINGS INTO THREE RED BAGS AND WE WILL GO MEET HER WEDNESDAY AT THE TRAIN STATION",
|
|
67
|
+
"expectedTerms": ["stella", "snow peas", "blue cheese", "brother bob", "plastic snake", "toy frog", "three red bags", "train station"],
|
|
68
|
+
"chunkDurationMs": 100,
|
|
69
|
+
"difficulty": "challenging",
|
|
70
|
+
"tags": ["accent", "challenging", "international", "pakistan", "speech-accent-archive"]
|
|
71
|
+
},
|
|
72
|
+
{
|
|
73
|
+
"id": "stella-jamaica-jamaican-creole-english1",
|
|
74
|
+
"title": "International English accent from Jamaica",
|
|
75
|
+
"audioPath": "stella-jamaica-jamaican-creole-english1.pcm",
|
|
76
|
+
"expectedText": "PLEASE CALL STELLA ASK HER TO BRING THESE THINGS WITH HER FROM THE STORE SIX SPOONS OF FRESH SNOW PEAS FIVE THICK SLABS OF BLUE CHEESE AND MAYBE A SNACK FOR HER BROTHER BOB WE ALSO NEED A SMALL PLASTIC SNAKE AND A BIG TOY FROG FOR THE KIDS SHE CAN SCOOP THESE THINGS INTO THREE RED BAGS AND WE WILL GO MEET HER WEDNESDAY AT THE TRAIN STATION",
|
|
77
|
+
"expectedTerms": ["stella", "snow peas", "blue cheese", "brother bob", "plastic snake", "toy frog", "three red bags", "train station"],
|
|
78
|
+
"chunkDurationMs": 100,
|
|
79
|
+
"difficulty": "challenging",
|
|
80
|
+
"tags": ["accent", "challenging", "international", "jamaica", "speech-accent-archive"]
|
|
81
|
+
},
|
|
82
|
+
{
|
|
83
|
+
"id": "stella-liberia-liberian-pidgin-english2",
|
|
84
|
+
"title": "International English accent from Liberia",
|
|
85
|
+
"audioPath": "stella-liberia-liberian-pidgin-english2.pcm",
|
|
86
|
+
"expectedText": "PLEASE CALL STELLA ASK HER TO BRING THESE THINGS WITH HER FROM THE STORE SIX SPOONS OF FRESH SNOW PEAS FIVE THICK SLABS OF BLUE CHEESE AND MAYBE A SNACK FOR HER BROTHER BOB WE ALSO NEED A SMALL PLASTIC SNAKE AND A BIG TOY FROG FOR THE KIDS SHE CAN SCOOP THESE THINGS INTO THREE RED BAGS AND WE WILL GO MEET HER WEDNESDAY AT THE TRAIN STATION",
|
|
87
|
+
"expectedTerms": ["stella", "snow peas", "blue cheese", "brother bob", "plastic snake", "toy frog", "three red bags", "train station"],
|
|
88
|
+
"chunkDurationMs": 100,
|
|
89
|
+
"difficulty": "challenging",
|
|
90
|
+
"tags": ["accent", "challenging", "international", "liberia", "speech-accent-archive"]
|
|
91
|
+
},
|
|
92
|
+
{
|
|
93
|
+
"id": "stella-sierra-leone-krio5",
|
|
94
|
+
"title": "International English accent from Sierra Leone",
|
|
95
|
+
"audioPath": "stella-sierra-leone-krio5.pcm",
|
|
96
|
+
"expectedText": "PLEASE CALL STELLA ASK HER TO BRING THESE THINGS WITH HER FROM THE STORE SIX SPOONS OF FRESH SNOW PEAS FIVE THICK SLABS OF BLUE CHEESE AND MAYBE A SNACK FOR HER BROTHER BOB WE ALSO NEED A SMALL PLASTIC SNAKE AND A BIG TOY FROG FOR THE KIDS SHE CAN SCOOP THESE THINGS INTO THREE RED BAGS AND WE WILL GO MEET HER WEDNESDAY AT THE TRAIN STATION",
|
|
97
|
+
"expectedTerms": ["stella", "snow peas", "blue cheese", "brother bob", "plastic snake", "toy frog", "three red bags", "train station"],
|
|
98
|
+
"chunkDurationMs": 100,
|
|
99
|
+
"difficulty": "challenging",
|
|
100
|
+
"tags": ["accent", "challenging", "international", "sierra-leone", "speech-accent-archive"]
|
|
101
|
+
},
|
|
102
|
+
{
|
|
103
|
+
"id": "stella-bulgaria-bulgarian20",
|
|
104
|
+
"title": "International English accent from Bulgaria",
|
|
105
|
+
"audioPath": "stella-bulgaria-bulgarian20.pcm",
|
|
106
|
+
"expectedText": "PLEASE CALL STELLA ASK HER TO BRING THESE THINGS WITH HER FROM THE STORE SIX SPOONS OF FRESH SNOW PEAS FIVE THICK SLABS OF BLUE CHEESE AND MAYBE A SNACK FOR HER BROTHER BOB WE ALSO NEED A SMALL PLASTIC SNAKE AND A BIG TOY FROG FOR THE KIDS SHE CAN SCOOP THESE THINGS INTO THREE RED BAGS AND WE WILL GO MEET HER WEDNESDAY AT THE TRAIN STATION",
|
|
107
|
+
"expectedTerms": ["stella", "snow peas", "blue cheese", "brother bob", "plastic snake", "toy frog", "three red bags", "train station"],
|
|
108
|
+
"chunkDurationMs": 100,
|
|
109
|
+
"difficulty": "challenging",
|
|
110
|
+
"tags": ["accent", "challenging", "bulgaria", "international", "speech-accent-archive"]
|
|
111
|
+
},
|
|
112
|
+
{
|
|
113
|
+
"id": "multiturn-two-clean",
|
|
114
|
+
"title": "Synthetic two-turn clean conversation",
|
|
115
|
+
"audioPath": "multiturn-two-clean.pcm",
|
|
116
|
+
"expectedText": "GO QUIETLY ALONE NO HARM WILL BEFALL YOU WE PASSED AROUND ATLANTA CROSSED THE CHATTAHOOCHEE AND TRAVELED BACK OVER THE SAME ROUTE ON WHICH WE HAD MADE THE ARDUOUS CAMPAIGN UNDER JOE JOHNSTON",
|
|
117
|
+
"expectedTerms": ["quietly alone", "atlanta", "chattahoochee", "joe johnston"],
|
|
118
|
+
"expectedTurnTexts": [
|
|
119
|
+
"GO QUIETLY ALONE NO HARM WILL BEFALL YOU",
|
|
120
|
+
"WE PASSED AROUND ATLANTA CROSSED THE CHATTAHOOCHEE AND TRAVELED BACK OVER THE SAME ROUTE ON WHICH WE HAD MADE THE ARDUOUS CAMPAIGN UNDER JOE JOHNSTON"
|
|
121
|
+
],
|
|
122
|
+
"chunkDurationMs": 100,
|
|
123
|
+
"difficulty": "clean",
|
|
124
|
+
"tags": ["clean", "conversation", "multi-turn", "synthetic"]
|
|
125
|
+
},
|
|
126
|
+
{
|
|
127
|
+
"id": "multiturn-three-mixed",
|
|
128
|
+
"title": "Synthetic three-turn mixed conversation",
|
|
129
|
+
"audioPath": "multiturn-three-mixed.pcm",
|
|
130
|
+
"expectedText": "GO QUIETLY ALONE NO HARM WILL BEFALL YOU SLIGHT RAINSTORMS ARE LIKELY TO BE ENCOUNTERED IN A TRIP ROUND THE MOUNTAIN BUT ONE MAY EASILY FIND SHELTER BENEATH WELL THATCHED TREES THAT SHED THE RAIN LIKE A ROOF GO QUIETLY ALONE NO HARM WILL BEFALL YOU",
|
|
131
|
+
"expectedTerms": ["quietly alone", "rainstorms", "thatched trees"],
|
|
132
|
+
"expectedTurnTexts": [
|
|
133
|
+
"GO QUIETLY ALONE NO HARM WILL BEFALL YOU",
|
|
134
|
+
"SLIGHT RAINSTORMS ARE LIKELY TO BE ENCOUNTERED IN A TRIP ROUND THE MOUNTAIN BUT ONE MAY EASILY FIND SHELTER BENEATH WELL THATCHED TREES THAT SHED THE RAIN LIKE A ROOF",
|
|
135
|
+
"GO QUIETLY ALONE NO HARM WILL BEFALL YOU"
|
|
136
|
+
],
|
|
137
|
+
"chunkDurationMs": 100,
|
|
138
|
+
"difficulty": "challenging",
|
|
139
|
+
"tags": ["conversation", "multi-turn", "noisy", "synthetic"]
|
|
31
140
|
}
|
|
32
141
|
]
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@absolutejs/voice",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.20",
|
|
4
4
|
"description": "Voice primitives and Elysia plugin for AbsoluteJS",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
@@ -16,9 +16,16 @@
|
|
|
16
16
|
"license": "CC BY-NC 4.0",
|
|
17
17
|
"author": "Alex Kahn",
|
|
18
18
|
"scripts": {
|
|
19
|
+
"bench:accents": "bun run ./scripts/benchmark-stt.ts all accents",
|
|
19
20
|
"bench:assemblyai": "bun run ./scripts/benchmark-stt.ts assemblyai",
|
|
21
|
+
"bench:assemblyai:accents": "bun run ./scripts/benchmark-stt.ts assemblyai accents",
|
|
20
22
|
"bench:deepgram": "bun run ./scripts/benchmark-stt.ts deepgram",
|
|
23
|
+
"bench:deepgram:accents": "bun run ./scripts/benchmark-stt.ts deepgram accents",
|
|
24
|
+
"bench:deepgram:sessions": "bun run ./scripts/benchmark-session.ts deepgram",
|
|
25
|
+
"bench:resilience": "bun run ./scripts/benchmark-resilience.ts",
|
|
26
|
+
"bench:sessions": "bun run ./scripts/benchmark-session.ts all",
|
|
21
27
|
"bench:stt": "bun run ./scripts/benchmark-stt.ts all",
|
|
28
|
+
"bench:assemblyai:sessions": "bun run ./scripts/benchmark-session.ts assemblyai",
|
|
22
29
|
"build": "rm -rf dist && bun build ./src/index.ts ./src/client/index.ts ./src/react/index.ts ./src/vue/index.ts ./src/svelte/index.ts ./src/angular/index.ts ./src/testing/index.ts --outdir dist --target bun --external elysia --external react --external vue --external @angular/core --external @absolutejs/absolute && bun build ./src/client/htmxBootstrap.ts --outdir dist/client --target browser --format esm && tsc --emitDeclarationOnly --project tsconfig.json",
|
|
23
30
|
"format": "prettier --write \"./**/*.{js,jsx,ts,tsx,json,md}\"",
|
|
24
31
|
"lint": "eslint ./src",
|