@utterance/core 0.0.2 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +28 -3
- package/dist/index.js +28 -3
- package/models/utterance-v2.onnx +0 -0
- package/package.json +6 -2
- package/models/.gitkeep +0 -0
- package/models/utterance-v1.onnx +0 -0
package/dist/index.cjs
CHANGED
|
@@ -373,7 +373,7 @@ var FeatureExtractor = class {
|
|
|
373
373
|
};
|
|
374
374
|
|
|
375
375
|
// src/types.ts
|
|
376
|
-
var MODEL_CDN_URL = "https://pub-46a5feb0029246bcbc93fab6162cff94.r2.dev/
|
|
376
|
+
var MODEL_CDN_URL = "https://pub-46a5feb0029246bcbc93fab6162cff94.r2.dev/v2/utterance-v2.onnx";
|
|
377
377
|
var DEFAULT_OPTIONS = {
|
|
378
378
|
sensitivity: 0.5,
|
|
379
379
|
pauseTolerance: 1500,
|
|
@@ -484,7 +484,9 @@ var ONNXModel = class {
|
|
|
484
484
|
}
|
|
485
485
|
} else if (path === "bundled") {
|
|
486
486
|
try {
|
|
487
|
-
const
|
|
487
|
+
const getUrl = new Function("p", "b", "return new URL(p, b).href");
|
|
488
|
+
const href = getUrl("../../models/utterance-v2.onnx", import_meta.url);
|
|
489
|
+
const response = await fetch(href);
|
|
488
490
|
if (response.ok) {
|
|
489
491
|
modelSource = await response.arrayBuffer();
|
|
490
492
|
} else {
|
|
@@ -569,6 +571,25 @@ var ONNXModel = class {
|
|
|
569
571
|
const dstIdx = i * FEATURE_DIM;
|
|
570
572
|
input.set(this.frameBuffer.subarray(srcIdx, srcIdx + FEATURE_DIM), dstIdx);
|
|
571
573
|
}
|
|
574
|
+
for (let f = 0; f < 14; f++) {
|
|
575
|
+
let sum = 0;
|
|
576
|
+
for (let i = 0; i < CONTEXT_FRAMES; i++) {
|
|
577
|
+
sum += input[i * FEATURE_DIM + f];
|
|
578
|
+
}
|
|
579
|
+
const mean = sum / CONTEXT_FRAMES;
|
|
580
|
+
let varSum = 0;
|
|
581
|
+
for (let i = 0; i < CONTEXT_FRAMES; i++) {
|
|
582
|
+
const d = input[i * FEATURE_DIM + f] - mean;
|
|
583
|
+
varSum += d * d;
|
|
584
|
+
}
|
|
585
|
+
const std = Math.sqrt(varSum / CONTEXT_FRAMES) || 1;
|
|
586
|
+
for (let i = 0; i < CONTEXT_FRAMES; i++) {
|
|
587
|
+
input[i * FEATURE_DIM + f] = (input[i * FEATURE_DIM + f] - mean) / std;
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
for (let i = 0; i < CONTEXT_FRAMES; i++) {
|
|
591
|
+
input[i * FEATURE_DIM + 14] /= 500;
|
|
592
|
+
}
|
|
572
593
|
const tensor = new ort.Tensor("float32", input, [1, CONTEXT_FRAMES, FEATURE_DIM]);
|
|
573
594
|
const results = await session.run({ input: tensor });
|
|
574
595
|
const output = results.output;
|
|
@@ -616,6 +637,7 @@ var TurnDetector = class {
|
|
|
616
637
|
state = "idle";
|
|
617
638
|
pauseStart = 0;
|
|
618
639
|
speakStart = 0;
|
|
640
|
+
interruptFired = false;
|
|
619
641
|
sensitivity;
|
|
620
642
|
pauseTolerance;
|
|
621
643
|
constructor(sensitivity = 0.5, pauseTolerance = 1500) {
|
|
@@ -645,6 +667,7 @@ var TurnDetector = class {
|
|
|
645
667
|
const threshold = this.sensitivity;
|
|
646
668
|
switch (label) {
|
|
647
669
|
case "speaking":
|
|
670
|
+
this.interruptFired = false;
|
|
648
671
|
if (this.state !== "speaking") {
|
|
649
672
|
this.state = "speaking";
|
|
650
673
|
this.speakStart = timestamp;
|
|
@@ -680,7 +703,8 @@ var TurnDetector = class {
|
|
|
680
703
|
}
|
|
681
704
|
break;
|
|
682
705
|
case "interrupt_intent":
|
|
683
|
-
if (confidence >= threshold) {
|
|
706
|
+
if (confidence >= threshold && !this.interruptFired) {
|
|
707
|
+
this.interruptFired = true;
|
|
684
708
|
this.emit("interrupt", { timestamp });
|
|
685
709
|
}
|
|
686
710
|
break;
|
|
@@ -693,6 +717,7 @@ var TurnDetector = class {
|
|
|
693
717
|
this.state = "idle";
|
|
694
718
|
this.pauseStart = 0;
|
|
695
719
|
this.speakStart = 0;
|
|
720
|
+
this.interruptFired = false;
|
|
696
721
|
}
|
|
697
722
|
emit(event, payload) {
|
|
698
723
|
this.listeners.get(event)?.forEach((fn) => fn(payload));
|
package/dist/index.js
CHANGED
|
@@ -337,7 +337,7 @@ var FeatureExtractor = class {
|
|
|
337
337
|
};
|
|
338
338
|
|
|
339
339
|
// src/types.ts
|
|
340
|
-
var MODEL_CDN_URL = "https://pub-46a5feb0029246bcbc93fab6162cff94.r2.dev/
|
|
340
|
+
var MODEL_CDN_URL = "https://pub-46a5feb0029246bcbc93fab6162cff94.r2.dev/v2/utterance-v2.onnx";
|
|
341
341
|
var DEFAULT_OPTIONS = {
|
|
342
342
|
sensitivity: 0.5,
|
|
343
343
|
pauseTolerance: 1500,
|
|
@@ -447,7 +447,9 @@ var ONNXModel = class {
|
|
|
447
447
|
}
|
|
448
448
|
} else if (path === "bundled") {
|
|
449
449
|
try {
|
|
450
|
-
const
|
|
450
|
+
const getUrl = new Function("p", "b", "return new URL(p, b).href");
|
|
451
|
+
const href = getUrl("../../models/utterance-v2.onnx", import.meta.url);
|
|
452
|
+
const response = await fetch(href);
|
|
451
453
|
if (response.ok) {
|
|
452
454
|
modelSource = await response.arrayBuffer();
|
|
453
455
|
} else {
|
|
@@ -532,6 +534,25 @@ var ONNXModel = class {
|
|
|
532
534
|
const dstIdx = i * FEATURE_DIM;
|
|
533
535
|
input.set(this.frameBuffer.subarray(srcIdx, srcIdx + FEATURE_DIM), dstIdx);
|
|
534
536
|
}
|
|
537
|
+
for (let f = 0; f < 14; f++) {
|
|
538
|
+
let sum = 0;
|
|
539
|
+
for (let i = 0; i < CONTEXT_FRAMES; i++) {
|
|
540
|
+
sum += input[i * FEATURE_DIM + f];
|
|
541
|
+
}
|
|
542
|
+
const mean = sum / CONTEXT_FRAMES;
|
|
543
|
+
let varSum = 0;
|
|
544
|
+
for (let i = 0; i < CONTEXT_FRAMES; i++) {
|
|
545
|
+
const d = input[i * FEATURE_DIM + f] - mean;
|
|
546
|
+
varSum += d * d;
|
|
547
|
+
}
|
|
548
|
+
const std = Math.sqrt(varSum / CONTEXT_FRAMES) || 1;
|
|
549
|
+
for (let i = 0; i < CONTEXT_FRAMES; i++) {
|
|
550
|
+
input[i * FEATURE_DIM + f] = (input[i * FEATURE_DIM + f] - mean) / std;
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
for (let i = 0; i < CONTEXT_FRAMES; i++) {
|
|
554
|
+
input[i * FEATURE_DIM + 14] /= 500;
|
|
555
|
+
}
|
|
535
556
|
const tensor = new ort.Tensor("float32", input, [1, CONTEXT_FRAMES, FEATURE_DIM]);
|
|
536
557
|
const results = await session.run({ input: tensor });
|
|
537
558
|
const output = results.output;
|
|
@@ -579,6 +600,7 @@ var TurnDetector = class {
|
|
|
579
600
|
state = "idle";
|
|
580
601
|
pauseStart = 0;
|
|
581
602
|
speakStart = 0;
|
|
603
|
+
interruptFired = false;
|
|
582
604
|
sensitivity;
|
|
583
605
|
pauseTolerance;
|
|
584
606
|
constructor(sensitivity = 0.5, pauseTolerance = 1500) {
|
|
@@ -608,6 +630,7 @@ var TurnDetector = class {
|
|
|
608
630
|
const threshold = this.sensitivity;
|
|
609
631
|
switch (label) {
|
|
610
632
|
case "speaking":
|
|
633
|
+
this.interruptFired = false;
|
|
611
634
|
if (this.state !== "speaking") {
|
|
612
635
|
this.state = "speaking";
|
|
613
636
|
this.speakStart = timestamp;
|
|
@@ -643,7 +666,8 @@ var TurnDetector = class {
|
|
|
643
666
|
}
|
|
644
667
|
break;
|
|
645
668
|
case "interrupt_intent":
|
|
646
|
-
if (confidence >= threshold) {
|
|
669
|
+
if (confidence >= threshold && !this.interruptFired) {
|
|
670
|
+
this.interruptFired = true;
|
|
647
671
|
this.emit("interrupt", { timestamp });
|
|
648
672
|
}
|
|
649
673
|
break;
|
|
@@ -656,6 +680,7 @@ var TurnDetector = class {
|
|
|
656
680
|
this.state = "idle";
|
|
657
681
|
this.pauseStart = 0;
|
|
658
682
|
this.speakStart = 0;
|
|
683
|
+
this.interruptFired = false;
|
|
659
684
|
}
|
|
660
685
|
emit(event, payload) {
|
|
661
686
|
this.listeners.get(event)?.forEach((fn) => fn(payload));
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@utterance/core",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.4",
|
|
4
4
|
"description": "Client-side semantic endpointing. Know when they're done talking.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.cjs",
|
|
@@ -56,6 +56,8 @@
|
|
|
56
56
|
"devDependencies": {
|
|
57
57
|
"@eslint/js": "^9.0.0",
|
|
58
58
|
"@tailwindcss/postcss": "^4.2.0",
|
|
59
|
+
"@types/d3-scale": "^4.0.9",
|
|
60
|
+
"@types/d3-shape": "^3.1.8",
|
|
59
61
|
"@types/mdx": "^2.0.13",
|
|
60
62
|
"@types/node": "^22.0.0",
|
|
61
63
|
"@types/react": "^19.2.14",
|
|
@@ -76,9 +78,11 @@
|
|
|
76
78
|
"dependencies": {
|
|
77
79
|
"@next/third-parties": "^16.1.6",
|
|
78
80
|
"@react-three/fiber": "^9.5.0",
|
|
79
|
-
"@utterance/core": "^0.0.
|
|
81
|
+
"@utterance/core": "^0.0.2",
|
|
80
82
|
"class-variance-authority": "^0.7.1",
|
|
81
83
|
"clsx": "^2.1.1",
|
|
84
|
+
"d3-scale": "^4.0.2",
|
|
85
|
+
"d3-shape": "^3.2.0",
|
|
82
86
|
"fumadocs-core": "^16.6.3",
|
|
83
87
|
"fumadocs-mdx": "^14.2.7",
|
|
84
88
|
"fumadocs-ui": "^16.6.3",
|
package/models/.gitkeep
DELETED
|
File without changes
|
package/models/utterance-v1.onnx
DELETED
|
Binary file
|