@utterance/core 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +26 -3
- package/dist/index.js +26 -3
- package/models/utterance-v2.onnx +0 -0
- package/package.json +5 -1
- package/models/.gitkeep +0 -0
- package/models/utterance-v1.onnx +0 -0
package/dist/index.cjs
CHANGED
|
@@ -373,7 +373,7 @@ var FeatureExtractor = class {
|
|
|
373
373
|
};
|
|
374
374
|
|
|
375
375
|
// src/types.ts
|
|
376
|
-
var MODEL_CDN_URL = "https://pub-46a5feb0029246bcbc93fab6162cff94.r2.dev/
|
|
376
|
+
var MODEL_CDN_URL = "https://pub-46a5feb0029246bcbc93fab6162cff94.r2.dev/v2/utterance-v2.onnx";
|
|
377
377
|
var DEFAULT_OPTIONS = {
|
|
378
378
|
sensitivity: 0.5,
|
|
379
379
|
pauseTolerance: 1500,
|
|
@@ -485,7 +485,7 @@ var ONNXModel = class {
|
|
|
485
485
|
} else if (path === "bundled") {
|
|
486
486
|
try {
|
|
487
487
|
const getUrl = new Function("p", "b", "return new URL(p, b).href");
|
|
488
|
-
const href = getUrl("../../models/utterance-
|
|
488
|
+
const href = getUrl("../../models/utterance-v2.onnx", import_meta.url);
|
|
489
489
|
const response = await fetch(href);
|
|
490
490
|
if (response.ok) {
|
|
491
491
|
modelSource = await response.arrayBuffer();
|
|
@@ -571,6 +571,25 @@ var ONNXModel = class {
|
|
|
571
571
|
const dstIdx = i * FEATURE_DIM;
|
|
572
572
|
input.set(this.frameBuffer.subarray(srcIdx, srcIdx + FEATURE_DIM), dstIdx);
|
|
573
573
|
}
|
|
574
|
+
for (let f = 0; f < 14; f++) {
|
|
575
|
+
let sum = 0;
|
|
576
|
+
for (let i = 0; i < CONTEXT_FRAMES; i++) {
|
|
577
|
+
sum += input[i * FEATURE_DIM + f];
|
|
578
|
+
}
|
|
579
|
+
const mean = sum / CONTEXT_FRAMES;
|
|
580
|
+
let varSum = 0;
|
|
581
|
+
for (let i = 0; i < CONTEXT_FRAMES; i++) {
|
|
582
|
+
const d = input[i * FEATURE_DIM + f] - mean;
|
|
583
|
+
varSum += d * d;
|
|
584
|
+
}
|
|
585
|
+
const std = Math.sqrt(varSum / CONTEXT_FRAMES) || 1;
|
|
586
|
+
for (let i = 0; i < CONTEXT_FRAMES; i++) {
|
|
587
|
+
input[i * FEATURE_DIM + f] = (input[i * FEATURE_DIM + f] - mean) / std;
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
for (let i = 0; i < CONTEXT_FRAMES; i++) {
|
|
591
|
+
input[i * FEATURE_DIM + 14] /= 500;
|
|
592
|
+
}
|
|
574
593
|
const tensor = new ort.Tensor("float32", input, [1, CONTEXT_FRAMES, FEATURE_DIM]);
|
|
575
594
|
const results = await session.run({ input: tensor });
|
|
576
595
|
const output = results.output;
|
|
@@ -618,6 +637,7 @@ var TurnDetector = class {
|
|
|
618
637
|
state = "idle";
|
|
619
638
|
pauseStart = 0;
|
|
620
639
|
speakStart = 0;
|
|
640
|
+
interruptFired = false;
|
|
621
641
|
sensitivity;
|
|
622
642
|
pauseTolerance;
|
|
623
643
|
constructor(sensitivity = 0.5, pauseTolerance = 1500) {
|
|
@@ -647,6 +667,7 @@ var TurnDetector = class {
|
|
|
647
667
|
const threshold = this.sensitivity;
|
|
648
668
|
switch (label) {
|
|
649
669
|
case "speaking":
|
|
670
|
+
this.interruptFired = false;
|
|
650
671
|
if (this.state !== "speaking") {
|
|
651
672
|
this.state = "speaking";
|
|
652
673
|
this.speakStart = timestamp;
|
|
@@ -682,7 +703,8 @@ var TurnDetector = class {
|
|
|
682
703
|
}
|
|
683
704
|
break;
|
|
684
705
|
case "interrupt_intent":
|
|
685
|
-
if (confidence >= threshold) {
|
|
706
|
+
if (confidence >= threshold && !this.interruptFired) {
|
|
707
|
+
this.interruptFired = true;
|
|
686
708
|
this.emit("interrupt", { timestamp });
|
|
687
709
|
}
|
|
688
710
|
break;
|
|
@@ -695,6 +717,7 @@ var TurnDetector = class {
|
|
|
695
717
|
this.state = "idle";
|
|
696
718
|
this.pauseStart = 0;
|
|
697
719
|
this.speakStart = 0;
|
|
720
|
+
this.interruptFired = false;
|
|
698
721
|
}
|
|
699
722
|
emit(event, payload) {
|
|
700
723
|
this.listeners.get(event)?.forEach((fn) => fn(payload));
|
package/dist/index.js
CHANGED
|
@@ -337,7 +337,7 @@ var FeatureExtractor = class {
|
|
|
337
337
|
};
|
|
338
338
|
|
|
339
339
|
// src/types.ts
|
|
340
|
-
var MODEL_CDN_URL = "https://pub-46a5feb0029246bcbc93fab6162cff94.r2.dev/
|
|
340
|
+
var MODEL_CDN_URL = "https://pub-46a5feb0029246bcbc93fab6162cff94.r2.dev/v2/utterance-v2.onnx";
|
|
341
341
|
var DEFAULT_OPTIONS = {
|
|
342
342
|
sensitivity: 0.5,
|
|
343
343
|
pauseTolerance: 1500,
|
|
@@ -448,7 +448,7 @@ var ONNXModel = class {
|
|
|
448
448
|
} else if (path === "bundled") {
|
|
449
449
|
try {
|
|
450
450
|
const getUrl = new Function("p", "b", "return new URL(p, b).href");
|
|
451
|
-
const href = getUrl("../../models/utterance-
|
|
451
|
+
const href = getUrl("../../models/utterance-v2.onnx", import.meta.url);
|
|
452
452
|
const response = await fetch(href);
|
|
453
453
|
if (response.ok) {
|
|
454
454
|
modelSource = await response.arrayBuffer();
|
|
@@ -534,6 +534,25 @@ var ONNXModel = class {
|
|
|
534
534
|
const dstIdx = i * FEATURE_DIM;
|
|
535
535
|
input.set(this.frameBuffer.subarray(srcIdx, srcIdx + FEATURE_DIM), dstIdx);
|
|
536
536
|
}
|
|
537
|
+
for (let f = 0; f < 14; f++) {
|
|
538
|
+
let sum = 0;
|
|
539
|
+
for (let i = 0; i < CONTEXT_FRAMES; i++) {
|
|
540
|
+
sum += input[i * FEATURE_DIM + f];
|
|
541
|
+
}
|
|
542
|
+
const mean = sum / CONTEXT_FRAMES;
|
|
543
|
+
let varSum = 0;
|
|
544
|
+
for (let i = 0; i < CONTEXT_FRAMES; i++) {
|
|
545
|
+
const d = input[i * FEATURE_DIM + f] - mean;
|
|
546
|
+
varSum += d * d;
|
|
547
|
+
}
|
|
548
|
+
const std = Math.sqrt(varSum / CONTEXT_FRAMES) || 1;
|
|
549
|
+
for (let i = 0; i < CONTEXT_FRAMES; i++) {
|
|
550
|
+
input[i * FEATURE_DIM + f] = (input[i * FEATURE_DIM + f] - mean) / std;
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
for (let i = 0; i < CONTEXT_FRAMES; i++) {
|
|
554
|
+
input[i * FEATURE_DIM + 14] /= 500;
|
|
555
|
+
}
|
|
537
556
|
const tensor = new ort.Tensor("float32", input, [1, CONTEXT_FRAMES, FEATURE_DIM]);
|
|
538
557
|
const results = await session.run({ input: tensor });
|
|
539
558
|
const output = results.output;
|
|
@@ -581,6 +600,7 @@ var TurnDetector = class {
|
|
|
581
600
|
state = "idle";
|
|
582
601
|
pauseStart = 0;
|
|
583
602
|
speakStart = 0;
|
|
603
|
+
interruptFired = false;
|
|
584
604
|
sensitivity;
|
|
585
605
|
pauseTolerance;
|
|
586
606
|
constructor(sensitivity = 0.5, pauseTolerance = 1500) {
|
|
@@ -610,6 +630,7 @@ var TurnDetector = class {
|
|
|
610
630
|
const threshold = this.sensitivity;
|
|
611
631
|
switch (label) {
|
|
612
632
|
case "speaking":
|
|
633
|
+
this.interruptFired = false;
|
|
613
634
|
if (this.state !== "speaking") {
|
|
614
635
|
this.state = "speaking";
|
|
615
636
|
this.speakStart = timestamp;
|
|
@@ -645,7 +666,8 @@ var TurnDetector = class {
|
|
|
645
666
|
}
|
|
646
667
|
break;
|
|
647
668
|
case "interrupt_intent":
|
|
648
|
-
if (confidence >= threshold) {
|
|
669
|
+
if (confidence >= threshold && !this.interruptFired) {
|
|
670
|
+
this.interruptFired = true;
|
|
649
671
|
this.emit("interrupt", { timestamp });
|
|
650
672
|
}
|
|
651
673
|
break;
|
|
@@ -658,6 +680,7 @@ var TurnDetector = class {
|
|
|
658
680
|
this.state = "idle";
|
|
659
681
|
this.pauseStart = 0;
|
|
660
682
|
this.speakStart = 0;
|
|
683
|
+
this.interruptFired = false;
|
|
661
684
|
}
|
|
662
685
|
emit(event, payload) {
|
|
663
686
|
this.listeners.get(event)?.forEach((fn) => fn(payload));
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@utterance/core",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.4",
|
|
4
4
|
"description": "Client-side semantic endpointing. Know when they're done talking.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.cjs",
|
|
@@ -56,6 +56,8 @@
|
|
|
56
56
|
"devDependencies": {
|
|
57
57
|
"@eslint/js": "^9.0.0",
|
|
58
58
|
"@tailwindcss/postcss": "^4.2.0",
|
|
59
|
+
"@types/d3-scale": "^4.0.9",
|
|
60
|
+
"@types/d3-shape": "^3.1.8",
|
|
59
61
|
"@types/mdx": "^2.0.13",
|
|
60
62
|
"@types/node": "^22.0.0",
|
|
61
63
|
"@types/react": "^19.2.14",
|
|
@@ -79,6 +81,8 @@
|
|
|
79
81
|
"@utterance/core": "^0.0.2",
|
|
80
82
|
"class-variance-authority": "^0.7.1",
|
|
81
83
|
"clsx": "^2.1.1",
|
|
84
|
+
"d3-scale": "^4.0.2",
|
|
85
|
+
"d3-shape": "^3.2.0",
|
|
82
86
|
"fumadocs-core": "^16.6.3",
|
|
83
87
|
"fumadocs-mdx": "^14.2.7",
|
|
84
88
|
"fumadocs-ui": "^16.6.3",
|
package/models/.gitkeep
DELETED
|
File without changes
|
package/models/utterance-v1.onnx
DELETED
|
Binary file
|