@utterance/core 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -373,7 +373,7 @@ var FeatureExtractor = class {
373
373
  };
374
374
 
375
375
  // src/types.ts
376
- var MODEL_CDN_URL = "https://pub-46a5feb0029246bcbc93fab6162cff94.r2.dev/v0.0.2/utterance-v1.onnx";
376
+ var MODEL_CDN_URL = "https://pub-46a5feb0029246bcbc93fab6162cff94.r2.dev/v2/utterance-v2.onnx";
377
377
  var DEFAULT_OPTIONS = {
378
378
  sensitivity: 0.5,
379
379
  pauseTolerance: 1500,
@@ -485,7 +485,7 @@ var ONNXModel = class {
485
485
  } else if (path === "bundled") {
486
486
  try {
487
487
  const getUrl = new Function("p", "b", "return new URL(p, b).href");
488
- const href = getUrl("../../models/utterance-v1.onnx", import_meta.url);
488
+ const href = getUrl("../../models/utterance-v2.onnx", import_meta.url);
489
489
  const response = await fetch(href);
490
490
  if (response.ok) {
491
491
  modelSource = await response.arrayBuffer();
@@ -571,6 +571,25 @@ var ONNXModel = class {
571
571
  const dstIdx = i * FEATURE_DIM;
572
572
  input.set(this.frameBuffer.subarray(srcIdx, srcIdx + FEATURE_DIM), dstIdx);
573
573
  }
574
+ for (let f = 0; f < 14; f++) {
575
+ let sum = 0;
576
+ for (let i = 0; i < CONTEXT_FRAMES; i++) {
577
+ sum += input[i * FEATURE_DIM + f];
578
+ }
579
+ const mean = sum / CONTEXT_FRAMES;
580
+ let varSum = 0;
581
+ for (let i = 0; i < CONTEXT_FRAMES; i++) {
582
+ const d = input[i * FEATURE_DIM + f] - mean;
583
+ varSum += d * d;
584
+ }
585
+ const std = Math.sqrt(varSum / CONTEXT_FRAMES) || 1;
586
+ for (let i = 0; i < CONTEXT_FRAMES; i++) {
587
+ input[i * FEATURE_DIM + f] = (input[i * FEATURE_DIM + f] - mean) / std;
588
+ }
589
+ }
590
+ for (let i = 0; i < CONTEXT_FRAMES; i++) {
591
+ input[i * FEATURE_DIM + 14] /= 500;
592
+ }
574
593
  const tensor = new ort.Tensor("float32", input, [1, CONTEXT_FRAMES, FEATURE_DIM]);
575
594
  const results = await session.run({ input: tensor });
576
595
  const output = results.output;
@@ -618,6 +637,7 @@ var TurnDetector = class {
618
637
  state = "idle";
619
638
  pauseStart = 0;
620
639
  speakStart = 0;
640
+ interruptFired = false;
621
641
  sensitivity;
622
642
  pauseTolerance;
623
643
  constructor(sensitivity = 0.5, pauseTolerance = 1500) {
@@ -647,6 +667,7 @@ var TurnDetector = class {
647
667
  const threshold = this.sensitivity;
648
668
  switch (label) {
649
669
  case "speaking":
670
+ this.interruptFired = false;
650
671
  if (this.state !== "speaking") {
651
672
  this.state = "speaking";
652
673
  this.speakStart = timestamp;
@@ -682,7 +703,8 @@ var TurnDetector = class {
682
703
  }
683
704
  break;
684
705
  case "interrupt_intent":
685
- if (confidence >= threshold) {
706
+ if (confidence >= threshold && !this.interruptFired) {
707
+ this.interruptFired = true;
686
708
  this.emit("interrupt", { timestamp });
687
709
  }
688
710
  break;
@@ -695,6 +717,7 @@ var TurnDetector = class {
695
717
  this.state = "idle";
696
718
  this.pauseStart = 0;
697
719
  this.speakStart = 0;
720
+ this.interruptFired = false;
698
721
  }
699
722
  emit(event, payload) {
700
723
  this.listeners.get(event)?.forEach((fn) => fn(payload));
package/dist/index.js CHANGED
@@ -337,7 +337,7 @@ var FeatureExtractor = class {
337
337
  };
338
338
 
339
339
  // src/types.ts
340
- var MODEL_CDN_URL = "https://pub-46a5feb0029246bcbc93fab6162cff94.r2.dev/v0.0.2/utterance-v1.onnx";
340
+ var MODEL_CDN_URL = "https://pub-46a5feb0029246bcbc93fab6162cff94.r2.dev/v2/utterance-v2.onnx";
341
341
  var DEFAULT_OPTIONS = {
342
342
  sensitivity: 0.5,
343
343
  pauseTolerance: 1500,
@@ -448,7 +448,7 @@ var ONNXModel = class {
448
448
  } else if (path === "bundled") {
449
449
  try {
450
450
  const getUrl = new Function("p", "b", "return new URL(p, b).href");
451
- const href = getUrl("../../models/utterance-v1.onnx", import.meta.url);
451
+ const href = getUrl("../../models/utterance-v2.onnx", import.meta.url);
452
452
  const response = await fetch(href);
453
453
  if (response.ok) {
454
454
  modelSource = await response.arrayBuffer();
@@ -534,6 +534,25 @@ var ONNXModel = class {
534
534
  const dstIdx = i * FEATURE_DIM;
535
535
  input.set(this.frameBuffer.subarray(srcIdx, srcIdx + FEATURE_DIM), dstIdx);
536
536
  }
537
+ for (let f = 0; f < 14; f++) {
538
+ let sum = 0;
539
+ for (let i = 0; i < CONTEXT_FRAMES; i++) {
540
+ sum += input[i * FEATURE_DIM + f];
541
+ }
542
+ const mean = sum / CONTEXT_FRAMES;
543
+ let varSum = 0;
544
+ for (let i = 0; i < CONTEXT_FRAMES; i++) {
545
+ const d = input[i * FEATURE_DIM + f] - mean;
546
+ varSum += d * d;
547
+ }
548
+ const std = Math.sqrt(varSum / CONTEXT_FRAMES) || 1;
549
+ for (let i = 0; i < CONTEXT_FRAMES; i++) {
550
+ input[i * FEATURE_DIM + f] = (input[i * FEATURE_DIM + f] - mean) / std;
551
+ }
552
+ }
553
+ for (let i = 0; i < CONTEXT_FRAMES; i++) {
554
+ input[i * FEATURE_DIM + 14] /= 500;
555
+ }
537
556
  const tensor = new ort.Tensor("float32", input, [1, CONTEXT_FRAMES, FEATURE_DIM]);
538
557
  const results = await session.run({ input: tensor });
539
558
  const output = results.output;
@@ -581,6 +600,7 @@ var TurnDetector = class {
581
600
  state = "idle";
582
601
  pauseStart = 0;
583
602
  speakStart = 0;
603
+ interruptFired = false;
584
604
  sensitivity;
585
605
  pauseTolerance;
586
606
  constructor(sensitivity = 0.5, pauseTolerance = 1500) {
@@ -610,6 +630,7 @@ var TurnDetector = class {
610
630
  const threshold = this.sensitivity;
611
631
  switch (label) {
612
632
  case "speaking":
633
+ this.interruptFired = false;
613
634
  if (this.state !== "speaking") {
614
635
  this.state = "speaking";
615
636
  this.speakStart = timestamp;
@@ -645,7 +666,8 @@ var TurnDetector = class {
645
666
  }
646
667
  break;
647
668
  case "interrupt_intent":
648
- if (confidence >= threshold) {
669
+ if (confidence >= threshold && !this.interruptFired) {
670
+ this.interruptFired = true;
649
671
  this.emit("interrupt", { timestamp });
650
672
  }
651
673
  break;
@@ -658,6 +680,7 @@ var TurnDetector = class {
658
680
  this.state = "idle";
659
681
  this.pauseStart = 0;
660
682
  this.speakStart = 0;
683
+ this.interruptFired = false;
661
684
  }
662
685
  emit(event, payload) {
663
686
  this.listeners.get(event)?.forEach((fn) => fn(payload));
Binary file
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@utterance/core",
3
- "version": "0.0.3",
3
+ "version": "0.0.4",
4
4
  "description": "Client-side semantic endpointing. Know when they're done talking.",
5
5
  "type": "module",
6
6
  "main": "dist/index.cjs",
@@ -56,6 +56,8 @@
56
56
  "devDependencies": {
57
57
  "@eslint/js": "^9.0.0",
58
58
  "@tailwindcss/postcss": "^4.2.0",
59
+ "@types/d3-scale": "^4.0.9",
60
+ "@types/d3-shape": "^3.1.8",
59
61
  "@types/mdx": "^2.0.13",
60
62
  "@types/node": "^22.0.0",
61
63
  "@types/react": "^19.2.14",
@@ -79,6 +81,8 @@
79
81
  "@utterance/core": "^0.0.2",
80
82
  "class-variance-authority": "^0.7.1",
81
83
  "clsx": "^2.1.1",
84
+ "d3-scale": "^4.0.2",
85
+ "d3-shape": "^3.2.0",
82
86
  "fumadocs-core": "^16.6.3",
83
87
  "fumadocs-mdx": "^14.2.7",
84
88
  "fumadocs-ui": "^16.6.3",
package/models/.gitkeep DELETED
File without changes
Binary file