getpatter 0.6.6 → 0.6.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -2,6 +2,17 @@ import {
2
2
  startTunnel
3
3
  } from "./chunk-XS45BAQL.mjs";
4
4
  import {
5
+ TestSession
6
+ } from "./chunk-OV252D2V.mjs";
7
+ import {
8
+ EvalSession,
9
+ FakeAudioSender,
10
+ FakeSTT,
11
+ FakeTTS,
12
+ historyTranscript
13
+ } from "./chunk-3JNVSNLV.mjs";
14
+ import {
15
+ AGENT_BACKLOG_CAP_S,
5
16
  AuthenticationError,
6
17
  CallMetricsAccumulator,
7
18
  Carrier,
@@ -10,11 +21,14 @@ import {
10
21
  DeepgramModel,
11
22
  DeepgramSTT,
12
23
  DefaultToolExecutor,
24
+ ENV_FLAG,
13
25
  ElevenLabsConvAIAdapter,
14
26
  EmbeddedServer,
15
27
  ErrorCode,
16
28
  EventBus,
17
29
  LLMLoop,
30
+ LLM_STREAM_IDLE_TIMEOUT_MS,
31
+ LocalCallRecorder,
18
32
  MetricsStore,
19
33
  OpenAILLMProvider,
20
34
  PRICING_LAST_UPDATED,
@@ -26,6 +40,7 @@ import {
26
40
  PlivoAdapter,
27
41
  PricingUnit,
28
42
  ProvisionError,
43
+ RECORDING_SAMPLE_RATE,
29
44
  RateLimitError,
30
45
  RemoteMessageHandler,
31
46
  SPAN_BARGEIN,
@@ -36,7 +51,7 @@ import {
36
51
  SPAN_TOOL,
37
52
  SPAN_TTS,
38
53
  SentenceChunker,
39
- TestSession,
54
+ TwilioAdapter,
40
55
  VERSION,
41
56
  calculateRealtimeCost,
42
57
  calculateSttCost,
@@ -44,6 +59,7 @@ import {
44
59
  calculateTtsCost,
45
60
  callsToCsv,
46
61
  callsToJson,
62
+ createStreamIdleWatchdog,
47
63
  initTracing,
48
64
  isRemoteUrl,
49
65
  isTracingEnabled,
@@ -56,8 +72,10 @@ import {
56
72
  openclawConsult,
57
73
  openclawPostCallNotifier,
58
74
  resolveLogRoot,
59
- startSpan
60
- } from "./chunk-YJX2EKON.mjs";
75
+ shutdownTracing,
76
+ startSpan,
77
+ withSpan
78
+ } from "./chunk-YJ4HKJL6.mjs";
61
79
  import {
62
80
  OpenAIRealtime2Adapter,
63
81
  OpenAIRealtimeAdapter,
@@ -78,7 +96,7 @@ import {
78
96
  resample24kTo16k,
79
97
  resample8kTo16k,
80
98
  validateRealtimeTurnDetection
81
- } from "./chunk-BO227NTF.mjs";
99
+ } from "./chunk-I56S5MDJ.mjs";
82
100
  import {
83
101
  MinWordsStrategy,
84
102
  evaluateStrategies,
@@ -92,8 +110,9 @@ import {
92
110
  notifyDashboard
93
111
  } from "./chunk-6GR5MHHQ.mjs";
94
112
  import {
95
- SileroVAD
96
- } from "./chunk-3VVATR6A.mjs";
113
+ SileroVAD,
114
+ loadOnnxRuntime
115
+ } from "./chunk-C2LWB42T.mjs";
97
116
  import {
98
117
  __dirname,
99
118
  __require,
@@ -432,10 +451,20 @@ var cachedInstallId = null;
432
451
  function runId() {
433
452
  return RUN_ID;
434
453
  }
454
+ function stateDir() {
455
+ const override = process.env.PATTER_TELEMETRY_STATE_DIR;
456
+ if (override && override.length > 0) return override;
457
+ const xdg = process.env.XDG_STATE_HOME;
458
+ if (xdg && xdg.length > 0) return path.join(xdg, "getpatter");
459
+ return path.join(os.homedir(), ".getpatter");
460
+ }
461
+ function legacyStateDir() {
462
+ if (process.env.PATTER_TELEMETRY_STATE_DIR) return null;
463
+ const xdg = process.env.XDG_STATE_HOME;
464
+ return xdg && xdg.length > 0 ? xdg : null;
465
+ }
435
466
  function statePath() {
436
- const base = process.env.PATTER_TELEMETRY_STATE_DIR || process.env.XDG_STATE_HOME;
437
- const root = base && base.length > 0 ? base : path.join(os.homedir(), ".getpatter");
438
- return path.join(root, "install-id");
467
+ return path.join(stateDir(), "install-id");
439
468
  }
440
469
  function installId() {
441
470
  if (cachedInstallId !== null) return cachedInstallId;
@@ -448,6 +477,27 @@ function installId() {
448
477
  }
449
478
  } catch {
450
479
  }
480
+ const legacyDir = legacyStateDir();
481
+ if (legacyDir !== null) {
482
+ const legacy = path.join(legacyDir, "install-id");
483
+ let existing = "";
484
+ try {
485
+ existing = fs.readFileSync(legacy, "utf8").trim();
486
+ } catch {
487
+ existing = "";
488
+ }
489
+ if (HEX32.test(existing)) {
490
+ try {
491
+ fs.mkdirSync(path.dirname(p), { recursive: true });
492
+ fs.writeFileSync(p, existing, "utf8");
493
+ const stat = fs.statSync(legacy);
494
+ fs.utimesSync(p, stat.atime, stat.mtime);
495
+ } catch {
496
+ }
497
+ cachedInstallId = existing;
498
+ return cachedInstallId;
499
+ }
500
+ }
451
501
  const newId = randomUUID().replace(/-/g, "");
452
502
  try {
453
503
  fs.mkdirSync(path.dirname(p), { recursive: true });
@@ -469,6 +519,16 @@ function previousVersion(current) {
469
519
  } catch {
470
520
  prev = "";
471
521
  }
522
+ if (prev === "") {
523
+ const legacyDir = legacyStateDir();
524
+ if (legacyDir !== null) {
525
+ try {
526
+ prev = fs.readFileSync(path.join(legacyDir, "version"), "utf8").trim();
527
+ } catch {
528
+ prev = "";
529
+ }
530
+ }
531
+ }
472
532
  try {
473
533
  fs.mkdirSync(path.dirname(p), { recursive: true });
474
534
  fs.writeFileSync(p, current, "utf8");
@@ -499,6 +559,14 @@ function isFirstRun() {
499
559
  } catch {
500
560
  return false;
501
561
  }
562
+ const legacyDir = legacyStateDir();
563
+ if (legacyDir !== null) {
564
+ try {
565
+ if (fs.existsSync(path.join(legacyDir, "first-run"))) return false;
566
+ } catch {
567
+ return false;
568
+ }
569
+ }
502
570
  try {
503
571
  fs.mkdirSync(path.dirname(p), { recursive: true });
504
572
  fs.writeFileSync(p, "1", "utf8");
@@ -512,7 +580,13 @@ function optOutPath() {
512
580
  }
513
581
  function isOptedOut() {
514
582
  try {
515
- return fs.existsSync(optOutPath());
583
+ if (fs.existsSync(optOutPath())) return true;
584
+ } catch {
585
+ }
586
+ const legacyDir = legacyStateDir();
587
+ if (legacyDir === null) return false;
588
+ try {
589
+ return fs.existsSync(path.join(legacyDir, "telemetry-disabled"));
516
590
  } catch {
517
591
  return false;
518
592
  }
@@ -603,7 +677,7 @@ function stackDimensions(stt, tts, llm) {
603
677
  }
604
678
 
605
679
  // src/telemetry/events.ts
606
- var SCHEMA_VERSION = 5;
680
+ var SCHEMA_VERSION = 7;
607
681
  var EVENT_SDK_INITIALIZED = "sdk_initialized";
608
682
  var EVENT_FIRST_RUN = "first_run";
609
683
  var EVENT_CLI_COMMAND = "cli_command";
@@ -643,7 +717,7 @@ var DIMENSION_VALUES = {
643
717
  // call_started / call_completed: inbound vs outbound — a core usage split.
644
718
  direction: /* @__PURE__ */ new Set(["inbound", "outbound", "none"]),
645
719
  // cli_command: which CLI subcommand was invoked (never args/flags values).
646
- cli_command: /* @__PURE__ */ new Set(["dashboard", "eval", "telemetry", "none", "other"]),
720
+ cli_command: /* @__PURE__ */ new Set(["dashboard", "eval", "hermes", "openclaw", "telemetry", "none", "other"]),
647
721
  // call_completed: the call's terminal outcome
648
722
  outcome: /* @__PURE__ */ new Set(["completed", "error", "no_answer", "busy", "failed"]),
649
723
  // call_completed: terminal error code (mirrors ErrorCode, plus "other"). Never
@@ -698,11 +772,14 @@ var BOOL_DIMENSIONS = /* @__PURE__ */ new Set([
698
772
  "per_tool_timeouts_set",
699
773
  "llm_fallback_configured"
700
774
  ]);
775
+ var ID_RE = /^[0-9a-f]{32}$/;
776
+ var ID_DIMENSIONS = /* @__PURE__ */ new Set(["call_uid"]);
701
777
  var ALLOWED_DIMENSIONS = /* @__PURE__ */ new Set([
702
778
  ...Object.keys(DIMENSION_VALUES),
703
779
  ...NUMERIC_DIMENSIONS,
704
780
  ...STRING_DIMENSIONS,
705
- ...BOOL_DIMENSIONS
781
+ ...BOOL_DIMENSIONS,
782
+ ...ID_DIMENSIONS
706
783
  ]);
707
784
  function osFamily() {
708
785
  const p = os2.platform();
@@ -748,8 +825,14 @@ function buildEvent(name, opts) {
748
825
  if (!(typeof value === "string" && MODEL_TOKEN_RE.test(value))) {
749
826
  continue;
750
827
  }
828
+ } else if (ID_DIMENSIONS.has(key)) {
829
+ if (!(typeof value === "string" && ID_RE.test(value))) {
830
+ continue;
831
+ }
751
832
  } else if (BOOL_DIMENSIONS.has(key) && typeof value !== "boolean") {
752
833
  continue;
834
+ } else if (NUMERIC_DIMENSIONS.has(key) && typeof value !== "number") {
835
+ continue;
753
836
  }
754
837
  if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
755
838
  event[key] = value;
@@ -762,14 +845,16 @@ function buildEvent(name, opts) {
762
845
  var DEFAULT_ENDPOINT = "https://telemetry.getpatter.com/v1/ingest";
763
846
  var TIMEOUT_MS = 3e3;
764
847
  var BUFFER_MAX = 256;
848
+ var MAX_EVENTS_PER_POST = 64;
765
849
  var noticeShown = false;
766
850
  var liveClients = /* @__PURE__ */ new Set();
767
851
  var exitHookRegistered = false;
852
+ var pendingFlush = /* @__PURE__ */ new Set();
768
853
  function showNoticeOnce() {
769
854
  if (noticeShown) return;
770
855
  noticeShown = true;
771
856
  getLogger().info(
772
- "Anonymous usage telemetry is on (no PII, no call content). Collected: a random anonymous install id, SDK version, language, OS family, runtime version, coarse feature flags, the composed stack (provider + model per layer), tool counts, integration category, and per-call duration, latency, cost, and error codes (no call content, no message text). Disable with PATTER_TELEMETRY_DISABLED=1, DO_NOT_TRACK=1, or telemetry: false. Details: https://docs.getpatter.com/telemetry"
857
+ "Anonymous usage telemetry is on (no PII, no call content). Collected: a random anonymous install id, SDK version, language, OS family, runtime version, coarse feature flags, the composed stack (provider + model per layer), tool counts, integration category, a random per-call correlation id, and per-call duration, latency, cost, and error codes (no call content, no message text). Disable with PATTER_TELEMETRY_DISABLED=1, DO_NOT_TRACK=1, or telemetry: false. Details: https://docs.getpatter.com/telemetry"
773
858
  );
774
859
  }
775
860
  function registerExitHook() {
@@ -789,7 +874,7 @@ var TelemetryClient = class {
789
874
  endpoint;
790
875
  debug;
791
876
  buffer = [];
792
- flushing = false;
877
+ inflight = null;
793
878
  closed = false;
794
879
  selfRef = new WeakRef(this);
795
880
  constructor(options) {
@@ -827,6 +912,7 @@ var TelemetryClient = class {
827
912
  try {
828
913
  if (this.buffer.length >= BUFFER_MAX) this.buffer.shift();
829
914
  this.buffer.push(event);
915
+ pendingFlush.add(this);
830
916
  this.scheduleFlush();
831
917
  } catch (err) {
832
918
  getLogger().debug("telemetry enqueue failed", err);
@@ -845,42 +931,68 @@ var TelemetryClient = class {
845
931
  getLogger().debug("telemetry flushPending failed", err);
846
932
  }
847
933
  }
934
+ /**
935
+ * Flush buffered events and wait for delivery. Unlike `close()` the client
936
+ * stays usable afterwards — for teardown paths that may serve again
937
+ * (`Patter.disconnect()`). Bounded by the flush's own per-POST abort timer.
938
+ * Mirrors Python's `drain()`.
939
+ */
940
+ async drain() {
941
+ if (!this.enabledFlag || this.debug || this.closed) return;
942
+ try {
943
+ if (this.inflight) await this.inflight;
944
+ if (this.buffer.length > 0) await this.flush();
945
+ } catch (err) {
946
+ getLogger().debug("telemetry drain failed", err);
947
+ }
948
+ }
848
949
  /** Flush remaining events (graceful shutdown). Never throws. */
849
950
  async close() {
850
951
  if (this.closed) return;
851
952
  this.closed = true;
852
953
  liveClients.delete(this.selfRef);
853
- if (!this.enabledFlag || this.debug) return;
954
+ if (!this.enabledFlag || this.debug) {
955
+ pendingFlush.delete(this);
956
+ return;
957
+ }
854
958
  try {
959
+ if (this.inflight) await this.inflight;
855
960
  await this.flush();
856
961
  } catch (err) {
857
962
  getLogger().debug("telemetry close flush failed", err);
858
963
  }
964
+ pendingFlush.delete(this);
859
965
  }
860
966
  scheduleFlush() {
861
- if (this.flushing) return;
862
- this.flushing = true;
863
- void this.flush().finally(() => {
864
- this.flushing = false;
967
+ if (this.inflight) return;
968
+ this.inflight = this.flush().finally(() => {
969
+ this.inflight = null;
970
+ if (this.buffer.length > 0 && !this.closed) this.scheduleFlush();
865
971
  });
972
+ void this.inflight;
866
973
  }
867
974
  async flush() {
868
975
  if (this.buffer.length === 0) return;
869
976
  const events = this.buffer.splice(0, this.buffer.length);
870
- const controller = new AbortController();
871
- const timer = setTimeout(() => controller.abort(), TIMEOUT_MS);
872
- timer.unref?.();
977
+ pendingFlush.delete(this);
873
978
  try {
874
- await fetch(this.endpoint, {
875
- method: "POST",
876
- headers: { "content-type": "application/json" },
877
- body: JSON.stringify(events),
878
- signal: controller.signal
879
- });
979
+ for (let start = 0; start < events.length; start += MAX_EVENTS_PER_POST) {
980
+ const controller = new AbortController();
981
+ const timer = setTimeout(() => controller.abort(), TIMEOUT_MS);
982
+ timer.unref?.();
983
+ try {
984
+ await fetch(this.endpoint, {
985
+ method: "POST",
986
+ headers: { "content-type": "application/json" },
987
+ body: JSON.stringify(events.slice(start, start + MAX_EVENTS_PER_POST)),
988
+ signal: controller.signal
989
+ });
990
+ } finally {
991
+ clearTimeout(timer);
992
+ }
993
+ }
880
994
  } catch (err) {
881
995
  getLogger().debug("telemetry flush failed", err);
882
- } finally {
883
- clearTimeout(timer);
884
996
  }
885
997
  }
886
998
  };
@@ -1212,7 +1324,7 @@ function resolvePersistRoot(persist) {
1212
1324
  if (typeof persist === "string") return resolveLogRoot(persist);
1213
1325
  const envRoot = resolveLogRoot();
1214
1326
  if (envRoot !== null) return envRoot;
1215
- return null;
1327
+ return resolveLogRoot("auto");
1216
1328
  }
1217
1329
  function closeParkedConnections(slot) {
1218
1330
  if (slot.stt) {
@@ -1552,7 +1664,10 @@ var Patter = class {
1552
1664
  const initDims = {
1553
1665
  carrier: carrierFamily(carrier),
1554
1666
  tunnel: tunnel instanceof Static ? "static" : options.tunnel ? "configured" : "none",
1555
- ...telemetryEnvironmentDims()
1667
+ // Environment dims only when telemetry is ENABLED: the helper's
1668
+ // previousVersion probe writes ~/.getpatter/version, violating the
1669
+ // documented invariant that opting out never touches the filesystem.
1670
+ ...this.telemetry.enabled ? telemetryEnvironmentDims() : {}
1556
1671
  };
1557
1672
  if (this.telemetry.enabled) {
1558
1673
  try {
@@ -1561,8 +1676,8 @@ var Patter = class {
1561
1676
  }
1562
1677
  }
1563
1678
  this.telemetry.record("sdk_initialized", initDims);
1564
- this._tunnelReady = new Promise((resolve, reject) => {
1565
- this._tunnelReadyResolve = resolve;
1679
+ this._tunnelReady = new Promise((resolve2, reject) => {
1680
+ this._tunnelReadyResolve = resolve2;
1566
1681
  this._tunnelReadyReject = reject;
1567
1682
  });
1568
1683
  this._tunnelReady.catch(() => {
@@ -1570,8 +1685,8 @@ var Patter = class {
1570
1685
  if (normalizedWebhook) {
1571
1686
  this._tunnelReadyResolve(normalizedWebhook);
1572
1687
  }
1573
- this._ready = new Promise((resolve, reject) => {
1574
- this._readyResolve = resolve;
1688
+ this._ready = new Promise((resolve2, reject) => {
1689
+ this._readyResolve = resolve2;
1575
1690
  this._readyReject = reject;
1576
1691
  });
1577
1692
  this._ready.catch(() => {
@@ -1581,7 +1696,17 @@ var Patter = class {
1581
1696
  /** Resolve user-supplied agent options against engine defaults and return the merged config. */
1582
1697
  agent(opts) {
1583
1698
  const family = telemetryEngineFamily(opts);
1584
- const stack = stackDimensions(opts.stt, opts.tts, opts.llm);
1699
+ let stack = { ...stackDimensions(opts.stt, opts.tts, opts.llm) };
1700
+ if (family === "realtime") {
1701
+ const engineModel = opts.engine?.model;
1702
+ stack = {
1703
+ ...stack,
1704
+ llm_model: modelToken(
1705
+ "openai",
1706
+ opts.model ?? engineModel ?? "gpt-realtime-mini"
1707
+ )
1708
+ };
1709
+ }
1585
1710
  const featureKey = family + "|" + Object.entries(stack).sort().map(([k, v]) => `${k}=${v}`).join(",");
1586
1711
  if (!this.telemetrySeenEngines.has(featureKey)) {
1587
1712
  this.telemetrySeenEngines.add(featureKey);
@@ -1672,11 +1797,45 @@ var Patter = class {
1672
1797
  throw new Error(`provider must be one of: ${valid.join(", ")}. Got: '${working.provider}'`);
1673
1798
  }
1674
1799
  }
1800
+ if (working.provider === "openai_realtime" && !working.engine && !this.localConfig.openaiKey) {
1801
+ const envKey = process.env.OPENAI_API_KEY;
1802
+ if (envKey) {
1803
+ this.localConfig = { ...this.localConfig, openaiKey: envKey };
1804
+ } else {
1805
+ throw new Error(
1806
+ "OpenAI Realtime mode requires an OpenAI API key. Pass engine: new OpenAIRealtime({ apiKey: 'sk-...' }) or set OPENAI_API_KEY in the environment."
1807
+ );
1808
+ }
1809
+ }
1675
1810
  if (working.consult && working.provider === "elevenlabs_convai") {
1676
1811
  getLogger().warn(
1677
1812
  "consult is set but provider is ElevenLabs ConvAI; the consult tool is only injected in Realtime and Pipeline modes and will be ignored for this agent."
1678
1813
  );
1679
1814
  }
1815
+ if (working.handoffs !== void 0) {
1816
+ if (typeof working.handoffs !== "object" || working.handoffs === null || Array.isArray(working.handoffs)) {
1817
+ throw new TypeError(
1818
+ `handoffs must be an object of { name: agentOptions }, got ${Array.isArray(working.handoffs) ? "array" : typeof working.handoffs}.`
1819
+ );
1820
+ }
1821
+ for (const [hName, hAgent] of Object.entries(working.handoffs)) {
1822
+ if (!hName) {
1823
+ throw new Error(
1824
+ "handoffs keys must be non-empty strings (the names the LLM passes to handoff_to)."
1825
+ );
1826
+ }
1827
+ if (typeof hAgent !== "object" || hAgent === null || Array.isArray(hAgent)) {
1828
+ throw new TypeError(
1829
+ `handoffs['${hName}'] must be an agent options object (build with phone.agent({...})), got ${Array.isArray(hAgent) ? "array" : typeof hAgent}.`
1830
+ );
1831
+ }
1832
+ }
1833
+ if (working.provider === "elevenlabs_convai") {
1834
+ getLogger().warn(
1835
+ "handoffs is set but provider is ElevenLabs ConvAI; the handoff_to tool is only injected in Realtime and Pipeline modes and will be ignored for this agent."
1836
+ );
1837
+ }
1838
+ }
1680
1839
  if (working.llm !== void 0) {
1681
1840
  const llm = working.llm;
1682
1841
  if (!llm || typeof llm.stream !== "function") {
@@ -1727,7 +1886,7 @@ var Patter = class {
1727
1886
  }
1728
1887
  if (opts.agent.echoCancellation) {
1729
1888
  try {
1730
- await import("./aec-PJJMUM5E.mjs");
1889
+ await import("./aec-ZZ5HGKS3.mjs");
1731
1890
  } catch (err) {
1732
1891
  getLogger().debug(`AEC pre-import failed at serve(): ${String(err)}`);
1733
1892
  }
@@ -1777,7 +1936,7 @@ var Patter = class {
1777
1936
  const telephonyProvider = carrier.kind;
1778
1937
  const wantsCarrierManagement = opts.manageWebhook !== false || wantsCloudflared;
1779
1938
  if (wantsCarrierManagement) {
1780
- const { autoConfigureCarrier } = await import("./carrier-config-7YGNRBPO.mjs");
1939
+ const { autoConfigureCarrier } = await import("./carrier-config-6L5NND7B.mjs");
1781
1940
  await autoConfigureCarrier({
1782
1941
  telephonyProvider,
1783
1942
  twilioSid: carrier.kind === "twilio" ? carrier.accountSid : void 0,
@@ -1816,11 +1975,14 @@ var Patter = class {
1816
1975
  opts.pricing,
1817
1976
  opts.dashboard ?? true,
1818
1977
  opts.dashboardToken ?? "",
1819
- opts.allowInsecureDashboard ?? false
1978
+ opts.allowInsecureDashboard ?? false,
1979
+ opts.localRecording ?? false
1820
1980
  );
1821
1981
  this.embeddedServer.telemetry = this.telemetry;
1822
1982
  this.embeddedServer.popPrewarmAudio = this.popPrewarmAudio;
1823
1983
  this.embeddedServer.popPrewarmedConnections = this.popPrewarmedConnections;
1984
+ this.embeddedServer.aliasPrewarm = this.aliasPrewarm;
1985
+ this.embeddedServer.speechEvents = this.speechEvents;
1824
1986
  this.embeddedServer.recordPrewarmWaste = this.recordPrewarmWaste;
1825
1987
  try {
1826
1988
  await this.embeddedServer.start(port);
@@ -1837,7 +1999,7 @@ var Patter = class {
1837
1999
  }
1838
2000
  /** Run the agent in interactive terminal-test mode (no real telephony). */
1839
2001
  async test(opts) {
1840
- const { TestSession: TestSession2 } = await import("./test-mode-XFOADUNE.mjs");
2002
+ const { TestSession: TestSession2 } = await import("./test-mode-5CNXC447.mjs");
1841
2003
  const session = new TestSession2();
1842
2004
  await session.run({
1843
2005
  agent: opts.agent,
@@ -1915,6 +2077,25 @@ var Patter = class {
1915
2077
  * carrier ``start`` event instead of opening fresh ones — saving
1916
2078
  * ~150-900 ms of cold-start handshake on the first turn.
1917
2079
  */
2080
+ /**
2081
+ * Re-key prewarm caches from a dial-time id to the live carrier id.
2082
+ * Plivo issues ``request_uuid`` at dial time but the media stream and
2083
+ * webhooks carry ``CallUUID`` — without re-keying, prewarmed first-message
2084
+ * audio and parked provider sockets never matched and always TTL-evicted
2085
+ * as "wasted". Mirrors Python ``_alias_prewarm``.
2086
+ */
2087
+ aliasPrewarm = (oldId, newId) => {
2088
+ if (!oldId || !newId || oldId === newId) return;
2089
+ const rekey = (map) => {
2090
+ const v = map.get(oldId);
2091
+ if (v !== void 0 && !map.has(newId)) map.set(newId, v);
2092
+ map.delete(oldId);
2093
+ };
2094
+ rekey(this.prewarmAudio);
2095
+ rekey(this.prewarmTtlTimers);
2096
+ rekey(this.prewarmedConnections);
2097
+ rekey(this.prewarmedConnTimers);
2098
+ };
1918
2099
  popPrewarmedConnections = (callId) => {
1919
2100
  const slot = this.prewarmedConnections.get(callId);
1920
2101
  if (slot === void 0) return void 0;
@@ -2016,7 +2197,7 @@ var Patter = class {
2016
2197
  }
2017
2198
  if (wantsRealtimePark) {
2018
2199
  tasks.push((async () => {
2019
- const { OpenAIRealtime2Adapter: OpenAIRealtime2Adapter2 } = await import("./openai-realtime-2-L5EKAAUH.mjs");
2200
+ const { OpenAIRealtime2Adapter: OpenAIRealtime2Adapter2 } = await import("./openai-realtime-2-O4DP3LXN.mjs");
2020
2201
  const apiKey = process.env.OPENAI_API_KEY ?? "";
2021
2202
  if (!apiKey) {
2022
2203
  getLogger().debug(`Park OpenAI Realtime skipped for ${callId}: no OPENAI_API_KEY`);
@@ -2230,6 +2411,12 @@ var Patter = class {
2230
2411
  if (!options.to) {
2231
2412
  throw new Error("'to' phone number is required");
2232
2413
  }
2414
+ if (options.firstMessage) {
2415
+ options = {
2416
+ ...options,
2417
+ agent: { ...options.agent, firstMessage: options.firstMessage }
2418
+ };
2419
+ }
2233
2420
  if (!/^\+[1-9]\d{6,14}$/.test(options.to)) {
2234
2421
  throw new Error("'to' must be E.164 format (+<country><digits>). Got value with invalid format.");
2235
2422
  }
@@ -2374,6 +2561,9 @@ var Patter = class {
2374
2561
  this.parkProviderConnections(options.agent, plivoCallId);
2375
2562
  }
2376
2563
  }
2564
+ if (plivoCallId) {
2565
+ return this.maybeAwaitCompletion(options, plivoCallId, effectiveRingTimeout);
2566
+ }
2377
2567
  return;
2378
2568
  }
2379
2569
  const twilioSid = carrier.accountSid;
@@ -2511,7 +2701,7 @@ var Patter = class {
2511
2701
  * entries leak across ``serve`` / ``disconnect`` cycles. See FIX #93.
2512
2702
  */
2513
2703
  async disconnect() {
2514
- this.telemetry.flushPending();
2704
+ await this.telemetry.drain();
2515
2705
  for (const handle of this.prewarmTtlTimers.values()) {
2516
2706
  clearTimeout(handle);
2517
2707
  }
@@ -2519,7 +2709,7 @@ var Patter = class {
2519
2709
  if (this.prewarmTasks.size > 0) {
2520
2710
  const drain = Promise.allSettled(Array.from(this.prewarmTasks));
2521
2711
  const timer = new Promise(
2522
- (resolve) => setTimeout(resolve, 1e3).unref?.()
2712
+ (resolve2) => setTimeout(resolve2, 1e3).unref?.()
2523
2713
  );
2524
2714
  await Promise.race([drain, timer]);
2525
2715
  }
@@ -2551,8 +2741,8 @@ var Patter = class {
2551
2741
  this.localConfig = { ...this.localConfig, webhookUrl: void 0 };
2552
2742
  this.tunnelOwnsWebhookUrl = false;
2553
2743
  }
2554
- this._tunnelReady = new Promise((resolve, reject) => {
2555
- this._tunnelReadyResolve = resolve;
2744
+ this._tunnelReady = new Promise((resolve2, reject) => {
2745
+ this._tunnelReadyResolve = resolve2;
2556
2746
  this._tunnelReadyReject = reject;
2557
2747
  });
2558
2748
  this._tunnelReady.catch(() => {
@@ -2560,8 +2750,8 @@ var Patter = class {
2560
2750
  if (this.localConfig.webhookUrl) {
2561
2751
  this._tunnelReadyResolve(this.localConfig.webhookUrl);
2562
2752
  }
2563
- this._ready = new Promise((resolve, reject) => {
2564
- this._readyResolve = resolve;
2753
+ this._ready = new Promise((resolve2, reject) => {
2754
+ this._readyResolve = resolve2;
2565
2755
  this._readyReject = reject;
2566
2756
  });
2567
2757
  this._ready.catch(() => {
@@ -3298,134 +3488,426 @@ function resultFromCallResult(result) {
3298
3488
 
3299
3489
  // src/providers/gemini-live.ts
3300
3490
  init_esm_shims();
3301
- var GEMINI_DEFAULT_INPUT_SR = 16e3;
3302
- var GEMINI_DEFAULT_OUTPUT_SR = 24e3;
3303
- var GeminiLiveAdapter = class {
3304
- constructor(apiKey, options = {}) {
3305
- this.apiKey = apiKey;
3306
- this.model = options.model ?? "gemini-2.5-flash-native-audio-preview-09-2025";
3307
- this.voice = options.voice ?? "Puck";
3308
- this.instructions = options.instructions ?? "";
3309
- this.language = options.language ?? "en-US";
3310
- this.tools = options.tools;
3311
- this.inputSampleRate = options.inputSampleRate ?? GEMINI_DEFAULT_INPUT_SR;
3312
- this.outputSampleRate = options.outputSampleRate ?? GEMINI_DEFAULT_OUTPUT_SR;
3313
- this.temperature = options.temperature ?? 0.8;
3314
- }
3491
+
3492
+ // src/providers/google-llm.ts
3493
+ init_esm_shims();
3494
+ var GoogleModel = {
3495
+ GEMINI_2_5_FLASH: "gemini-2.5-flash",
3496
+ GEMINI_2_5_PRO: "gemini-2.5-pro",
3497
+ GEMINI_2_0_FLASH: "gemini-2.0-flash",
3498
+ GEMINI_2_0_FLASH_LITE: "gemini-2.0-flash-lite",
3499
+ GEMINI_1_5_FLASH: "gemini-1.5-flash",
3500
+ GEMINI_1_5_PRO: "gemini-1.5-pro"
3501
+ };
3502
+ var DEFAULT_MODEL = GoogleModel.GEMINI_2_5_FLASH;
3503
+ var DEFAULT_BASE_URL = "https://generativelanguage.googleapis.com/v1beta";
3504
+ var GoogleLLMProvider = class {
3505
+ /** Stable pricing/dashboard key — read by stream-handler/metrics. */
3506
+ static providerKey = "google";
3315
3507
  apiKey;
3316
3508
  model;
3317
- voice;
3318
- instructions;
3319
- language;
3320
- tools;
3321
- inputSampleRate;
3322
- /** Output sample rate — exposed so callers can configure downstream transcoding. */
3323
- outputSampleRate;
3509
+ baseUrl;
3324
3510
  temperature;
3325
- client = null;
3326
- session = null;
3327
- receiveLoop = null;
3328
- handlers = [];
3329
- running = false;
3330
- /**
3331
- * Tracks call_id -> function name so tool responses can be sent back with
3332
- * the correct `name` field (Gemini expects the original function name,
3333
- * not the call_id).
3334
- */
3335
- pendingToolCalls = /* @__PURE__ */ new Map();
3336
- /** Lazily import @google/genai, open a Live session, and start the receive loop. */
3337
- async connect() {
3338
- let genaiModule;
3339
- try {
3340
- const modName = "@google/genai";
3341
- genaiModule = await import(modName);
3342
- } catch {
3511
+ maxOutputTokens;
3512
+ constructor(options) {
3513
+ if (!options.apiKey) {
3343
3514
  throw new Error(
3344
- '\nGemini Live requires the "@google/genai" package, which is not installed.\n\n Install: npm install @google/genai\n\nThis is an optional peer dependency of getpatter \u2014 it is only needed when\nyou use GeminiLive as an agent engine. Other LLM/engine providers do not\nrequire it.\n'
3515
+ "Google API key is required. Pass it via { apiKey } or read GOOGLE_API_KEY from the environment."
3345
3516
  );
3346
3517
  }
3347
- const { GoogleGenAI } = genaiModule;
3348
- this.client = new GoogleGenAI({
3349
- apiKey: this.apiKey,
3350
- httpOptions: { apiVersion: "v1alpha" }
3351
- });
3352
- const config = {
3353
- responseModalities: ["AUDIO"],
3354
- speechConfig: {
3355
- voiceConfig: { prebuiltVoiceConfig: { voiceName: this.voice } },
3356
- languageCode: this.language
3357
- },
3358
- temperature: this.temperature
3359
- };
3360
- if (this.instructions) {
3361
- config.systemInstruction = { parts: [{ text: this.instructions }] };
3362
- }
3363
- if (this.tools?.length) {
3364
- config.tools = [
3365
- {
3366
- functionDeclarations: this.tools.map((t) => ({
3367
- name: t.name,
3368
- description: t.description,
3369
- parameters: t.parameters
3370
- }))
3371
- }
3372
- ];
3373
- }
3374
- const liveApi = this.client.live;
3375
- if (!liveApi?.connect) {
3376
- throw new Error("@google/genai: live.connect is not available in this version");
3518
+ this.apiKey = options.apiKey;
3519
+ this.model = options.model ?? DEFAULT_MODEL;
3520
+ this.baseUrl = options.baseUrl ?? DEFAULT_BASE_URL;
3521
+ this.temperature = options.temperature;
3522
+ this.maxOutputTokens = options.maxOutputTokens;
3523
+ }
3524
+ /**
3525
+ * Pre-call DNS / TLS warmup for the Gemini API.
3526
+ * Issues a lightweight ``GET ${baseUrl}/models?key=...`` so DNS, TLS
3527
+ * and HTTP/2 are already up by the time the first
3528
+ * ``streamGenerateContent`` call lands. Best-effort: 5 s timeout, all
3529
+ * exceptions swallowed at debug level.
3530
+ */
3531
+ async warmup() {
3532
+ try {
3533
+ await fetch(`${this.baseUrl}/models?key=${encodeURIComponent(this.apiKey)}`, {
3534
+ method: "GET",
3535
+ signal: AbortSignal.timeout(5e3)
3536
+ });
3537
+ } catch (err) {
3538
+ getLogger().debug(`Google LLM warmup failed (best-effort): ${String(err)}`);
3377
3539
  }
3378
- this.session = await liveApi.connect({ model: this.model, config });
3379
- this.running = true;
3380
- this.receiveLoop = this.pumpReceive().catch((err) => {
3381
- getLogger().error(`Gemini Live receive loop error: ${String(err)}`);
3382
- });
3383
3540
  }
3384
- /** Send a PCM audio chunk to Gemini as base64 inline data. */
3385
- sendAudio(pcm) {
3386
- if (!this.session || !this.running) return;
3387
- const mime = `audio/pcm;rate=${this.inputSampleRate}`;
3388
- const sess = this.session;
3389
- const result = sess.sendRealtimeInput?.({
3390
- media: { data: pcm.toString("base64"), mimeType: mime }
3541
+ /** Stream Patter-format LLM chunks from the Gemini SSE endpoint. */
3542
+ async *stream(messages, tools, opts) {
3543
+ const { systemInstruction, contents } = toGeminiContents(messages);
3544
+ const geminiTools = tools ? toGeminiTools(tools) : null;
3545
+ const body = { contents };
3546
+ if (systemInstruction) {
3547
+ body.systemInstruction = { role: "system", parts: [{ text: systemInstruction }] };
3548
+ }
3549
+ if (geminiTools) body.tools = geminiTools;
3550
+ const generationConfig = {};
3551
+ if (this.temperature !== void 0) generationConfig.temperature = this.temperature;
3552
+ if (this.maxOutputTokens !== void 0)
3553
+ generationConfig.maxOutputTokens = this.maxOutputTokens;
3554
+ if (Object.keys(generationConfig).length > 0) body.generationConfig = generationConfig;
3555
+ const url = `${this.baseUrl}/models/${encodeURIComponent(this.model)}:streamGenerateContent?alt=sse&key=${encodeURIComponent(this.apiKey)}`;
3556
+ const idle = createStreamIdleWatchdog();
3557
+ const response = await fetch(url, {
3558
+ method: "POST",
3559
+ headers: { "Content-Type": "application/json" },
3560
+ body: JSON.stringify(body),
3561
+ signal: mergeAbortSignals(opts?.signal, idle.signal)
3391
3562
  });
3392
- if (result instanceof Promise) {
3393
- void result.catch(
3394
- (err) => getLogger().warn(`Gemini Live sendAudio error: ${String(err)}`)
3563
+ if (!response.ok) {
3564
+ const errText = await response.text();
3565
+ getLogger().error(`Gemini API error: ${response.status} ${errText.slice(0, 200)}`);
3566
+ throw new PatterConnectionError(
3567
+ `Gemini API returned ${response.status}: ${errText.slice(0, 200)}`
3395
3568
  );
3396
3569
  }
3397
- }
3398
- /** Send a text turn to Gemini and mark the turn complete. */
3399
- async sendText(text) {
3400
- if (!this.session) return;
3401
- const sess = this.session;
3402
- await sess.sendClientContent?.({
3403
- turns: { role: "user", parts: [{ text }] },
3404
- turnComplete: true
3405
- });
3406
- }
3407
- /** Send a tool/function-call result back to Gemini. */
3408
- async sendFunctionResult(callId, result) {
3409
- if (!this.session) return;
3410
- const sess = this.session;
3411
- const name = this.pendingToolCalls.get(callId) ?? callId;
3412
- this.pendingToolCalls.delete(callId);
3413
- await sess.sendToolResponse?.({
3414
- functionResponses: [
3415
- { id: callId, name, response: { result } }
3416
- ]
3417
- });
3418
- }
3419
- /** No-op — Gemini Live barge-in is VAD-driven, not client-cancelled. */
3420
- cancelResponse() {
3421
- getLogger().debug("Gemini Live: cancelResponse is implicit via VAD");
3422
- }
3423
- /** Register an event handler that receives every Gemini Live event. */
3424
- onEvent(handler) {
3425
- this.handlers.push(handler);
3426
- }
3427
- async emit(type, data) {
3428
- for (const h of this.handlers) {
3570
+ const reader = response.body?.getReader();
3571
+ if (!reader) return;
3572
+ const decoder = new TextDecoder();
3573
+ let buffer = "";
3574
+ let nextIndex = 0;
3575
+ let lastUsage;
3576
+ try {
3577
+ while (true) {
3578
+ const { done, value } = await reader.read();
3579
+ idle.touch();
3580
+ if (done) break;
3581
+ buffer += decoder.decode(value, { stream: true });
3582
+ const lines = buffer.split("\n");
3583
+ buffer = lines.pop() || "";
3584
+ for (const line of lines) {
3585
+ const trimmed = line.trim();
3586
+ if (!trimmed.startsWith("data: ")) continue;
3587
+ const data = trimmed.slice(6);
3588
+ if (!data) continue;
3589
+ let payload;
3590
+ try {
3591
+ payload = JSON.parse(data);
3592
+ } catch {
3593
+ continue;
3594
+ }
3595
+ if (payload.usageMetadata) {
3596
+ lastUsage = payload.usageMetadata;
3597
+ }
3598
+ const candidate = payload.candidates?.[0];
3599
+ const parts = candidate?.content?.parts ?? [];
3600
+ for (const part of parts) {
3601
+ if (part.functionCall) {
3602
+ const args = part.functionCall.args ?? {};
3603
+ const callId = part.functionCall.id ?? `gemini_call_${nextIndex}`;
3604
+ yield {
3605
+ type: "tool_call",
3606
+ index: nextIndex,
3607
+ id: callId,
3608
+ name: part.functionCall.name ?? "",
3609
+ arguments: JSON.stringify(args)
3610
+ };
3611
+ nextIndex++;
3612
+ continue;
3613
+ }
3614
+ if (part.text) {
3615
+ yield { type: "text", content: part.text };
3616
+ }
3617
+ }
3618
+ }
3619
+ }
3620
+ } catch (err) {
3621
+ if (idle.fired && !opts?.signal?.aborted) {
3622
+ throw new PatterConnectionError(
3623
+ `Gemini stream idle timeout \u2014 no data for ${LLM_STREAM_IDLE_TIMEOUT_MS / 1e3}s`
3624
+ );
3625
+ }
3626
+ throw err;
3627
+ } finally {
3628
+ idle.clear();
3629
+ reader.cancel().catch(() => {
3630
+ });
3631
+ }
3632
+ if (lastUsage) {
3633
+ const cached = lastUsage.cachedContentTokenCount ?? 0;
3634
+ yield {
3635
+ type: "usage",
3636
+ inputTokens: Math.max(0, (lastUsage.promptTokenCount ?? 0) - cached),
3637
+ outputTokens: lastUsage.candidatesTokenCount,
3638
+ cacheReadInputTokens: cached
3639
+ };
3640
+ }
3641
+ yield { type: "done" };
3642
+ }
3643
+ };
3644
+ var GEMINI_SCHEMA_KEYS = /* @__PURE__ */ new Set([
3645
+ "type",
3646
+ "description",
3647
+ "properties",
3648
+ "items",
3649
+ "enum",
3650
+ "required",
3651
+ "nullable",
3652
+ "format",
3653
+ "minimum",
3654
+ "maximum",
3655
+ "minLength",
3656
+ "maxLength",
3657
+ "minItems",
3658
+ "maxItems",
3659
+ "pattern",
3660
+ "anyOf",
3661
+ "default",
3662
+ "title"
3663
+ ]);
3664
+ function sanitizeGeminiSchema(schema) {
3665
+ if (Array.isArray(schema)) return schema.map(sanitizeGeminiSchema);
3666
+ if (schema !== null && typeof schema === "object") {
3667
+ const out = {};
3668
+ for (const [k, v] of Object.entries(schema)) {
3669
+ if (GEMINI_SCHEMA_KEYS.has(k)) out[k] = sanitizeGeminiSchema(v);
3670
+ }
3671
+ return out;
3672
+ }
3673
+ return schema;
3674
+ }
3675
+ function toGeminiTools(tools) {
3676
+ const functionDeclarations = tools.map((t) => {
3677
+ const fn = t.function ?? t;
3678
+ return {
3679
+ name: String(fn.name ?? ""),
3680
+ description: String(fn.description ?? ""),
3681
+ parameters: sanitizeGeminiSchema(fn.parameters ?? { type: "object", properties: {} })
3682
+ };
3683
+ });
3684
+ if (functionDeclarations.length === 0) return [];
3685
+ return [{ functionDeclarations }];
3686
+ }
3687
+ function toGeminiContents(messages) {
3688
+ const systemParts = [];
3689
+ const contents = [];
3690
+ const fnNameByCallId = /* @__PURE__ */ new Map();
3691
+ for (const rawMsg of messages) {
3692
+ const role = rawMsg.role;
3693
+ if (role === "system") {
3694
+ if (typeof rawMsg.content === "string" && rawMsg.content) {
3695
+ systemParts.push(rawMsg.content);
3696
+ }
3697
+ continue;
3698
+ }
3699
+ if (role === "user") {
3700
+ if (typeof rawMsg.content === "string" && rawMsg.content) {
3701
+ contents.push({ role: "user", parts: [{ text: rawMsg.content }] });
3702
+ }
3703
+ continue;
3704
+ }
3705
+ if (role === "assistant") {
3706
+ const parts = [];
3707
+ if (typeof rawMsg.content === "string" && rawMsg.content) {
3708
+ parts.push({ text: rawMsg.content });
3709
+ }
3710
+ for (const tc of rawMsg.tool_calls ?? []) {
3711
+ let args = {};
3712
+ try {
3713
+ const parsed = JSON.parse(tc.function?.arguments ?? "{}");
3714
+ if (parsed && typeof parsed === "object") args = parsed;
3715
+ } catch {
3716
+ args = {};
3717
+ }
3718
+ if (tc.id && tc.function?.name) fnNameByCallId.set(tc.id, tc.function.name);
3719
+ parts.push({
3720
+ functionCall: {
3721
+ name: tc.function?.name ?? "",
3722
+ args,
3723
+ id: tc.id
3724
+ }
3725
+ });
3726
+ }
3727
+ if (parts.length > 0) contents.push({ role: "model", parts });
3728
+ continue;
3729
+ }
3730
+ if (role === "tool") {
3731
+ const raw = rawMsg.content;
3732
+ let response;
3733
+ if (typeof raw === "string") {
3734
+ try {
3735
+ const parsed = JSON.parse(raw);
3736
+ response = parsed && typeof parsed === "object" && !Array.isArray(parsed) ? parsed : { result: parsed };
3737
+ } catch {
3738
+ response = { result: raw };
3739
+ }
3740
+ } else {
3741
+ response = raw ?? {};
3742
+ }
3743
+ contents.push({
3744
+ role: "user",
3745
+ parts: [
3746
+ {
3747
+ functionResponse: {
3748
+ name: rawMsg.name ?? fnNameByCallId.get(rawMsg.tool_call_id ?? "") ?? rawMsg.tool_call_id ?? "",
3749
+ response,
3750
+ id: rawMsg.tool_call_id
3751
+ }
3752
+ }
3753
+ ]
3754
+ });
3755
+ continue;
3756
+ }
3757
+ }
3758
+ const merged = [];
3759
+ for (const entry of contents) {
3760
+ const prev = merged[merged.length - 1];
3761
+ const isFunctionResponseOnly = (c) => c.role === "user" && c.parts.every((p) => p.functionResponse !== void 0);
3762
+ if (prev && isFunctionResponseOnly(prev) && isFunctionResponseOnly(entry)) {
3763
+ prev.parts.push(...entry.parts);
3764
+ } else {
3765
+ merged.push(entry);
3766
+ }
3767
+ }
3768
+ if (merged.length > 0 && merged[0].role === "model") {
3769
+ merged.unshift({ role: "user", parts: [{ text: "(call connected)" }] });
3770
+ }
3771
+ return { systemInstruction: systemParts.join("\n\n"), contents: merged };
3772
+ }
3773
+
3774
+ // src/providers/gemini-live.ts
3775
+ var GEMINI_DEFAULT_INPUT_SR = 16e3;
3776
+ var GEMINI_DEFAULT_OUTPUT_SR = 24e3;
3777
+ var GeminiLiveAdapter = class {
3778
+ constructor(apiKey, options = {}) {
3779
+ this.apiKey = apiKey;
3780
+ this.model = options.model ?? "gemini-2.5-flash-native-audio-preview-09-2025";
3781
+ this.voice = options.voice ?? "Puck";
3782
+ this.instructions = options.instructions ?? "";
3783
+ this.language = options.language ?? "en-US";
3784
+ this.tools = options.tools;
3785
+ this.inputSampleRate = options.inputSampleRate ?? GEMINI_DEFAULT_INPUT_SR;
3786
+ this.outputSampleRate = options.outputSampleRate ?? GEMINI_DEFAULT_OUTPUT_SR;
3787
+ this.temperature = options.temperature ?? 0.8;
3788
+ }
3789
+ apiKey;
3790
+ model;
3791
+ voice;
3792
+ instructions;
3793
+ language;
3794
+ tools;
3795
+ inputSampleRate;
3796
+ /** Output sample rate — exposed so callers can configure downstream transcoding. */
3797
+ outputSampleRate;
3798
+ temperature;
3799
+ client = null;
3800
+ session = null;
3801
+ receiveLoop = null;
3802
+ handlers = [];
3803
+ running = false;
3804
+ /**
3805
+ * Tracks call_id -> function name so tool responses can be sent back with
3806
+ * the correct `name` field (Gemini expects the original function name,
3807
+ * not the call_id).
3808
+ */
3809
+ pendingToolCalls = /* @__PURE__ */ new Map();
3810
+ /** Lazily import @google/genai, open a Live session, and start the receive loop. */
3811
+ async connect() {
3812
+ let genaiModule;
3813
+ try {
3814
+ const modName = "@google/genai";
3815
+ genaiModule = await import(modName);
3816
+ } catch {
3817
+ throw new Error(
3818
+ '\nGemini Live requires the "@google/genai" package, which is not installed.\n\n Install: npm install @google/genai\n\nThis is an optional peer dependency of getpatter \u2014 it is only needed when\nyou use GeminiLive as an agent engine. Other LLM/engine providers do not\nrequire it.\n'
3819
+ );
3820
+ }
3821
+ const { GoogleGenAI } = genaiModule;
3822
+ this.client = new GoogleGenAI({
3823
+ apiKey: this.apiKey,
3824
+ httpOptions: { apiVersion: "v1alpha" }
3825
+ });
3826
+ const config = {
3827
+ responseModalities: ["AUDIO"],
3828
+ speechConfig: {
3829
+ voiceConfig: { prebuiltVoiceConfig: { voiceName: this.voice } },
3830
+ languageCode: this.language
3831
+ },
3832
+ temperature: this.temperature,
3833
+ // Without these, native-audio sessions produced NO user transcript
3834
+ // ever and no assistant transcript in AUDIO modality — logs/history/
3835
+ // metrics got nothing for Gemini Live calls. Mirrors Python.
3836
+ inputAudioTranscription: {},
3837
+ outputAudioTranscription: {}
3838
+ };
3839
+ if (this.instructions) {
3840
+ config.systemInstruction = { parts: [{ text: this.instructions }] };
3841
+ }
3842
+ if (this.tools?.length) {
3843
+ config.tools = [
3844
+ {
3845
+ functionDeclarations: this.tools.map((t) => ({
3846
+ name: t.name,
3847
+ description: t.description,
3848
+ // Strip JSON-Schema keys the Live API's proto Schema rejects
3849
+ // ($schema, additionalProperties — strict-mode and zod-derived
3850
+ // MCP tools): one such tool 400'd the whole session.
3851
+ parameters: sanitizeGeminiSchema(t.parameters)
3852
+ }))
3853
+ }
3854
+ ];
3855
+ }
3856
+ const liveApi = this.client.live;
3857
+ if (!liveApi?.connect) {
3858
+ throw new Error("@google/genai: live.connect is not available in this version");
3859
+ }
3860
+ this.session = await liveApi.connect({ model: this.model, config });
3861
+ this.running = true;
3862
+ this.receiveLoop = this.pumpReceive().catch((err) => {
3863
+ getLogger().error(`Gemini Live receive loop error: ${String(err)}`);
3864
+ });
3865
+ }
3866
+ /** Send a PCM audio chunk to Gemini as base64 inline data. */
3867
+ sendAudio(pcm) {
3868
+ if (!this.session || !this.running) return;
3869
+ const mime = `audio/pcm;rate=${this.inputSampleRate}`;
3870
+ const sess = this.session;
3871
+ const result = sess.sendRealtimeInput?.({
3872
+ media: { data: pcm.toString("base64"), mimeType: mime }
3873
+ });
3874
+ if (result instanceof Promise) {
3875
+ void result.catch(
3876
+ (err) => getLogger().warn(`Gemini Live sendAudio error: ${String(err)}`)
3877
+ );
3878
+ }
3879
+ }
3880
+ /** Send a text turn to Gemini and mark the turn complete. */
3881
+ async sendText(text) {
3882
+ if (!this.session) return;
3883
+ const sess = this.session;
3884
+ await sess.sendClientContent?.({
3885
+ turns: { role: "user", parts: [{ text }] },
3886
+ turnComplete: true
3887
+ });
3888
+ }
3889
+ /** Send a tool/function-call result back to Gemini. */
3890
+ async sendFunctionResult(callId, result) {
3891
+ if (!this.session) return;
3892
+ const sess = this.session;
3893
+ const name = this.pendingToolCalls.get(callId) ?? callId;
3894
+ this.pendingToolCalls.delete(callId);
3895
+ await sess.sendToolResponse?.({
3896
+ functionResponses: [
3897
+ { id: callId, name, response: { result } }
3898
+ ]
3899
+ });
3900
+ }
3901
+ /** No-op — Gemini Live barge-in is VAD-driven, not client-cancelled. */
3902
+ cancelResponse() {
3903
+ getLogger().debug("Gemini Live: cancelResponse is implicit via VAD");
3904
+ }
3905
+ /** Register an event handler that receives every Gemini Live event. */
3906
+ onEvent(handler) {
3907
+ this.handlers.push(handler);
3908
+ }
3909
+ async emit(type, data) {
3910
+ for (const h of this.handlers) {
3429
3911
  try {
3430
3912
  await h(type, data);
3431
3913
  } catch (err) {
@@ -3452,9 +3934,20 @@ var GeminiLiveAdapter = class {
3452
3934
  }
3453
3935
  if (part.text) await this.emit("transcript_output", part.text);
3454
3936
  }
3937
+ if (sc.inputTranscription?.text) {
3938
+ await this.emit("transcript_input", sc.inputTranscription.text);
3939
+ }
3940
+ if (sc.outputTranscription?.text) {
3941
+ await this.emit("transcript_output", sc.outputTranscription.text);
3942
+ }
3455
3943
  if (sc.turnComplete) await this.emit("response_done", null);
3456
3944
  if (sc.interrupted) await this.emit("speech_started", null);
3457
3945
  }
3946
+ if (r.goAway) {
3947
+ getLogger().warn(
3948
+ `Gemini Live goAway received \u2014 session ends in ${r.goAway.timeLeft ?? "unknown"}`
3949
+ );
3950
+ }
3458
3951
  if (r.toolCall) {
3459
3952
  for (const fn of r.toolCall.functionCalls ?? []) {
3460
3953
  const args = fn.args ?? {};
@@ -3524,6 +4017,10 @@ var UltravoxRealtimeAdapter = class {
3524
4017
  sampleRate;
3525
4018
  firstMessage;
3526
4019
  ws = null;
4020
+ /** Last Ultravox state string (turn-end transition detection). */
4021
+ lastUltravoxState = "";
4022
+ /** Whether the current agent turn streamed delta frames (dedupe finals). */
4023
+ agentStreamedDeltas = false;
3527
4024
  handlers = [];
3528
4025
  /** Exposed for diagnostics — true while the underlying socket is open. */
3529
4026
  running = false;
@@ -3575,7 +4072,7 @@ var UltravoxRealtimeAdapter = class {
3575
4072
  const call = await resp.json();
3576
4073
  if (!call.joinUrl) throw new Error("Ultravox response missing joinUrl");
3577
4074
  this.ws = new WebSocket(call.joinUrl);
3578
- await new Promise((resolve, reject) => {
4075
+ await new Promise((resolve2, reject) => {
3579
4076
  const ws = this.ws;
3580
4077
  let settled = false;
3581
4078
  const timer = setTimeout(() => {
@@ -3595,7 +4092,7 @@ var UltravoxRealtimeAdapter = class {
3595
4092
  settled = true;
3596
4093
  clearTimeout(timer);
3597
4094
  ws.off("error", onError);
3598
- resolve();
4095
+ resolve2();
3599
4096
  };
3600
4097
  const onError = (err) => {
3601
4098
  if (settled) return;
@@ -3678,10 +4175,20 @@ var UltravoxRealtimeAdapter = class {
3678
4175
  const etype = event.type ?? "";
3679
4176
  if (etype === "transcript") {
3680
4177
  const role = event.role;
3681
- const text = event.text ?? event.delta ?? "";
4178
+ const delta = event.delta ?? "";
4179
+ const fullText = event.text ?? "";
3682
4180
  const isFinal = Boolean(event.final);
3683
- if (role === "user" && isFinal && text) await this.emit("transcript_input", text);
3684
- else if (role === "agent" && text) await this.emit("transcript_output", text);
4181
+ if (role === "user" && isFinal && (fullText || delta)) {
4182
+ await this.emit("transcript_input", fullText || delta);
4183
+ } else if (role === "agent") {
4184
+ if (delta) {
4185
+ await this.emit("transcript_output", delta);
4186
+ this.agentStreamedDeltas = true;
4187
+ } else if (isFinal && fullText && !this.agentStreamedDeltas) {
4188
+ await this.emit("transcript_output", fullText);
4189
+ }
4190
+ if (isFinal) this.agentStreamedDeltas = false;
4191
+ }
3685
4192
  } else if (etype === "client_tool_invocation") {
3686
4193
  await this.emit("function_call", {
3687
4194
  call_id: event.invocationId ?? "",
@@ -3690,8 +4197,13 @@ var UltravoxRealtimeAdapter = class {
3690
4197
  });
3691
4198
  } else if (etype === "state") {
3692
4199
  const state = event.state;
3693
- if (state === "listening") await this.emit("speech_started", null);
3694
- else if (state === "idle") await this.emit("response_done", null);
4200
+ const prev = this.lastUltravoxState;
4201
+ this.lastUltravoxState = state ?? "";
4202
+ if (state === "listening" && prev === "speaking") {
4203
+ await this.emit("response_done", null);
4204
+ } else if (state === "idle") {
4205
+ await this.emit("response_done", null);
4206
+ }
3695
4207
  } else if (etype === "playback_clear_buffer") {
3696
4208
  await this.emit("speech_started", null);
3697
4209
  }
@@ -3789,14 +4301,23 @@ function scheduleCron(cron, callback) {
3789
4301
  };
3790
4302
  }
3791
4303
  function scheduleOnce(at, callback) {
3792
- const delayMs = at.getTime() - Date.now();
3793
4304
  let cancelled = false;
3794
4305
  let done = false;
3795
- const timer = setTimeout(() => {
3796
- if (cancelled) return;
3797
- done = true;
3798
- wrapCallback(callback)();
3799
- }, Math.max(0, delayMs));
4306
+ const MAX_TIMEOUT_MS = 2147483647;
4307
+ let timer;
4308
+ const arm = () => {
4309
+ const remaining = at.getTime() - Date.now();
4310
+ if (remaining > MAX_TIMEOUT_MS) {
4311
+ timer = setTimeout(arm, MAX_TIMEOUT_MS);
4312
+ return;
4313
+ }
4314
+ timer = setTimeout(() => {
4315
+ if (cancelled) return;
4316
+ done = true;
4317
+ wrapCallback(callback)();
4318
+ }, Math.max(0, remaining));
4319
+ };
4320
+ arm();
3800
4321
  return {
3801
4322
  jobId: `once-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
3802
4323
  cancel() {
@@ -3892,7 +4413,9 @@ var ELEVENLABS_VOICE_ID_BY_NAME = {
3892
4413
  var VOICE_ID_PATTERN = /^[A-Za-z0-9]{20}$/;
3893
4414
  var CARRIER_NATIVE_FORMAT = {
3894
4415
  twilio: "ulaw_8000",
3895
- telnyx: "pcm_16000",
4416
+ // The SDK's streaming_start pins the Telnyx wire to PCMU/μ-law @ 8 kHz —
4417
+ // 'pcm_16000' here shipped raw PCM16 onto a μ-law wire (static).
4418
+ telnyx: "ulaw_8000",
3896
4419
  // Plivo streams mulaw 8 kHz (we pin contentType in the answer XML).
3897
4420
  plivo: "ulaw_8000"
3898
4421
  };
@@ -4018,18 +4541,14 @@ var ElevenLabsTTS = class _ElevenLabsTTS {
4018
4541
  /**
4019
4542
  * Construct an instance pre-configured for Telnyx bidirectional media.
4020
4543
  *
4021
- * Telnyx's default media-streaming codec is L16 PCM @ 16 kHz, which
4022
- * matches our default Telnyx handler. We pick `pcm_16000` so the audio
4023
- * flows end-to-end with zero resampling or transcoding.
4024
- *
4025
- * Trade-off: if your Telnyx profile is pinned to PCMU/8000 (μ-law),
4026
- * construct `ElevenLabsTTS` directly with `outputFormat: 'ulaw_8000'`
4027
- * — Telnyx supports that natively too.
4544
+ * The SDK's ``streaming_start`` pins the Telnyx wire to PCMU/μ-law @
4545
+ * 8 kHz (stream_bidirectional_codec=PCMU), so μ-law output flows
4546
+ * end-to-end with zero resampling or transcoding.
4028
4547
  */
4029
4548
  static forTelnyx(apiKey, options = {}) {
4030
4549
  return new _ElevenLabsTTS(apiKey, {
4031
4550
  ...options,
4032
- outputFormat: ElevenLabsOutputFormat.PCM_16000
4551
+ outputFormat: ElevenLabsOutputFormat.ULAW_8000
4033
4552
  });
4034
4553
  }
4035
4554
  /**
@@ -4163,7 +4682,7 @@ var CartesiaTTS = class _CartesiaTTS {
4163
4682
  static forTwilio(apiKey, options = {}) {
4164
4683
  return new _CartesiaTTS(apiKey, {
4165
4684
  ...options,
4166
- sampleRate: CartesiaTTSSampleRate.HZ_8000
4685
+ sampleRate: CartesiaTTSSampleRate.HZ_16000
4167
4686
  });
4168
4687
  }
4169
4688
  /**
@@ -4507,7 +5026,10 @@ var WhisperSTT = class _WhisperSTT {
4507
5026
  * ``(apiKey, model, language, bufferSize, responseFormat)`` — callers using
4508
5027
  * the old order will need to swap ``language`` and ``model``.
4509
5028
  */
5029
+ /** Construction args replayed by clone(). */
5030
+ patterCtorArgs;
4510
5031
  constructor(apiKey, language, model = "whisper-1", bufferSize = DEFAULT_BUFFER_SIZE, responseFormat = "json") {
5032
+ this.patterCtorArgs = [apiKey, language, model, bufferSize, responseFormat];
4511
5033
  if (!ALLOWED_MODELS.has(model)) {
4512
5034
  throw new Error(
4513
5035
  `WhisperSTT: unsupported model "${model}". Expected one of ${[...ALLOWED_MODELS].join(", ")}.`
@@ -4524,6 +5046,15 @@ var WhisperSTT = class _WhisperSTT {
4524
5046
  return new _WhisperSTT(apiKey, language, model);
4525
5047
  }
4526
5048
  /** Reset the audio buffer and arm the adapter for incoming chunks. */
5049
+ /**
5050
+ * Fresh adapter built with this instance's construction arguments —
5051
+ * called per call by the stream handler so concurrent calls never share
5052
+ * connection state (sockets/queues; cross-call transcript bleed).
5053
+ */
5054
+ clone() {
5055
+ const ctor = this.constructor;
5056
+ return new ctor(...this.patterCtorArgs);
5057
+ }
4527
5058
  async connect() {
4528
5059
  this.running = true;
4529
5060
  this.chunks = [];
@@ -4666,6 +5197,11 @@ var OpenAITranscribeSTT = class extends WhisperSTT {
4666
5197
  `OpenAITranscribeSTT: unsupported model "${model}". Expected one of ${[...ALLOWED_MODELS2].join(", ")}. For "whisper-1", use WhisperSTT instead.`
4667
5198
  );
4668
5199
  }
5200
+ if (responseFormat === "verbose_json") {
5201
+ throw new Error(
5202
+ `OpenAITranscribeSTT: responseFormat "verbose_json" is only supported by whisper-1 (use WhisperSTT). "${model}" accepts "json".`
5203
+ );
5204
+ }
4669
5205
  super(apiKey, language, model, bufferSize, responseFormat);
4670
5206
  }
4671
5207
  };
@@ -4712,7 +5248,7 @@ var CartesiaSTTServerEvent = {
4712
5248
  var CartesiaSTTClientFrame = {
4713
5249
  FINALIZE: "finalize"
4714
5250
  };
4715
- var DEFAULT_BASE_URL = "https://api.cartesia.ai";
5251
+ var DEFAULT_BASE_URL2 = "https://api.cartesia.ai";
4716
5252
  var API_VERSION = "2025-04-16";
4717
5253
  var USER_AGENT = "Patter/1.0";
4718
5254
  var KEEPALIVE_INTERVAL_MS = 3e4;
@@ -4721,6 +5257,7 @@ var CartesiaSTT = class {
4721
5257
  constructor(apiKey, options = {}) {
4722
5258
  this.apiKey = apiKey;
4723
5259
  this.options = options;
5260
+ this.patterCtorArgs = [apiKey, options];
4724
5261
  if (!apiKey) {
4725
5262
  throw new Error("CartesiaSTT requires a non-empty apiKey");
4726
5263
  }
@@ -4737,6 +5274,8 @@ var CartesiaSTT = class {
4737
5274
  * `null` until the first transcript event arrives (matches Python's `None`).
4738
5275
  */
4739
5276
  requestId = null;
5277
+ /** Construction args replayed by clone(). */
5278
+ patterCtorArgs;
4740
5279
  /**
4741
5280
  * Open a fresh WebSocket without arming any message / keepalive handlers
4742
5281
  * and without taking ownership on `this.ws`. Returns the OPEN socket so
@@ -4752,14 +5291,14 @@ var CartesiaSTT = class {
4752
5291
  const ws = new WebSocket2(url, {
4753
5292
  headers: { "User-Agent": USER_AGENT }
4754
5293
  });
4755
- await new Promise((resolve, reject) => {
5294
+ await new Promise((resolve2, reject) => {
4756
5295
  const timer = setTimeout(
4757
5296
  () => reject(new Error("Cartesia STT park connect timeout")),
4758
5297
  CONNECT_TIMEOUT_MS
4759
5298
  );
4760
5299
  ws.once("open", () => {
4761
5300
  clearTimeout(timer);
4762
- resolve();
5301
+ resolve2();
4763
5302
  });
4764
5303
  ws.once("error", (err) => {
4765
5304
  clearTimeout(timer);
@@ -4770,7 +5309,7 @@ var CartesiaSTT = class {
4770
5309
  }
4771
5310
  buildWsUrl() {
4772
5311
  const opts = this.options;
4773
- const rawBase = opts.baseUrl ?? DEFAULT_BASE_URL;
5312
+ const rawBase = opts.baseUrl ?? DEFAULT_BASE_URL2;
4774
5313
  let base;
4775
5314
  if (rawBase.startsWith("http://")) {
4776
5315
  base = `ws://${rawBase.slice("http://".length)}`;
@@ -4809,7 +5348,7 @@ var CartesiaSTT = class {
4809
5348
  const url = this.buildWsUrl();
4810
5349
  let ws = null;
4811
5350
  try {
4812
- ws = await new Promise((resolve, reject) => {
5351
+ ws = await new Promise((resolve2, reject) => {
4813
5352
  const sock = new WebSocket2(url, {
4814
5353
  headers: { "User-Agent": USER_AGENT }
4815
5354
  });
@@ -4822,7 +5361,7 @@ var CartesiaSTT = class {
4822
5361
  }, 5e3);
4823
5362
  sock.once("open", () => {
4824
5363
  clearTimeout(timer);
4825
- resolve(sock);
5364
+ resolve2(sock);
4826
5365
  });
4827
5366
  sock.once("error", (err) => {
4828
5367
  clearTimeout(timer);
@@ -4844,19 +5383,28 @@ var CartesiaSTT = class {
4844
5383
  }
4845
5384
  }
4846
5385
  /** Open the streaming WebSocket and arm message + keepalive handlers. */
5386
+ /**
5387
+ * Fresh adapter built with this instance's construction arguments —
5388
+ * called per call by the stream handler so concurrent calls never share
5389
+ * connection state (sockets/queues; cross-call transcript bleed).
5390
+ */
5391
+ clone() {
5392
+ const ctor = this.constructor;
5393
+ return new ctor(...this.patterCtorArgs);
5394
+ }
4847
5395
  async connect() {
4848
5396
  const url = this.buildWsUrl();
4849
5397
  this.ws = new WebSocket2(url, {
4850
5398
  headers: { "User-Agent": USER_AGENT }
4851
5399
  });
4852
- await new Promise((resolve, reject) => {
5400
+ await new Promise((resolve2, reject) => {
4853
5401
  const timer = setTimeout(
4854
5402
  () => reject(new Error("Cartesia STT connect timeout")),
4855
5403
  CONNECT_TIMEOUT_MS
4856
5404
  );
4857
5405
  this.ws.once("open", () => {
4858
5406
  clearTimeout(timer);
4859
- resolve();
5407
+ resolve2();
4860
5408
  });
4861
5409
  this.ws.once("error", (err) => {
4862
5410
  clearTimeout(timer);
@@ -4920,7 +5468,13 @@ var CartesiaSTT = class {
4920
5468
  }
4921
5469
  emit(transcript) {
4922
5470
  for (const cb of this.callbacks) {
4923
- cb(transcript);
5471
+ try {
5472
+ Promise.resolve(cb(transcript)).catch(
5473
+ (err) => getLogger().error(`STT transcript callback failed: ${String(err)}`)
5474
+ );
5475
+ } catch (err) {
5476
+ getLogger().error(`STT transcript callback threw: ${String(err)}`);
5477
+ }
4924
5478
  }
4925
5479
  }
4926
5480
  /** Send a binary PCM16-LE audio chunk to Cartesia for transcription. */
@@ -4944,12 +5498,12 @@ var CartesiaSTT = class {
4944
5498
  */
4945
5499
  async finalize() {
4946
5500
  if (!this.ws || this.ws.readyState !== WebSocket2.OPEN) return;
4947
- await new Promise((resolve) => {
5501
+ await new Promise((resolve2) => {
4948
5502
  this.ws.send(CartesiaSTTClientFrame.FINALIZE, (err) => {
4949
5503
  if (err) {
4950
5504
  getLogger().debug(`Cartesia finalize send failed: ${String(err)}`);
4951
5505
  }
4952
- resolve();
5506
+ resolve2();
4953
5507
  });
4954
5508
  });
4955
5509
  }
@@ -4998,10 +5552,10 @@ var CartesiaSTT = class {
4998
5552
  if (!ws) return;
4999
5553
  if (ws.readyState === WebSocket2.OPEN) {
5000
5554
  try {
5001
- await new Promise((resolve) => {
5555
+ await new Promise((resolve2) => {
5002
5556
  ws.send(CartesiaSTTClientFrame.FINALIZE, (err) => {
5003
5557
  if (err) getLogger().warn(`CartesiaSTT finalize send failed: ${String(err)}`);
5004
- resolve();
5558
+ resolve2();
5005
5559
  });
5006
5560
  });
5007
5561
  } catch (err) {
@@ -5009,18 +5563,18 @@ var CartesiaSTT = class {
5009
5563
  }
5010
5564
  }
5011
5565
  if (ws.readyState === WebSocket2.OPEN || ws.readyState === WebSocket2.CONNECTING) {
5012
- await new Promise((resolve) => {
5566
+ await new Promise((resolve2) => {
5013
5567
  const done = () => {
5014
5568
  ws.off("close", done);
5015
5569
  ws.off("error", done);
5016
- resolve();
5570
+ resolve2();
5017
5571
  };
5018
5572
  ws.once("close", done);
5019
5573
  ws.once("error", done);
5020
5574
  try {
5021
5575
  ws.close();
5022
5576
  } catch {
5023
- resolve();
5577
+ resolve2();
5024
5578
  }
5025
5579
  });
5026
5580
  }
@@ -5120,6 +5674,7 @@ var SonioxSTT = class _SonioxSTT {
5120
5674
  ws = null;
5121
5675
  callbacks = /* @__PURE__ */ new Set();
5122
5676
  final = new TokenAccumulator();
5677
+ lastInterimText = "";
5123
5678
  keepaliveTimer = null;
5124
5679
  apiKey;
5125
5680
  model;
@@ -5132,7 +5687,10 @@ var SonioxSTT = class _SonioxSTT {
5132
5687
  maxEndpointDelayMs;
5133
5688
  clientReferenceId;
5134
5689
  baseUrl;
5690
+ /** Construction args replayed by clone(). */
5691
+ patterCtorArgs;
5135
5692
  constructor(apiKey, options = {}) {
5693
+ this.patterCtorArgs = [apiKey, options];
5136
5694
  if (!apiKey) {
5137
5695
  throw new Error("Soniox apiKey is required");
5138
5696
  }
@@ -5181,14 +5739,23 @@ var SonioxSTT = class _SonioxSTT {
5181
5739
  return config;
5182
5740
  }
5183
5741
  /** Open the streaming WebSocket and send the initial config payload. */
5742
+ /**
5743
+ * Fresh adapter built with this instance's construction arguments —
5744
+ * called per call by the stream handler so concurrent calls never share
5745
+ * connection state (sockets/queues; cross-call transcript bleed).
5746
+ */
5747
+ clone() {
5748
+ const ctor = this.constructor;
5749
+ return new ctor(...this.patterCtorArgs);
5750
+ }
5184
5751
  async connect() {
5185
5752
  this.final.reset();
5186
5753
  this.ws = new WebSocket3(this.baseUrl);
5187
- await new Promise((resolve, reject) => {
5754
+ await new Promise((resolve2, reject) => {
5188
5755
  const timer = setTimeout(() => reject(new Error("Soniox connect timeout")), 1e4);
5189
5756
  this.ws.once("open", () => {
5190
5757
  clearTimeout(timer);
5191
- resolve();
5758
+ resolve2();
5192
5759
  });
5193
5760
  this.ws.once("error", (err) => {
5194
5761
  clearTimeout(timer);
@@ -5252,7 +5819,8 @@ var SonioxSTT = class _SonioxSTT {
5252
5819
  }
5253
5820
  if (!emittedFinalThisMsg) {
5254
5821
  const text = (this.final.text + nonFinal.text).trim();
5255
- if (text) {
5822
+ if (text && text !== this.lastInterimText) {
5823
+ this.lastInterimText = text;
5256
5824
  const { sum: fSum, count: fCount } = this.final.raw;
5257
5825
  const { sum: nSum, count: nCount } = nonFinal.raw;
5258
5826
  const total = fCount + nCount;
@@ -5271,7 +5839,13 @@ var SonioxSTT = class _SonioxSTT {
5271
5839
  }
5272
5840
  emit(transcript) {
5273
5841
  for (const cb of this.callbacks) {
5274
- cb(transcript);
5842
+ try {
5843
+ Promise.resolve(cb(transcript)).catch(
5844
+ (err) => getLogger().error(`STT transcript callback failed: ${String(err)}`)
5845
+ );
5846
+ } catch (err) {
5847
+ getLogger().error(`STT transcript callback threw: ${String(err)}`);
5848
+ }
5275
5849
  }
5276
5850
  }
5277
5851
  /** Send a binary PCM16-LE audio chunk to Soniox for transcription. */
@@ -5280,6 +5854,19 @@ var SonioxSTT = class _SonioxSTT {
5280
5854
  if (audio.length === 0) return;
5281
5855
  this.ws.send(audio);
5282
5856
  }
5857
+ /**
5858
+ * Ask Soniox to finalize buffered audio immediately. The pipeline's VAD
5859
+ * ``speech_end`` fast-path duck-types ``stt.finalize`` — without this
5860
+ * every Soniox turn waited out the full endpointing delay. Mirrors Python.
5861
+ */
5862
+ finalize() {
5863
+ if (!this.ws || this.ws.readyState !== WebSocket3.OPEN) return;
5864
+ try {
5865
+ this.ws.send(JSON.stringify({ type: "finalize" }));
5866
+ } catch (err) {
5867
+ getLogger().debug(`Soniox finalize failed: ${String(err)}`);
5868
+ }
5869
+ }
5283
5870
  /** Register a transcript listener. */
5284
5871
  onTranscript(callback) {
5285
5872
  this.callbacks.add(callback);
@@ -5356,7 +5943,7 @@ var AssemblyAIClientFrame = {
5356
5943
  FORCE_ENDPOINT: "ForceEndpoint",
5357
5944
  TERMINATE: "Terminate"
5358
5945
  };
5359
- var DEFAULT_BASE_URL2 = "wss://streaming.assemblyai.com";
5946
+ var DEFAULT_BASE_URL3 = "wss://streaming.assemblyai.com";
5360
5947
  var DEFAULT_MIN_TURN_SILENCE_MS = 400;
5361
5948
  var CONNECT_TIMEOUT_MS2 = 1e4;
5362
5949
  var TERMINATION_WAIT_TIMEOUT_MS = 500;
@@ -5371,6 +5958,7 @@ var AssemblyAISTT = class _AssemblyAISTT {
5371
5958
  constructor(apiKey, options = {}) {
5372
5959
  this.apiKey = apiKey;
5373
5960
  this.options = options;
5961
+ this.patterCtorArgs = [apiKey, options];
5374
5962
  if (!apiKey) {
5375
5963
  throw new Error("AssemblyAISTT requires a non-empty apiKey");
5376
5964
  }
@@ -5412,6 +6000,8 @@ var AssemblyAISTT = class _AssemblyAISTT {
5412
6000
  sessionId = null;
5413
6001
  /** Unix timestamp when the AssemblyAI session expires. */
5414
6002
  expiresAt = null;
6003
+ /** Construction args replayed by clone(). */
6004
+ patterCtorArgs;
5415
6005
  /** Factory for Twilio calls — mulaw 8 kHz. */
5416
6006
  static forTwilio(apiKey, model = AssemblyAIModel.UNIVERSAL_STREAMING_ENGLISH) {
5417
6007
  return new _AssemblyAISTT(apiKey, {
@@ -5463,7 +6053,7 @@ var AssemblyAISTT = class _AssemblyAISTT {
5463
6053
  params.set(key, String(value));
5464
6054
  }
5465
6055
  }
5466
- const base = opts.baseUrl ?? DEFAULT_BASE_URL2;
6056
+ const base = opts.baseUrl ?? DEFAULT_BASE_URL3;
5467
6057
  return `${base}/v3/ws?${params.toString()}`;
5468
6058
  }
5469
6059
  buildHeaders() {
@@ -5496,7 +6086,7 @@ var AssemblyAISTT = class _AssemblyAISTT {
5496
6086
  const headers = this.buildHeaders();
5497
6087
  let ws = null;
5498
6088
  try {
5499
- ws = await new Promise((resolve, reject) => {
6089
+ ws = await new Promise((resolve2, reject) => {
5500
6090
  const sock = new WebSocket4(url, { headers });
5501
6091
  const timer = setTimeout(() => {
5502
6092
  try {
@@ -5507,7 +6097,7 @@ var AssemblyAISTT = class _AssemblyAISTT {
5507
6097
  }, 5e3);
5508
6098
  sock.once("open", () => {
5509
6099
  clearTimeout(timer);
5510
- resolve(sock);
6100
+ resolve2(sock);
5511
6101
  });
5512
6102
  sock.once("error", (err) => {
5513
6103
  clearTimeout(timer);
@@ -5533,6 +6123,15 @@ var AssemblyAISTT = class _AssemblyAISTT {
5533
6123
  }
5534
6124
  }
5535
6125
  /** Open the streaming WebSocket and arm message handlers. */
6126
+ /**
6127
+ * Fresh adapter built with this instance's construction arguments —
6128
+ * called per call by the stream handler so concurrent calls never share
6129
+ * connection state (sockets/queues; cross-call transcript bleed).
6130
+ */
6131
+ clone() {
6132
+ const ctor = this.constructor;
6133
+ return new ctor(...this.patterCtorArgs);
6134
+ }
5536
6135
  async connect() {
5537
6136
  this.closing = false;
5538
6137
  const url = this.buildUrl();
@@ -5541,14 +6140,14 @@ var AssemblyAISTT = class _AssemblyAISTT {
5541
6140
  this.attachHandlers(this.ws);
5542
6141
  }
5543
6142
  async awaitOpen(ws) {
5544
- await new Promise((resolve, reject) => {
6143
+ await new Promise((resolve2, reject) => {
5545
6144
  const timer = setTimeout(
5546
6145
  () => reject(new Error("AssemblyAI connect timeout")),
5547
6146
  CONNECT_TIMEOUT_MS2
5548
6147
  );
5549
6148
  ws.once("open", () => {
5550
6149
  clearTimeout(timer);
5551
- resolve();
6150
+ resolve2();
5552
6151
  });
5553
6152
  ws.once("error", (err) => {
5554
6153
  clearTimeout(timer);
@@ -5635,7 +6234,13 @@ var AssemblyAISTT = class _AssemblyAISTT {
5635
6234
  }
5636
6235
  emit(transcript) {
5637
6236
  for (const cb of this.callbacks) {
5638
- cb(transcript);
6237
+ try {
6238
+ Promise.resolve(cb(transcript)).catch(
6239
+ (err) => getLogger().error(`STT transcript callback failed: ${String(err)}`)
6240
+ );
6241
+ } catch (err) {
6242
+ getLogger().error(`STT transcript callback threw: ${String(err)}`);
6243
+ }
5639
6244
  }
5640
6245
  }
5641
6246
  /** Send a binary PCM/mu-law audio chunk to AssemblyAI for transcription. */
@@ -5745,14 +6350,14 @@ var AssemblyAISTT = class _AssemblyAISTT {
5745
6350
  this.ws.send(JSON.stringify({ type: AssemblyAIClientFrame.TERMINATE }));
5746
6351
  } catch {
5747
6352
  }
5748
- await new Promise((resolve) => {
6353
+ await new Promise((resolve2) => {
5749
6354
  const timer = setTimeout(() => {
5750
6355
  this.terminationResolve = null;
5751
- resolve();
6356
+ resolve2();
5752
6357
  }, TERMINATION_WAIT_TIMEOUT_MS);
5753
6358
  this.terminationResolve = () => {
5754
6359
  clearTimeout(timer);
5755
- resolve();
6360
+ resolve2();
5756
6361
  };
5757
6362
  });
5758
6363
  try {
@@ -5859,7 +6464,10 @@ var SpeechmaticsSTT = class {
5859
6464
  operatingPoint;
5860
6465
  domain;
5861
6466
  outputLocale;
6467
+ /** Construction args replayed by clone(). */
6468
+ patterCtorArgs;
5862
6469
  constructor(apiKey, options = {}) {
6470
+ this.patterCtorArgs = [apiKey, options];
5863
6471
  if (!apiKey) {
5864
6472
  throw new Error("Speechmatics apiKey is required");
5865
6473
  }
@@ -5931,13 +6539,22 @@ var SpeechmaticsSTT = class {
5931
6539
  };
5932
6540
  }
5933
6541
  /** Open the streaming WebSocket and send the `StartRecognition` frame. */
6542
+ /**
6543
+ * Fresh adapter built with this instance's construction arguments —
6544
+ * called per call by the stream handler so concurrent calls never share
6545
+ * connection state (sockets/queues; cross-call transcript bleed).
6546
+ */
6547
+ clone() {
6548
+ const ctor = this.constructor;
6549
+ return new ctor(...this.patterCtorArgs);
6550
+ }
5934
6551
  async connect() {
5935
6552
  if (this.ws !== null) return;
5936
6553
  const ws = new WebSocket5(this.baseUrl, {
5937
6554
  headers: { Authorization: `Bearer ${this.apiKey}` }
5938
6555
  });
5939
6556
  this.ws = ws;
5940
- await new Promise((resolve, reject) => {
6557
+ await new Promise((resolve2, reject) => {
5941
6558
  let settled = false;
5942
6559
  const settle = (fn) => {
5943
6560
  if (settled) return;
@@ -5951,7 +6568,7 @@ var SpeechmaticsSTT = class {
5951
6568
  ),
5952
6569
  CONNECT_TIMEOUT_MS3
5953
6570
  );
5954
- ws.once("open", () => settle(resolve));
6571
+ ws.once("open", () => settle(resolve2));
5955
6572
  ws.once("error", (err) => settle(() => reject(err)));
5956
6573
  ws.once("unexpected-response", (_req, res) => {
5957
6574
  const status = res?.statusCode ?? 0;
@@ -6083,7 +6700,9 @@ var SpeechmaticsSTT = class {
6083
6700
  emitTranscript(transcript) {
6084
6701
  for (const cb of this.transcriptCallbacks) {
6085
6702
  try {
6086
- cb(transcript);
6703
+ Promise.resolve(cb(transcript)).catch(
6704
+ (err) => getLogger().error(`SpeechmaticsSTT transcript callback failed: ${String(err)}`)
6705
+ );
6087
6706
  } catch (err) {
6088
6707
  getLogger().error(`SpeechmaticsSTT transcript callback threw: ${String(err)}`);
6089
6708
  }
@@ -6206,7 +6825,9 @@ function sanitiseLogStr(value, limit = 200) {
6206
6825
  }
6207
6826
  var CARRIER_NATIVE_FORMAT2 = {
6208
6827
  twilio: "ulaw_8000",
6209
- telnyx: "pcm_16000",
6828
+ // The SDK's streaming_start pins the Telnyx wire to PCMU/μ-law @ 8 kHz —
6829
+ // 'pcm_16000' here shipped raw PCM16 onto a μ-law wire (static).
6830
+ telnyx: "ulaw_8000",
6210
6831
  // Plivo streams mulaw 8 kHz (we pin contentType in the answer XML).
6211
6832
  plivo: "ulaw_8000"
6212
6833
  };
@@ -6334,11 +6955,11 @@ var ElevenLabsWebSocketTTS = class _ElevenLabsWebSocketTTS {
6334
6955
  }
6335
6956
  });
6336
6957
  }
6337
- /** Pre-configured for Telnyx (`pcm_16000`). */
6958
+ /** Pre-configured for Telnyx (μ-law 8 kHz wire). */
6338
6959
  static forTelnyx(opts) {
6339
6960
  return new _ElevenLabsWebSocketTTS({
6340
6961
  ...opts,
6341
- outputFormat: "pcm_16000"
6962
+ outputFormat: "ulaw_8000"
6342
6963
  });
6343
6964
  }
6344
6965
  buildUrl() {
@@ -6478,7 +7099,7 @@ var ElevenLabsWebSocketTTS = class _ElevenLabsWebSocketTTS {
6478
7099
  ws.on("error", onError);
6479
7100
  try {
6480
7101
  if (!adopted) {
6481
- await new Promise((resolve, reject) => {
7102
+ await new Promise((resolve2, reject) => {
6482
7103
  connectTimer = setTimeout(
6483
7104
  () => reject(new Error("ElevenLabs WS connect timeout")),
6484
7105
  CONNECT_TIMEOUT_MS4
@@ -6486,7 +7107,7 @@ var ElevenLabsWebSocketTTS = class _ElevenLabsWebSocketTTS {
6486
7107
  ws.once("open", () => {
6487
7108
  if (connectTimer) clearTimeout(connectTimer);
6488
7109
  connectTimer = void 0;
6489
- resolve();
7110
+ resolve2();
6490
7111
  });
6491
7112
  ws.once("error", (err) => {
6492
7113
  if (connectTimer) clearTimeout(connectTimer);
@@ -6571,14 +7192,14 @@ var ElevenLabsWebSocketTTS = class _ElevenLabsWebSocketTTS {
6571
7192
  headers: { "xi-api-key": this.apiKey }
6572
7193
  });
6573
7194
  try {
6574
- await new Promise((resolve, reject) => {
7195
+ await new Promise((resolve2, reject) => {
6575
7196
  const timer = setTimeout(
6576
7197
  () => reject(new Error("ElevenLabs WS TTS warmup connect timeout")),
6577
7198
  CONNECT_TIMEOUT_MS4
6578
7199
  );
6579
7200
  ws.once("open", () => {
6580
7201
  clearTimeout(timer);
6581
- resolve();
7202
+ resolve2();
6582
7203
  });
6583
7204
  ws.once("error", (err) => {
6584
7205
  clearTimeout(timer);
@@ -6622,14 +7243,14 @@ var ElevenLabsWebSocketTTS = class _ElevenLabsWebSocketTTS {
6622
7243
  const ws = new WebSocket6(this.buildUrl(), {
6623
7244
  headers: { "xi-api-key": this.apiKey }
6624
7245
  });
6625
- await new Promise((resolve, reject) => {
7246
+ await new Promise((resolve2, reject) => {
6626
7247
  const timer = setTimeout(
6627
7248
  () => reject(new Error("ElevenLabs WS park connect timeout")),
6628
7249
  CONNECT_TIMEOUT_MS4
6629
7250
  );
6630
7251
  ws.once("open", () => {
6631
7252
  clearTimeout(timer);
6632
- resolve();
7253
+ resolve2();
6633
7254
  });
6634
7255
  ws.once("error", (err) => {
6635
7256
  clearTimeout(timer);
@@ -6970,7 +7591,7 @@ var TTS4 = class _TTS extends CartesiaTTS {
6970
7591
  }
6971
7592
  static forTwilio(arg1, arg2) {
6972
7593
  const opts = typeof arg1 === "string" ? { apiKey: arg1, ...arg2 ?? {} } : arg1 ?? {};
6973
- return new _TTS({ ...opts, sampleRate: 8e3 });
7594
+ return new _TTS({ ...opts, sampleRate: 16e3 });
6974
7595
  }
6975
7596
  static forTelnyx(arg1, arg2) {
6976
7597
  const opts = typeof arg1 === "string" ? { apiKey: arg1, ...arg2 ?? {} } : arg1 ?? {};
@@ -7353,7 +7974,7 @@ var AnthropicModel = {
7353
7974
  CLAUDE_3_5_SONNET_20241022: "claude-3-5-sonnet-20241022",
7354
7975
  CLAUDE_3_5_HAIKU_20241022: "claude-3-5-haiku-20241022"
7355
7976
  };
7356
- var DEFAULT_MODEL = AnthropicModel.CLAUDE_HAIKU_4_5_20251001;
7977
+ var DEFAULT_MODEL2 = AnthropicModel.CLAUDE_HAIKU_4_5_20251001;
7357
7978
  var DEFAULT_MAX_TOKENS = 1024;
7358
7979
  var PROMPT_CACHING_BETA = "prompt-caching-2024-07-31";
7359
7980
  var AnthropicLLMProvider = class {
@@ -7373,7 +7994,7 @@ var AnthropicLLMProvider = class {
7373
7994
  );
7374
7995
  }
7375
7996
  this.apiKey = options.apiKey;
7376
- this.model = options.model ?? DEFAULT_MODEL;
7997
+ this.model = options.model ?? DEFAULT_MODEL2;
7377
7998
  this.maxTokens = options.maxTokens ?? DEFAULT_MAX_TOKENS;
7378
7999
  this.temperature = options.temperature;
7379
8000
  this.url = options.baseUrl ?? DEFAULT_ANTHROPIC_URL;
@@ -7446,16 +8067,19 @@ var AnthropicLLMProvider = class {
7446
8067
  if (this.promptCaching) {
7447
8068
  headers["anthropic-beta"] = PROMPT_CACHING_BETA;
7448
8069
  }
8070
+ const idle = createStreamIdleWatchdog();
7449
8071
  const response = await fetch(this.url, {
7450
8072
  method: "POST",
7451
8073
  headers,
7452
8074
  body: JSON.stringify(body),
7453
- signal: mergeAbortSignals(opts?.signal, AbortSignal.timeout(3e4))
8075
+ signal: mergeAbortSignals(opts?.signal, idle.signal)
7454
8076
  });
7455
8077
  if (!response.ok) {
7456
8078
  const errText = await response.text();
7457
- getLogger().error(`Anthropic API error: ${response.status} ${errText}`);
7458
- return;
8079
+ getLogger().error(`Anthropic API error: ${response.status} ${errText.slice(0, 200)}`);
8080
+ throw new PatterConnectionError(
8081
+ `Anthropic API returned ${response.status}: ${errText.slice(0, 200)}`
8082
+ );
7459
8083
  }
7460
8084
  const reader = response.body?.getReader();
7461
8085
  if (!reader) return;
@@ -7471,6 +8095,7 @@ var AnthropicLLMProvider = class {
7471
8095
  try {
7472
8096
  while (true) {
7473
8097
  const { done, value } = await reader.read();
8098
+ idle.touch();
7474
8099
  if (done) break;
7475
8100
  buffer += decoder.decode(value, { stream: true });
7476
8101
  const lines = buffer.split("\n");
@@ -7486,6 +8111,15 @@ var AnthropicLLMProvider = class {
7486
8111
  } catch {
7487
8112
  continue;
7488
8113
  }
8114
+ if (event.type === "error") {
8115
+ const errPayload = event.error;
8116
+ const detail = `${errPayload?.type ?? "unknown"}: ${errPayload?.message ?? ""}`.slice(
8117
+ 0,
8118
+ 200
8119
+ );
8120
+ getLogger().error(`Anthropic in-stream error event: ${detail}`);
8121
+ throw new PatterConnectionError(`Anthropic stream error \u2014 ${detail}`);
8122
+ }
7489
8123
  if (event.type === "message_start" && event.message?.usage) {
7490
8124
  const u = event.message.usage;
7491
8125
  if (u.input_tokens) inputTokens = u.input_tokens;
@@ -7533,7 +8167,15 @@ var AnthropicLLMProvider = class {
7533
8167
  }
7534
8168
  }
7535
8169
  }
8170
+ } catch (err) {
8171
+ if (idle.fired && !opts?.signal?.aborted) {
8172
+ throw new PatterConnectionError(
8173
+ `Anthropic stream idle timeout \u2014 no data for ${LLM_STREAM_IDLE_TIMEOUT_MS / 1e3}s`
8174
+ );
8175
+ }
8176
+ throw err;
7536
8177
  } finally {
8178
+ idle.clear();
7537
8179
  reader.cancel().catch(() => {
7538
8180
  });
7539
8181
  }
@@ -7618,6 +8260,9 @@ function toAnthropicMessages(messages) {
7618
8260
  continue;
7619
8261
  }
7620
8262
  }
8263
+ if (out.length > 0 && out[0].role === "assistant") {
8264
+ out.unshift({ role: "user", content: "(call connected)" });
8265
+ }
7621
8266
  return { system: systemParts.join("\n\n"), messages: out };
7622
8267
  }
7623
8268
 
@@ -7658,7 +8303,7 @@ var GroqModel = {
7658
8303
  MIXTRAL_8X7B: "mixtral-8x7b-32768",
7659
8304
  GEMMA2_9B: "gemma2-9b-it"
7660
8305
  };
7661
- var DEFAULT_MODEL2 = GroqModel.LLAMA_3_3_70B_VERSATILE;
8306
+ var DEFAULT_MODEL3 = GroqModel.LLAMA_3_3_70B_VERSATILE;
7662
8307
  var GroqLLMProvider = class {
7663
8308
  /** Stable pricing/dashboard key — read by stream-handler/metrics. */
7664
8309
  static providerKey = "groq";
@@ -7682,7 +8327,7 @@ var GroqLLMProvider = class {
7682
8327
  );
7683
8328
  }
7684
8329
  this.apiKey = options.apiKey;
7685
- this.model = options.model ?? DEFAULT_MODEL2;
8330
+ this.model = options.model ?? DEFAULT_MODEL3;
7686
8331
  this.baseUrl = options.baseUrl ?? GROQ_BASE_URL;
7687
8332
  this.temperature = options.temperature;
7688
8333
  this.maxTokens = options.maxTokens;
@@ -7731,6 +8376,7 @@ var GroqLLMProvider = class {
7731
8376
  if (this.presencePenalty !== void 0) body.presence_penalty = this.presencePenalty;
7732
8377
  if (this.stop !== void 0) body.stop = this.stop;
7733
8378
  if (tools) body.tools = tools;
8379
+ const idle = createStreamIdleWatchdog();
7734
8380
  const response = await fetch(`${this.baseUrl}/chat/completions`, {
7735
8381
  method: "POST",
7736
8382
  headers: {
@@ -7739,17 +8385,30 @@ var GroqLLMProvider = class {
7739
8385
  "User-Agent": `getpatter/${VERSION}`
7740
8386
  },
7741
8387
  body: JSON.stringify(body),
7742
- signal: mergeAbortSignals(opts?.signal, AbortSignal.timeout(3e4))
8388
+ signal: mergeAbortSignals(opts?.signal, idle.signal)
7743
8389
  });
7744
8390
  if (!response.ok) {
7745
8391
  const errText = await response.text();
7746
- getLogger().error(`Groq API error: ${response.status} ${errText}`);
7747
- return;
8392
+ getLogger().error(`Groq API error: ${response.status} ${errText.slice(0, 200)}`);
8393
+ throw new PatterConnectionError(
8394
+ `Groq API returned ${response.status}: ${errText.slice(0, 200)}`
8395
+ );
8396
+ }
8397
+ try {
8398
+ yield* parseOpenAISseStream(response, idle.touch);
8399
+ } catch (err) {
8400
+ if (idle.fired && !opts?.signal?.aborted) {
8401
+ throw new PatterConnectionError(
8402
+ `Groq stream idle timeout \u2014 no data for ${LLM_STREAM_IDLE_TIMEOUT_MS / 1e3}s`
8403
+ );
8404
+ }
8405
+ throw err;
8406
+ } finally {
8407
+ idle.clear();
7748
8408
  }
7749
- yield* parseOpenAISseStream(response);
7750
8409
  }
7751
8410
  };
7752
- async function* parseOpenAISseStream(response) {
8411
+ async function* parseOpenAISseStream(response, onRead) {
7753
8412
  const reader = response.body?.getReader();
7754
8413
  if (!reader) return;
7755
8414
  const decoder = new TextDecoder();
@@ -7757,6 +8416,7 @@ async function* parseOpenAISseStream(response) {
7757
8416
  try {
7758
8417
  while (true) {
7759
8418
  const { done, value } = await reader.read();
8419
+ onRead?.();
7760
8420
  if (done) break;
7761
8421
  buffer += decoder.decode(value, { stream: true });
7762
8422
  const lines = buffer.split("\n");
@@ -7777,7 +8437,7 @@ async function* parseOpenAISseStream(response) {
7777
8437
  const cached = chunk.usage?.prompt_tokens_details?.cached_tokens ?? 0;
7778
8438
  yield {
7779
8439
  type: "usage",
7780
- inputTokens: usage.prompt_tokens,
8440
+ inputTokens: Math.max(0, (usage.prompt_tokens ?? 0) - cached),
7781
8441
  outputTokens: usage.completion_tokens,
7782
8442
  cacheReadInputTokens: cached
7783
8443
  };
@@ -7833,468 +8493,248 @@ var LLM3 = class extends GroqLLMProvider {
7833
8493
  });
7834
8494
  }
7835
8495
  };
7836
-
7837
- // src/llm/cerebras.ts
7838
- init_esm_shims();
7839
-
7840
- // src/providers/cerebras-llm.ts
7841
- init_esm_shims();
7842
- var CEREBRAS_BASE_URL = "https://api.cerebras.ai/v1";
7843
- var CerebrasModel = {
7844
- GPT_OSS_120B: "gpt-oss-120b",
7845
- LLAMA_3_1_8B: "llama3.1-8b",
7846
- LLAMA_3_3_70B: "llama-3.3-70b",
7847
- QWEN_3_235B_INSTRUCT: "qwen-3-235b-a22b-instruct-2507",
7848
- ZAI_GLM_4_7: "zai-glm-4.7"
7849
- };
7850
- var DEFAULT_MODEL3 = CerebrasModel.GPT_OSS_120B;
7851
- var RETRY_BACKOFF_BASE_MS = 500;
7852
- var CerebrasLLMProvider = class {
7853
- /** Stable pricing/dashboard key — read by stream-handler/metrics. */
7854
- static providerKey = "cerebras";
7855
- apiKey;
7856
- model;
7857
- baseUrl;
7858
- gzipCompression;
7859
- temperature;
7860
- maxTokens;
7861
- responseFormat;
7862
- parallelToolCalls;
7863
- toolChoice;
7864
- seed;
7865
- topP;
7866
- frequencyPenalty;
7867
- presencePenalty;
7868
- stop;
7869
- constructor(options) {
7870
- if (!options.apiKey) {
7871
- throw new Error(
7872
- "Cerebras API key is required. Pass it via { apiKey } or read CEREBRAS_API_KEY from the environment."
7873
- );
7874
- }
7875
- this.apiKey = options.apiKey;
7876
- this.model = options.model ?? DEFAULT_MODEL3;
7877
- this.baseUrl = options.baseUrl ?? CEREBRAS_BASE_URL;
7878
- this.gzipCompression = options.gzipCompression ?? true;
7879
- this.temperature = options.temperature;
7880
- this.maxTokens = options.maxTokens;
7881
- this.responseFormat = options.responseFormat;
7882
- this.parallelToolCalls = options.parallelToolCalls;
7883
- this.toolChoice = options.toolChoice;
7884
- this.seed = options.seed;
7885
- this.topP = options.topP;
7886
- this.frequencyPenalty = options.frequencyPenalty;
7887
- this.presencePenalty = options.presencePenalty;
7888
- this.stop = options.stop;
7889
- }
7890
- /**
7891
- * Pre-call DNS / TLS warmup for the Cerebras inference endpoint.
7892
- * Best-effort: 5 s timeout, all exceptions swallowed at debug level.
7893
- */
7894
- async warmup() {
7895
- try {
7896
- await fetch(`${this.baseUrl}/models`, {
7897
- method: "GET",
7898
- headers: { Authorization: `Bearer ${this.apiKey}` },
7899
- signal: AbortSignal.timeout(5e3)
7900
- });
7901
- } catch (err) {
7902
- getLogger().debug(`Cerebras LLM warmup failed (best-effort): ${String(err)}`);
7903
- }
7904
- }
7905
- /** Stream Patter-format LLM chunks from the Cerebras chat completions API. */
7906
- async *stream(messages, tools, opts) {
7907
- const body = {
7908
- model: this.model,
7909
- messages,
7910
- stream: true,
7911
- stream_options: { include_usage: true }
7912
- };
7913
- if (this.temperature !== void 0) body.temperature = this.temperature;
7914
- if (this.maxTokens !== void 0) {
7915
- body.max_completion_tokens = this.maxTokens;
7916
- }
7917
- if (this.responseFormat !== void 0) body.response_format = this.responseFormat;
7918
- if (this.parallelToolCalls !== void 0) body.parallel_tool_calls = this.parallelToolCalls;
7919
- if (this.toolChoice !== void 0) body.tool_choice = this.toolChoice;
7920
- if (this.seed !== void 0) body.seed = this.seed;
7921
- if (this.topP !== void 0) body.top_p = this.topP;
7922
- if (this.frequencyPenalty !== void 0) body.frequency_penalty = this.frequencyPenalty;
7923
- if (this.presencePenalty !== void 0) body.presence_penalty = this.presencePenalty;
7924
- if (this.stop !== void 0) body.stop = this.stop;
7925
- if (tools) body.tools = tools;
7926
- const headers = {
7927
- "Content-Type": "application/json",
7928
- Authorization: `Bearer ${this.apiKey}`,
7929
- // Identify the SDK in upstream logs/rate-limit attribution.
7930
- "User-Agent": `getpatter/${VERSION}`
7931
- };
7932
- let payload = JSON.stringify(body);
7933
- if (this.gzipCompression) {
7934
- const compressed = await gzipEncode(payload);
7935
- if (compressed) {
7936
- payload = compressed;
7937
- headers["Content-Encoding"] = "gzip";
7938
- }
7939
- }
7940
- const maxAttempts = 2;
7941
- let lastErrText = "";
7942
- let lastStatus = 0;
7943
- for (let attempt = 0; attempt < maxAttempts; attempt++) {
7944
- const response = await fetch(`${this.baseUrl}/chat/completions`, {
7945
- method: "POST",
7946
- headers,
7947
- body: payload,
7948
- signal: mergeAbortSignals(opts?.signal, AbortSignal.timeout(3e4))
7949
- });
7950
- if (response.ok) {
7951
- yield* parseOpenAISseStream(response);
7952
- return;
7953
- }
7954
- lastStatus = response.status;
7955
- lastErrText = await response.text().catch(() => "");
7956
- const isRetriable = response.status === 429 || response.status >= 500;
7957
- const isLastAttempt = attempt >= maxAttempts - 1;
7958
- if (!isRetriable || isLastAttempt) {
7959
- if (response.status === 404 && lastErrText.includes("model_not_found")) {
7960
- getLogger().error(
7961
- `Cerebras: model "${this.model}" not available on your tier. Override via \`new CerebrasLLM({ model: '<id>' })\` and list tier-available ids with \`GET ${this.baseUrl}/models\` (common: llama3.1-8b, qwen-3-235b-a22b-instruct-2507, llama-3.3-70b on paid). Raw response: ${lastErrText}`
7962
- );
7963
- } else {
7964
- getLogger().error(`Cerebras API error: ${response.status} ${lastErrText}`);
7965
- }
7966
- return;
7967
- }
7968
- const advisoryMs = parseRateLimitResetMs(response.headers);
7969
- const exponentialMs = RETRY_BACKOFF_BASE_MS * Math.pow(2, attempt);
7970
- const delayMs = Math.min(5e3, Math.max(advisoryMs, exponentialMs));
7971
- getLogger().warn(
7972
- `Cerebras API ${response.status} (attempt ${attempt + 1}/${maxAttempts}); retrying after ${delayMs}ms`
7973
- );
7974
- await new Promise((resolve, reject) => {
7975
- const t = setTimeout(resolve, delayMs);
7976
- opts?.signal?.addEventListener(
7977
- "abort",
7978
- () => {
7979
- clearTimeout(t);
7980
- reject(opts.signal.reason);
7981
- },
7982
- { once: true }
7983
- );
7984
- });
7985
- }
7986
- throw new PatterError(`Cerebras API error ${lastStatus}: ${lastErrText || "request failed"}`);
7987
- }
7988
- };
7989
- async function gzipEncode(data) {
7990
- const CompressionCtor = globalThis.CompressionStream;
7991
- if (!CompressionCtor) return null;
7992
- const stream = new CompressionCtor("gzip");
7993
- const writer = stream.writable.getWriter();
7994
- const encoder = new TextEncoder();
7995
- await writer.write(encoder.encode(data));
7996
- await writer.close();
7997
- const chunks = [];
7998
- const reader = stream.readable.getReader();
7999
- while (true) {
8000
- const { done, value } = await reader.read();
8001
- if (done) break;
8002
- if (value) chunks.push(value);
8003
- }
8004
- const total = chunks.reduce((n, c) => n + c.length, 0);
8005
- const out = new Uint8Array(total);
8006
- let offset = 0;
8007
- for (const c of chunks) {
8008
- out.set(c, offset);
8009
- offset += c.length;
8010
- }
8011
- return out;
8012
- }
8013
- function parseRateLimitResetMs(headers) {
8014
- const candidates = [
8015
- headers.get("x-ratelimit-reset-tokens-minute"),
8016
- headers.get("x-ratelimit-reset-requests-minute"),
8017
- // Some upstreams send the standard ``retry-after`` (seconds).
8018
- headers.get("retry-after")
8019
- ];
8020
- let bestMs = 0;
8021
- for (const raw of candidates) {
8022
- if (!raw) continue;
8023
- const parsed = Number.parseFloat(raw);
8024
- if (Number.isFinite(parsed) && parsed > 0) {
8025
- const ms = parsed * 1e3;
8026
- if (ms > bestMs) bestMs = ms;
8027
- }
8028
- }
8029
- return bestMs;
8030
- }
8031
-
8032
- // src/llm/cerebras.ts
8033
- var LLM4 = class extends CerebrasLLMProvider {
8034
- static providerKey = "cerebras";
8035
- constructor(opts = {}) {
8036
- const key = opts.apiKey ?? process.env.CEREBRAS_API_KEY;
8037
- if (!key) {
8038
- throw new Error(
8039
- "Cerebras LLM requires an apiKey. Pass { apiKey: 'csk-...' } or set CEREBRAS_API_KEY."
8040
- );
8041
- }
8042
- super({
8043
- apiKey: key,
8044
- model: opts.model,
8045
- baseUrl: opts.baseUrl,
8046
- gzipCompression: opts.gzipCompression,
8047
- temperature: opts.temperature,
8048
- maxTokens: opts.maxTokens,
8049
- responseFormat: opts.responseFormat,
8050
- parallelToolCalls: opts.parallelToolCalls,
8051
- toolChoice: opts.toolChoice,
8052
- seed: opts.seed,
8053
- topP: opts.topP,
8054
- frequencyPenalty: opts.frequencyPenalty,
8055
- presencePenalty: opts.presencePenalty,
8056
- stop: opts.stop
8057
- });
8058
- }
8059
- };
8060
-
8061
- // src/llm/google.ts
8062
- init_esm_shims();
8063
-
8064
- // src/providers/google-llm.ts
8065
- init_esm_shims();
8066
- var GoogleModel = {
8067
- GEMINI_2_5_FLASH: "gemini-2.5-flash",
8068
- GEMINI_2_5_PRO: "gemini-2.5-pro",
8069
- GEMINI_2_0_FLASH: "gemini-2.0-flash",
8070
- GEMINI_2_0_FLASH_LITE: "gemini-2.0-flash-lite",
8071
- GEMINI_1_5_FLASH: "gemini-1.5-flash",
8072
- GEMINI_1_5_PRO: "gemini-1.5-pro"
8496
+
8497
+ // src/llm/cerebras.ts
8498
+ init_esm_shims();
8499
+
8500
+ // src/providers/cerebras-llm.ts
8501
+ init_esm_shims();
8502
+ var CEREBRAS_BASE_URL = "https://api.cerebras.ai/v1";
8503
+ var CerebrasModel = {
8504
+ GPT_OSS_120B: "gpt-oss-120b",
8505
+ LLAMA_3_1_8B: "llama3.1-8b",
8506
+ LLAMA_3_3_70B: "llama-3.3-70b",
8507
+ QWEN_3_235B_INSTRUCT: "qwen-3-235b-a22b-instruct-2507",
8508
+ ZAI_GLM_4_7: "zai-glm-4.7"
8073
8509
  };
8074
- var DEFAULT_MODEL4 = GoogleModel.GEMINI_2_5_FLASH;
8075
- var DEFAULT_BASE_URL3 = "https://generativelanguage.googleapis.com/v1beta";
8076
- var GoogleLLMProvider = class {
8510
+ var DEFAULT_MODEL4 = CerebrasModel.GPT_OSS_120B;
8511
+ var RETRY_BACKOFF_BASE_MS = 500;
8512
+ var CerebrasLLMProvider = class {
8077
8513
  /** Stable pricing/dashboard key — read by stream-handler/metrics. */
8078
- static providerKey = "google";
8514
+ static providerKey = "cerebras";
8079
8515
  apiKey;
8080
8516
  model;
8081
8517
  baseUrl;
8518
+ gzipCompression;
8082
8519
  temperature;
8083
- maxOutputTokens;
8520
+ maxTokens;
8521
+ responseFormat;
8522
+ parallelToolCalls;
8523
+ toolChoice;
8524
+ seed;
8525
+ topP;
8526
+ frequencyPenalty;
8527
+ presencePenalty;
8528
+ stop;
8084
8529
  constructor(options) {
8085
8530
  if (!options.apiKey) {
8086
8531
  throw new Error(
8087
- "Google API key is required. Pass it via { apiKey } or read GOOGLE_API_KEY from the environment."
8532
+ "Cerebras API key is required. Pass it via { apiKey } or read CEREBRAS_API_KEY from the environment."
8088
8533
  );
8089
8534
  }
8090
8535
  this.apiKey = options.apiKey;
8091
8536
  this.model = options.model ?? DEFAULT_MODEL4;
8092
- this.baseUrl = options.baseUrl ?? DEFAULT_BASE_URL3;
8537
+ this.baseUrl = options.baseUrl ?? CEREBRAS_BASE_URL;
8538
+ this.gzipCompression = options.gzipCompression ?? true;
8093
8539
  this.temperature = options.temperature;
8094
- this.maxOutputTokens = options.maxOutputTokens;
8540
+ this.maxTokens = options.maxTokens;
8541
+ this.responseFormat = options.responseFormat;
8542
+ this.parallelToolCalls = options.parallelToolCalls;
8543
+ this.toolChoice = options.toolChoice;
8544
+ this.seed = options.seed;
8545
+ this.topP = options.topP;
8546
+ this.frequencyPenalty = options.frequencyPenalty;
8547
+ this.presencePenalty = options.presencePenalty;
8548
+ this.stop = options.stop;
8095
8549
  }
8096
8550
  /**
8097
- * Pre-call DNS / TLS warmup for the Gemini API.
8098
- * Issues a lightweight ``GET ${baseUrl}/models?key=...`` so DNS, TLS
8099
- * and HTTP/2 are already up by the time the first
8100
- * ``streamGenerateContent`` call lands. Best-effort: 5 s timeout, all
8101
- * exceptions swallowed at debug level.
8551
+ * Pre-call DNS / TLS warmup for the Cerebras inference endpoint.
8552
+ * Best-effort: 5 s timeout, all exceptions swallowed at debug level.
8102
8553
  */
8103
8554
  async warmup() {
8104
8555
  try {
8105
- await fetch(`${this.baseUrl}/models?key=${encodeURIComponent(this.apiKey)}`, {
8556
+ await fetch(`${this.baseUrl}/models`, {
8106
8557
  method: "GET",
8558
+ headers: { Authorization: `Bearer ${this.apiKey}` },
8107
8559
  signal: AbortSignal.timeout(5e3)
8108
8560
  });
8109
8561
  } catch (err) {
8110
- getLogger().debug(`Google LLM warmup failed (best-effort): ${String(err)}`);
8562
+ getLogger().debug(`Cerebras LLM warmup failed (best-effort): ${String(err)}`);
8111
8563
  }
8112
8564
  }
8113
- /** Stream Patter-format LLM chunks from the Gemini SSE endpoint. */
8565
+ /** Stream Patter-format LLM chunks from the Cerebras chat completions API. */
8114
8566
  async *stream(messages, tools, opts) {
8115
- const { systemInstruction, contents } = toGeminiContents(messages);
8116
- const geminiTools = tools ? toGeminiTools(tools) : null;
8117
- const body = { contents };
8118
- if (systemInstruction) {
8119
- body.systemInstruction = { role: "system", parts: [{ text: systemInstruction }] };
8120
- }
8121
- if (geminiTools) body.tools = geminiTools;
8122
- const generationConfig = {};
8123
- if (this.temperature !== void 0) generationConfig.temperature = this.temperature;
8124
- if (this.maxOutputTokens !== void 0)
8125
- generationConfig.maxOutputTokens = this.maxOutputTokens;
8126
- if (Object.keys(generationConfig).length > 0) body.generationConfig = generationConfig;
8127
- const url = `${this.baseUrl}/models/${encodeURIComponent(this.model)}:streamGenerateContent?alt=sse&key=${encodeURIComponent(this.apiKey)}`;
8128
- const response = await fetch(url, {
8129
- method: "POST",
8130
- headers: { "Content-Type": "application/json" },
8131
- body: JSON.stringify(body),
8132
- signal: mergeAbortSignals(opts?.signal, AbortSignal.timeout(3e4))
8133
- });
8134
- if (!response.ok) {
8135
- const errText = await response.text();
8136
- getLogger().error(`Gemini API error: ${response.status} ${errText}`);
8137
- return;
8138
- }
8139
- const reader = response.body?.getReader();
8140
- if (!reader) return;
8141
- const decoder = new TextDecoder();
8142
- let buffer = "";
8143
- let nextIndex = 0;
8144
- let lastUsage;
8145
- try {
8146
- while (true) {
8147
- const { done, value } = await reader.read();
8148
- if (done) break;
8149
- buffer += decoder.decode(value, { stream: true });
8150
- const lines = buffer.split("\n");
8151
- buffer = lines.pop() || "";
8152
- for (const line of lines) {
8153
- const trimmed = line.trim();
8154
- if (!trimmed.startsWith("data: ")) continue;
8155
- const data = trimmed.slice(6);
8156
- if (!data) continue;
8157
- let payload;
8158
- try {
8159
- payload = JSON.parse(data);
8160
- } catch {
8161
- continue;
8162
- }
8163
- if (payload.usageMetadata) {
8164
- lastUsage = payload.usageMetadata;
8165
- }
8166
- const candidate = payload.candidates?.[0];
8167
- const parts = candidate?.content?.parts ?? [];
8168
- for (const part of parts) {
8169
- if (part.functionCall) {
8170
- const args = part.functionCall.args ?? {};
8171
- const callId = part.functionCall.id ?? `gemini_call_${nextIndex}`;
8172
- yield {
8173
- type: "tool_call",
8174
- index: nextIndex,
8175
- id: callId,
8176
- name: part.functionCall.name ?? "",
8177
- arguments: JSON.stringify(args)
8178
- };
8179
- nextIndex++;
8180
- continue;
8181
- }
8182
- if (part.text) {
8183
- yield { type: "text", content: part.text };
8184
- }
8185
- }
8186
- }
8187
- }
8188
- } finally {
8189
- reader.cancel().catch(() => {
8190
- });
8191
- }
8192
- if (lastUsage) {
8193
- yield {
8194
- type: "usage",
8195
- inputTokens: lastUsage.promptTokenCount,
8196
- outputTokens: lastUsage.candidatesTokenCount,
8197
- cacheReadInputTokens: lastUsage.cachedContentTokenCount ?? 0
8198
- };
8199
- }
8200
- yield { type: "done" };
8201
- }
8202
- };
8203
- function toGeminiTools(tools) {
8204
- const functionDeclarations = tools.map((t) => {
8205
- const fn = t.function ?? t;
8206
- return {
8207
- name: String(fn.name ?? ""),
8208
- description: String(fn.description ?? ""),
8209
- parameters: fn.parameters ?? { type: "object", properties: {} }
8567
+ const body = {
8568
+ model: this.model,
8569
+ messages,
8570
+ stream: true,
8571
+ stream_options: { include_usage: true }
8210
8572
  };
8211
- });
8212
- if (functionDeclarations.length === 0) return [];
8213
- return [{ functionDeclarations }];
8214
- }
8215
- function toGeminiContents(messages) {
8216
- const systemParts = [];
8217
- const contents = [];
8218
- for (const rawMsg of messages) {
8219
- const role = rawMsg.role;
8220
- if (role === "system") {
8221
- if (typeof rawMsg.content === "string" && rawMsg.content) {
8222
- systemParts.push(rawMsg.content);
8223
- }
8224
- continue;
8573
+ if (this.temperature !== void 0) body.temperature = this.temperature;
8574
+ if (this.maxTokens !== void 0) {
8575
+ body.max_completion_tokens = this.maxTokens;
8225
8576
  }
8226
- if (role === "user") {
8227
- if (typeof rawMsg.content === "string" && rawMsg.content) {
8228
- contents.push({ role: "user", parts: [{ text: rawMsg.content }] });
8577
+ if (this.responseFormat !== void 0) body.response_format = this.responseFormat;
8578
+ if (this.parallelToolCalls !== void 0) body.parallel_tool_calls = this.parallelToolCalls;
8579
+ if (this.toolChoice !== void 0) body.tool_choice = this.toolChoice;
8580
+ if (this.seed !== void 0) body.seed = this.seed;
8581
+ if (this.topP !== void 0) body.top_p = this.topP;
8582
+ if (this.frequencyPenalty !== void 0) body.frequency_penalty = this.frequencyPenalty;
8583
+ if (this.presencePenalty !== void 0) body.presence_penalty = this.presencePenalty;
8584
+ if (this.stop !== void 0) body.stop = this.stop;
8585
+ if (tools) body.tools = tools;
8586
+ const headers = {
8587
+ "Content-Type": "application/json",
8588
+ Authorization: `Bearer ${this.apiKey}`,
8589
+ // Identify the SDK in upstream logs/rate-limit attribution.
8590
+ "User-Agent": `getpatter/${VERSION}`
8591
+ };
8592
+ let payload = JSON.stringify(body);
8593
+ if (this.gzipCompression) {
8594
+ const compressed = await gzipEncode(payload);
8595
+ if (compressed) {
8596
+ payload = compressed;
8597
+ headers["Content-Encoding"] = "gzip";
8229
8598
  }
8230
- continue;
8231
8599
  }
8232
- if (role === "assistant") {
8233
- const parts = [];
8234
- if (typeof rawMsg.content === "string" && rawMsg.content) {
8235
- parts.push({ text: rawMsg.content });
8236
- }
8237
- for (const tc of rawMsg.tool_calls ?? []) {
8238
- let args = {};
8600
+ const maxAttempts = 2;
8601
+ let lastErrText = "";
8602
+ let lastStatus = 0;
8603
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
8604
+ const idle = createStreamIdleWatchdog();
8605
+ const response = await fetch(`${this.baseUrl}/chat/completions`, {
8606
+ method: "POST",
8607
+ headers,
8608
+ body: payload,
8609
+ signal: mergeAbortSignals(opts?.signal, idle.signal)
8610
+ });
8611
+ if (response.ok) {
8239
8612
  try {
8240
- const parsed = JSON.parse(tc.function?.arguments ?? "{}");
8241
- if (parsed && typeof parsed === "object") args = parsed;
8242
- } catch {
8243
- args = {};
8244
- }
8245
- parts.push({
8246
- functionCall: {
8247
- name: tc.function?.name ?? "",
8248
- args,
8249
- id: tc.id
8613
+ yield* parseOpenAISseStream(response, idle.touch);
8614
+ } catch (err) {
8615
+ if (idle.fired && !opts?.signal?.aborted) {
8616
+ throw new PatterConnectionError(
8617
+ `Cerebras stream idle timeout \u2014 no data for ${LLM_STREAM_IDLE_TIMEOUT_MS / 1e3}s`
8618
+ );
8250
8619
  }
8251
- });
8252
- }
8253
- if (parts.length > 0) contents.push({ role: "model", parts });
8254
- continue;
8255
- }
8256
- if (role === "tool") {
8257
- const raw = rawMsg.content;
8258
- let response;
8259
- if (typeof raw === "string") {
8260
- try {
8261
- const parsed = JSON.parse(raw);
8262
- response = parsed && typeof parsed === "object" && !Array.isArray(parsed) ? parsed : { result: parsed };
8263
- } catch {
8264
- response = { result: raw };
8620
+ throw err;
8621
+ } finally {
8622
+ idle.clear();
8265
8623
  }
8266
- } else {
8267
- response = raw ?? {};
8624
+ return;
8268
8625
  }
8269
- contents.push({
8270
- role: "user",
8271
- parts: [
8272
- {
8273
- functionResponse: {
8274
- name: rawMsg.name ?? rawMsg.tool_call_id ?? "",
8275
- response,
8276
- id: rawMsg.tool_call_id
8277
- }
8278
- }
8279
- ]
8626
+ idle.clear();
8627
+ lastStatus = response.status;
8628
+ lastErrText = await response.text().catch(() => "");
8629
+ const isRetriable = response.status === 429 || response.status >= 500;
8630
+ const isLastAttempt = attempt >= maxAttempts - 1;
8631
+ if (!isRetriable || isLastAttempt) {
8632
+ if (response.status === 404 && lastErrText.includes("model_not_found")) {
8633
+ getLogger().error(
8634
+ `Cerebras: model "${this.model}" not available on your tier. Override via \`new CerebrasLLM({ model: '<id>' })\` and list tier-available ids with \`GET ${this.baseUrl}/models\` (common: llama3.1-8b, qwen-3-235b-a22b-instruct-2507, llama-3.3-70b on paid). Raw response: ${lastErrText.slice(0, 200)}`
8635
+ );
8636
+ } else {
8637
+ getLogger().error(`Cerebras API error: ${response.status} ${lastErrText.slice(0, 200)}`);
8638
+ }
8639
+ throw new PatterConnectionError(
8640
+ `Cerebras API returned ${response.status}: ${lastErrText.slice(0, 200)}`
8641
+ );
8642
+ }
8643
+ const advisoryMs = parseRateLimitResetMs(response.headers);
8644
+ const exponentialMs = RETRY_BACKOFF_BASE_MS * Math.pow(2, attempt);
8645
+ const delayMs = Math.min(5e3, Math.max(advisoryMs, exponentialMs));
8646
+ getLogger().warn(
8647
+ `Cerebras API ${response.status} (attempt ${attempt + 1}/${maxAttempts}); retrying after ${delayMs}ms`
8648
+ );
8649
+ await new Promise((resolve2, reject) => {
8650
+ const t = setTimeout(resolve2, delayMs);
8651
+ opts?.signal?.addEventListener(
8652
+ "abort",
8653
+ () => {
8654
+ clearTimeout(t);
8655
+ reject(opts.signal.reason);
8656
+ },
8657
+ { once: true }
8658
+ );
8280
8659
  });
8281
- continue;
8282
8660
  }
8661
+ throw new PatterError(`Cerebras API error ${lastStatus}: ${lastErrText || "request failed"}`);
8283
8662
  }
8284
- const merged = [];
8285
- for (const entry of contents) {
8286
- const prev = merged[merged.length - 1];
8287
- const isFunctionResponseOnly = (c) => c.role === "user" && c.parts.every((p) => p.functionResponse !== void 0);
8288
- if (prev && isFunctionResponseOnly(prev) && isFunctionResponseOnly(entry)) {
8289
- prev.parts.push(...entry.parts);
8290
- } else {
8291
- merged.push(entry);
8663
+ };
8664
+ async function gzipEncode(data) {
8665
+ const CompressionCtor = globalThis.CompressionStream;
8666
+ if (!CompressionCtor) return null;
8667
+ const stream = new CompressionCtor("gzip");
8668
+ const writer = stream.writable.getWriter();
8669
+ const encoder = new TextEncoder();
8670
+ await writer.write(encoder.encode(data));
8671
+ await writer.close();
8672
+ const chunks = [];
8673
+ const reader = stream.readable.getReader();
8674
+ while (true) {
8675
+ const { done, value } = await reader.read();
8676
+ if (done) break;
8677
+ if (value) chunks.push(value);
8678
+ }
8679
+ const total = chunks.reduce((n, c) => n + c.length, 0);
8680
+ const out = new Uint8Array(total);
8681
+ let offset = 0;
8682
+ for (const c of chunks) {
8683
+ out.set(c, offset);
8684
+ offset += c.length;
8685
+ }
8686
+ return out;
8687
+ }
8688
+ function parseRateLimitResetMs(headers) {
8689
+ const candidates = [
8690
+ headers.get("x-ratelimit-reset-tokens-minute"),
8691
+ headers.get("x-ratelimit-reset-requests-minute"),
8692
+ // Some upstreams send the standard ``retry-after`` (seconds).
8693
+ headers.get("retry-after")
8694
+ ];
8695
+ let bestMs = 0;
8696
+ for (const raw of candidates) {
8697
+ if (!raw) continue;
8698
+ const parsed = Number.parseFloat(raw);
8699
+ if (Number.isFinite(parsed) && parsed > 0) {
8700
+ const ms = parsed * 1e3;
8701
+ if (ms > bestMs) bestMs = ms;
8292
8702
  }
8293
8703
  }
8294
- return { systemInstruction: systemParts.join("\n\n"), contents: merged };
8704
+ return bestMs;
8295
8705
  }
8296
8706
 
8707
+ // src/llm/cerebras.ts
8708
+ var LLM4 = class extends CerebrasLLMProvider {
8709
+ static providerKey = "cerebras";
8710
+ constructor(opts = {}) {
8711
+ const key = opts.apiKey ?? process.env.CEREBRAS_API_KEY;
8712
+ if (!key) {
8713
+ throw new Error(
8714
+ "Cerebras LLM requires an apiKey. Pass { apiKey: 'csk-...' } or set CEREBRAS_API_KEY."
8715
+ );
8716
+ }
8717
+ super({
8718
+ apiKey: key,
8719
+ model: opts.model,
8720
+ baseUrl: opts.baseUrl,
8721
+ gzipCompression: opts.gzipCompression,
8722
+ temperature: opts.temperature,
8723
+ maxTokens: opts.maxTokens,
8724
+ responseFormat: opts.responseFormat,
8725
+ parallelToolCalls: opts.parallelToolCalls,
8726
+ toolChoice: opts.toolChoice,
8727
+ seed: opts.seed,
8728
+ topP: opts.topP,
8729
+ frequencyPenalty: opts.frequencyPenalty,
8730
+ presencePenalty: opts.presencePenalty,
8731
+ stop: opts.stop
8732
+ });
8733
+ }
8734
+ };
8735
+
8297
8736
  // src/llm/google.ts
8737
+ init_esm_shims();
8298
8738
  var LLM5 = class extends GoogleLLMProvider {
8299
8739
  static providerKey = "google";
8300
8740
  constructor(opts = {}) {
@@ -8499,11 +8939,12 @@ var OpenAICompatibleLLMProvider = class {
8499
8939
  const caller = opts?.caller;
8500
8940
  const callee = opts?.callee;
8501
8941
  const body = this.buildBody(messages, tools, callId);
8942
+ const idle = createStreamIdleWatchdog(this.timeoutMs);
8502
8943
  const response = await fetch(`${this.baseUrl}/chat/completions`, {
8503
8944
  method: "POST",
8504
8945
  headers: this.buildHeaders(callId, caller, callee),
8505
8946
  body: JSON.stringify(body),
8506
- signal: mergeAbortSignals(opts?.signal, AbortSignal.timeout(this.timeoutMs))
8947
+ signal: mergeAbortSignals(opts?.signal, idle.signal)
8507
8948
  });
8508
8949
  if (!response.ok) {
8509
8950
  const errText = await response.text();
@@ -8514,7 +8955,18 @@ var OpenAICompatibleLLMProvider = class {
8514
8955
  `LLM API returned ${response.status}: ${errText.slice(0, 200)}`
8515
8956
  );
8516
8957
  }
8517
- yield* parseOpenAISseStream(response);
8958
+ try {
8959
+ yield* parseOpenAISseStream(response, idle.touch);
8960
+ } catch (err) {
8961
+ if (idle.fired && !opts?.signal?.aborted) {
8962
+ throw new PatterConnectionError(
8963
+ `LLM stream idle timeout \u2014 no data for ${Math.round(this.timeoutMs / 1e3)}s`
8964
+ );
8965
+ }
8966
+ throw err;
8967
+ } finally {
8968
+ idle.clear();
8969
+ }
8518
8970
  }
8519
8971
  };
8520
8972
  var LLM6 = class extends OpenAICompatibleLLMProvider {
@@ -8618,13 +9070,437 @@ var LLM9 = class extends OpenAICompatibleLLMProvider {
8618
9070
  }
8619
9071
  };
8620
9072
 
9073
+ // src/providers/smart-turn.ts
9074
+ init_esm_shims();
9075
+ import * as fs3 from "fs";
9076
+ import * as path2 from "path";
9077
+ var SMART_TURN_MODEL_ENV_VAR = "PATTER_SMART_TURN_MODEL";
9078
+ var SMART_TURN_SAMPLE_RATE = 16e3;
9079
+ var SMART_TURN_MAX_SECONDS = 8;
9080
+ var SMART_TURN_MAX_SAMPLES = SMART_TURN_SAMPLE_RATE * SMART_TURN_MAX_SECONDS;
9081
+ var DEFAULT_SMART_TURN_THRESHOLD = 0.5;
9082
+ var N_FFT = 400;
9083
+ var HOP_LENGTH = 160;
9084
+ var N_MELS = 80;
9085
+ var N_FRAMES = 800;
9086
+ var MEL_FLOOR = 1e-10;
9087
+ var NORM_EPS = 1e-7;
9088
+ var DOWNLOAD_HINT = `Download a smart-turn-v3 ONNX file from https://huggingface.co/pipecat-ai/smart-turn-v3 and either set the ${SMART_TURN_MODEL_ENV_VAR} environment variable to its path or pass modelPath to SmartTurnDetector.load(). The model is not bundled with the SDK (~30 MB).`;
9089
+ function resolveSmartTurnModelPath(modelPath) {
9090
+ let resolved = modelPath;
9091
+ if (!resolved) {
9092
+ resolved = (process.env[SMART_TURN_MODEL_ENV_VAR] ?? "").trim();
9093
+ if (!resolved) {
9094
+ throw new Error(
9095
+ `SmartTurnDetector has no model file configured. ${DOWNLOAD_HINT}`
9096
+ );
9097
+ }
9098
+ }
9099
+ if (!fs3.existsSync(resolved)) {
9100
+ throw new Error(`Smart-turn model file not found: ${resolved}. ${DOWNLOAD_HINT}`);
9101
+ }
9102
+ if (!fs3.statSync(resolved).isFile()) {
9103
+ throw new Error(`Smart-turn model path is not a file: ${resolved}. ${DOWNLOAD_HINT}`);
9104
+ }
9105
+ return path2.resolve(resolved);
9106
+ }
9107
+ function hertzToMelSlaney(freq) {
9108
+ const minLogHertz = 1e3;
9109
+ const minLogMel = 15;
9110
+ const logstep = 27 / Math.log(6.4);
9111
+ if (freq >= minLogHertz) {
9112
+ return minLogMel + Math.log(freq / minLogHertz) * logstep;
9113
+ }
9114
+ return 3 * freq / 200;
9115
+ }
9116
+ function melToHertzSlaney(mels) {
9117
+ const minLogHertz = 1e3;
9118
+ const minLogMel = 15;
9119
+ const logstep = Math.log(6.4) / 27;
9120
+ if (mels >= minLogMel) {
9121
+ return minLogHertz * Math.exp(logstep * (mels - minLogMel));
9122
+ }
9123
+ return 200 * mels / 3;
9124
+ }
9125
+ var melFilterbankCache = null;
9126
+ function melFilterbank() {
9127
+ if (melFilterbankCache) return melFilterbankCache;
9128
+ const numBins = 1 + N_FFT / 2;
9129
+ const fftFreqs = new Float64Array(numBins);
9130
+ for (let k = 0; k < numBins; k++) {
9131
+ fftFreqs[k] = k * (SMART_TURN_SAMPLE_RATE / 2) / (numBins - 1);
9132
+ }
9133
+ const melMin = hertzToMelSlaney(0);
9134
+ const melMax = hertzToMelSlaney(SMART_TURN_SAMPLE_RATE / 2);
9135
+ const filterFreqs = new Float64Array(N_MELS + 2);
9136
+ for (let i = 0; i < N_MELS + 2; i++) {
9137
+ filterFreqs[i] = melToHertzSlaney(melMin + (melMax - melMin) * i / (N_MELS + 1));
9138
+ }
9139
+ const filters = [];
9140
+ for (let m = 0; m < N_MELS; m++) {
9141
+ const lower = filterFreqs[m];
9142
+ const center = filterFreqs[m + 1];
9143
+ const upper = filterFreqs[m + 2];
9144
+ const enorm = 2 / (upper - lower);
9145
+ const dense = new Float64Array(numBins);
9146
+ let startBin = -1;
9147
+ let endBin = -1;
9148
+ for (let k = 0; k < numBins; k++) {
9149
+ const down = (fftFreqs[k] - lower) / (center - lower);
9150
+ const up = (upper - fftFreqs[k]) / (upper - center);
9151
+ const w = Math.max(0, Math.min(down, up)) * enorm;
9152
+ dense[k] = w;
9153
+ if (w > 0) {
9154
+ if (startBin === -1) startBin = k;
9155
+ endBin = k;
9156
+ }
9157
+ }
9158
+ if (startBin === -1) {
9159
+ filters.push({ startBin: 0, weights: new Float64Array(0) });
9160
+ } else {
9161
+ filters.push({ startBin, weights: dense.slice(startBin, endBin + 1) });
9162
+ }
9163
+ }
9164
+ melFilterbankCache = filters;
9165
+ return filters;
9166
+ }
9167
+ var hannWindowCache = null;
9168
+ function hannWindow() {
9169
+ if (!hannWindowCache) {
9170
+ const w = new Float64Array(N_FFT);
9171
+ for (let n = 0; n < N_FFT; n++) {
9172
+ w[n] = 0.5 - 0.5 * Math.cos(2 * Math.PI * n / N_FFT);
9173
+ }
9174
+ hannWindowCache = w;
9175
+ }
9176
+ return hannWindowCache;
9177
+ }
9178
+ var dft25Cos = null;
9179
+ var dft25Sin = null;
9180
+ function dft25Tables() {
9181
+ if (!dft25Cos || !dft25Sin) {
9182
+ dft25Cos = new Float64Array(25 * 25);
9183
+ dft25Sin = new Float64Array(25 * 25);
9184
+ for (let k = 0; k < 25; k++) {
9185
+ for (let j = 0; j < 25; j++) {
9186
+ const angle = -2 * Math.PI * k * j / 25;
9187
+ dft25Cos[k * 25 + j] = Math.cos(angle);
9188
+ dft25Sin[k * 25 + j] = Math.sin(angle);
9189
+ }
9190
+ }
9191
+ }
9192
+ return { cos: dft25Cos, sin: dft25Sin };
9193
+ }
9194
+ var fftTwiddleCos = /* @__PURE__ */ new Map();
9195
+ var fftTwiddleSin = /* @__PURE__ */ new Map();
9196
+ var fftScratch = /* @__PURE__ */ new Map();
9197
+ function fftTables(n) {
9198
+ let cos = fftTwiddleCos.get(n);
9199
+ let sin = fftTwiddleSin.get(n);
9200
+ if (!cos || !sin) {
9201
+ const half = n / 2;
9202
+ cos = new Float64Array(half);
9203
+ sin = new Float64Array(half);
9204
+ for (let k = 0; k < half; k++) {
9205
+ const angle = -2 * Math.PI * k / n;
9206
+ cos[k] = Math.cos(angle);
9207
+ sin[k] = Math.sin(angle);
9208
+ }
9209
+ fftTwiddleCos.set(n, cos);
9210
+ fftTwiddleSin.set(n, sin);
9211
+ }
9212
+ return { cos, sin };
9213
+ }
9214
+ function fftScratchFor(n) {
9215
+ let bufs = fftScratch.get(n);
9216
+ if (!bufs) {
9217
+ bufs = [
9218
+ new Float64Array(n),
9219
+ new Float64Array(n),
9220
+ new Float64Array(n),
9221
+ new Float64Array(n)
9222
+ ];
9223
+ fftScratch.set(n, bufs);
9224
+ }
9225
+ return bufs;
9226
+ }
9227
+ var dft25OutRe = new Float64Array(25);
9228
+ var dft25OutIm = new Float64Array(25);
9229
+ function fftComplex(re, im) {
9230
+ const n = re.length;
9231
+ if (n === 25) {
9232
+ const { cos: cos2, sin: sin2 } = dft25Tables();
9233
+ for (let k = 0; k < 25; k++) {
9234
+ let sumRe = 0;
9235
+ let sumIm = 0;
9236
+ const row = k * 25;
9237
+ for (let j = 0; j < 25; j++) {
9238
+ const c = cos2[row + j];
9239
+ const s = sin2[row + j];
9240
+ sumRe += re[j] * c - im[j] * s;
9241
+ sumIm += re[j] * s + im[j] * c;
9242
+ }
9243
+ dft25OutRe[k] = sumRe;
9244
+ dft25OutIm[k] = sumIm;
9245
+ }
9246
+ re.set(dft25OutRe);
9247
+ im.set(dft25OutIm);
9248
+ return;
9249
+ }
9250
+ if (n === 1) return;
9251
+ const half = n / 2;
9252
+ const [evenRe, evenIm, oddRe, oddIm] = fftScratchFor(half);
9253
+ for (let i = 0; i < half; i++) {
9254
+ evenRe[i] = re[2 * i];
9255
+ evenIm[i] = im[2 * i];
9256
+ oddRe[i] = re[2 * i + 1];
9257
+ oddIm[i] = im[2 * i + 1];
9258
+ }
9259
+ fftComplex(evenRe.subarray(0, half), evenIm.subarray(0, half));
9260
+ fftComplex(oddRe.subarray(0, half), oddIm.subarray(0, half));
9261
+ const { cos, sin } = fftTables(n);
9262
+ for (let k = 0; k < half; k++) {
9263
+ const wr = cos[k];
9264
+ const wi = sin[k];
9265
+ const tr = wr * oddRe[k] - wi * oddIm[k];
9266
+ const ti = wr * oddIm[k] + wi * oddRe[k];
9267
+ re[k] = evenRe[k] + tr;
9268
+ im[k] = evenIm[k] + ti;
9269
+ re[k + half] = evenRe[k] - tr;
9270
+ im[k + half] = evenIm[k] - ti;
9271
+ }
9272
+ }
9273
+ function prepareInputWindow(samples) {
9274
+ const out = new Float64Array(SMART_TURN_MAX_SAMPLES);
9275
+ const n = samples.length;
9276
+ if (n >= SMART_TURN_MAX_SAMPLES) {
9277
+ const offset = n - SMART_TURN_MAX_SAMPLES;
9278
+ for (let i = 0; i < SMART_TURN_MAX_SAMPLES; i++) out[i] = samples[offset + i];
9279
+ } else {
9280
+ const padding = SMART_TURN_MAX_SAMPLES - n;
9281
+ for (let i = 0; i < n; i++) out[padding + i] = samples[i];
9282
+ }
9283
+ let mean = 0;
9284
+ for (let i = 0; i < SMART_TURN_MAX_SAMPLES; i++) mean += out[i];
9285
+ mean /= SMART_TURN_MAX_SAMPLES;
9286
+ let variance = 0;
9287
+ for (let i = 0; i < SMART_TURN_MAX_SAMPLES; i++) {
9288
+ const d = out[i] - mean;
9289
+ variance += d * d;
9290
+ }
9291
+ variance /= SMART_TURN_MAX_SAMPLES;
9292
+ const scale = 1 / Math.sqrt(variance + NORM_EPS);
9293
+ for (let i = 0; i < SMART_TURN_MAX_SAMPLES; i++) out[i] = (out[i] - mean) * scale;
9294
+ return out;
9295
+ }
9296
+ async function computeWhisperLogMelFeatures(window) {
9297
+ if (window.length !== SMART_TURN_MAX_SAMPLES) {
9298
+ throw new Error(
9299
+ `expected ${SMART_TURN_MAX_SAMPLES} samples, got ${window.length}; run prepareInputWindow() first`
9300
+ );
9301
+ }
9302
+ const half = N_FFT / 2;
9303
+ const paddedLen = SMART_TURN_MAX_SAMPLES + N_FFT;
9304
+ const numBins = 1 + N_FFT / 2;
9305
+ const padded = new Float64Array(paddedLen);
9306
+ for (let i = 0; i < half; i++) padded[i] = window[half - i];
9307
+ padded.set(window, half);
9308
+ for (let i = 0; i < half; i++) {
9309
+ padded[half + SMART_TURN_MAX_SAMPLES + i] = window[SMART_TURN_MAX_SAMPLES - 2 - i];
9310
+ }
9311
+ const hann = hannWindow();
9312
+ const filters = melFilterbank();
9313
+ const totalFrames = 1 + Math.floor((paddedLen - N_FFT) / HOP_LENGTH);
9314
+ const logSpec = new Float64Array(N_MELS * N_FRAMES);
9315
+ const re = new Float64Array(N_FFT);
9316
+ const im = new Float64Array(N_FFT);
9317
+ const power = new Float64Array(numBins);
9318
+ let maxLog = -Infinity;
9319
+ for (let t = 0; t < totalFrames - 1; t++) {
9320
+ const start = t * HOP_LENGTH;
9321
+ for (let j = 0; j < N_FFT; j++) {
9322
+ re[j] = padded[start + j] * hann[j];
9323
+ im[j] = 0;
9324
+ }
9325
+ fftComplex(re, im);
9326
+ for (let k = 0; k < numBins; k++) {
9327
+ power[k] = re[k] * re[k] + im[k] * im[k];
9328
+ }
9329
+ for (let m = 0; m < N_MELS; m++) {
9330
+ const { startBin, weights } = filters[m];
9331
+ let acc = 0;
9332
+ for (let j = 0; j < weights.length; j++) {
9333
+ acc += power[startBin + j] * weights[j];
9334
+ }
9335
+ const v = Math.log10(Math.max(acc, MEL_FLOOR));
9336
+ logSpec[m * N_FRAMES + t] = v;
9337
+ if (v > maxLog) maxLog = v;
9338
+ }
9339
+ if ((t & 127) === 127) {
9340
+ await new Promise((resolve2) => setImmediate(resolve2));
9341
+ }
9342
+ }
9343
+ const floor = maxLog - 8;
9344
+ const out = new Float32Array(N_MELS * N_FRAMES);
9345
+ for (let i = 0; i < logSpec.length; i++) {
9346
+ out[i] = (Math.max(logSpec[i], floor) + 4) / 4;
9347
+ }
9348
+ return out;
9349
+ }
9350
+ async function featuresFromPcm16(pcm16Window) {
9351
+ const numSamples = Math.floor(pcm16Window.length / 2);
9352
+ const samples = new Float64Array(numSamples);
9353
+ for (let i = 0; i < numSamples; i++) {
9354
+ samples[i] = pcm16Window.readInt16LE(i * 2) / 32768;
9355
+ }
9356
+ return computeWhisperLogMelFeatures(prepareInputWindow(samples));
9357
+ }
9358
+ var SmartTurnDetector = class _SmartTurnDetector {
9359
+ constructor(runtime, session, thresholdValue) {
9360
+ this.runtime = runtime;
9361
+ this.session = session;
9362
+ this.thresholdValue = thresholdValue;
9363
+ }
9364
+ runtime;
9365
+ session;
9366
+ thresholdValue;
9367
+ closed = false;
9368
+ /**
9369
+ * Load the smart-turn v3 ONNX model and return a ready detector.
9370
+ * Throws with download instructions when no model file is configured
9371
+ * (see {@link SMART_TURN_MODEL_ENV_VAR}), and with install instructions
9372
+ * when `onnxruntime-node` is missing.
9373
+ */
9374
+ static async load(options = {}) {
9375
+ const threshold = options.threshold ?? DEFAULT_SMART_TURN_THRESHOLD;
9376
+ if (!(threshold >= 0 && threshold <= 1)) {
9377
+ throw new Error("threshold must be within [0.0, 1.0]");
9378
+ }
9379
+ const modelPath = resolveSmartTurnModelPath(options.modelPath);
9380
+ const runtime = await loadOnnxRuntime("SmartTurnDetector");
9381
+ const session = await runtime.InferenceSession.create(modelPath, {
9382
+ interOpNumThreads: 1,
9383
+ intraOpNumThreads: 1,
9384
+ executionMode: "sequential",
9385
+ graphOptimizationLevel: "all",
9386
+ executionProviders: options.forceCpu === false ? void 0 : ["cpu"]
9387
+ });
9388
+ return new _SmartTurnDetector(runtime, session, threshold);
9389
+ }
9390
+ /**
9391
+ * Like {@link load}, but degrade instead of throw.
9392
+ *
9393
+ * Resolves to `undefined` — after a single clear warning — when semantic
9394
+ * turn detection is not provisioned: the optional `onnxruntime-node`
9395
+ * dependency is missing, no model file is configured, or the configured
9396
+ * file cannot be loaded. Intended for deployments where the detector is
9397
+ * a soft upgrade:
9398
+ *
9399
+ * ```ts
9400
+ * const agent = phone.agent({
9401
+ * ...,
9402
+ * turnDetector: await SmartTurnDetector.maybeLoad(),
9403
+ * });
9404
+ * ```
9405
+ *
9406
+ * `turnDetector: undefined` keeps the plain VAD-silence endpointing, so
9407
+ * the agent starts (and the call behaves) exactly as if the feature were
9408
+ * never enabled — it never crashes the app.
9409
+ *
9410
+ * An out-of-range `threshold` still throws: that is a configuration bug,
9411
+ * not a provisioning gap. Mirror of the Python
9412
+ * `SmartTurnDetector.maybe_load`.
9413
+ */
9414
+ static async maybeLoad(options = {}) {
9415
+ const threshold = options.threshold ?? DEFAULT_SMART_TURN_THRESHOLD;
9416
+ if (!(threshold >= 0 && threshold <= 1)) {
9417
+ throw new Error("threshold must be within [0.0, 1.0]");
9418
+ }
9419
+ try {
9420
+ return await _SmartTurnDetector.load(options);
9421
+ } catch (err) {
9422
+ getLogger().warn(
9423
+ `Semantic turn detection unavailable \u2014 falling back to plain VAD-silence endpointing: ${err instanceof Error ? err.message : String(err)}`
9424
+ );
9425
+ return void 0;
9426
+ }
9427
+ }
9428
+ /**
9429
+ * Internal factory used by tests — bypasses onnxruntime-node loading.
9430
+ * @internal
9431
+ */
9432
+ static fromOnnxSession(runtime, session, options = {}) {
9433
+ return new _SmartTurnDetector(
9434
+ runtime,
9435
+ session,
9436
+ options.threshold ?? DEFAULT_SMART_TURN_THRESHOLD
9437
+ );
9438
+ }
9439
+ /** Identifier of the underlying model (`smart-turn-v3`). */
9440
+ get model() {
9441
+ return "smart-turn-v3";
9442
+ }
9443
+ /** Identifier of the runtime backend (`ONNX`). */
9444
+ get provider() {
9445
+ return "ONNX";
9446
+ }
9447
+ /** Input sample rate the model expects (16 000 Hz). */
9448
+ get sampleRate() {
9449
+ return SMART_TURN_SAMPLE_RATE;
9450
+ }
9451
+ /** Maximum audio context the model consumes per prediction (8 s). */
9452
+ get maxWindowSeconds() {
9453
+ return SMART_TURN_MAX_SECONDS;
9454
+ }
9455
+ /** End-of-turn probability at/above which the turn is complete. */
9456
+ get threshold() {
9457
+ return this.thresholdValue;
9458
+ }
9459
+ /**
9460
+ * End-of-turn probability for the given recent-audio window.
9461
+ *
9462
+ * @param pcm16Window Mono int16 little-endian PCM at 16 kHz — ideally
9463
+ * the full audio of the caller's current turn, up to 8 s (the
9464
+ * handler keeps a rolling 8 s buffer). Longer input is truncated to
9465
+ * the most recent 8 s; shorter input is left-padded with silence,
9466
+ * matching the reference preprocessing exactly.
9467
+ * @returns Probability in `[0, 1]` that the turn is COMPLETE (the
9468
+ * graph applies the sigmoid internally). Returns 0 for an empty
9469
+ * window.
9470
+ */
9471
+ async predict(pcm16Window) {
9472
+ if (this.closed || this.session === null) {
9473
+ throw new Error("SmartTurnDetector is closed");
9474
+ }
9475
+ if (pcm16Window.length < 2) {
9476
+ return 0;
9477
+ }
9478
+ const features = await featuresFromPcm16(pcm16Window);
9479
+ const { Tensor } = this.runtime;
9480
+ const feeds = {
9481
+ input_features: new Tensor("float32", features, [1, N_MELS, N_FRAMES])
9482
+ };
9483
+ const results = await this.session.run(feeds);
9484
+ const first = Object.values(results)[0];
9485
+ const data = first?.data;
9486
+ const probability = data?.[0] ?? 0;
9487
+ return Math.min(1, Math.max(0, probability));
9488
+ }
9489
+ /** Release the ONNX session. Idempotent. */
9490
+ async close() {
9491
+ if (this.closed) return;
9492
+ this.closed = true;
9493
+ this.session = null;
9494
+ }
9495
+ };
9496
+
8621
9497
  // src/providers/deepfilternet-filter.ts
8622
9498
  init_esm_shims();
8623
9499
  function log() {
8624
9500
  return getLogger();
8625
9501
  }
8626
9502
  var DEEPFILTERNET_SR = 48e3;
8627
- async function loadOnnxRuntime() {
9503
+ async function loadOnnxRuntime2() {
8628
9504
  try {
8629
9505
  const specifier = "onnxruntime-node";
8630
9506
  const mod = await import(specifier);
@@ -8731,7 +9607,7 @@ var DeepFilterNetFilter = class {
8731
9607
  return null;
8732
9608
  }
8733
9609
  if (this.ort === null) {
8734
- this.ort = await loadOnnxRuntime();
9610
+ this.ort = await loadOnnxRuntime2();
8735
9611
  }
8736
9612
  if (this.ort === null) {
8737
9613
  if (!this.warned && !this.silenceWarnings) {
@@ -9000,6 +9876,10 @@ var ChatContext = class _ChatContext {
9000
9876
  } else {
9001
9877
  this.items = maxMessages > 0 ? [...this.items.slice(-maxMessages)] : [];
9002
9878
  }
9879
+ const start = this.items.length > 0 && this.items[0].role === "system" ? 1 : 0;
9880
+ while (this.items.length > start && this.items[start].role === "tool") {
9881
+ this.items.splice(start, 1);
9882
+ }
9003
9883
  }
9004
9884
  // -------------------------------------------------------------------------
9005
9885
  // Provider format conversion
@@ -9035,6 +9915,10 @@ var ChatContext = class _ChatContext {
9035
9915
  }
9036
9916
  continue;
9037
9917
  }
9918
+ if (msg.role === "tool") {
9919
+ messages.push({ role: "user", content: `[tool result] ${msg.content}` });
9920
+ continue;
9921
+ }
9038
9922
  messages.push({ role: msg.role, content: msg.content });
9039
9923
  }
9040
9924
  return { system, messages };
@@ -9249,11 +10133,13 @@ var IVRActivity = class {
9249
10133
  }
9250
10134
  /** Record the current user-turn state (e.g. `"listening"`, `"away"`). */
9251
10135
  noteUserState(state) {
10136
+ if (!this.started) return;
9252
10137
  this.currentUserState = state;
9253
10138
  this.scheduleSilenceCheck();
9254
10139
  }
9255
10140
  /** Record the current agent-turn state (e.g. `"idle"`, `"listening"`). */
9256
10141
  noteAgentState(state) {
10142
+ if (!this.started) return;
9257
10143
  this.currentAgentState = state;
9258
10144
  this.scheduleSilenceCheck();
9259
10145
  }
@@ -9333,8 +10219,8 @@ var IVRActivity = class {
9333
10219
 
9334
10220
  // src/audio/background-audio.ts
9335
10221
  init_esm_shims();
9336
- import { promises as fs3 } from "fs";
9337
- import path2 from "path";
10222
+ import { promises as fs4 } from "fs";
10223
+ import path3 from "path";
9338
10224
  import { fileURLToPath } from "url";
9339
10225
  var BuiltinAudioClip = {
9340
10226
  CITY_AMBIENCE: "city-ambience.ogg",
@@ -9347,8 +10233,8 @@ var BuiltinAudioClip = {
9347
10233
  };
9348
10234
  function builtinClipPath(clip) {
9349
10235
  const meta = typeof import.meta !== "undefined" ? import.meta : void 0;
9350
- const here = meta?.url ? path2.dirname(fileURLToPath(meta.url)) : typeof __dirname !== "undefined" ? __dirname : process.cwd();
9351
- return path2.resolve(here, "..", "resources", "audio", clip);
10236
+ const here = meta?.url ? path3.dirname(fileURLToPath(meta.url)) : typeof __dirname !== "undefined" ? __dirname : process.cwd();
10237
+ return path3.resolve(here, "..", "resources", "audio", clip);
9352
10238
  }
9353
10239
  var INT16_MIN = -32768;
9354
10240
  var INT16_MAX = 32767;
@@ -9517,7 +10403,7 @@ var BackgroundAudioPlayer = class {
9517
10403
  return source.decode(source.path);
9518
10404
  case "builtin": {
9519
10405
  const p = builtinClipPath(source.clip);
9520
- const header = await fs3.readFile(p, { flag: "r" }).then((buf) => buf.subarray(0, 4));
10406
+ const header = await fs4.readFile(p, { flag: "r" }).then((buf) => buf.subarray(0, 4));
9521
10407
  if (header.toString("ascii") !== "OggS") {
9522
10408
  throw new Error(`Bundled clip ${source.clip} is not a valid Ogg file`);
9523
10409
  }
@@ -9528,170 +10414,25 @@ var BackgroundAudioPlayer = class {
9528
10414
  applyGain(pcm, gain) {
9529
10415
  if (gain === 1) return pcm;
9530
10416
  const n = pcm.length >> 1;
9531
- const out = Buffer.allocUnsafe(pcm.length);
9532
- for (let i = 0; i < n; i++) {
9533
- out.writeInt16LE(clipInt16(Math.round(pcm.readInt16LE(i * 2) * gain)), i * 2);
9534
- }
9535
- return out;
9536
- }
9537
- resampleTo(dstSr) {
9538
- if (this.pcm === null) return Buffer.alloc(0);
9539
- if (dstSr === this.sourceSr) return this.pcm;
9540
- const cached = this.resampleCache.get(dstSr);
9541
- if (cached) return cached;
9542
- const resampled = resamplePcm(this.pcm, this.sourceSr, dstSr);
9543
- this.resampleCache.set(dstSr, resampled);
9544
- return resampled;
9545
- }
9546
- };
9547
- function isAudioConfig(value) {
9548
- return typeof value === "object" && value !== null && "source" in value && typeof value.source === "object";
9549
- }
9550
-
9551
- // src/providers/twilio-adapter.ts
9552
- init_esm_shims();
9553
- var TWILIO_API_BASE = "https://api.twilio.com/2010-04-01";
9554
- var TwilioAdapter = class _TwilioAdapter {
9555
- accountSid;
9556
- region;
9557
- baseUrl;
9558
- authHeader;
9559
- constructor(accountSid, authToken, opts = {}) {
9560
- if (!accountSid) throw new Error("TwilioAdapter: accountSid is required");
9561
- if (!authToken) throw new Error("TwilioAdapter: authToken is required");
9562
- this.accountSid = accountSid;
9563
- this.region = opts.region;
9564
- this.baseUrl = opts.region ? `https://api.${opts.region}.twilio.com/2010-04-01` : TWILIO_API_BASE;
9565
- this.authHeader = `Basic ${Buffer.from(`${accountSid}:${authToken}`).toString("base64")}`;
9566
- }
9567
- async request(method, path3, body) {
9568
- const url = `${this.baseUrl}/Accounts/${encodeURIComponent(this.accountSid)}${path3}`;
9569
- const headers = { Authorization: this.authHeader };
9570
- if (body) headers["Content-Type"] = "application/x-www-form-urlencoded";
9571
- const response = await fetch(url, {
9572
- method,
9573
- headers,
9574
- body: body ? body.toString() : void 0,
9575
- signal: AbortSignal.timeout(3e4)
9576
- });
9577
- const text = await response.text();
9578
- if (!response.ok) {
9579
- throw new Error(`Twilio ${method} ${path3} failed: ${response.status} ${text}`);
9580
- }
9581
- if (!text) return {};
9582
- try {
9583
- return JSON.parse(text);
9584
- } catch (e) {
9585
- throw new Error(`Twilio returned non-JSON response: ${String(e)}`);
9586
- }
9587
- }
9588
- /**
9589
- * Provision a local phone number in the given country.
9590
- *
9591
- * Lists available local numbers, then purchases the first match.
9592
- */
9593
- async provisionNumber(opts) {
9594
- const country = encodeURIComponent(opts.countryCode);
9595
- const queryParts = ["PageSize=1"];
9596
- if (opts.areaCode) queryParts.push(`AreaCode=${encodeURIComponent(opts.areaCode)}`);
9597
- const path3 = `/AvailablePhoneNumbers/${country}/Local.json?${queryParts.join("&")}`;
9598
- const available = await this.request("GET", path3);
9599
- const first = available.available_phone_numbers?.[0]?.phone_number;
9600
- if (!first) {
9601
- throw new Error(`TwilioAdapter: no numbers available for country ${opts.countryCode}`);
9602
- }
9603
- const body = new URLSearchParams({ PhoneNumber: first });
9604
- const purchased = await this.request(
9605
- "POST",
9606
- "/IncomingPhoneNumbers.json",
9607
- body
9608
- );
9609
- if (!purchased.sid || !purchased.phone_number) {
9610
- throw new Error("TwilioAdapter: malformed response from IncomingPhoneNumbers.create");
9611
- }
9612
- return { phoneNumber: purchased.phone_number, sid: purchased.sid };
9613
- }
9614
- /** Update an already-purchased number to point at our voice webhook. */
9615
- async configureNumber(phoneNumberSid, opts) {
9616
- if (!phoneNumberSid) throw new Error("TwilioAdapter: phoneNumberSid is required");
9617
- const body = new URLSearchParams({
9618
- VoiceUrl: opts.voiceUrl,
9619
- VoiceMethod: "POST"
9620
- });
9621
- if (opts.statusCallback) body.set("StatusCallback", opts.statusCallback);
9622
- await this.request(
9623
- "POST",
9624
- `/IncomingPhoneNumbers/${encodeURIComponent(phoneNumberSid)}.json`,
9625
- body
9626
- );
9627
- }
9628
- /** Place an outbound call. Returns the Twilio call SID. */
9629
- async initiateCall(opts) {
9630
- if (!opts.url && !opts.streamUrl) {
9631
- throw new Error("TwilioAdapter: initiateCall requires either url or streamUrl");
9632
- }
9633
- const body = new URLSearchParams({
9634
- From: opts.from,
9635
- To: opts.to
9636
- });
9637
- if (opts.url) {
9638
- body.set("Url", opts.url);
9639
- } else if (opts.streamUrl) {
9640
- body.set("Twiml", _TwilioAdapter.generateStreamTwiml(opts.streamUrl));
9641
- }
9642
- if (opts.statusCallback) body.set("StatusCallback", opts.statusCallback);
9643
- if (opts.machineDetection) body.set("MachineDetection", opts.machineDetection);
9644
- if (opts.extraParams) {
9645
- for (const [key, value] of Object.entries(opts.extraParams)) {
9646
- body.set(key, value);
9647
- }
9648
- }
9649
- const call = await this.request("POST", "/Calls.json", body);
9650
- if (!call.sid) {
9651
- throw new Error("TwilioAdapter: Calls.create returned no SID");
9652
- }
9653
- return { callSid: call.sid };
9654
- }
9655
- /**
9656
- * Build a ``<Response><Connect><Stream url="...">`` TwiML document.
9657
- *
9658
- * ``parameters`` is forwarded as ``<Parameter name="..." value="..."/>``
9659
- * children of ``<Stream>``. Twilio Media Streams strips query-string params
9660
- * from the ``<Stream url=...>`` before the WS handshake, so
9661
- * ``<Parameter>`` tags are the supported way to pre-populate
9662
- * ``start.customParameters`` on the WS ``start`` frame. Used by the
9663
- * inbound path to carry caller / callee through to the bridge.
9664
- *
9665
- * Mirrors the Python adapter's ``generate_stream_twiml``.
9666
- */
9667
- static generateStreamTwiml(streamUrl, parameters) {
9668
- const esc = (s) => s.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;").replace(/'/g, "&apos;");
9669
- const escapedUrl = esc(streamUrl);
9670
- let paramTags = "";
9671
- if (parameters) {
9672
- for (const [name, value] of Object.entries(parameters)) {
9673
- if (value == null) continue;
9674
- paramTags += `<Parameter name="${esc(name)}" value="${esc(String(value))}"/>`;
9675
- }
10417
+ const out = Buffer.allocUnsafe(pcm.length);
10418
+ for (let i = 0; i < n; i++) {
10419
+ out.writeInt16LE(clipInt16(Math.round(pcm.readInt16LE(i * 2) * gain)), i * 2);
9676
10420
  }
9677
- return `<?xml version="1.0" encoding="UTF-8"?><Response><Connect><Stream url="${escapedUrl}">${paramTags}</Stream></Connect></Response>`;
10421
+ return out;
9678
10422
  }
9679
- /** Force-complete an in-progress call. */
9680
- async endCall(callSid) {
9681
- if (!callSid) throw new Error("TwilioAdapter: callSid is required");
9682
- const body = new URLSearchParams({ Status: "completed" });
9683
- try {
9684
- await this.request(
9685
- "POST",
9686
- `/Calls/${encodeURIComponent(callSid)}.json`,
9687
- body
9688
- );
9689
- } catch (err) {
9690
- getLogger().warn(`[TwilioAdapter] endCall failed for ${callSid}: ${String(err)}`);
9691
- throw err;
9692
- }
10423
+ resampleTo(dstSr) {
10424
+ if (this.pcm === null) return Buffer.alloc(0);
10425
+ if (dstSr === this.sourceSr) return this.pcm;
10426
+ const cached = this.resampleCache.get(dstSr);
10427
+ if (cached) return cached;
10428
+ const resampled = resamplePcm(this.pcm, this.sourceSr, dstSr);
10429
+ this.resampleCache.set(dstSr, resampled);
10430
+ return resampled;
9693
10431
  }
9694
10432
  };
10433
+ function isAudioConfig(value) {
10434
+ return typeof value === "object" && value !== null && "source" in value && typeof value.source === "object";
10435
+ }
9695
10436
 
9696
10437
  // src/providers/telnyx-adapter.ts
9697
10438
  init_esm_shims();
@@ -9706,8 +10447,8 @@ var TelnyxAdapter = class {
9706
10447
  this.apiKey = apiKey;
9707
10448
  this.connectionId = connectionId;
9708
10449
  }
9709
- async request(method, path3, body) {
9710
- const url = `${this.baseUrl}${path3}`;
10450
+ async request(method, path4, body) {
10451
+ const url = `${this.baseUrl}${path4}`;
9711
10452
  const headers = {
9712
10453
  Authorization: `Bearer ${this.apiKey}`
9713
10454
  };
@@ -9720,7 +10461,7 @@ var TelnyxAdapter = class {
9720
10461
  });
9721
10462
  const text = await response.text();
9722
10463
  if (!response.ok) {
9723
- throw new Error(`Telnyx ${method} ${path3} failed: ${response.status} ${text}`);
10464
+ throw new Error(`Telnyx ${method} ${path4} failed: ${response.status} ${text}`);
9724
10465
  }
9725
10466
  if (!text) return {};
9726
10467
  try {
@@ -9761,10 +10502,15 @@ var TelnyxAdapter = class {
9761
10502
  if (!phoneNumber) throw new Error("TelnyxAdapter: phoneNumber is required");
9762
10503
  if (!opts.connectionId) throw new Error("TelnyxAdapter: connectionId is required");
9763
10504
  try {
10505
+ await this.request(
10506
+ "PATCH",
10507
+ `/phone_numbers/${encodeURIComponent(phoneNumber)}`,
10508
+ { connection_id: opts.connectionId }
10509
+ );
9764
10510
  await this.request(
9765
10511
  "PATCH",
9766
10512
  `/phone_numbers/${encodeURIComponent(phoneNumber)}/voice`,
9767
- { connection_id: opts.connectionId, tech_prefix_enabled: false }
10513
+ { tech_prefix_enabled: false }
9768
10514
  );
9769
10515
  } catch (err) {
9770
10516
  const status = err instanceof Error ? err.message.replace(/\+\d{7,15}/g, "[REDACTED]") : String(err);
@@ -9864,6 +10610,7 @@ var TelnyxSTT = class {
9864
10610
  this.transcriptionEngine = transcriptionEngine;
9865
10611
  this.sampleRate = sampleRate;
9866
10612
  this.baseUrl = baseUrl;
10613
+ this.patterCtorArgs = [apiKey, language, transcriptionEngine, sampleRate, baseUrl];
9867
10614
  }
9868
10615
  apiKey;
9869
10616
  language;
@@ -9875,6 +10622,17 @@ var TelnyxSTT = class {
9875
10622
  ws = null;
9876
10623
  callbacks = /* @__PURE__ */ new Set();
9877
10624
  headerSent = false;
10625
+ /** Construction args replayed by clone(). */
10626
+ patterCtorArgs;
10627
+ /**
10628
+ * Fresh adapter built with this instance's construction arguments —
10629
+ * called per call by the stream handler so concurrent calls never share
10630
+ * connection state (sockets/queues; cross-call transcript bleed).
10631
+ */
10632
+ clone() {
10633
+ const ctor = this.constructor;
10634
+ return new ctor(...this.patterCtorArgs);
10635
+ }
9878
10636
  /** Open the streaming WebSocket and arm message handlers. */
9879
10637
  async connect() {
9880
10638
  const params = new URLSearchParams({
@@ -9886,11 +10644,11 @@ var TelnyxSTT = class {
9886
10644
  this.ws = new WebSocket7(url, {
9887
10645
  headers: { Authorization: `Bearer ${this.apiKey}` }
9888
10646
  });
9889
- await new Promise((resolve, reject) => {
10647
+ await new Promise((resolve2, reject) => {
9890
10648
  const timer = setTimeout(() => reject(new Error("Telnyx STT connect timeout")), 1e4);
9891
10649
  this.ws.once("open", () => {
9892
10650
  clearTimeout(timer);
9893
- resolve();
10651
+ resolve2();
9894
10652
  });
9895
10653
  this.ws.once("error", (err) => {
9896
10654
  clearTimeout(timer);
@@ -9912,7 +10670,13 @@ var TelnyxSTT = class {
9912
10670
  confidence: data.confidence ?? 0
9913
10671
  };
9914
10672
  for (const cb of this.callbacks) {
9915
- cb(transcript);
10673
+ try {
10674
+ Promise.resolve(cb(transcript)).catch(
10675
+ (err) => getLogger().error(`STT transcript callback failed: ${String(err)}`)
10676
+ );
10677
+ } catch (err) {
10678
+ getLogger().error(`STT transcript callback threw: ${String(err)}`);
10679
+ }
9916
10680
  }
9917
10681
  });
9918
10682
  this.ws.on("error", (err) => {
@@ -10009,11 +10773,11 @@ var TelnyxTTS = class {
10009
10773
  ws = new WebSocket8(url, {
10010
10774
  headers: { Authorization: `Bearer ${this.apiKey}` }
10011
10775
  });
10012
- await new Promise((resolve, reject) => {
10776
+ await new Promise((resolve2, reject) => {
10013
10777
  const timer = setTimeout(() => reject(new Error("Telnyx TTS connect timeout")), 1e4);
10014
10778
  ws.once("open", () => {
10015
10779
  clearTimeout(timer);
10016
- resolve();
10780
+ resolve2();
10017
10781
  });
10018
10782
  ws.once("error", (err) => {
10019
10783
  clearTimeout(timer);
@@ -10052,7 +10816,7 @@ var TelnyxTTS = class {
10052
10816
  while (true) {
10053
10817
  let frameTimer;
10054
10818
  const item = queue.length > 0 ? queue.shift() : await Promise.race([
10055
- new Promise((resolve) => waiters.push(resolve)),
10819
+ new Promise((resolve2) => waiters.push(resolve2)),
10056
10820
  new Promise((_, reject) => {
10057
10821
  frameTimer = setTimeout(
10058
10822
  () => reject(new Error("Telnyx TTS frame timeout")),
@@ -10076,15 +10840,682 @@ var TelnyxTTS = class {
10076
10840
  }
10077
10841
  };
10078
10842
 
10843
+ // src/evals/index.ts
10844
+ init_esm_shims();
10845
+
10846
+ // src/evals/case.ts
10847
+ init_esm_shims();
10848
+ function evalResultToDict(result) {
10849
+ return {
10850
+ case: result.caseName,
10851
+ score: result.judge.score,
10852
+ passed: result.judge.passed,
10853
+ reasoning: result.judge.reasoning,
10854
+ transcript: result.transcript.map((t) => ({ role: t.role, text: t.text })),
10855
+ duration_s: Math.round(result.durationS * 1e3) / 1e3,
10856
+ error: result.error
10857
+ };
10858
+ }
10859
+
10860
+ // src/evals/llm-judge.ts
10861
+ init_esm_shims();
10862
+ var JUDGE_SYSTEM = 'You are a strict but fair evaluator of voice-AI agents. You will be given: (1) the expected behavior for the agent, (2) a rubric, (3) a transcript of the conversation. Return a JSON object with exactly three keys:\n - "score": float between 0.0 and 1.0\n - "passed": boolean (true when score >= threshold)\n - "reasoning": short string explaining the score\nDo not return any text outside the JSON object.';
10863
+ var LLMJudge = class {
10864
+ model;
10865
+ passThreshold;
10866
+ apiKey;
10867
+ backend;
10868
+ constructor(options = {}) {
10869
+ this.model = options.model ?? "gpt-4o-mini";
10870
+ this.apiKey = options.apiKey;
10871
+ this.passThreshold = options.passThreshold ?? 0.7;
10872
+ this.backend = options.backend;
10873
+ }
10874
+ /** Return a {@link JudgeResult} for the given transcript. */
10875
+ async judgeCase(evalCase, transcript) {
10876
+ const prompt = this.buildPrompt(evalCase, transcript);
10877
+ const raw = this.backend ? await this.backend.judge(prompt) : await this.callOpenAI(prompt);
10878
+ return this.parse(raw);
10879
+ }
10880
+ buildPrompt(evalCase, transcript) {
10881
+ const lines = [
10882
+ `EXPECTED BEHAVIOR: ${evalCase.expectedBehavior}`,
10883
+ `RUBRIC: ${evalCase.rubric}`,
10884
+ `PASS THRESHOLD: ${this.passThreshold}`,
10885
+ "TRANSCRIPT:"
10886
+ ];
10887
+ for (const turn of transcript) {
10888
+ lines.push(` ${turn.role || "?"}: ${turn.text ?? ""}`);
10889
+ }
10890
+ return lines.join("\n");
10891
+ }
10892
+ /** Call OpenAI chat completions directly over fetch (no SDK dependency). */
10893
+ async callOpenAI(prompt) {
10894
+ const apiKey = this.apiKey || process.env.OPENAI_API_KEY;
10895
+ if (!apiKey) {
10896
+ throw new Error(
10897
+ "LLMJudge requires an OpenAI API key. Set OPENAI_API_KEY or pass apiKey to the LLMJudge constructor."
10898
+ );
10899
+ }
10900
+ const response = await fetch("https://api.openai.com/v1/chat/completions", {
10901
+ method: "POST",
10902
+ headers: {
10903
+ "Content-Type": "application/json",
10904
+ Authorization: `Bearer ${apiKey}`
10905
+ },
10906
+ body: JSON.stringify({
10907
+ model: this.model,
10908
+ messages: [
10909
+ { role: "system", content: JUDGE_SYSTEM },
10910
+ { role: "user", content: prompt }
10911
+ ],
10912
+ response_format: { type: "json_object" },
10913
+ temperature: 0
10914
+ })
10915
+ });
10916
+ if (!response.ok) {
10917
+ const errText = await response.text();
10918
+ throw new Error(`LLMJudge OpenAI call failed: ${response.status} ${errText.slice(0, 200)}`);
10919
+ }
10920
+ const data = await response.json();
10921
+ const content = data.choices?.[0]?.message?.content;
10922
+ if (!content) {
10923
+ throw new Error(
10924
+ `LLMJudge response had no choices/content: ${JSON.stringify(data).slice(0, 200)}`
10925
+ );
10926
+ }
10927
+ return content;
10928
+ }
10929
+ /** Parse the judge's JSON — tolerant of extra whitespace / code fences. */
10930
+ parse(raw) {
10931
+ let text = raw.trim();
10932
+ if (text.startsWith("```")) {
10933
+ text = text.replace(/^```(?:json)?\s*/, "").replace(/\s*```$/, "");
10934
+ }
10935
+ let data;
10936
+ try {
10937
+ data = JSON.parse(text);
10938
+ } catch {
10939
+ getLogger().warn(`LLMJudge: invalid JSON, defaulting to fail: ${JSON.stringify(raw)}`);
10940
+ return {
10941
+ score: 0,
10942
+ passed: false,
10943
+ reasoning: `Judge returned invalid JSON: ${raw.slice(0, 200)}`
10944
+ };
10945
+ }
10946
+ const scoreRaw = Number(data.score ?? 0);
10947
+ let score = Number.isFinite(scoreRaw) ? scoreRaw : 0;
10948
+ score = Math.max(0, Math.min(1, score));
10949
+ const passed = score >= this.passThreshold;
10950
+ const reasoning = String(data.reasoning ?? "");
10951
+ return { score, passed, reasoning };
10952
+ }
10953
+ };
10954
+
10955
+ // src/evals/runner.ts
10956
+ init_esm_shims();
10957
+ import { readFile } from "fs/promises";
10958
+ import { extname, basename } from "path";
10959
+ var EvalRunner = class {
10960
+ judge;
10961
+ constructor(options = {}) {
10962
+ this.judge = options.judge ?? new LLMJudge();
10963
+ }
10964
+ /**
10965
+ * Run every case in ``suite`` sequentially.
10966
+ *
10967
+ * ``agentFactory`` is required only for cases that do NOT carry their own
10968
+ * ``agent`` (the legacy ``reply()`` path).
10969
+ */
10970
+ async run(suite, agentFactory) {
10971
+ const results = [];
10972
+ for (const evalCase of suite.cases) {
10973
+ results.push(await this.runCase(evalCase, agentFactory));
10974
+ }
10975
+ return results;
10976
+ }
10977
+ /**
10978
+ * Run a single case and return its {@link EvalResult}.
10979
+ *
10980
+ * Routes through the real-pipeline {@link EvalSession} when
10981
+ * ``evalCase.agent`` is set; otherwise uses the legacy ``reply()``-callable
10982
+ * ``agentFactory`` (unchanged behaviour).
10983
+ */
10984
+ async runCase(evalCase, agentFactory) {
10985
+ const start = Date.now();
10986
+ const transcript = [];
10987
+ let error = null;
10988
+ try {
10989
+ if (evalCase.agent !== void 0) {
10990
+ await this.runTurnsWithSession(evalCase, transcript);
10991
+ } else {
10992
+ if (agentFactory === void 0) {
10993
+ throw new Error(
10994
+ `case ${JSON.stringify(evalCase.name)} has no agent and no agentFactory was supplied`
10995
+ );
10996
+ }
10997
+ await this.runTurnsWithReply(evalCase, agentFactory, transcript);
10998
+ }
10999
+ } catch (exc) {
11000
+ error = formatError(exc);
11001
+ getLogger().error(`eval case=${JSON.stringify(evalCase.name)} raised: ${error}`);
11002
+ }
11003
+ if (error !== null && transcript.length === 0) {
11004
+ return {
11005
+ caseName: evalCase.name,
11006
+ transcript,
11007
+ judge: { score: 0, passed: false, reasoning: error },
11008
+ durationS: (Date.now() - start) / 1e3,
11009
+ error
11010
+ };
11011
+ }
11012
+ let judgeResult;
11013
+ try {
11014
+ judgeResult = await this.judge.judgeCase(evalCase, transcript);
11015
+ } catch (exc) {
11016
+ const judgeError = `judge error: ${exc instanceof Error ? exc.message : String(exc)}`;
11017
+ return {
11018
+ caseName: evalCase.name,
11019
+ transcript,
11020
+ judge: { score: 0, passed: false, reasoning: judgeError },
11021
+ durationS: (Date.now() - start) / 1e3,
11022
+ error: judgeError
11023
+ };
11024
+ }
11025
+ return {
11026
+ caseName: evalCase.name,
11027
+ transcript,
11028
+ judge: judgeResult,
11029
+ durationS: (Date.now() - start) / 1e3,
11030
+ error
11031
+ };
11032
+ }
11033
+ /**
11034
+ * Legacy path — drives the case against a ``reply()`` callable.
11035
+ *
11036
+ * Appends into ``transcript`` in place so a mid-case exception still
11037
+ * leaves the partial transcript for the judge (existing semantics).
11038
+ */
11039
+ async runTurnsWithReply(evalCase, agentFactory, transcript) {
11040
+ const agent = await agentFactory();
11041
+ if (evalCase.firstMessage) {
11042
+ transcript.push({ role: "agent", text: evalCase.firstMessage });
11043
+ }
11044
+ for (const turn of evalCase.turns) {
11045
+ transcript.push({ role: "user", text: turn.user });
11046
+ const reply = typeof agent === "function" ? await agent(turn.user) : "";
11047
+ transcript.push({ role: "agent", text: reply || "" });
11048
+ logMissingExpected(evalCase, turn, reply || "");
11049
+ }
11050
+ }
11051
+ /**
11052
+ * Real-pipeline path — drives the case through {@link EvalSession}.
11053
+ *
11054
+ * The agent's REAL handler emits its own ``firstMessage`` (a
11055
+ * ``evalCase.firstMessage`` overrides the agent's), tools/hooks/guardrails
11056
+ * run for real, and the transcript mirrors what the pipeline actually
11057
+ * said. Appends into ``transcript`` in place (partial-on-error, same as
11058
+ * the legacy path).
11059
+ */
11060
+ async runTurnsWithSession(evalCase, transcript) {
11061
+ const { EvalSession: EvalSession2 } = await import("./session-N3CBCYYN.mjs");
11062
+ if (!evalCase.agent) {
11063
+ throw new Error(`case ${JSON.stringify(evalCase.name)} has no agent \u2014 use the reply-factory path`);
11064
+ }
11065
+ let agent = evalCase.agent;
11066
+ if (evalCase.firstMessage) {
11067
+ agent = { ...agent, firstMessage: evalCase.firstMessage };
11068
+ }
11069
+ const session = await EvalSession2.create({
11070
+ agent,
11071
+ llmProvider: evalCase.llmProvider
11072
+ });
11073
+ try {
11074
+ if (agent.firstMessage) {
11075
+ transcript.push({ role: "agent", text: agent.firstMessage });
11076
+ }
11077
+ for (const turn of evalCase.turns) {
11078
+ transcript.push({ role: "user", text: turn.user });
11079
+ const result = await session.userSays(turn.user);
11080
+ transcript.push({ role: "agent", text: result.agentText });
11081
+ logMissingExpected(evalCase, turn, result.agentText);
11082
+ }
11083
+ } finally {
11084
+ await session.close();
11085
+ }
11086
+ }
11087
+ /** Render a JSON report suitable for CI artefacts. */
11088
+ report(suite, results) {
11089
+ const total = results.length;
11090
+ const passed = results.filter((r) => r.judge.passed).length;
11091
+ const payload = {
11092
+ suite: suite.name,
11093
+ total,
11094
+ passed,
11095
+ failed: total - passed,
11096
+ pass_rate: total > 0 ? passed / total : 0,
11097
+ cases: results.map((r) => evalResultToDict(r))
11098
+ };
11099
+ return JSON.stringify(payload, null, 2);
11100
+ }
11101
+ };
11102
+ function formatError(exc) {
11103
+ if (exc instanceof Error) {
11104
+ return `${exc.name}: ${exc.message}`;
11105
+ }
11106
+ return String(exc);
11107
+ }
11108
+ function logMissingExpected(evalCase, turn, reply) {
11109
+ for (const needle of turn.expectedContains ?? []) {
11110
+ if (!reply.toLowerCase().includes(needle.toLowerCase())) {
11111
+ getLogger().info(
11112
+ `case=${JSON.stringify(evalCase.name)} expectedContains=${JSON.stringify(needle)} missing in reply`
11113
+ );
11114
+ }
11115
+ }
11116
+ }
11117
+ async function loadSuite(path4) {
11118
+ const text = await readFile(path4, "utf-8");
11119
+ const ext = extname(path4).toLowerCase();
11120
+ let data;
11121
+ if (ext === ".yaml" || ext === ".yml") {
11122
+ let yaml;
11123
+ try {
11124
+ const moduleName = "yaml";
11125
+ yaml = await import(moduleName);
11126
+ } catch {
11127
+ throw new Error(
11128
+ "Loading YAML suites requires the optional 'yaml' package. Install with: npm install yaml \u2014 or use a JSON suite file."
11129
+ );
11130
+ }
11131
+ data = yaml.parse(text);
11132
+ } else {
11133
+ data = JSON.parse(text);
11134
+ }
11135
+ if (data === null || typeof data !== "object" || Array.isArray(data)) {
11136
+ throw new Error(`Eval suite ${path4} must be a mapping, got ${typeOf(data)}`);
11137
+ }
11138
+ const record = data;
11139
+ const casesRaw = record.cases ?? [];
11140
+ if (!Array.isArray(casesRaw)) {
11141
+ throw new Error(`Eval suite ${path4}: 'cases' must be a list`);
11142
+ }
11143
+ const cases = casesRaw.map((c, i) => {
11144
+ if (c === null || typeof c !== "object" || Array.isArray(c)) {
11145
+ throw new Error(`Eval suite ${path4}: case ${i} must be a mapping`);
11146
+ }
11147
+ const caseRecord = c;
11148
+ const turnsRaw = caseRecord.turns ?? [];
11149
+ const turns = (Array.isArray(turnsRaw) ? turnsRaw : []).filter((t) => t !== null && typeof t === "object").map((t) => ({
11150
+ user: String(t.user ?? ""),
11151
+ expectedContains: toStringArray(t.expected_contains ?? t.expectedContains)
11152
+ }));
11153
+ return {
11154
+ name: String(caseRecord.name ?? `case_${i}`),
11155
+ turns,
11156
+ expectedBehavior: String(caseRecord.expected_behavior ?? caseRecord.expectedBehavior ?? ""),
11157
+ rubric: String(caseRecord.rubric ?? ""),
11158
+ tags: toStringArray(caseRecord.tags),
11159
+ firstMessage: String(caseRecord.first_message ?? caseRecord.firstMessage ?? "")
11160
+ };
11161
+ });
11162
+ return {
11163
+ name: String(record.name ?? basename(path4, extname(path4))),
11164
+ cases,
11165
+ metadata: record.metadata ?? {}
11166
+ };
11167
+ }
11168
+ function toStringArray(value) {
11169
+ if (!Array.isArray(value)) return [];
11170
+ return value.map((v) => String(v));
11171
+ }
11172
+ function typeOf(value) {
11173
+ if (value === null) return "null";
11174
+ if (Array.isArray(value)) return "array";
11175
+ return typeof value;
11176
+ }
11177
+
11178
+ // src/evals/scripted-llm.ts
11179
+ init_esm_shims();
11180
+ function textTurn(text, options = {}) {
11181
+ return [
11182
+ { type: "text", content: text },
11183
+ {
11184
+ type: "usage",
11185
+ inputTokens: options.inputTokens ?? 8,
11186
+ outputTokens: options.outputTokens ?? 8
11187
+ }
11188
+ ];
11189
+ }
11190
+ function toolCallTurn(name, args, options = {}) {
11191
+ return [
11192
+ {
11193
+ type: "tool_call",
11194
+ index: 0,
11195
+ id: options.callId ?? "call_1",
11196
+ name,
11197
+ arguments: JSON.stringify(args ?? {})
11198
+ },
11199
+ { type: "usage", inputTokens: 8, outputTokens: 4 }
11200
+ ];
11201
+ }
11202
+ var ScriptedLLMProvider = class {
11203
+ /** Stable pricing/dashboard key (no real pricing entry — cost is 0). */
11204
+ static providerKey = "scripted";
11205
+ calls = [];
11206
+ scripts;
11207
+ constructor(turns) {
11208
+ this.scripts = (turns ?? []).map((chunks) => chunks.map((c) => ({ ...c })));
11209
+ }
11210
+ /** Append another scripted turn (chunk list) to the script queue. */
11211
+ addTurn(chunks) {
11212
+ this.scripts.push(chunks.map((c) => ({ ...c })));
11213
+ }
11214
+ async *stream(messages, tools, opts) {
11215
+ this.calls.push({
11216
+ messages: messages.map((m) => ({ ...m })),
11217
+ tools: tools ? tools.map((t) => ({ ...t })) : null,
11218
+ callId: opts?.callId ?? null
11219
+ });
11220
+ const script = this.scripts.shift();
11221
+ if (script === void 0) {
11222
+ yield { type: "done" };
11223
+ return;
11224
+ }
11225
+ for (const chunk of script) {
11226
+ if (opts?.signal?.aborted) return;
11227
+ yield { ...chunk };
11228
+ }
11229
+ }
11230
+ };
11231
+
11232
+ // src/evals/assertions.ts
11233
+ init_esm_shims();
11234
+ import { AssertionError } from "assert";
11235
+ function expect(result) {
11236
+ return new TurnExpectation(result);
11237
+ }
11238
+ function deepEqual(a, b) {
11239
+ if (a === b) return true;
11240
+ if (Array.isArray(a) && Array.isArray(b)) {
11241
+ return a.length === b.length && a.every((v, i) => deepEqual(v, b[i]));
11242
+ }
11243
+ if (a !== null && b !== null && typeof a === "object" && typeof b === "object" && !Array.isArray(a) && !Array.isArray(b)) {
11244
+ const ak = Object.keys(a);
11245
+ const bk = Object.keys(b);
11246
+ return ak.length === bk.length && ak.every(
11247
+ (k) => deepEqual(a[k], b[k])
11248
+ );
11249
+ }
11250
+ return false;
11251
+ }
11252
+ function isSubset(subset, actual) {
11253
+ if (subset !== null && typeof subset === "object" && !Array.isArray(subset)) {
11254
+ if (actual === null || typeof actual !== "object" || Array.isArray(actual)) {
11255
+ return false;
11256
+ }
11257
+ const actualRecord = actual;
11258
+ return Object.entries(subset).every(
11259
+ ([key, value]) => key in actualRecord && isSubset(value, actualRecord[key])
11260
+ );
11261
+ }
11262
+ return deepEqual(subset, actual);
11263
+ }
11264
+ var TurnExpectation = class {
11265
+ turnResult;
11266
+ constructor(result) {
11267
+ this.turnResult = result;
11268
+ }
11269
+ /** The wrapped {@link TurnResult} (escape hatch for ad-hoc asserts). */
11270
+ get result() {
11271
+ return this.turnResult;
11272
+ }
11273
+ // -- tools -----------------------------------------------------------------
11274
+ /**
11275
+ * Assert that tool ``name`` ran this turn.
11276
+ *
11277
+ * ``argsSubset`` (optional) must be recursively contained in the args of
11278
+ * at least one matching invocation — extra argument keys are allowed,
11279
+ * listed keys must match exactly.
11280
+ */
11281
+ toolCalled(name, argsSubset) {
11282
+ const matches = this.turnResult.toolCalls.filter((tc) => tc.name === name);
11283
+ if (matches.length === 0) {
11284
+ const called = this.turnResult.toolCalls.map((tc) => tc.name);
11285
+ throw new AssertionError({
11286
+ message: `expected tool ${JSON.stringify(name)} to be called this turn; tools called: ${called.length > 0 ? JSON.stringify(called) : "none"}`
11287
+ });
11288
+ }
11289
+ if (argsSubset !== void 0 && !matches.some((tc) => isSubset(argsSubset, tc.arguments))) {
11290
+ throw new AssertionError({
11291
+ message: `tool ${JSON.stringify(name)} was called, but no invocation matched argsSubset=${JSON.stringify(argsSubset)}; observed args: ` + JSON.stringify(matches.map((tc) => tc.arguments))
11292
+ });
11293
+ }
11294
+ return this;
11295
+ }
11296
+ /** Assert that no tool ran this turn (or that ``name`` did not). */
11297
+ noToolCalled(name) {
11298
+ if (name === void 0) {
11299
+ if (this.turnResult.toolCalls.length > 0) {
11300
+ throw new AssertionError({
11301
+ message: "expected no tool calls this turn; tools called: " + JSON.stringify(this.turnResult.toolCalls.map((tc) => tc.name))
11302
+ });
11303
+ }
11304
+ return this;
11305
+ }
11306
+ const offenders = this.turnResult.toolCalls.filter((tc) => tc.name === name);
11307
+ if (offenders.length > 0) {
11308
+ throw new AssertionError({
11309
+ message: `expected tool ${JSON.stringify(name)} NOT to be called this turn; it ran ${offenders.length} time(s) with args ` + JSON.stringify(offenders.map((tc) => tc.arguments))
11310
+ });
11311
+ }
11312
+ return this;
11313
+ }
11314
+ agentTextContains(first, ...rest) {
11315
+ let needles;
11316
+ let caseSensitive = false;
11317
+ if (Array.isArray(first)) {
11318
+ needles = [...first];
11319
+ const options = rest[0];
11320
+ caseSensitive = options?.caseSensitive ?? false;
11321
+ } else {
11322
+ needles = [first, ...rest].filter(
11323
+ (n) => typeof n === "string"
11324
+ );
11325
+ }
11326
+ const haystack = this.turnResult.agentText;
11327
+ const cmpHaystack = caseSensitive ? haystack : haystack.toLowerCase();
11328
+ const missing = needles.filter(
11329
+ (n) => !cmpHaystack.includes(caseSensitive ? n : n.toLowerCase())
11330
+ );
11331
+ if (missing.length > 0) {
11332
+ throw new AssertionError({
11333
+ message: `agent text is missing ${JSON.stringify(missing)}; agent said: ` + JSON.stringify(haystack)
11334
+ });
11335
+ }
11336
+ return this;
11337
+ }
11338
+ // -- semantic judge ----------------------------------------------------------
11339
+ /**
11340
+ * Score this turn against ``intent`` with the LLM judge.
11341
+ *
11342
+ * Builds a synthetic {@link EvalCase} whose ``expectedBehavior`` is
11343
+ * ``intent`` and judges the turn's full history snapshot. Throws
11344
+ * ``AssertionError`` when the judge fails the turn; returns the
11345
+ * {@link JudgeResult} otherwise (chain-ending, async).
11346
+ */
11347
+ async judge(llmJudge, options) {
11348
+ const { intent, rubric } = options;
11349
+ const evalCase = {
11350
+ name: "inline-judge",
11351
+ turns: [],
11352
+ expectedBehavior: intent,
11353
+ rubric: rubric ?? `Pass when the agent's behavior matches: ${intent}`
11354
+ };
11355
+ const transcript = historyTranscript(this.turnResult.historySnapshot);
11356
+ const verdict = await llmJudge.judgeCase(evalCase, transcript);
11357
+ getLogger().info(
11358
+ `judge intent=${JSON.stringify(intent)} score=${verdict.score.toFixed(2)} passed=${verdict.passed}`
11359
+ );
11360
+ if (!verdict.passed) {
11361
+ throw new AssertionError({
11362
+ message: `LLM judge failed the turn (score=${verdict.score.toFixed(2)}): ${verdict.reasoning} \u2014 intent was ${JSON.stringify(intent)}; agent said ` + JSON.stringify(this.turnResult.agentText)
11363
+ });
11364
+ }
11365
+ return verdict;
11366
+ }
11367
+ };
11368
+
10079
11369
  // src/observability/index.ts
10080
11370
  init_esm_shims();
10081
11371
 
11372
+ // src/observability/attributes.ts
11373
+ init_esm_shims();
11374
+ var DEFAULT_SIDE = "uut";
11375
+ var _scopeStack = [];
11376
+ function _currentScope() {
11377
+ return _scopeStack.length > 0 ? _scopeStack[_scopeStack.length - 1] : null;
11378
+ }
11379
+ function _tryLoadOtelApi() {
11380
+ try {
11381
+ return __require("@opentelemetry/api");
11382
+ } catch {
11383
+ return null;
11384
+ }
11385
+ }
11386
+ function recordPatterAttrs(attrs) {
11387
+ if (!isTracingEnabled()) return;
11388
+ const scope = _currentScope();
11389
+ if (scope === null) return;
11390
+ const api = _tryLoadOtelApi();
11391
+ if (!api) return;
11392
+ const full = { ...attrs };
11393
+ if (full["patter.call_id"] === void 0) full["patter.call_id"] = scope.callId;
11394
+ if (full["patter.side"] === void 0) full["patter.side"] = scope.side;
11395
+ try {
11396
+ const active = api.trace.getActiveSpan?.() ?? null;
11397
+ if (active && (active.isRecording === void 0 || active.isRecording())) {
11398
+ for (const [k, v] of Object.entries(full)) {
11399
+ try {
11400
+ active.setAttribute(k, v);
11401
+ } catch {
11402
+ }
11403
+ }
11404
+ return;
11405
+ }
11406
+ } catch {
11407
+ }
11408
+ try {
11409
+ const tracer = api.trace.getTracer("getpatter.observability");
11410
+ const span = tracer.startSpan("patter.billable", { attributes: full });
11411
+ try {
11412
+ span.end();
11413
+ } catch {
11414
+ }
11415
+ } catch {
11416
+ }
11417
+ }
11418
+ async function patterCallScope(options, fn) {
11419
+ if (!options.callId) {
11420
+ throw new Error("patterCallScope requires non-empty callId");
11421
+ }
11422
+ const frame = {
11423
+ callId: options.callId,
11424
+ side: options.side ?? DEFAULT_SIDE
11425
+ };
11426
+ _scopeStack.push(frame);
11427
+ try {
11428
+ return await fn();
11429
+ } finally {
11430
+ const idx = _scopeStack.lastIndexOf(frame);
11431
+ if (idx >= 0) _scopeStack.splice(idx, 1);
11432
+ }
11433
+ }
11434
+ function attachSpanExporter(patterInstance, exporter, options = {}) {
11435
+ const side = options.side ?? DEFAULT_SIDE;
11436
+ patterInstance._patterSide = side;
11437
+ if (!isTracingEnabled()) {
11438
+ getLogger().debug(
11439
+ `attachSpanExporter: ${ENV_FLAG} not enabled or tracer unavailable; only side= stored`
11440
+ );
11441
+ return;
11442
+ }
11443
+ let sdkTraceBase = null;
11444
+ let sdkTraceNode = null;
11445
+ try {
11446
+ sdkTraceBase = __require("@opentelemetry/sdk-trace-base");
11447
+ } catch {
11448
+ sdkTraceBase = null;
11449
+ }
11450
+ try {
11451
+ sdkTraceNode = __require("@opentelemetry/sdk-trace-node");
11452
+ } catch {
11453
+ sdkTraceNode = null;
11454
+ }
11455
+ if (!sdkTraceBase) {
11456
+ getLogger().warn(
11457
+ "attachSpanExporter: @opentelemetry/sdk-trace-base is not installed; spans will not be exported. Install @opentelemetry/sdk-trace-base + @opentelemetry/sdk-trace-node."
11458
+ );
11459
+ return;
11460
+ }
11461
+ const api = _tryLoadOtelApi();
11462
+ if (!api) return;
11463
+ let provider = null;
11464
+ try {
11465
+ const tracerApi = api.trace;
11466
+ const existing = tracerApi.getTracerProvider?.() ?? null;
11467
+ if (existing && typeof existing.addSpanProcessor === "function") {
11468
+ provider = existing;
11469
+ }
11470
+ } catch {
11471
+ provider = null;
11472
+ }
11473
+ if (!provider) {
11474
+ if (!sdkTraceNode) {
11475
+ getLogger().warn(
11476
+ "attachSpanExporter: no SDK TracerProvider registered and @opentelemetry/sdk-trace-node is not installed; cannot wire exporter."
11477
+ );
11478
+ return;
11479
+ }
11480
+ try {
11481
+ provider = new sdkTraceNode.NodeTracerProvider();
11482
+ const trace = api.trace;
11483
+ trace.setGlobalTracerProvider?.(provider);
11484
+ } catch (e) {
11485
+ getLogger().debug(
11486
+ `attachSpanExporter: failed to construct NodeTracerProvider: ${String(
11487
+ e?.message ?? e
11488
+ )}`
11489
+ );
11490
+ return;
11491
+ }
11492
+ }
11493
+ let seen = provider._patterAttachedExporters;
11494
+ if (!seen) {
11495
+ seen = /* @__PURE__ */ new Set();
11496
+ provider._patterAttachedExporters = seen;
11497
+ }
11498
+ if (seen.has(exporter)) return;
11499
+ try {
11500
+ const processor = new sdkTraceBase.SimpleSpanProcessor(exporter);
11501
+ provider.addSpanProcessor?.(processor);
11502
+ seen.add(exporter);
11503
+ } catch (e) {
11504
+ getLogger().debug(
11505
+ `attachSpanExporter: failed to register exporter: ${String(
11506
+ e?.message ?? e
11507
+ )}`
11508
+ );
11509
+ }
11510
+ }
11511
+
10082
11512
  // src/index.ts
10083
11513
  var hermes = Object.freeze({ LLM: LLM8 });
10084
11514
  var openclaw = Object.freeze({ LLM: LLM9 });
10085
11515
  var openaiCompatible = Object.freeze({ LLM: LLM6 });
10086
11516
  var custom = Object.freeze({ LLM: LLM7 });
10087
11517
  export {
11518
+ AGENT_BACKLOG_CAP_S,
10088
11519
  AllProvidersFailedError,
10089
11520
  LLM2 as AnthropicLLM,
10090
11521
  STT6 as AssemblyAISTT,
@@ -10115,7 +11546,12 @@ export {
10115
11546
  TTS as ElevenLabsTTS,
10116
11547
  TTS2 as ElevenLabsWebSocketTTS,
10117
11548
  ErrorCode,
11549
+ EvalRunner,
11550
+ EvalSession,
10118
11551
  EventBus,
11552
+ FakeAudioSender,
11553
+ FakeSTT,
11554
+ FakeTTS,
10119
11555
  FallbackLLMProvider,
10120
11556
  GEMINI_DEFAULT_INPUT_SR,
10121
11557
  GEMINI_DEFAULT_OUTPUT_SR,
@@ -10129,8 +11565,10 @@ export {
10129
11565
  KrispFrameDuration,
10130
11566
  KrispSampleRate,
10131
11567
  KrispVivaFilter,
11568
+ LLMJudge,
10132
11569
  LLMLoop,
10133
11570
  TTS6 as LMNTTTS,
11571
+ LocalCallRecorder,
10134
11572
  MetricsStore,
10135
11573
  MinWordsStrategy,
10136
11574
  Ngrok,
@@ -10164,11 +11602,13 @@ export {
10164
11602
  PlivoAdapter,
10165
11603
  PricingUnit,
10166
11604
  ProvisionError,
11605
+ RECORDING_SAMPLE_RATE,
10167
11606
  RateLimitError,
10168
11607
  RemoteMessageHandler,
10169
11608
  RimeAudioFormat,
10170
11609
  RimeModel,
10171
11610
  TTS5 as RimeTTS,
11611
+ SMART_TURN_MODEL_ENV_VAR,
10172
11612
  SPAN_BARGEIN,
10173
11613
  SPAN_CALL,
10174
11614
  SPAN_ENDPOINT,
@@ -10176,8 +11616,10 @@ export {
10176
11616
  SPAN_STT,
10177
11617
  SPAN_TOOL,
10178
11618
  SPAN_TTS,
11619
+ ScriptedLLMProvider,
10179
11620
  SentenceChunker,
10180
11621
  SileroVAD,
11622
+ SmartTurnDetector,
10181
11623
  STT5 as SonioxSTT,
10182
11624
  SpeechEvents,
10183
11625
  SpeechmaticsAudioEncoding,
@@ -10199,6 +11641,7 @@ export {
10199
11641
  TestSession,
10200
11642
  TfidfLoopDetector,
10201
11643
  Tool,
11644
+ TurnExpectation,
10202
11645
  Carrier2 as Twilio,
10203
11646
  TwilioAdapter,
10204
11647
  ULTRAVOX_DEFAULT_API_BASE,
@@ -10206,6 +11649,7 @@ export {
10206
11649
  UltravoxRealtimeAdapter,
10207
11650
  STT2 as WhisperSTT,
10208
11651
  assemblyai,
11652
+ attachSpanExporter,
10209
11653
  builtinClipPath,
10210
11654
  calculateRealtimeCost,
10211
11655
  calculateSttCost,
@@ -10222,7 +11666,9 @@ export {
10222
11666
  deepgram,
10223
11667
  defineTool,
10224
11668
  elevenlabs,
11669
+ evalResultToDict,
10225
11670
  evaluateStrategies as evaluateBargeInStrategies,
11671
+ expect,
10226
11672
  filterEmoji,
10227
11673
  filterForTTS,
10228
11674
  filterMarkdown,
@@ -10232,11 +11678,13 @@ export {
10232
11678
  guardrail,
10233
11679
  hashCaller,
10234
11680
  hermes,
11681
+ historyTranscript,
10235
11682
  initTracing,
10236
11683
  isRemoteUrl,
10237
11684
  isTracingEnabled,
10238
11685
  isWebSocketUrl,
10239
11686
  lmnt,
11687
+ loadSuite,
10240
11688
  makeAuthMiddleware,
10241
11689
  mergePricing,
10242
11690
  mixPcm,
@@ -10249,7 +11697,9 @@ export {
10249
11697
  openclaw,
10250
11698
  openclawConsult,
10251
11699
  openclawPostCallNotifier,
11700
+ patterCallScope,
10252
11701
  pcm16ToMulaw,
11702
+ recordPatterAttrs,
10253
11703
  resample16kTo8k,
10254
11704
  resample24kTo16k,
10255
11705
  resample8kTo16k,
@@ -10261,11 +11711,15 @@ export {
10261
11711
  scheduleOnce,
10262
11712
  selectSoundFromList,
10263
11713
  setLogger,
11714
+ shutdownTracing,
10264
11715
  soniox,
10265
11716
  speechmatics,
10266
11717
  startSpan,
10267
11718
  startTunnel,
11719
+ textTurn,
10268
11720
  tool,
11721
+ toolCallTurn,
10269
11722
  ultravox,
10270
- whisper
11723
+ whisper,
11724
+ withSpan
10271
11725
  };