getpatter 0.6.7 → 0.6.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -498,8 +498,9 @@ var OpenAIRealtimeAdapter = class {
498
498
  } else if (t === "response.audio_transcript.delta") {
499
499
  dispatch("transcript_output", data.delta);
500
500
  } else if (t === "response.content_part.added" || t === "response.output_item.added") {
501
+ const itemType = data.item?.type;
501
502
  const itemId = data.item?.id ?? data.item_id ?? null;
502
- if (itemId) {
503
+ if (itemId && (itemType === void 0 || itemType === "message")) {
503
504
  this.currentResponseItemId = itemId;
504
505
  this.currentResponseAudioMs = 0;
505
506
  this.currentResponseFirstAudioAt = null;
@@ -683,6 +684,50 @@ var OpenAIRealtimeAdapter = class {
683
684
  }));
684
685
  this.ws?.send(JSON.stringify({ type: "response.create" }));
685
686
  }
687
+ /**
688
+ * Build the partial `session` body for a mid-session swap.
689
+ *
690
+ * v1-beta wire shape: flat `{ instructions, tools }`. The GA adapter
691
+ * overrides this to add the mandatory `"type": "realtime"` envelope.
692
+ * OpenAI merges partial `session.update` payloads server-side, so only the
693
+ * swapped fields are sent — audio formats, VAD tuning, and voice are
694
+ * untouched. Mirrors Python `_build_session_update_patch`.
695
+ */
696
+ buildSessionUpdatePatch(instructions, tools) {
697
+ const session = {};
698
+ if (instructions !== void 0) session.instructions = instructions;
699
+ if (tools !== void 0) {
700
+ session.tools = tools.map((t) => {
701
+ const def = {
702
+ type: "function",
703
+ name: t.name,
704
+ description: t.description,
705
+ parameters: t.parameters
706
+ };
707
+ if (t.strict === true) def.strict = true;
708
+ return def;
709
+ });
710
+ }
711
+ return session;
712
+ }
713
+ /**
714
+ * Apply a mid-session `instructions` / `tools` swap (multi-agent handoff).
715
+ *
716
+ * Sends a partial `session.update` carrying only the supplied fields and
717
+ * records them on the adapter so reconnect/warmup paths rebuild the
718
+ * session with the post-handoff config. Voice is intentionally NOT
719
+ * updatable here — OpenAI Realtime rejects a voice change once the session
720
+ * has produced audio, so the session keeps the voice established at call
721
+ * start (documented limitation). Mirrors Python
722
+ * `OpenAIRealtimeAdapter.update_session`.
723
+ */
724
+ async updateSession(update) {
725
+ if (update.instructions !== void 0) this.instructions = update.instructions;
726
+ if (update.tools !== void 0) this.tools = update.tools;
727
+ const session = this.buildSessionUpdatePatch(update.instructions, update.tools);
728
+ if (Object.keys(session).length === 0 || !this.ws) return;
729
+ this.ws.send(JSON.stringify({ type: "session.update", session }));
730
+ }
686
731
  /** Stop the heartbeat, drop listeners, and close the Realtime WebSocket. */
687
732
  close() {
688
733
  if (this.heartbeat) {
@@ -801,10 +846,18 @@ var StatefulResampler = class {
801
846
  firHistory = new Int16Array(2);
802
847
  // [s_{-2}, s_{-1}]
803
848
  firHistoryValid = false;
849
+ /**
850
+ * Samples after the last emitted FIR center, carried to the next chunk
851
+ * (1–2 samples: the next center and/or its missing lookahead). Replaces
852
+ * the old single ``firPendingSample`` — that design wrote the pending
853
+ * sample into history too (processed twice, true s-2 lost) and edge-
854
+ * replicated the +2 tap at every chunk end, producing audible crackle at
855
+ * chunk boundaries.
856
+ */
857
+ firCarry = new Int16Array(0);
804
858
  // Pending sample carried from odd-count chunks (not the byte carry —
805
859
  // this is a complete Int16 sample that becomes the first input for the
806
860
  // next call).
807
- firPendingSample = null;
808
861
  // 8k→16k: last input sample deferred across chunk boundaries.
809
862
  upsampleLast = 0;
810
863
  upsampleHasHistory = false;
@@ -863,13 +916,17 @@ var StatefulResampler = class {
863
916
  "[patter] StatefulResampler.flush: trailing odd byte discarded \u2014 upstream produced odd-length PCM stream"
864
917
  );
865
918
  }
866
- if (this.srcRate === 16e3 && this.dstRate === 8e3 && this.firPendingSample !== null) {
867
- const s = this.firPendingSample;
868
- const tmp = Buffer.alloc(4);
869
- tmp.writeInt16LE(s, 0);
870
- tmp.writeInt16LE(s, 2);
871
- const out = this._downsample16kTo8k(tmp);
872
- this.firPendingSample = null;
919
+ if (this.srcRate === 16e3 && this.dstRate === 8e3 && this.firCarry.length > 0) {
920
+ const carry = this.firCarry;
921
+ this.firCarry = new Int16Array(0);
922
+ const s0 = carry[0];
923
+ const sP1 = carry.length > 1 ? carry[1] : s0;
924
+ const sP2 = sP1;
925
+ const sM2 = this.firHistoryValid ? this.firHistory[0] : 0;
926
+ const sM1 = this.firHistoryValid ? this.firHistory[1] : 0;
927
+ const filtered = sM2 + 4 * sM1 + 6 * s0 + 4 * sP1 + sP2 + 8 >> 4;
928
+ const out = Buffer.alloc(2);
929
+ out.writeInt16LE(Math.max(-32768, Math.min(32767, filtered)), 0);
873
930
  return out;
874
931
  }
875
932
  if (this.srcRate === 8e3 && this.dstRate === 16e3 && this.upsampleHasHistory) {
@@ -886,7 +943,7 @@ var StatefulResampler = class {
886
943
  reset() {
887
944
  this.firHistory = new Int16Array(2);
888
945
  this.firHistoryValid = false;
889
- this.firPendingSample = null;
946
+ this.firCarry = new Int16Array(0);
890
947
  this.upsampleLast = 0;
891
948
  this.upsampleHasHistory = false;
892
949
  this.resample24Last = 0;
@@ -915,48 +972,37 @@ var StatefulResampler = class {
915
972
  */
916
973
  _downsample16kTo8k(buf) {
917
974
  const newSampleCount = buf.length >> 1;
918
- const hasPending = this.firPendingSample !== null;
919
- const totalInput = newSampleCount + (hasPending ? 1 : 0);
920
- const input = new Int16Array(totalInput);
921
- if (hasPending) {
922
- input[0] = this.firPendingSample;
923
- for (let j = 0; j < newSampleCount; j++) input[j + 1] = buf.readInt16LE(j * 2);
924
- } else {
925
- for (let j = 0; j < newSampleCount; j++) input[j] = buf.readInt16LE(j * 2);
975
+ const carried = this.firCarry;
976
+ const total = carried.length + newSampleCount;
977
+ const all = new Int16Array(total);
978
+ all.set(carried, 0);
979
+ for (let j = 0; j < newSampleCount; j++) {
980
+ all[carried.length + j] = buf.readInt16LE(j * 2);
926
981
  }
927
- this.firPendingSample = null;
928
- if (totalInput === 0) return Buffer.alloc(0);
982
+ if (total === 0) return Buffer.alloc(0);
929
983
  if (!this.firHistoryValid) {
930
984
  this.firHistory[0] = 0;
931
985
  this.firHistory[1] = 0;
932
986
  this.firHistoryValid = true;
933
987
  }
934
- const extended = new Int16Array(totalInput + 2);
935
- extended[0] = this.firHistory[0];
936
- extended[1] = this.firHistory[1];
937
- for (let j = 0; j < totalInput; j++) extended[j + 2] = input[j];
938
- const outSamples = totalInput >> 1;
939
- const out = Buffer.alloc(outSamples * 2);
940
- for (let i = 0; i < outSamples; i++) {
941
- const c = 2 + i * 2;
942
- const sM2 = extended[c - 2];
943
- const sM1 = extended[c - 1];
944
- const s0 = extended[c];
945
- const sP1 = c + 1 < extended.length ? extended[c + 1] : extended[extended.length - 1];
946
- const sP2 = c + 2 < extended.length ? extended[c + 2] : extended[extended.length - 1];
988
+ const nOut = total >= 3 ? (total - 3 >> 1) + 1 : 0;
989
+ const out = Buffer.alloc(nOut * 2);
990
+ for (let i = 0; i < nOut; i++) {
991
+ const c = i * 2;
992
+ const sM2 = c >= 2 ? all[c - 2] : this.firHistory[0];
993
+ const sM1 = c >= 1 ? all[c - 1] : this.firHistory[1];
994
+ const s0 = all[c];
995
+ const sP1 = all[c + 1];
996
+ const sP2 = all[c + 2];
947
997
  const filtered = sM2 + 4 * sM1 + 6 * s0 + 4 * sP1 + sP2 + 8 >> 4;
948
998
  out.writeInt16LE(Math.max(-32768, Math.min(32767, filtered)), i * 2);
949
999
  }
950
- if (totalInput % 2 === 1) {
951
- this.firPendingSample = input[totalInput - 1];
952
- }
953
- if (totalInput >= 2) {
954
- this.firHistory[0] = input[totalInput - 2];
955
- this.firHistory[1] = input[totalInput - 1];
956
- } else {
957
- this.firHistory[0] = this.firHistory[1];
958
- this.firHistory[1] = input[0];
1000
+ const nextCenter = nOut * 2;
1001
+ if (nextCenter >= 2) {
1002
+ this.firHistory[0] = all[nextCenter - 2];
1003
+ this.firHistory[1] = all[nextCenter - 1];
959
1004
  }
1005
+ this.firCarry = all.slice(nextCenter);
960
1006
  return out;
961
1007
  }
962
1008
  // ---------------------------------------------------------------------------
@@ -1194,7 +1240,11 @@ var OpenAIRealtime2Adapter = class extends OpenAIRealtimeAdapter {
1194
1240
  },
1195
1241
  instructions: this.instructions || "You are a helpful voice assistant. Be concise."
1196
1242
  };
1197
- if (opts.temperature !== void 0) config.temperature = opts.temperature;
1243
+ if (opts.temperature !== void 0) {
1244
+ getLogger().warn(
1245
+ `OpenAI Realtime GA does not accept 'temperature' \u2014 ignoring ${opts.temperature}`
1246
+ );
1247
+ }
1198
1248
  if (opts.maxResponseOutputTokens !== void 0) {
1199
1249
  config.max_output_tokens = opts.maxResponseOutputTokens;
1200
1250
  }
@@ -1216,6 +1266,20 @@ var OpenAIRealtime2Adapter = class extends OpenAIRealtimeAdapter {
1216
1266
  }
1217
1267
  return config;
1218
1268
  }
1269
+ /**
1270
+ * GA-shape partial `session.update` body for a mid-session swap.
1271
+ *
1272
+ * Identical to the base v1 patch plus the mandatory `"type": "realtime"`
1273
+ * discriminator — the GA endpoint rejects a `session.update` without it.
1274
+ * Used by {@link OpenAIRealtimeAdapter.updateSession} (inherited unchanged)
1275
+ * for the multi-agent `handoff_to` flow. Mirrors the Python
1276
+ * `OpenAIRealtime2Adapter._build_session_update_patch`.
1277
+ */
1278
+ buildSessionUpdatePatch(instructions, tools) {
1279
+ const session = super.buildSessionUpdatePatch(instructions, tools);
1280
+ if (Object.keys(session).length > 0) session.type = "realtime";
1281
+ return session;
1282
+ }
1219
1283
  /**
1220
1284
  * Open the Realtime WebSocket against the GA endpoint and apply the GA
1221
1285
  * session configuration. Header `OpenAI-Beta: realtime=v1` is OMITTED
@@ -1229,6 +1293,18 @@ var OpenAIRealtime2Adapter = class extends OpenAIRealtimeAdapter {
1229
1293
  });
1230
1294
  const wsRef = this.ws;
1231
1295
  const originalOn = wsRef.on.bind(this.ws);
1296
+ const originalOff = wsRef.off.bind(this.ws);
1297
+ const wrappedByHandler = /* @__PURE__ */ new Map();
1298
+ wsRef.off = (event, handler) => {
1299
+ if (event === "message") {
1300
+ const wrapped = wrappedByHandler.get(handler);
1301
+ if (wrapped) {
1302
+ wrappedByHandler.delete(handler);
1303
+ return originalOff(event, wrapped);
1304
+ }
1305
+ }
1306
+ return originalOff(event, handler);
1307
+ };
1232
1308
  wsRef.on = (event, handler) => {
1233
1309
  if (event !== "message") return originalOn(event, handler);
1234
1310
  const wrapped = (raw, ...rest) => {
@@ -1250,6 +1326,7 @@ var OpenAIRealtime2Adapter = class extends OpenAIRealtimeAdapter {
1250
1326
  }
1251
1327
  handler(raw, ...rest);
1252
1328
  };
1329
+ wrappedByHandler.set(handler, wrapped);
1253
1330
  return originalOn(event, wrapped);
1254
1331
  };
1255
1332
  await new Promise((resolve, reject) => {
@@ -1257,6 +1334,7 @@ var OpenAIRealtime2Adapter = class extends OpenAIRealtimeAdapter {
1257
1334
  let settled = false;
1258
1335
  const ws = this.ws;
1259
1336
  const onSetupMessage = (raw) => {
1337
+ if (settled) return;
1260
1338
  let msg;
1261
1339
  try {
1262
1340
  msg = JSON.parse(raw.toString());
@@ -0,0 +1,198 @@
1
+ import {
2
+ LLMLoop
3
+ } from "./chunk-YJ4HKJL6.mjs";
4
+ import {
5
+ getLogger
6
+ } from "./chunk-MVOQFAEO.mjs";
7
+ import {
8
+ init_esm_shims
9
+ } from "./chunk-N565J3CF.mjs";
10
+
11
+ // src/test-mode.ts
12
+ init_esm_shims();
13
+ import { createInterface } from "readline";
14
+ var TestSession = class {
15
+ /** Run a REPL-style session that loops user input through the agent's LLM/onMessage handler. */
16
+ async run(opts) {
17
+ const { agent, openaiKey, onMessage, onCallStart, onCallEnd } = opts;
18
+ const callId = `test_${Date.now().toString(36)}${Math.random().toString(36).slice(2, 8)}`;
19
+ const caller = "+15550000001";
20
+ const callee = "+15550000002";
21
+ const conversationHistory = [];
22
+ const log = getLogger();
23
+ log.info("");
24
+ log.info("=".repeat(60));
25
+ log.info(" PATTER TEST MODE");
26
+ log.info("=".repeat(60));
27
+ log.info(` Agent: ${agent.model || "default"} / ${agent.voice || "default"}`);
28
+ log.info(` Provider: ${agent.provider || "openai_realtime"}`);
29
+ log.info(` Call ID: ${callId}`);
30
+ log.info(` Caller: ${caller} -> Callee: ${callee}`);
31
+ log.info("-".repeat(60));
32
+ log.info(" Commands: /quit /transfer <number> /hangup /history");
33
+ log.info("=".repeat(60));
34
+ log.info("");
35
+ if (onCallStart) {
36
+ await onCallStart({
37
+ call_id: callId,
38
+ caller,
39
+ callee,
40
+ direction: "test"
41
+ });
42
+ }
43
+ if (agent.firstMessage) {
44
+ log.info(` Agent: ${agent.firstMessage}`);
45
+ log.info("");
46
+ conversationHistory.push({
47
+ role: "assistant",
48
+ text: agent.firstMessage,
49
+ timestamp: Date.now()
50
+ });
51
+ }
52
+ let llmLoop = null;
53
+ if (!onMessage && openaiKey) {
54
+ let llmModel = agent.model || "gpt-4o-mini";
55
+ if (llmModel.includes("realtime")) llmModel = "gpt-4o-mini";
56
+ let resolvedPrompt = agent.systemPrompt;
57
+ if (agent.variables) {
58
+ for (const [k, v] of Object.entries(agent.variables)) {
59
+ resolvedPrompt = resolvedPrompt.replaceAll(`{${k}}`, v);
60
+ }
61
+ }
62
+ llmLoop = new LLMLoop(
63
+ openaiKey,
64
+ llmModel,
65
+ resolvedPrompt,
66
+ agent.tools,
67
+ void 0,
68
+ agent.disablePhonePreamble ?? false
69
+ );
70
+ }
71
+ let ended = false;
72
+ const _callControl = {
73
+ callId,
74
+ caller,
75
+ callee,
76
+ transfer: async (number) => {
77
+ ended = true;
78
+ log.info(` [Transfer -> ${number}]`);
79
+ },
80
+ hangup: async () => {
81
+ ended = true;
82
+ log.info(" [Call ended by agent]");
83
+ },
84
+ sendDtmf: async (digits, _opts) => {
85
+ log.info(` [DTMF -> ${digits}]`);
86
+ }
87
+ };
88
+ void _callControl;
89
+ const rl = createInterface({
90
+ input: process.stdin,
91
+ output: process.stdout
92
+ });
93
+ const askQuestion = (prompt) => new Promise((resolve) => rl.question(prompt, resolve));
94
+ try {
95
+ while (!ended) {
96
+ let userInput;
97
+ try {
98
+ userInput = await askQuestion(" You: ");
99
+ } catch {
100
+ log.info("\n [Session ended]");
101
+ break;
102
+ }
103
+ userInput = userInput.trim();
104
+ if (!userInput) continue;
105
+ if (userInput === "/quit") {
106
+ log.info(" [Session ended]");
107
+ break;
108
+ } else if (userInput === "/hangup") {
109
+ log.info(" [You hung up]");
110
+ break;
111
+ } else if (userInput.startsWith("/transfer ")) {
112
+ const number = userInput.slice(10).trim();
113
+ log.info(` [Transfer -> ${number}]`);
114
+ break;
115
+ } else if (userInput === "/history") {
116
+ for (const entry of conversationHistory) {
117
+ const role = entry.role.charAt(0).toUpperCase() + entry.role.slice(1);
118
+ log.info(` ${role}: ${entry.text}`);
119
+ }
120
+ continue;
121
+ }
122
+ if (onMessage) {
123
+ conversationHistory.push({
124
+ role: "user",
125
+ text: userInput,
126
+ timestamp: Date.now()
127
+ });
128
+ try {
129
+ const responseText = await onMessage({
130
+ text: userInput,
131
+ call_id: callId,
132
+ caller,
133
+ history: [...conversationHistory]
134
+ });
135
+ if (responseText) {
136
+ log.info(` Agent: ${responseText}`);
137
+ conversationHistory.push({
138
+ role: "assistant",
139
+ text: responseText,
140
+ timestamp: Date.now()
141
+ });
142
+ log.info("");
143
+ }
144
+ } catch (e) {
145
+ log.error(` [Error: ${String(e)}]`);
146
+ }
147
+ } else if (llmLoop) {
148
+ const callCtx = { call_id: callId, caller, callee };
149
+ const parts = [];
150
+ process.stdout.write(" Agent: ");
151
+ try {
152
+ for await (const token of llmLoop.run(userInput, conversationHistory, callCtx)) {
153
+ parts.push(token);
154
+ process.stdout.write(token);
155
+ }
156
+ } catch (err) {
157
+ log.info("");
158
+ log.error(`LLM error: ${String(err)}`);
159
+ continue;
160
+ }
161
+ log.info("");
162
+ const responseText = parts.join("");
163
+ conversationHistory.push({
164
+ role: "user",
165
+ text: userInput,
166
+ timestamp: Date.now()
167
+ });
168
+ if (responseText) {
169
+ conversationHistory.push({
170
+ role: "assistant",
171
+ text: responseText,
172
+ timestamp: Date.now()
173
+ });
174
+ }
175
+ log.info("");
176
+ } else {
177
+ log.info(" [No onMessage handler or LLM loop configured]");
178
+ }
179
+ if (ended) break;
180
+ }
181
+ } finally {
182
+ rl.close();
183
+ }
184
+ if (onCallEnd) {
185
+ await onCallEnd({
186
+ call_id: callId,
187
+ caller,
188
+ callee,
189
+ direction: "test",
190
+ transcript: conversationHistory
191
+ });
192
+ }
193
+ }
194
+ };
195
+
196
+ export {
197
+ TestSession
198
+ };