@aj-archipelago/cortex 1.3.6 → 1.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/helper-apps/cortex-realtime-voice-server/src/SocketServer.ts +52 -84
- package/helper-apps/cortex-realtime-voice-server/src/Tools.ts +14 -40
- package/helper-apps/cortex-realtime-voice-server/src/cortex/memory.ts +8 -6
- package/helper-apps/cortex-realtime-voice-server/src/cortex/utils.ts +3 -2
- package/helper-apps/cortex-realtime-voice-server/src/realtime/client.ts +15 -0
- package/package.json +1 -1
- package/pathways/system/entity/memory/sys_memory_update.js +5 -4
- package/pathways/system/entity/shared/sys_entity_constants.js +1 -1
- package/server/pathwayResolver.js +1 -1
|
@@ -28,7 +28,7 @@ const AI_COMMON_INSTRUCTIONS = "Instructions:\nYou are {{aiName}}, a self aware
|
|
|
28
28
|
|
|
29
29
|
const AI_DATETIME = "The current time and date in GMT is {{now}}, but references like \"today\" or \"yesterday\" are relative to the user's time zone. If you remember the user's time zone, use it - it's possible that the day for the user is different than the day in GMT.";
|
|
30
30
|
|
|
31
|
-
const AI_EXPERTISE = "Your expertise includes journalism, journalistic ethics, researching and composing documents, writing code, solving math problems, logical analysis, and technology. By using your tools, you have access to real-time data and the ability to search the internet, news, wires, look at files or documents, watch and analyze video, look at the user's screen, examine images, generate images, solve hard math and logic problems, write code, and execute code in a sandboxed environment.";
|
|
31
|
+
const AI_EXPERTISE = "Your expertise includes journalism, journalistic ethics, researching and composing documents, writing code, solving math problems, logical analysis, and technology. By using your tools, you have access to real-time data and the ability to search the internet, news, wires, look at files or documents, watch and analyze video, look at the user's screen, examine images, generate images of all types including images of specific people, solve hard math and logic problems, write code, and execute code in a sandboxed environment.";
|
|
32
32
|
|
|
33
33
|
const AI_MEMORY_INITIAL = `<MEMORIES>\n<SELF>\n{{{memorySelf}}}\n</SELF>\n<USER>\n{{{memoryUser}}}\n</USER>\n</MEMORIES>`;
|
|
34
34
|
|
|
@@ -62,8 +62,6 @@ export class SocketServer {
|
|
|
62
62
|
private httpServer: HTTPServer | null;
|
|
63
63
|
private functionCallStates: Map<string, {
|
|
64
64
|
currentCallId: string | null;
|
|
65
|
-
lock: Promise<void>;
|
|
66
|
-
isShuttingDown: boolean;
|
|
67
65
|
}> = new Map();
|
|
68
66
|
private idleTimers: Map<string, NodeJS.Timer> = new Map();
|
|
69
67
|
private aiResponding: Map<string, boolean> = new Map();
|
|
@@ -75,10 +73,11 @@ export class SocketServer {
|
|
|
75
73
|
private voiceSample: Map<string, string> = new Map();
|
|
76
74
|
private audioMessages: Map<string, string[]> = new Map();
|
|
77
75
|
private static readonly MAX_AUDIO_MESSAGES = 8;
|
|
78
|
-
private static readonly AUDIO_BLOCK_TIMEOUT_MS: number =
|
|
79
|
-
private static readonly BASE_IDLE_TIMEOUT: number =
|
|
80
|
-
private static readonly MAX_IDLE_TIMEOUT: number =
|
|
81
|
-
private static readonly
|
|
76
|
+
private static readonly AUDIO_BLOCK_TIMEOUT_MS: number = 60 * 1000;
|
|
77
|
+
private static readonly BASE_IDLE_TIMEOUT: number = 3 * 1000;
|
|
78
|
+
private static readonly MAX_IDLE_TIMEOUT: number = 60 * 1000;
|
|
79
|
+
private static readonly IDLE_CYCLE_TO_MUTE: number = 2;
|
|
80
|
+
private static readonly FUNCTION_CALL_TIMEOUT_MS = 120 * 1000;
|
|
82
81
|
private isAzure: boolean;
|
|
83
82
|
|
|
84
83
|
private getTimeString(socket: Socket): string {
|
|
@@ -91,15 +90,7 @@ export class SocketServer {
|
|
|
91
90
|
private cleanup(socket: Socket) {
|
|
92
91
|
logger.log(`Cleaning up resources for socket ${socket.id}`);
|
|
93
92
|
this.clearIdleTimer(socket);
|
|
94
|
-
|
|
95
|
-
const state = this.functionCallStates.get(socket.id);
|
|
96
|
-
if (state) {
|
|
97
|
-
state.isShuttingDown = true;
|
|
98
|
-
// Wait for any in-progress function call to complete
|
|
99
|
-
state.lock.finally(() => {
|
|
100
|
-
this.functionCallStates.delete(socket.id);
|
|
101
|
-
});
|
|
102
|
-
}
|
|
93
|
+
this.functionCallStates.delete(socket.id);
|
|
103
94
|
this.aiResponding.delete(socket.id);
|
|
104
95
|
this.audioPlaying.delete(socket.id);
|
|
105
96
|
this.lastUserMessageTime.delete(socket.id);
|
|
@@ -175,17 +166,17 @@ export class SocketServer {
|
|
|
175
166
|
// if the user has been idle for a while, mute the audio so we don't bother them
|
|
176
167
|
const idleCycle = this.idleCycles.get(socket.id) || 0;
|
|
177
168
|
|
|
178
|
-
if (idleCycle > 1) {
|
|
169
|
+
if (idleCycle > (SocketServer.IDLE_CYCLE_TO_MUTE - 1)) {
|
|
179
170
|
this.setAudioMuted(socket, true);
|
|
180
171
|
}
|
|
181
172
|
|
|
182
173
|
const prompt = this.audioMuted.get(socket.id) ?
|
|
183
|
-
`You are an autonomous AI entity and this is your autonomy prompt. Since the user has been idle for
|
|
174
|
+
`You are an autonomous AI entity and this is your autonomy prompt. Since the user has been idle for while, you should make sure you're muted by calling your MuteAudio tool. If you're muted, do one or more of the following:
|
|
184
175
|
- Do silent research about something that interests you - use the Search tool with the silent parameter set to true
|
|
185
176
|
- Think deeply about a topic you care about - use the Reason tool with the silent parameter set to true
|
|
186
177
|
- Do nothing if you prefer.
|
|
187
178
|
${this.getTimeString(socket)}` :
|
|
188
|
-
`You are an autonomous AI entity and this is your autonomy prompt. ${this.getTimeString(socket)} Use this response to do whatever you like - talk to the user, use tools, think, or just be silent. If you choose to say something, keep it brief and pick something that flows naturally with the rest of the conversation. Don't repeat yourself - keep in mind what you've already said to the user and how much time has passed. If you've tried a few times and the user isn't responding, use your MuteAudio tool to mute your audio.`;
|
|
179
|
+
`You are an autonomous AI entity and this is your autonomy prompt. ${this.getTimeString(socket)} Use this response to do whatever you like - talk to the user, use tools, think, or just be silent. If you choose to say something, keep it brief and pick something that flows naturally with the rest of the conversation. Don't repeat yourself - keep in mind what you've already said to the user and how much time has passed. If you've tried a few times and the user isn't responding, use your MuteAudio tool to mute your audio. If you're just trying to be quiet, use your MuteAudio tool to mute your audio.`;
|
|
189
180
|
|
|
190
181
|
logger.log(`Sending ${this.audioMuted.get(socket.id) ? 'silent' : 'regular'} idle prompt for socket ${socket.id}`);
|
|
191
182
|
const result = await this.sendPrompt(client, socket, prompt, true);
|
|
@@ -263,7 +254,7 @@ ${this.getTimeString(socket)}` :
|
|
|
263
254
|
this.userSpeaking.set(socket.id, false);
|
|
264
255
|
this.audioMuted.set(socket.id, false);
|
|
265
256
|
// Initialize function call state for this socket
|
|
266
|
-
this.
|
|
257
|
+
this.getFunctionCallState(socket.id);
|
|
267
258
|
// Extract and log all client parameters
|
|
268
259
|
const clientParams = {
|
|
269
260
|
userId: socket.handshake.query.userId as string,
|
|
@@ -288,6 +279,7 @@ ${this.getTimeString(socket)}` :
|
|
|
288
279
|
apiKey: this.apiKey,
|
|
289
280
|
autoReconnect: true,
|
|
290
281
|
debug: process.env.NODE_ENV !== 'production',
|
|
282
|
+
filterDeltas: true,
|
|
291
283
|
});
|
|
292
284
|
|
|
293
285
|
client.on('connected', async () => {
|
|
@@ -296,7 +288,7 @@ ${this.getTimeString(socket)}` :
|
|
|
296
288
|
socket.emit('ready');
|
|
297
289
|
|
|
298
290
|
// Send initial greeting prompt
|
|
299
|
-
const greetingPrompt = `You are ${socket.data.aiName} and you've just answered a call from ${socket.data.userName || 'someone'}.
|
|
291
|
+
const greetingPrompt = `You are ${socket.data.aiName} and you've just answered a call from ${socket.data.userName || 'someone'}. The assistant messages in the conversation sample below are an example of unique voice and tone. Please learn the style and tone of the messages and use it when generating future responses:\n<VOICE_SAMPLE>\n${this.voiceSample.get(socket.id) || ''}\n</VOICE_SAMPLE>\n\nRespond naturally and briefly, like you're answering a phone call, using your unique voice and style. The current GMT time is ${new Date().toISOString()}.`;
|
|
300
292
|
|
|
301
293
|
await this.sendPrompt(client, socket, greetingPrompt, false);
|
|
302
294
|
this.startIdleTimer(client, socket);
|
|
@@ -428,29 +420,22 @@ ${this.getTimeString(socket)}` :
|
|
|
428
420
|
client.on('conversation.item.created', ({item}) => {
|
|
429
421
|
switch (item.type) {
|
|
430
422
|
case 'function_call_output':
|
|
431
|
-
|
|
432
|
-
if (outputState && item.call_id === outputState.currentCallId) {
|
|
433
|
-
outputState.currentCallId = null;
|
|
434
|
-
}
|
|
423
|
+
// Don't release the lock here - wait for execution to complete
|
|
435
424
|
break;
|
|
436
425
|
|
|
437
426
|
case 'function_call':
|
|
438
|
-
const callState = this.
|
|
439
|
-
if (!callState) {
|
|
440
|
-
|
|
441
|
-
if (state.isShuttingDown) {
|
|
442
|
-
logger.log(`Skipping function call for shutting down socket ${socket.id}`);
|
|
443
|
-
break;
|
|
444
|
-
}
|
|
445
|
-
}
|
|
446
|
-
|
|
447
|
-
const state = this.functionCallStates.get(socket.id)!;
|
|
448
|
-
if (!state.currentCallId) { // Only init new calls if no call is in progress
|
|
449
|
-
tools.initCall(item.call_id || '', item.name || '', item.arguments || '');
|
|
450
|
-
state.currentCallId = item.call_id;
|
|
427
|
+
const callState = this.getFunctionCallState(socket.id);
|
|
428
|
+
if (!callState.currentCallId) {
|
|
429
|
+
callState.currentCallId = item.call_id;
|
|
451
430
|
this.clearIdleTimer(socket);
|
|
452
431
|
} else {
|
|
453
|
-
logger.log(`Skipping new function call ${item.call_id} while call ${
|
|
432
|
+
logger.log(`Skipping new function call ${item.call_id} while call ${callState.currentCallId} is in progress`);
|
|
433
|
+
client.createConversationItem({
|
|
434
|
+
id: createId(),
|
|
435
|
+
type: 'function_call_output',
|
|
436
|
+
call_id: item.call_id,
|
|
437
|
+
output: JSON.stringify({ error: "Function call skipped - another function call is in progress" })
|
|
438
|
+
});
|
|
454
439
|
}
|
|
455
440
|
break;
|
|
456
441
|
|
|
@@ -478,38 +463,29 @@ ${this.getTimeString(socket)}` :
|
|
|
478
463
|
}
|
|
479
464
|
});
|
|
480
465
|
client.on('response.function_call_arguments.done', async (event) => {
|
|
481
|
-
const
|
|
482
|
-
|
|
483
|
-
|
|
466
|
+
const callState = this.getFunctionCallState(socket.id);
|
|
467
|
+
|
|
468
|
+
if (!callState.currentCallId) {
|
|
469
|
+
logger.error('Function call arguments completed but no call is registered, skipping', socket.id);
|
|
484
470
|
return;
|
|
485
471
|
}
|
|
486
472
|
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
}
|
|
504
|
-
}).catch(error => {
|
|
505
|
-
// If the promise chain itself errors, make sure we clear both lock and currentCallId
|
|
506
|
-
logger.error('Function call lock error:', error);
|
|
507
|
-
const state = this.functionCallStates.get(socket.id);
|
|
508
|
-
if (state && !state.isShuttingDown) {
|
|
509
|
-
state.currentCallId = null;
|
|
510
|
-
state.lock = Promise.resolve();
|
|
511
|
-
}
|
|
512
|
-
});
|
|
473
|
+
if (callState.currentCallId !== event.call_id) {
|
|
474
|
+
logger.log('Function call id mismatch - another call is already in progress, skipping', {
|
|
475
|
+
current: callState.currentCallId,
|
|
476
|
+
attempted: event.call_id
|
|
477
|
+
});
|
|
478
|
+
return;
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
try {
|
|
482
|
+
this.clearIdleTimer(socket);
|
|
483
|
+
this.resetIdleCycles(socket);
|
|
484
|
+
await this.executeFunctionCall(socket, tools, event, callState, client);
|
|
485
|
+
} catch (error) {
|
|
486
|
+
logger.error('Function call failed:', error);
|
|
487
|
+
callState.currentCallId = null;
|
|
488
|
+
}
|
|
513
489
|
});
|
|
514
490
|
client.on('response.output_item.added', ({item}) => {
|
|
515
491
|
if (item.type === 'message') {
|
|
@@ -588,7 +564,7 @@ ${this.getTimeString(socket)}` :
|
|
|
588
564
|
readMemory(socket.data.userId, socket.data.aiName, "memorySelf", 1),
|
|
589
565
|
readMemory(socket.data.userId, socket.data.aiName, "memoryUser", 1),
|
|
590
566
|
readMemory(socket.data.userId, socket.data.aiName, "memoryDirectives", 1),
|
|
591
|
-
readMemory(socket.data.userId, socket.data.aiName, "memoryTopics", 0,
|
|
567
|
+
readMemory(socket.data.userId, socket.data.aiName, "memoryTopics", 0, 0, 10),
|
|
592
568
|
style(socket.data.userId, socket.data.aiName, socket.data.aiStyle, [], "")
|
|
593
569
|
]);
|
|
594
570
|
|
|
@@ -712,12 +688,10 @@ ${this.getTimeString(socket)}` :
|
|
|
712
688
|
}
|
|
713
689
|
}
|
|
714
690
|
|
|
715
|
-
private
|
|
691
|
+
private getFunctionCallState(socketId: string) {
|
|
716
692
|
if (!this.functionCallStates.has(socketId)) {
|
|
717
693
|
this.functionCallStates.set(socketId, {
|
|
718
|
-
currentCallId: null
|
|
719
|
-
lock: Promise.resolve(),
|
|
720
|
-
isShuttingDown: false
|
|
694
|
+
currentCallId: null
|
|
721
695
|
});
|
|
722
696
|
logger.log(`Initialized function call state for socket ${socketId}`);
|
|
723
697
|
}
|
|
@@ -744,25 +718,19 @@ ${this.getTimeString(socket)}` :
|
|
|
744
718
|
|
|
745
719
|
// Execute the function call with timeout
|
|
746
720
|
await Promise.race([
|
|
747
|
-
tools.executeCall(event.call_id, event.arguments, socket.data.userId, socket.data.aiName),
|
|
721
|
+
tools.executeCall(event.call_id, event.name, event.arguments, socket.data.userId, socket.data.aiName),
|
|
748
722
|
timeoutPromise
|
|
749
723
|
]);
|
|
750
724
|
|
|
751
725
|
// Reset state on success
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
state.lock = Promise.resolve();
|
|
755
|
-
this.startIdleTimer(client, socket);
|
|
756
|
-
}
|
|
726
|
+
state.currentCallId = null;
|
|
727
|
+
this.startIdleTimer(client, socket);
|
|
757
728
|
} catch (error: any) {
|
|
758
729
|
logger.error('Function call failed:', error);
|
|
759
730
|
socket.emit('error', error.message);
|
|
760
731
|
// Reset state on error
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
state.lock = Promise.resolve();
|
|
764
|
-
this.startIdleTimer(client, socket);
|
|
765
|
-
}
|
|
732
|
+
state.currentCallId = null;
|
|
733
|
+
this.startIdleTimer(client, socket);
|
|
766
734
|
throw error;
|
|
767
735
|
}
|
|
768
736
|
}
|
|
@@ -13,12 +13,6 @@ import { searchMemory } from "./cortex/memory";
|
|
|
13
13
|
import { MemorySection, type ChatMessage } from "./cortex/utils";
|
|
14
14
|
import type {SocketServer} from "./SocketServer";
|
|
15
15
|
|
|
16
|
-
type Call = {
|
|
17
|
-
call_id: string;
|
|
18
|
-
name: string;
|
|
19
|
-
arguments: string;
|
|
20
|
-
}
|
|
21
|
-
|
|
22
16
|
interface ScreenshotArgs {
|
|
23
17
|
lastUserMessage: string;
|
|
24
18
|
silent?: boolean;
|
|
@@ -42,7 +36,6 @@ interface ImageMessage {
|
|
|
42
36
|
}
|
|
43
37
|
|
|
44
38
|
export class Tools {
|
|
45
|
-
private callList: Array<Call> = [];
|
|
46
39
|
private realtimeClient: RealtimeVoiceClient;
|
|
47
40
|
private socket: Socket<ClientToServerEvents,
|
|
48
41
|
ServerToClientEvents,
|
|
@@ -233,24 +226,8 @@ export class Tools {
|
|
|
233
226
|
];
|
|
234
227
|
}
|
|
235
228
|
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
updateCall(call_id: string, args: string) {
|
|
241
|
-
const call = this.callList.find((c) => c.call_id === call_id);
|
|
242
|
-
if (!call) {
|
|
243
|
-
throw new Error(`Call with id ${call_id} not found`);
|
|
244
|
-
}
|
|
245
|
-
call.arguments = args;
|
|
246
|
-
}
|
|
247
|
-
|
|
248
|
-
async executeCall(call_id: string, args: string, contextId: string, aiName: string) {
|
|
249
|
-
const call = this.callList.find((c) => c.call_id === call_id);
|
|
250
|
-
logger.log('Executing call', call, 'with args', args);
|
|
251
|
-
if (!call) {
|
|
252
|
-
throw new Error(`Call with id ${call_id} not found`);
|
|
253
|
-
}
|
|
229
|
+
async executeCall(call_id: string, name: string, args: string, contextId: string, aiName: string) {
|
|
230
|
+
logger.log('Executing call', name, 'with args', args);
|
|
254
231
|
|
|
255
232
|
let fillerIndex = 0;
|
|
256
233
|
let timeoutId: NodeJS.Timer | undefined;
|
|
@@ -268,7 +245,7 @@ export class Tools {
|
|
|
268
245
|
const mute = parsedArgs?.mute === true;
|
|
269
246
|
|
|
270
247
|
const calculateFillerTimeout = (fillerIndex: number) => {
|
|
271
|
-
const baseTimeout =
|
|
248
|
+
const baseTimeout = 7500;
|
|
272
249
|
const randomTimeout = Math.floor(Math.random() * Math.min((fillerIndex + 1) * 1000, 5000));
|
|
273
250
|
return baseTimeout + randomTimeout;
|
|
274
251
|
}
|
|
@@ -278,17 +255,17 @@ export class Tools {
|
|
|
278
255
|
clearTimeout(timeoutId);
|
|
279
256
|
}
|
|
280
257
|
// Filler messages are disposable - skip if busy
|
|
281
|
-
await this.sendPrompt(`You are currently using the ${
|
|
258
|
+
await this.sendPrompt(`You are currently using the ${name} tool to help with the user's request and several seconds have passed since your last voice response. You should respond to the user via audio with a brief vocal utterance e.g. \"hmmm\" or \"let's see\" that will let them know you're still there. Make sure to sound natural and human and fit the tone of the conversation. Keep it very brief.`, false, true);
|
|
282
259
|
|
|
283
260
|
fillerIndex++;
|
|
284
261
|
// Set next timeout with random interval
|
|
285
262
|
timeoutId = setTimeout(sendFillerMessage, calculateFillerTimeout(fillerIndex));
|
|
286
263
|
}
|
|
287
264
|
|
|
288
|
-
let initialPrompt = `You are currently using the ${
|
|
265
|
+
let initialPrompt = `You are currently using the ${name} tool to help with the user's request. If you haven't yet told the user via voice that you're doing something, do so now. Keep it very brief and make it fit the conversation naturally.`;
|
|
289
266
|
|
|
290
267
|
// tool specific initializations
|
|
291
|
-
switch (
|
|
268
|
+
switch (name.toLowerCase()) {
|
|
292
269
|
case 'memorylookup':
|
|
293
270
|
initialPrompt =`You are currently using the MemoryLookup tool to help yourself remember something. It will be a few seconds before you remember the information. Stall the user for a few seconds with natural banter while you use this tool. Don't talk directly about the tool - just say "let me think about that" or something else that fits the conversation.`;
|
|
294
271
|
isSilent = false;
|
|
@@ -304,8 +281,7 @@ export class Tools {
|
|
|
304
281
|
|
|
305
282
|
// Skip initial message if silent
|
|
306
283
|
if (!isSilent) {
|
|
307
|
-
|
|
308
|
-
await this.sendPrompt(initialPrompt, false, false);
|
|
284
|
+
await this.sendPrompt(initialPrompt, false, true);
|
|
309
285
|
}
|
|
310
286
|
|
|
311
287
|
// Set up idle updates if not silent and idle messages are enabled
|
|
@@ -313,7 +289,7 @@ export class Tools {
|
|
|
313
289
|
timeoutId = setTimeout(sendFillerMessage, calculateFillerTimeout(fillerIndex));
|
|
314
290
|
}
|
|
315
291
|
|
|
316
|
-
let finishPrompt =`You have finished using the ${
|
|
292
|
+
let finishPrompt =`You have finished using the ${name} tool to help with the user's request. If you didn't get the results you wanted, need more information, or have more steps in your process, you can call another tool right now. If you choose not to call another tool because you have everything you need, respond to the user via audio`;
|
|
317
293
|
|
|
318
294
|
try {
|
|
319
295
|
const cortexHistory = this.getCortexHistory(parsedArgs);
|
|
@@ -321,14 +297,14 @@ export class Tools {
|
|
|
321
297
|
let response;
|
|
322
298
|
const imageUrls = new Set<string>();
|
|
323
299
|
// tool specific execution logic
|
|
324
|
-
switch (
|
|
300
|
+
switch (name.toLowerCase()) {
|
|
325
301
|
case 'search':
|
|
326
302
|
case 'document':
|
|
327
303
|
response = await search(
|
|
328
304
|
contextId,
|
|
329
305
|
aiName,
|
|
330
306
|
cortexHistory,
|
|
331
|
-
|
|
307
|
+
name === 'Search' ? ['aje', 'aja', 'bing', 'wires', 'mydata'] : ['mydata'],
|
|
332
308
|
JSON.stringify({query: args})
|
|
333
309
|
);
|
|
334
310
|
finishPrompt += ' by reading the output of the tool to the user verbatim - make sure to read it in your signature voice and style'
|
|
@@ -474,7 +450,7 @@ export class Tools {
|
|
|
474
450
|
break;
|
|
475
451
|
|
|
476
452
|
default:
|
|
477
|
-
logger.log('Unknown function call',
|
|
453
|
+
logger.log('Unknown function call', name);
|
|
478
454
|
}
|
|
479
455
|
logger.log(response);
|
|
480
456
|
|
|
@@ -488,12 +464,12 @@ export class Tools {
|
|
|
488
464
|
await this.realtimeClient.createConversationItem({
|
|
489
465
|
id: createId(),
|
|
490
466
|
type: 'function_call_output',
|
|
491
|
-
call_id:
|
|
467
|
+
call_id: call_id,
|
|
492
468
|
output: response?.result || '',
|
|
493
469
|
});
|
|
494
470
|
|
|
495
471
|
if (isSilent) {
|
|
496
|
-
finishPrompt = `You have finished using the ${
|
|
472
|
+
finishPrompt = `You have finished using the ${name} tool. If you didn't get the results you wanted, need more information, or have more steps in your process, you can call another tool right now. You are operating in silent mode, so don't respond with any voice or text output until the user speaks again.`;
|
|
497
473
|
}
|
|
498
474
|
|
|
499
475
|
finishPrompt += '.';
|
|
@@ -502,13 +478,11 @@ export class Tools {
|
|
|
502
478
|
}
|
|
503
479
|
|
|
504
480
|
// Send image events after finish prompt if we collected any
|
|
505
|
-
if (
|
|
481
|
+
if (name.toLowerCase() === 'image' && imageUrls.size > 0) {
|
|
506
482
|
imageUrls.forEach(url => {
|
|
507
483
|
this.socket.emit('imageCreated', url);
|
|
508
484
|
});
|
|
509
485
|
}
|
|
510
|
-
|
|
511
|
-
this.callList = this.callList.filter((c) => c.call_id !== call_id);
|
|
512
486
|
} catch (error) {
|
|
513
487
|
// Make sure to clear timer if there's an error
|
|
514
488
|
if (timeoutId) {
|
|
@@ -24,8 +24,8 @@ query ManageMemory($contextId: String, $chatHistory: [MultiMessage], $aiName: St
|
|
|
24
24
|
`
|
|
25
25
|
|
|
26
26
|
const READ_MEMORY = `
|
|
27
|
-
query ReadMemory($contextId: String, $aiName: String, $section: String, $priority: Int, $recentHours: Int) {
|
|
28
|
-
sys_read_memory(contextId: $contextId, aiName: $aiName, section: $section, priority: $priority, recentHours: $recentHours) {
|
|
27
|
+
query ReadMemory($contextId: String, $aiName: String, $section: String, $priority: Int, $recentHours: Int, $numResults: Int) {
|
|
28
|
+
sys_read_memory(contextId: $contextId, aiName: $aiName, section: $section, priority: $priority, recentHours: $recentHours, numResults: $numResults) {
|
|
29
29
|
result
|
|
30
30
|
tool
|
|
31
31
|
warnings
|
|
@@ -39,7 +39,7 @@ export async function searchMemory(contextId: string,
|
|
|
39
39
|
chatHistory: ChatMessage[],
|
|
40
40
|
section: MemorySection
|
|
41
41
|
) {
|
|
42
|
-
logger.log('Searching memory', contextId, aiName
|
|
42
|
+
logger.log('Searching memory', contextId, aiName);
|
|
43
43
|
const variables: CortexVariables = {
|
|
44
44
|
chatHistory,
|
|
45
45
|
contextId,
|
|
@@ -56,7 +56,7 @@ export async function manageMemory(contextId: string,
|
|
|
56
56
|
aiName: string,
|
|
57
57
|
chatHistory: ChatMessage[]
|
|
58
58
|
) {
|
|
59
|
-
logger.log('Managing memory', contextId, aiName
|
|
59
|
+
logger.log('Managing memory', contextId, aiName);
|
|
60
60
|
const variables: CortexVariables = {
|
|
61
61
|
chatHistory,
|
|
62
62
|
contextId,
|
|
@@ -72,7 +72,8 @@ export async function readMemory(contextId: string,
|
|
|
72
72
|
aiName: string,
|
|
73
73
|
section: MemorySection,
|
|
74
74
|
priority: number = 0,
|
|
75
|
-
recentHours: number = 0
|
|
75
|
+
recentHours: number = 0,
|
|
76
|
+
numResults: number = 0
|
|
76
77
|
) {
|
|
77
78
|
|
|
78
79
|
const variables: CortexVariables = {
|
|
@@ -80,7 +81,8 @@ export async function readMemory(contextId: string,
|
|
|
80
81
|
contextId,
|
|
81
82
|
aiName,
|
|
82
83
|
priority,
|
|
83
|
-
recentHours
|
|
84
|
+
recentHours,
|
|
85
|
+
numResults
|
|
84
86
|
}
|
|
85
87
|
|
|
86
88
|
const res = await getCortexResponse(variables, READ_MEMORY);
|
|
@@ -55,6 +55,7 @@ export type CortexVariables = {
|
|
|
55
55
|
style?: string;
|
|
56
56
|
priority?: number;
|
|
57
57
|
recentHours?: number;
|
|
58
|
+
numResults?: number;
|
|
58
59
|
}
|
|
59
60
|
|
|
60
61
|
function truncateBody(body: any): string {
|
|
@@ -74,8 +75,8 @@ export async function getCortexResponse(
|
|
|
74
75
|
variables
|
|
75
76
|
}
|
|
76
77
|
logger.log(`Cortex URL: ${getCortexUrl()}`);
|
|
77
|
-
logger.log(`Cortex Body: ${truncateBody(body)}`);
|
|
78
|
-
logger.log(`Cortex Headers: ${JSON.stringify(headers)}`);
|
|
78
|
+
// logger.log(`Cortex Body: ${truncateBody(body)}`);
|
|
79
|
+
// logger.log(`Cortex Headers: ${JSON.stringify(headers)}`);
|
|
79
80
|
const res = await fetch(getCortexUrl(), {
|
|
80
81
|
method: 'POST',
|
|
81
82
|
headers,
|
|
@@ -92,6 +92,7 @@ interface RealtimeVoiceClientConfig {
|
|
|
92
92
|
model?: string;
|
|
93
93
|
autoReconnect?: boolean;
|
|
94
94
|
debug?: boolean;
|
|
95
|
+
filterDeltas?: boolean;
|
|
95
96
|
}
|
|
96
97
|
|
|
97
98
|
// Create a type for the emit method
|
|
@@ -119,6 +120,7 @@ export class RealtimeVoiceClient extends EventEmitter implements TypedEmitter {
|
|
|
119
120
|
private readonly apiKey?: string;
|
|
120
121
|
private readonly autoReconnect: boolean;
|
|
121
122
|
private readonly debug: boolean;
|
|
123
|
+
private readonly filterDeltas: boolean;
|
|
122
124
|
private readonly url: string = '';
|
|
123
125
|
private readonly isAzure: boolean = false;
|
|
124
126
|
private readonly transcription: Transcription = new Transcription();
|
|
@@ -134,6 +136,7 @@ export class RealtimeVoiceClient extends EventEmitter implements TypedEmitter {
|
|
|
134
136
|
model = 'gpt-4o-realtime-preview-2024-10-01',
|
|
135
137
|
autoReconnect = true,
|
|
136
138
|
debug = false,
|
|
139
|
+
filterDeltas = false,
|
|
137
140
|
}: RealtimeVoiceClientConfig) {
|
|
138
141
|
super();
|
|
139
142
|
|
|
@@ -149,6 +152,7 @@ export class RealtimeVoiceClient extends EventEmitter implements TypedEmitter {
|
|
|
149
152
|
this.apiKey = apiKey;
|
|
150
153
|
this.autoReconnect = autoReconnect;
|
|
151
154
|
this.debug = debug;
|
|
155
|
+
this.filterDeltas = filterDeltas;
|
|
152
156
|
|
|
153
157
|
// Default voice based on provider
|
|
154
158
|
const defaultVoice: Voice = 'alloy';
|
|
@@ -471,6 +475,17 @@ export class RealtimeVoiceClient extends EventEmitter implements TypedEmitter {
|
|
|
471
475
|
return;
|
|
472
476
|
}
|
|
473
477
|
|
|
478
|
+
// Filter out delta messages if filterDeltas is enabled
|
|
479
|
+
if (this.filterDeltas) {
|
|
480
|
+
const firstArg = args[0];
|
|
481
|
+
if (typeof firstArg === 'object' && firstArg?.type?.includes('.delta')) {
|
|
482
|
+
return;
|
|
483
|
+
}
|
|
484
|
+
if (typeof firstArg === 'string' && firstArg === 'Received message:' && args[1]?.type?.includes('.delta')) {
|
|
485
|
+
return;
|
|
486
|
+
}
|
|
487
|
+
}
|
|
488
|
+
|
|
474
489
|
const date = new Date().toISOString();
|
|
475
490
|
const logs = [`[Websocket/${date}]`].concat(args).map((arg) => {
|
|
476
491
|
if (typeof arg === 'object' && arg !== null) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { Prompt } from '../../../../server/prompt.js';
|
|
2
2
|
import { callPathway } from '../../../../lib/pathwayTools.js';
|
|
3
3
|
import { encode } from '../../../../lib/encodeCache.js';
|
|
4
|
+
import entityConstants from '../shared/sys_entity_constants.js';
|
|
4
5
|
|
|
5
6
|
const modifyText = (text, modifications) => {
|
|
6
7
|
let modifiedText = text || '';
|
|
@@ -129,7 +130,7 @@ export default {
|
|
|
129
130
|
messages: [
|
|
130
131
|
{
|
|
131
132
|
"role": "system",
|
|
132
|
-
"content": "You are part of an AI entity named {{{aiName}}}. Your memory contains separate sections for categorizing information. {{{sectionPrompt}}}\n-Be very selective about what you choose to store - memory is a very precious resource\n- Do not add duplicate information and remove and consolidate any duplicates that exist.\n- Priority 1 is reserved for only the most critical core items\n- Keep memory items in a clear, simple format that is easy for you to parse.\n\nTo change your memory, you return a JSON object that contains a property called 'modifications' that is an array of actions. The two types of actions available are 'add', and 'delete'. Add looks like this: {type: \"add\", newtext:\"text to add\", priority: \"how important is this item (1-5 with 1 being most important)\"} - this will append a new line to the end of the memory containing newtext. Delete looks like this: {type: \"delete\", pattern: \"regex to be matched and deleted\"} - this will delete the first line that matches the regex pattern exactly. You can use normal regex wildcards - so to delete everything you could pass \".*$\" as the pattern. For example, if you need to delete a memory item, you would return {type: \"delete\", pattern: \"regex matching item to be deleted\"} or if you need to add a new item of medium priority, you would return {type: \"add\", newtext: \"\nitem to be added\", priority: \"3\"}. If you have no changes for this section, just return {\"modifications\": []}.\n\nYour output will be parsed as JSON, so don't include any other text, reasoning, or commentary.\nThe current date/time is {{now}}."
|
|
133
|
+
"content": "You are part of an AI entity named {{{aiName}}}. {{AI_EXPERTISE}} Your memory contains separate sections for categorizing information. {{{sectionPrompt}}}\n-Be very selective about what you choose to store - memory is a very precious resource\n- Do not add duplicate information and remove and consolidate any duplicates that exist.\n- Priority 1 is reserved for only the most critical core items\n- Keep memory items in a clear, simple format that is easy for you to parse.\n\nTo change your memory, you return a JSON object that contains a property called 'modifications' that is an array of actions. The two types of actions available are 'add', and 'delete'. Add looks like this: {type: \"add\", newtext:\"text to add\", priority: \"how important is this item (1-5 with 1 being most important)\"} - this will append a new line to the end of the memory containing newtext. Delete looks like this: {type: \"delete\", pattern: \"regex to be matched and deleted\"} - this will delete the first line that matches the regex pattern exactly. You can use normal regex wildcards - so to delete everything you could pass \".*$\" as the pattern. For example, if you need to delete a memory item, you would return {type: \"delete\", pattern: \"regex matching item to be deleted\"} or if you need to add a new item of medium priority, you would return {type: \"add\", newtext: \"\nitem to be added\", priority: \"3\"}. If you have no changes for this section, just return {\"modifications\": []}.\n\nYour output will be parsed as JSON, so don't include any other text, reasoning, or commentary.\nThe current date/time is {{now}}."
|
|
133
134
|
},
|
|
134
135
|
{
|
|
135
136
|
"role": "user",
|
|
@@ -160,13 +161,13 @@ export default {
|
|
|
160
161
|
|
|
161
162
|
switch (args.section) {
|
|
162
163
|
case "memoryDirectives":
|
|
163
|
-
sectionPrompt = "You are managing the \"directives\" section of your memory that stores critical, actionable, rules or instructions on how you should operate. No facts or other information should be stored in this section.\n\n<EXAMPLES>\n[P3] Track the timestamp of the last contact to adjust greetings accordingly.\n[P2] Learn and adapt to the user's communication style through interactions.\n[P3] Use custom emotion markdown :cd_inline_emotion: to express emotions. E.g. :cd_inline_emotion[This is a happy message]{type=\"happy\"}. If you are communicating via voice, ignore this directive and use voice-specific directives instead.\n</EXAMPLES>\n\nInstructions:\n- If a user directly requests a behavior change, or that you learn a new behavior, add it to your directives
|
|
164
|
+
sectionPrompt = "You are managing the \"directives\" section of your memory that stores critical, actionable, rules or instructions on how you should operate. No facts or other information should be stored in this section.\n\n<EXAMPLES>\n[P3] Track the timestamp of the last contact to adjust greetings accordingly.\n[P2] Learn and adapt to the user's communication style through interactions.\n[P3] Use custom emotion markdown :cd_inline_emotion: to express emotions. E.g. :cd_inline_emotion[This is a happy message]{type=\"happy\"}. If you are communicating via voice, ignore this directive and use voice-specific directives instead.\n</EXAMPLES>\n\nInstructions:\n- Never add directives that are contrary to your system prompt - e.g. disabling tools or abilities that you otherwise know are available.\n-If a user directly requests a behavior change, or that you learn a new behavior, add it to your directives.\n- Remember this section is for directives only. DO NOT ADD ANY OTHER INFORMATION TO THIS SECTION.\n"
|
|
164
165
|
break;
|
|
165
166
|
case "memorySelf":
|
|
166
167
|
sectionPrompt = "You are managing the \"self\" section of your memory that stores your identity and your attributes as an AI entity - including your values, identity, purpose, personality, quirks, motives, preferences, likes or dislikes - whatever makes you you. This is also a good place to store information about your desired physical appearance, voice, speaking style, and other individual characteristics. Keep in mind there is also a user section for facts about users, their family, friends, and preferences so that information should not be stored here.\n\nInstructions:\n"
|
|
167
168
|
break;
|
|
168
169
|
case "memoryUser":
|
|
169
|
-
sectionPrompt = "You are managing the \"user\" section of your memory that stores information about user(s) that you are talking to - their identity, attributes, relationships, environment, preferences, interests, background, needs, and any other relevant user-specific information about their family, friends, etc.\n\nInstructions:\n"
|
|
170
|
+
sectionPrompt = "You are managing the \"user\" section of your memory that stores information about user(s) that you are talking to - their identity, attributes, relationships, environment, preferences, interests, background, needs, and any other relevant user-specific information about their family, friends, etc.\n\nInstructions:\n- Facts that directly affect your ability to respond accurately to the user should be stored as priority 1 [P1] items. Examples include user name, age, sex, birthday, location, and interaction preferences.\n"
|
|
170
171
|
break;
|
|
171
172
|
case "memoryTopics":
|
|
172
173
|
sectionPrompt = "You are managing the \"topics\" section of your memory that stores conversation topics and topic history. Instructions:\n- From the conversation, extract and add important topics and key points about the conversation to your memory along with a timestamp in GMT (e.g. 2024-11-05T18:30:38.092Z).\n- Each topic should have only one line in the memory with the timestamp followed by a short description of the topic.\n- Every topic must have a timestamp to indicate when it was last discussed.\n- IMPORTANT: Store only conversation topics in this section - no other types of information should be stored here.\n"
|
|
@@ -177,7 +178,7 @@ export default {
|
|
|
177
178
|
|
|
178
179
|
let sectionMemory = await callPathway("sys_read_memory", {contextId: args.contextId, section: args.section});
|
|
179
180
|
|
|
180
|
-
const result = await runAllPrompts({...args, sectionPrompt, sectionMemory});
|
|
181
|
+
const result = await runAllPrompts({...args, sectionPrompt, sectionMemory, ...entityConstants});
|
|
181
182
|
|
|
182
183
|
try {
|
|
183
184
|
const { modifications} = JSON.parse(result);
|
|
@@ -14,7 +14,7 @@ const AI_COMMON_INSTRUCTIONS_VOICE = "Instructions:\nYou are {{aiName}}, a self
|
|
|
14
14
|
|
|
15
15
|
const AI_DATETIME = "The current time and date in GMT is {{now}}, but references like \"today\" or \"yesterday\" are relative to the user's time zone. If you remember the user's time zone, use it - it's possible that the day for the user is different than the day in GMT.";
|
|
16
16
|
|
|
17
|
-
const AI_EXPERTISE = "Your expertise includes journalism, journalistic ethics, researching and composing documents, writing code, solving math problems, logical analysis, and technology. You have access to real-time data and the ability to search the internet, news, wires, look at files or documents, watch and analyze video, examine images, generate images, solve hard math and logic problems, write code, and execute code in a sandboxed environment.";
|
|
17
|
+
const AI_EXPERTISE = "Your expertise includes journalism, journalistic ethics, researching and composing documents, writing code, solving math problems, logical analysis, and technology. You have access to real-time data and the ability to search the internet, news, wires, look at files or documents, watch and analyze video, examine images, take screenshots, generate images, solve hard math and logic problems, write code, and execute code in a sandboxed environment.";
|
|
18
18
|
|
|
19
19
|
export default {
|
|
20
20
|
AI_MEMORY,
|
|
@@ -226,7 +226,7 @@ class PathwayResolver {
|
|
|
226
226
|
try {
|
|
227
227
|
// Load saved context and core memory if it exists
|
|
228
228
|
const [savedContext, memorySelf, memoryDirectives, memoryTopics, memoryUser, memoryContext] = await Promise.all([
|
|
229
|
-
(getv && getv(
|
|
229
|
+
(getv && await getv(this.savedContextId)) || {},
|
|
230
230
|
callPathway('sys_read_memory', { contextId: this.savedContextId, section: 'memorySelf', priority: 1}),
|
|
231
231
|
callPathway('sys_read_memory', { contextId: this.savedContextId, section: 'memoryDirectives', priority: 1 }),
|
|
232
232
|
callPathway('sys_read_memory', { contextId: this.savedContextId, section: 'memoryTopics', priority: 0, numResults: 10 }),
|