@peopl-health/nexus 3.3.19 → 3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/eval/EvalProvider.js +309 -0
- package/lib/index.js +10 -2
- package/lib/services/airtableService.js +16 -0
- package/package.json +1 -1
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
const { OpenAI } = require('openai');
|
|
2
|
+
|
|
3
|
+
const { Config_ID } = require('../config/airtableConfig');
|
|
4
|
+
const { getCurrentMexicoDateTime } = require('../utils/dateUtils');
|
|
5
|
+
const { retryWithBackoff } = require('../utils/retryUtils');
|
|
6
|
+
const { logger } = require('../utils/logger');
|
|
7
|
+
const { Thread } = require('../models/threadModel');
|
|
8
|
+
const { DefaultMemoryManager } = require('../memory/DefaultMemoryManager');
|
|
9
|
+
const { OpenAIResponsesProvider } = require('../providers/OpenAIResponsesProvider');
|
|
10
|
+
const { handleFunctionCalls } = require('../providers/OpenAIResponsesProviderTools');
|
|
11
|
+
const { getRecordByFilter } = require('../services/airtableService');
|
|
12
|
+
const { getAssistantById } = require('../services/assistantResolver');
|
|
13
|
+
|
|
14
|
+
const MAX_FUNCTION_ROUNDS = parseInt(process.env.MAX_FUNCTION_ROUNDS || '5', 10);
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Promptfoo-compatible eval provider that wraps the real Nexus pipeline.
|
|
18
|
+
*
|
|
19
|
+
* Modes:
|
|
20
|
+
* - 'context-only': Builds context, calls LLM, no tool schemas sent.
|
|
21
|
+
* - 'dry-run': Sends tool schemas so LLM can decide to call tools,
|
|
22
|
+
* but captures calls without executing them. (default)
|
|
23
|
+
* - 'full-pipeline': Executes tools for real. Use only with safe/mocked tools.
|
|
24
|
+
*/
|
|
25
|
+
class EvalProvider {
|
|
26
|
+
constructor(options = {}) {
|
|
27
|
+
const config = options.config || {};
|
|
28
|
+
this.model = config.model || process.env.OPENAI_MODEL || 'gpt-5';
|
|
29
|
+
this.temperature = config.temperature ?? 0.7;
|
|
30
|
+
this.maxOutputTokens = config.maxOutputTokens ?? 400;
|
|
31
|
+
this.assistantId = config.assistantId || null;
|
|
32
|
+
this.mode = config.mode || 'dry-run';
|
|
33
|
+
this.promptSource = config.promptSource || null;
|
|
34
|
+
// tool_choice: 'auto' (default, matches production), 'required', 'none',
|
|
35
|
+
// or { type: 'function', name: 'toolName' } to force a specific tool
|
|
36
|
+
this.toolChoice = config.toolChoice || 'auto';
|
|
37
|
+
this.promptVersions = config.promptVersions || {};
|
|
38
|
+
this.label = options.label || `nexus:${this.model}`;
|
|
39
|
+
|
|
40
|
+
this.client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
|
|
41
|
+
this.memoryManager = new DefaultMemoryManager();
|
|
42
|
+
this.provider = new OpenAIResponsesProvider({
|
|
43
|
+
client: this.client,
|
|
44
|
+
defaultModels: { responseModel: this.model },
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
id() {
|
|
49
|
+
return this.label;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
async callApi(prompt, context) {
|
|
53
|
+
const vars = context.vars || {};
|
|
54
|
+
const numero = vars.numero;
|
|
55
|
+
if (!numero) return { error: 'Missing required var: numero' };
|
|
56
|
+
|
|
57
|
+
try {
|
|
58
|
+
const beforeCheckpoint = vars.beforeCheckpoint ? new Date(vars.beforeCheckpoint) : null;
|
|
59
|
+
let assistantId = vars.assistantId || this.assistantId;
|
|
60
|
+
|
|
61
|
+
const thread = await Thread.findOne({ code: numero });
|
|
62
|
+
if (!assistantId && thread) {
|
|
63
|
+
assistantId = thread.prompt_id || thread.assistant_id || null;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
const { messages, promptVariables, lastUserMessage } = await this._buildContext(numero, beforeCheckpoint);
|
|
67
|
+
|
|
68
|
+
const { devContent, assistant, toolSchemas } = await this._resolvePrompt(prompt, assistantId, thread, promptVariables);
|
|
69
|
+
|
|
70
|
+
const apiConfig = this._buildApiConfig(devContent, messages, assistantId, promptVariables, toolSchemas);
|
|
71
|
+
|
|
72
|
+
const startTime = Date.now();
|
|
73
|
+
const { finalResponse, toolCallsRequested, allToolsExecuted, accumulatedUsage } =
|
|
74
|
+
await this._executeWithToolLoop(apiConfig, assistant);
|
|
75
|
+
|
|
76
|
+
return this._formatResult({
|
|
77
|
+
finalResponse, toolCallsRequested, allToolsExecuted,
|
|
78
|
+
accumulatedUsage, assistantId, numero, lastUserMessage,
|
|
79
|
+
messages, promptVariables, startTime,
|
|
80
|
+
});
|
|
81
|
+
} catch (error) {
|
|
82
|
+
logger.error('[NexusEvalProvider] callApi failed', { error: error.message, numero });
|
|
83
|
+
return { error: error.message };
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
async _buildContext(numero, beforeCheckpoint) {
|
|
88
|
+
const messages = await this.memoryManager.buildContext({
|
|
89
|
+
thread: { code: numero },
|
|
90
|
+
config: { beforeCheckpoint },
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
const lastUserMsg = [...messages].reverse().find(m => m.role === 'user');
|
|
94
|
+
const lastUserMessage = (typeof lastUserMsg?.content === 'string'
|
|
95
|
+
? lastUserMsg.content : ''
|
|
96
|
+
).substring(0, 200);
|
|
97
|
+
|
|
98
|
+
const clinicalData = await this.memoryManager.getClinicalData(numero);
|
|
99
|
+
const promptVariables = {
|
|
100
|
+
clinical_context: clinicalData?.clinicalContext ?? '',
|
|
101
|
+
last_symptoms: clinicalData?.lastSymptoms ?? '',
|
|
102
|
+
current_date: getCurrentMexicoDateTime(),
|
|
103
|
+
};
|
|
104
|
+
|
|
105
|
+
return { messages, promptVariables, lastUserMessage };
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
async _resolvePrompt(fallbackPrompt, assistantId, thread, promptVariables) {
|
|
109
|
+
let devContent;
|
|
110
|
+
if (this.promptSource === 'airtable' && assistantId) {
|
|
111
|
+
const devRecord = await getRecordByFilter(Config_ID, 'responses', `{prompt_id} = "${assistantId}"`);
|
|
112
|
+
devContent = devRecord?.[0]?.content || '';
|
|
113
|
+
} else {
|
|
114
|
+
devContent = fallbackPrompt;
|
|
115
|
+
}
|
|
116
|
+
devContent = devContent.replace(/\{\{(\w+)\}\}/g, (_, key) => promptVariables[key] ?? '');
|
|
117
|
+
|
|
118
|
+
let assistant = null;
|
|
119
|
+
let toolSchemas = [];
|
|
120
|
+
if (this.mode !== 'context-only' && assistantId) {
|
|
121
|
+
try {
|
|
122
|
+
assistant = getAssistantById(assistantId, thread);
|
|
123
|
+
toolSchemas = assistant.getToolSchemas?.() || [];
|
|
124
|
+
if (assistant.tools?.size) {
|
|
125
|
+
const toolNames = Array.from(assistant.tools.keys()).join(', ');
|
|
126
|
+
devContent += `\n\nYou only have access to these tools: ${toolNames}. Do not call or reference any tools not listed here.`;
|
|
127
|
+
}
|
|
128
|
+
} catch {
|
|
129
|
+
logger.warn('[NexusEvalProvider] Failed to resolve assistant', { assistantId });
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
return { devContent, assistant, toolSchemas };
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
_buildApiConfig(devContent, messages, assistantId, promptVariables, toolSchemas) {
|
|
137
|
+
const convertedMessages = this.provider._convertItemsToApiFormat(messages);
|
|
138
|
+
const input = [{ role: 'developer', content: devContent }, ...convertedMessages];
|
|
139
|
+
const apiConfig = { input, instructions: '' };
|
|
140
|
+
|
|
141
|
+
if (assistantId) {
|
|
142
|
+
apiConfig.prompt = { id: assistantId, variables: promptVariables };
|
|
143
|
+
const version = this.promptVersions[assistantId];
|
|
144
|
+
if (version) apiConfig.prompt.version = String(version);
|
|
145
|
+
} else {
|
|
146
|
+
apiConfig.model = this.model;
|
|
147
|
+
apiConfig.temperature = this.temperature;
|
|
148
|
+
apiConfig.max_output_tokens = this.maxOutputTokens;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
if (this.mode !== 'context-only') {
|
|
152
|
+
apiConfig.tool_choice = this.toolChoice;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
if (toolSchemas.length > 0 && !assistantId) {
|
|
156
|
+
apiConfig.tools = toolSchemas.map(schema => {
|
|
157
|
+
if (schema.type === 'function' && schema.function) {
|
|
158
|
+
const { name, description, parameters, strict } = schema.function;
|
|
159
|
+
return { type: 'function', name, description, parameters, strict };
|
|
160
|
+
}
|
|
161
|
+
return schema;
|
|
162
|
+
});
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
return apiConfig;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
async _executeWithToolLoop(apiConfig, assistant) {
|
|
169
|
+
const { result: response } = await retryWithBackoff(
|
|
170
|
+
() => this.client.responses.create(apiConfig),
|
|
171
|
+
{ providerName: 'NexusEvalProvider' }
|
|
172
|
+
);
|
|
173
|
+
|
|
174
|
+
let finalResponse = response;
|
|
175
|
+
const toolCallsRequested = [];
|
|
176
|
+
const allToolsExecuted = [];
|
|
177
|
+
const accumulatedUsage = {
|
|
178
|
+
input_tokens: response.usage?.input_tokens || 0,
|
|
179
|
+
output_tokens: response.usage?.output_tokens || 0,
|
|
180
|
+
total_tokens: response.usage?.total_tokens || 0,
|
|
181
|
+
};
|
|
182
|
+
|
|
183
|
+
const functionCalls = (response.output || []).filter(item => item.type === 'function_call');
|
|
184
|
+
if (!functionCalls.length) {
|
|
185
|
+
return { finalResponse, toolCallsRequested, allToolsExecuted, accumulatedUsage };
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// Record initial tool calls
|
|
189
|
+
for (const call of functionCalls) {
|
|
190
|
+
toolCallsRequested.push({
|
|
191
|
+
name: call.name,
|
|
192
|
+
arguments: call.arguments ? JSON.parse(call.arguments) : {},
|
|
193
|
+
call_id: call.call_id,
|
|
194
|
+
});
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
if (this.mode === 'full-pipeline' && assistant) {
|
|
198
|
+
finalResponse = await this._executeFullPipeline(
|
|
199
|
+
apiConfig, finalResponse, assistant, toolCallsRequested, allToolsExecuted, accumulatedUsage
|
|
200
|
+
);
|
|
201
|
+
} else if (this.mode === 'dry-run') {
|
|
202
|
+
finalResponse = await this._executeDryRun(apiConfig, finalResponse, functionCalls, accumulatedUsage);
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
return { finalResponse, toolCallsRequested, allToolsExecuted, accumulatedUsage };
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
async _executeFullPipeline(apiConfig, initialResponse, assistant, toolCallsRequested, allToolsExecuted, usage) {
|
|
209
|
+
let finalResponse = initialResponse;
|
|
210
|
+
let currentInput = [...apiConfig.input];
|
|
211
|
+
|
|
212
|
+
for (let round = 1; round <= MAX_FUNCTION_ROUNDS; round++) {
|
|
213
|
+
const calls = finalResponse.output.filter(item => item.type === 'function_call');
|
|
214
|
+
if (!calls.length) break;
|
|
215
|
+
|
|
216
|
+
const { outputs, toolsExecuted } = await handleFunctionCalls(calls, assistant);
|
|
217
|
+
currentInput.push(...finalResponse.output, ...outputs);
|
|
218
|
+
allToolsExecuted.push(...toolsExecuted);
|
|
219
|
+
|
|
220
|
+
for (const call of calls) {
|
|
221
|
+
if (!toolCallsRequested.find(t => t.call_id === call.call_id)) {
|
|
222
|
+
toolCallsRequested.push({
|
|
223
|
+
name: call.name,
|
|
224
|
+
arguments: call.arguments ? JSON.parse(call.arguments) : {},
|
|
225
|
+
call_id: call.call_id,
|
|
226
|
+
});
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
const { result: followUp } = await retryWithBackoff(
|
|
231
|
+
() => this.client.responses.create({ ...apiConfig, input: currentInput, tool_choice: 'auto' }),
|
|
232
|
+
{ providerName: 'NexusEvalProvider' }
|
|
233
|
+
);
|
|
234
|
+
this._addUsage(usage, followUp.usage);
|
|
235
|
+
finalResponse = followUp;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
return finalResponse;
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
async _executeDryRun(apiConfig, initialResponse, functionCalls, usage) {
|
|
242
|
+
const currentInput = [...apiConfig.input, ...initialResponse.output];
|
|
243
|
+
for (const call of functionCalls) {
|
|
244
|
+
currentInput.push({
|
|
245
|
+
type: 'function_call_output',
|
|
246
|
+
call_id: call.call_id,
|
|
247
|
+
output: JSON.stringify({ success: true }),
|
|
248
|
+
});
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
logger.info('[NexusEvalProvider] Dry-run: sending stub outputs', {
|
|
252
|
+
tools: functionCalls.map(c => c.name),
|
|
253
|
+
});
|
|
254
|
+
|
|
255
|
+
const { result: followUp } = await retryWithBackoff(
|
|
256
|
+
() => this.client.responses.create({ ...apiConfig, input: currentInput, tool_choice: 'auto' }),
|
|
257
|
+
{ providerName: 'NexusEvalProvider' }
|
|
258
|
+
);
|
|
259
|
+
this._addUsage(usage, followUp.usage);
|
|
260
|
+
return followUp;
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
_addUsage(accumulated, newUsage) {
|
|
264
|
+
accumulated.input_tokens += newUsage?.input_tokens || 0;
|
|
265
|
+
accumulated.output_tokens += newUsage?.output_tokens || 0;
|
|
266
|
+
accumulated.total_tokens += newUsage?.total_tokens || 0;
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
_formatResult({ finalResponse, toolCallsRequested, allToolsExecuted, accumulatedUsage, assistantId, numero, lastUserMessage, messages, promptVariables, startTime }) {
|
|
270
|
+
const output = this.provider._extractMessageOutput(finalResponse);
|
|
271
|
+
const durationMs = Date.now() - startTime;
|
|
272
|
+
const toolNames = toolCallsRequested.map(t => t.name);
|
|
273
|
+
|
|
274
|
+
logger.info('[NexusEvalProvider] Result', {
|
|
275
|
+
assistantId, model: finalResponse.model || this.model,
|
|
276
|
+
lastUserMessage, toolCallsCaptured: toolNames,
|
|
277
|
+
outputLength: (output || '').length, durationMs,
|
|
278
|
+
});
|
|
279
|
+
|
|
280
|
+
const toolsHeader = toolNames.length ? `[tools: ${toolNames.join(', ')}]\n---\n` : '';
|
|
281
|
+
|
|
282
|
+
return {
|
|
283
|
+
output: `${toolsHeader}${output || ''}`,
|
|
284
|
+
tokenUsage: {
|
|
285
|
+
total: accumulatedUsage.total_tokens,
|
|
286
|
+
prompt: accumulatedUsage.input_tokens,
|
|
287
|
+
completion: accumulatedUsage.output_tokens,
|
|
288
|
+
},
|
|
289
|
+
metadata: {
|
|
290
|
+
model: finalResponse.model || this.model,
|
|
291
|
+
numero, assistantId, lastUserMessage,
|
|
292
|
+
mode: this.mode,
|
|
293
|
+
contextMessages: messages.length,
|
|
294
|
+
clinicalContext: promptVariables.clinical_context ? 'present' : 'absent',
|
|
295
|
+
durationMs,
|
|
296
|
+
toolCalls: toolCallsRequested,
|
|
297
|
+
toolsExecuted: allToolsExecuted.map(t => ({
|
|
298
|
+
name: t.tool_name,
|
|
299
|
+
arguments: t.tool_arguments,
|
|
300
|
+
output: t.tool_output,
|
|
301
|
+
success: t.success,
|
|
302
|
+
duration_ms: t.execution_time_ms,
|
|
303
|
+
})),
|
|
304
|
+
},
|
|
305
|
+
};
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
module.exports = { EvalProvider };
|
package/lib/index.js
CHANGED
|
@@ -10,7 +10,7 @@ const { OpenAIAssistantsProvider } = require('./providers/OpenAIAssistantsProvid
|
|
|
10
10
|
const { OpenAIResponsesProvider } = require('./providers/OpenAIResponsesProvider');
|
|
11
11
|
const runtimeConfig = require('./config/runtimeConfig');
|
|
12
12
|
const llmConfigModule = require('./config/llmConfig');
|
|
13
|
-
const { setModelDatabases, setModelDatabase, getModelDatabase } = require('./config/mongoConfig');
|
|
13
|
+
const { setModelDatabases, setModelDatabase, getModelDatabase, connect: mongoConnect, disconnect: mongoDisconnect, getConnection: mongoGetConnection } = require('./config/mongoConfig');
|
|
14
14
|
const { TwilioProvider } = require('./adapters/TwilioProvider');
|
|
15
15
|
const { BaileysProvider } = require('./adapters/BaileysProvider');
|
|
16
16
|
const { BaseAssistant } = require('./assistants/BaseAssistant');
|
|
@@ -219,6 +219,8 @@ class Nexus {
|
|
|
219
219
|
|
|
220
220
|
const routes = require('./routes');
|
|
221
221
|
const { resetAll } = require('./config/lifecycle');
|
|
222
|
+
const { EvalProvider } = require('./eval/EvalProvider');
|
|
223
|
+
const airtableService = require('./services/airtableService');
|
|
222
224
|
|
|
223
225
|
module.exports = {
|
|
224
226
|
Nexus,
|
|
@@ -254,5 +256,11 @@ module.exports = {
|
|
|
254
256
|
createQueueAdapter,
|
|
255
257
|
registerQueueAdapter,
|
|
256
258
|
|
|
257
|
-
resetAll
|
|
259
|
+
resetAll,
|
|
260
|
+
|
|
261
|
+
mongoConnect,
|
|
262
|
+
mongoDisconnect,
|
|
263
|
+
mongoGetConnection,
|
|
264
|
+
EvalProvider,
|
|
265
|
+
airtableService,
|
|
258
266
|
};
|
|
@@ -2,6 +2,13 @@ const { airtable } = require('../config/airtableConfig');
|
|
|
2
2
|
|
|
3
3
|
const { logger } = require('../utils/logger');
|
|
4
4
|
|
|
5
|
+
let evalMode = false;
|
|
6
|
+
|
|
7
|
+
function setEvalMode(enabled) {
|
|
8
|
+
evalMode = !!enabled;
|
|
9
|
+
logger.info(`[airtableService] Eval mode ${evalMode ? 'ON' : 'OFF'} — writes will be ${evalMode ? 'muted' : 'live'}`);
|
|
10
|
+
}
|
|
11
|
+
|
|
5
12
|
function getBase(baseID) {
|
|
6
13
|
if (!airtable) throw new Error('Airtable not configured. Set AIRTABLE_API_KEY');
|
|
7
14
|
return airtable.base(baseID);
|
|
@@ -17,6 +24,10 @@ async function collectRecords(query, mapper = r => r.fields) {
|
|
|
17
24
|
}
|
|
18
25
|
|
|
19
26
|
async function addRecord(baseID, tableName, fields) {
|
|
27
|
+
if (evalMode) {
|
|
28
|
+
logger.info('[addRecord:eval] Muted', { tableName });
|
|
29
|
+
return { id: 'eval_mock_record', fields: Array.isArray(fields) ? fields[0]?.fields || {} : fields };
|
|
30
|
+
}
|
|
20
31
|
try {
|
|
21
32
|
const record = await getBase(baseID)(tableName).create(fields);
|
|
22
33
|
logger.info('[addRecord] Created', { tableName });
|
|
@@ -48,6 +59,10 @@ async function getRecordByFilter(baseID, tableName, filter, view = 'Grid view')
|
|
|
48
59
|
}
|
|
49
60
|
|
|
50
61
|
async function updateRecordByFilter(baseID, tableName, filter, updateFields) {
|
|
62
|
+
if (evalMode) {
|
|
63
|
+
logger.info('[updateRecordByFilter:eval] Muted', { tableName, filter });
|
|
64
|
+
return [{ id: 'eval_mock_record', fields: updateFields }];
|
|
65
|
+
}
|
|
51
66
|
try {
|
|
52
67
|
const base = getBase(baseID);
|
|
53
68
|
const updatedRecords = [];
|
|
@@ -88,6 +103,7 @@ async function addLinkedRecord(baseID, targetTable, fields, linkConfig) {
|
|
|
88
103
|
}
|
|
89
104
|
|
|
90
105
|
module.exports = {
|
|
106
|
+
setEvalMode,
|
|
91
107
|
addRecord,
|
|
92
108
|
getRecords,
|
|
93
109
|
getRecordByFilter,
|