@juspay/neurolink 5.1.0 → 5.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +21 -9
- package/README.md +123 -126
- package/dist/agent/direct-tools.d.ts +6 -6
- package/dist/cli/commands/config.d.ts +3 -3
- package/dist/cli/commands/mcp.js +8 -7
- package/dist/cli/factories/command-factory.d.ts +4 -0
- package/dist/cli/factories/command-factory.js +63 -8
- package/dist/cli/index.js +87 -140
- package/dist/core/base-provider.d.ts +423 -0
- package/dist/core/base-provider.js +376 -0
- package/dist/core/constants.d.ts +2 -1
- package/dist/core/constants.js +2 -1
- package/dist/core/dynamic-models.d.ts +6 -6
- package/dist/core/evaluation.d.ts +19 -80
- package/dist/core/evaluation.js +185 -484
- package/dist/core/factory.d.ts +3 -3
- package/dist/core/factory.js +31 -91
- package/dist/core/service-registry.d.ts +47 -0
- package/dist/core/service-registry.js +112 -0
- package/dist/core/types.d.ts +8 -1
- package/dist/factories/compatibility-factory.js +1 -1
- package/dist/factories/provider-factory.d.ts +72 -0
- package/dist/factories/provider-factory.js +144 -0
- package/dist/factories/provider-registry.d.ts +38 -0
- package/dist/factories/provider-registry.js +107 -0
- package/dist/index.d.ts +4 -3
- package/dist/index.js +2 -4
- package/dist/lib/agent/direct-tools.d.ts +6 -6
- package/dist/lib/core/base-provider.d.ts +423 -0
- package/dist/lib/core/base-provider.js +376 -0
- package/dist/lib/core/constants.d.ts +2 -1
- package/dist/lib/core/constants.js +2 -1
- package/dist/lib/core/dynamic-models.d.ts +6 -6
- package/dist/lib/core/evaluation.d.ts +19 -80
- package/dist/lib/core/evaluation.js +185 -484
- package/dist/lib/core/factory.d.ts +3 -3
- package/dist/lib/core/factory.js +30 -91
- package/dist/lib/core/service-registry.d.ts +47 -0
- package/dist/lib/core/service-registry.js +112 -0
- package/dist/lib/core/types.d.ts +8 -1
- package/dist/lib/factories/compatibility-factory.js +1 -1
- package/dist/lib/factories/provider-factory.d.ts +72 -0
- package/dist/lib/factories/provider-factory.js +144 -0
- package/dist/lib/factories/provider-registry.d.ts +38 -0
- package/dist/lib/factories/provider-registry.js +107 -0
- package/dist/lib/index.d.ts +4 -3
- package/dist/lib/index.js +2 -4
- package/dist/lib/mcp/client.d.ts +1 -0
- package/dist/lib/mcp/client.js +1 -0
- package/dist/lib/mcp/config.js +28 -3
- package/dist/lib/mcp/context-manager.d.ts +1 -0
- package/dist/lib/mcp/context-manager.js +8 -4
- package/dist/lib/mcp/function-calling.d.ts +13 -0
- package/dist/lib/mcp/function-calling.js +134 -35
- package/dist/lib/mcp/initialize-tools.d.ts +1 -1
- package/dist/lib/mcp/initialize-tools.js +45 -1
- package/dist/lib/mcp/initialize.js +16 -6
- package/dist/lib/mcp/neurolink-mcp-client.d.ts +1 -0
- package/dist/lib/mcp/neurolink-mcp-client.js +21 -5
- package/dist/lib/mcp/servers/agent/direct-tools-server.d.ts +8 -0
- package/dist/lib/mcp/servers/agent/direct-tools-server.js +109 -0
- package/dist/lib/mcp/servers/ai-providers/ai-core-server.js +3 -1
- package/dist/lib/mcp/servers/ai-providers/ai-workflow-tools.d.ts +2 -2
- package/dist/lib/mcp/unified-registry.d.ts +4 -0
- package/dist/lib/mcp/unified-registry.js +42 -9
- package/dist/lib/neurolink.d.ts +156 -117
- package/dist/lib/neurolink.js +619 -404
- package/dist/lib/providers/amazon-bedrock.d.ts +32 -0
- package/dist/lib/providers/amazon-bedrock.js +143 -0
- package/dist/lib/providers/analytics-helper.js +7 -4
- package/dist/lib/providers/anthropic-baseprovider.d.ts +23 -0
- package/dist/lib/providers/anthropic-baseprovider.js +114 -0
- package/dist/lib/providers/anthropic.d.ts +19 -43
- package/dist/lib/providers/anthropic.js +82 -306
- package/dist/lib/providers/azure-openai.d.ts +20 -0
- package/dist/lib/providers/azure-openai.js +89 -0
- package/dist/lib/providers/function-calling-provider.d.ts +64 -2
- package/dist/lib/providers/function-calling-provider.js +208 -9
- package/dist/lib/providers/google-ai-studio.d.ts +23 -0
- package/dist/lib/providers/google-ai-studio.js +107 -0
- package/dist/lib/providers/google-vertex.d.ts +47 -0
- package/dist/lib/providers/google-vertex.js +205 -0
- package/dist/lib/providers/huggingFace.d.ts +32 -25
- package/dist/lib/providers/huggingFace.js +97 -431
- package/dist/lib/providers/index.d.ts +9 -9
- package/dist/lib/providers/index.js +9 -9
- package/dist/lib/providers/mcp-provider.js +24 -5
- package/dist/lib/providers/mistral.d.ts +42 -0
- package/dist/lib/providers/mistral.js +160 -0
- package/dist/lib/providers/ollama.d.ts +52 -36
- package/dist/lib/providers/ollama.js +297 -520
- package/dist/lib/providers/openAI.d.ts +19 -18
- package/dist/lib/providers/openAI.js +76 -275
- package/dist/lib/sdk/tool-extension.d.ts +181 -0
- package/dist/lib/sdk/tool-extension.js +283 -0
- package/dist/lib/sdk/tool-registration.d.ts +95 -0
- package/dist/lib/sdk/tool-registration.js +167 -0
- package/dist/lib/services/streaming/streaming-manager.js +11 -10
- package/dist/lib/services/websocket/websocket-server.js +12 -11
- package/dist/lib/telemetry/telemetry-service.js +8 -7
- package/dist/lib/types/generate-types.d.ts +1 -0
- package/dist/lib/types/mcp-types.d.ts +116 -0
- package/dist/lib/types/mcp-types.js +5 -0
- package/dist/lib/types/stream-types.d.ts +30 -18
- package/dist/lib/types/universal-provider-options.d.ts +87 -0
- package/dist/lib/types/universal-provider-options.js +53 -0
- package/dist/mcp/client.d.ts +1 -0
- package/dist/mcp/client.js +1 -0
- package/dist/mcp/config.js +28 -3
- package/dist/mcp/context-manager.d.ts +1 -0
- package/dist/mcp/context-manager.js +8 -4
- package/dist/mcp/function-calling.d.ts +13 -0
- package/dist/mcp/function-calling.js +134 -35
- package/dist/mcp/initialize-tools.d.ts +1 -1
- package/dist/mcp/initialize-tools.js +45 -1
- package/dist/mcp/initialize.js +16 -6
- package/dist/mcp/neurolink-mcp-client.d.ts +1 -0
- package/dist/mcp/neurolink-mcp-client.js +21 -5
- package/dist/mcp/servers/agent/direct-tools-server.d.ts +8 -0
- package/dist/mcp/servers/agent/direct-tools-server.js +109 -0
- package/dist/mcp/servers/ai-providers/ai-core-server.js +3 -1
- package/dist/mcp/servers/ai-providers/ai-workflow-tools.d.ts +2 -2
- package/dist/mcp/unified-registry.d.ts +4 -0
- package/dist/mcp/unified-registry.js +42 -9
- package/dist/neurolink.d.ts +156 -117
- package/dist/neurolink.js +619 -404
- package/dist/providers/amazon-bedrock.d.ts +32 -0
- package/dist/providers/amazon-bedrock.js +143 -0
- package/dist/providers/analytics-helper.js +7 -4
- package/dist/providers/anthropic-baseprovider.d.ts +23 -0
- package/dist/providers/anthropic-baseprovider.js +114 -0
- package/dist/providers/anthropic.d.ts +19 -43
- package/dist/providers/anthropic.js +81 -305
- package/dist/providers/azure-openai.d.ts +20 -0
- package/dist/providers/azure-openai.js +89 -0
- package/dist/providers/function-calling-provider.d.ts +64 -2
- package/dist/providers/function-calling-provider.js +208 -9
- package/dist/providers/google-ai-studio.d.ts +23 -0
- package/dist/providers/google-ai-studio.js +108 -0
- package/dist/providers/google-vertex.d.ts +47 -0
- package/dist/providers/google-vertex.js +205 -0
- package/dist/providers/huggingFace.d.ts +32 -25
- package/dist/providers/huggingFace.js +96 -430
- package/dist/providers/index.d.ts +9 -9
- package/dist/providers/index.js +9 -9
- package/dist/providers/mcp-provider.js +24 -5
- package/dist/providers/mistral.d.ts +42 -0
- package/dist/providers/mistral.js +160 -0
- package/dist/providers/ollama.d.ts +52 -36
- package/dist/providers/ollama.js +297 -519
- package/dist/providers/openAI.d.ts +19 -18
- package/dist/providers/openAI.js +76 -276
- package/dist/sdk/tool-extension.d.ts +181 -0
- package/dist/sdk/tool-extension.js +283 -0
- package/dist/sdk/tool-registration.d.ts +95 -0
- package/dist/sdk/tool-registration.js +168 -0
- package/dist/services/streaming/streaming-manager.js +11 -10
- package/dist/services/websocket/websocket-server.js +12 -11
- package/dist/telemetry/telemetry-service.js +8 -7
- package/dist/types/generate-types.d.ts +1 -0
- package/dist/types/mcp-types.d.ts +116 -0
- package/dist/types/mcp-types.js +5 -0
- package/dist/types/stream-types.d.ts +30 -18
- package/dist/types/universal-provider-options.d.ts +87 -0
- package/dist/types/universal-provider-options.js +53 -0
- package/package.json +12 -5
- package/dist/lib/providers/agent-enhanced-provider.d.ts +0 -93
- package/dist/lib/providers/agent-enhanced-provider.js +0 -605
- package/dist/lib/providers/amazonBedrock.d.ts +0 -28
- package/dist/lib/providers/amazonBedrock.js +0 -364
- package/dist/lib/providers/azureOpenAI.d.ts +0 -42
- package/dist/lib/providers/azureOpenAI.js +0 -347
- package/dist/lib/providers/googleAIStudio.d.ts +0 -42
- package/dist/lib/providers/googleAIStudio.js +0 -364
- package/dist/lib/providers/googleVertexAI.d.ts +0 -34
- package/dist/lib/providers/googleVertexAI.js +0 -547
- package/dist/lib/providers/mistralAI.d.ts +0 -37
- package/dist/lib/providers/mistralAI.js +0 -325
- package/dist/providers/agent-enhanced-provider.d.ts +0 -93
- package/dist/providers/agent-enhanced-provider.js +0 -606
- package/dist/providers/amazonBedrock.d.ts +0 -28
- package/dist/providers/amazonBedrock.js +0 -364
- package/dist/providers/azureOpenAI.d.ts +0 -42
- package/dist/providers/azureOpenAI.js +0 -348
- package/dist/providers/googleAIStudio.d.ts +0 -42
- package/dist/providers/googleAIStudio.js +0 -364
- package/dist/providers/googleVertexAI.d.ts +0 -34
- package/dist/providers/googleVertexAI.js +0 -547
- package/dist/providers/mistralAI.d.ts +0 -37
- package/dist/providers/mistralAI.js +0 -325
|
@@ -1,528 +1,229 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* NeuroLink Unified Evaluation System
|
|
3
|
-
*
|
|
4
|
-
* Combines Universal Evaluation with Lighthouse-Enhanced capabilities
|
|
5
|
-
* - Domain-aware evaluation with sophisticated context handling
|
|
6
|
-
* - Multi-provider support with fallback strategies
|
|
7
|
-
* - Structured output with Zod schema validation
|
|
8
|
-
* - Tool usage and conversation history analysis
|
|
9
|
-
* - Enterprise-grade reliability and performance
|
|
10
3
|
*/
|
|
11
4
|
import { logger } from "../utils/logger.js";
|
|
12
5
|
import { AIProviderFactory } from "./factory.js";
|
|
13
6
|
import { z } from "zod";
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
accuracy: z
|
|
25
|
-
.number()
|
|
26
|
-
.min(0)
|
|
27
|
-
.max(10)
|
|
28
|
-
.describe("Score (0-10) for factual correctness against data, tool outputs, and domain knowledge. 10 is most accurate."),
|
|
29
|
-
completeness: z
|
|
30
|
-
.number()
|
|
31
|
-
.min(0)
|
|
32
|
-
.max(10)
|
|
33
|
-
.describe("Score (0-10) for how completely the response addresses the query. 10 is most complete."),
|
|
34
|
-
// Enhanced domain scores (optional)
|
|
35
|
-
domainAlignment: z
|
|
36
|
-
.number()
|
|
37
|
-
.min(0)
|
|
38
|
-
.max(10)
|
|
39
|
-
.optional()
|
|
40
|
-
.describe("Score (0-10) for how well response aligns with specified domain expertise."),
|
|
41
|
-
terminologyAccuracy: z
|
|
42
|
-
.number()
|
|
43
|
-
.min(0)
|
|
44
|
-
.max(10)
|
|
45
|
-
.optional()
|
|
46
|
-
.describe("Score (0-10) for correct usage of domain-specific terminology."),
|
|
47
|
-
toolEffectiveness: z
|
|
48
|
-
.number()
|
|
49
|
-
.min(0)
|
|
50
|
-
.max(10)
|
|
51
|
-
.optional()
|
|
52
|
-
.describe("Score (0-10) for how effectively available tools/MCPs were utilized."),
|
|
53
|
-
// Qualitative assessment
|
|
54
|
-
isOffTopic: z
|
|
55
|
-
.boolean()
|
|
56
|
-
.describe("True if the response significantly deviates from query/domain."),
|
|
57
|
-
reasoning: z
|
|
58
|
-
.string()
|
|
59
|
-
.describe("Brief justification for scores, especially if low or off-topic. Max 150 words."),
|
|
60
|
-
suggestedImprovements: z
|
|
61
|
-
.string()
|
|
62
|
-
.optional()
|
|
63
|
-
.describe("Optional: Suggestions for improving the original response. Max 100 words."),
|
|
64
|
-
alertSeverity: z
|
|
65
|
-
.enum(["low", "medium", "high", "none"])
|
|
66
|
-
.describe("Suggested alert severity considering all scores and domain context."),
|
|
7
|
+
import { ProviderRegistry } from "../factories/provider-registry.js";
|
|
8
|
+
// Zod schema for validation
|
|
9
|
+
const UnifiedEvaluationSchema = z.object({
|
|
10
|
+
relevance: z.number().min(1).max(10),
|
|
11
|
+
accuracy: z.number().min(1).max(10),
|
|
12
|
+
completeness: z.number().min(1).max(10),
|
|
13
|
+
overall: z.number().min(1).max(10),
|
|
14
|
+
domainAlignment: z.number().min(1).max(10).optional(),
|
|
15
|
+
terminologyAccuracy: z.number().min(1).max(10).optional(),
|
|
16
|
+
toolEffectiveness: z.number().min(1).max(10).optional(),
|
|
67
17
|
});
|
|
68
18
|
/**
|
|
69
|
-
*
|
|
19
|
+
* Get default evaluation when evaluation fails
|
|
70
20
|
*/
|
|
71
|
-
|
|
72
|
-
const functionTag = "
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
mode,
|
|
78
|
-
domain: context.primaryDomain,
|
|
79
|
-
toolsUsed: context.toolsUsed?.length || 0,
|
|
80
|
-
conversationTurns: context.conversationHistory?.length || 0,
|
|
81
|
-
queryLength: context.userQuery.length,
|
|
82
|
-
responseLength: context.aiResponse.length,
|
|
21
|
+
function getDefaultUnifiedEvaluation(reason, evaluationTime, context) {
|
|
22
|
+
const functionTag = "getDefaultUnifiedEvaluation";
|
|
23
|
+
logger.debug(`[${functionTag}] Creating default evaluation`, {
|
|
24
|
+
reason,
|
|
25
|
+
evaluationTime,
|
|
26
|
+
hasContext: !!context,
|
|
83
27
|
});
|
|
84
|
-
const { parseEvaluationConfig } = await import("./evaluation-config.js");
|
|
85
|
-
const config = parseEvaluationConfig();
|
|
86
|
-
let lastError = null;
|
|
87
|
-
for (let attempt = 0; attempt <= config.retryAttempts; attempt++) {
|
|
88
|
-
try {
|
|
89
|
-
// Get evaluation model
|
|
90
|
-
const evaluationModelResult = await getEvaluationModel();
|
|
91
|
-
if (!evaluationModelResult) {
|
|
92
|
-
logger.debug(`[${functionTag}] No evaluation model available, returning defaults`);
|
|
93
|
-
return getDefaultUnifiedEvaluation("unavailable", Date.now() - startTime, context);
|
|
94
|
-
}
|
|
95
|
-
const { provider: evaluationModel, config: modelConfig } = evaluationModelResult;
|
|
96
|
-
// Create evaluation prompt based on mode
|
|
97
|
-
const evaluationPrompt = createUnifiedEvaluationPrompt(context, mode);
|
|
98
|
-
logger.debug(`[${functionTag}] Using ${mode} evaluation mode`, {
|
|
99
|
-
provider: modelConfig.providerName,
|
|
100
|
-
model: modelConfig.modelName,
|
|
101
|
-
attempt: attempt + 1,
|
|
102
|
-
});
|
|
103
|
-
// Try structured evaluation first (preferred)
|
|
104
|
-
try {
|
|
105
|
-
const structuredResult = await evaluationModel.generateObject({
|
|
106
|
-
schema: unifiedEvaluationSchema,
|
|
107
|
-
prompt: evaluationPrompt,
|
|
108
|
-
temperature: 0.1,
|
|
109
|
-
maxTokens: 1000,
|
|
110
|
-
system: createUnifiedSystemPrompt(mode),
|
|
111
|
-
});
|
|
112
|
-
return processStructuredEvaluationResult(structuredResult.object, modelConfig, Date.now() - startTime, context, attempt + 1);
|
|
113
|
-
}
|
|
114
|
-
catch (structuredError) {
|
|
115
|
-
logger.warn(`[${functionTag}] Structured evaluation failed, using fallback`, { structuredError });
|
|
116
|
-
// Fallback to legacy generate
|
|
117
|
-
const result = await evaluationModel.generate({
|
|
118
|
-
prompt: evaluationPrompt + "\n\nRespond with valid JSON only.",
|
|
119
|
-
temperature: 0.1,
|
|
120
|
-
maxTokens: 1000,
|
|
121
|
-
systemPrompt: createUnifiedSystemPrompt(mode),
|
|
122
|
-
});
|
|
123
|
-
const responseText = result?.text || result?.content;
|
|
124
|
-
if (!responseText) {
|
|
125
|
-
throw new Error("No evaluation text received from fallback");
|
|
126
|
-
}
|
|
127
|
-
return parseUnifiedEvaluationResult(responseText, modelConfig, Date.now() - startTime, context, attempt + 1);
|
|
128
|
-
}
|
|
129
|
-
}
|
|
130
|
-
catch (error) {
|
|
131
|
-
lastError = error instanceof Error ? error : new Error(String(error));
|
|
132
|
-
logger.warn(`[${functionTag}] Evaluation attempt ${attempt + 1} failed:`, lastError.message);
|
|
133
|
-
if (attempt === config.retryAttempts) {
|
|
134
|
-
break;
|
|
135
|
-
}
|
|
136
|
-
// Exponential backoff
|
|
137
|
-
await new Promise((resolve) => setTimeout(resolve, Math.pow(2, attempt) * 1000));
|
|
138
|
-
}
|
|
139
|
-
}
|
|
140
|
-
// All attempts failed
|
|
141
|
-
logger.error(`[${functionTag}] All evaluation attempts failed:`, lastError?.message);
|
|
142
|
-
return getDefaultUnifiedEvaluation(lastError?.message || "unknown-error", Date.now() - startTime, context);
|
|
143
|
-
}
|
|
144
|
-
/**
|
|
145
|
-
* Detect appropriate evaluation mode based on context
|
|
146
|
-
*/
|
|
147
|
-
function detectEvaluationMode(context) {
|
|
148
|
-
// Lighthouse mode: Has domain awareness, tool context, or conversation history
|
|
149
|
-
if (context.primaryDomain ||
|
|
150
|
-
context.toolsUsed?.length ||
|
|
151
|
-
context.conversationHistory?.length) {
|
|
152
|
-
return "lighthouse";
|
|
153
|
-
}
|
|
154
|
-
// Enhanced mode: Has rich context
|
|
155
|
-
if (context.context && Object.keys(context.context).length > 0) {
|
|
156
|
-
return "enhanced";
|
|
157
|
-
}
|
|
158
|
-
// Simple mode: Basic evaluation
|
|
159
|
-
return "simple";
|
|
160
|
-
}
|
|
161
|
-
/**
|
|
162
|
-
* Create unified evaluation prompt based on mode
|
|
163
|
-
*/
|
|
164
|
-
function createUnifiedEvaluationPrompt(context, mode) {
|
|
165
|
-
switch (mode) {
|
|
166
|
-
case "lighthouse":
|
|
167
|
-
return createLighthouseEvaluationPrompt(context);
|
|
168
|
-
case "enhanced":
|
|
169
|
-
return createEnhancedEvaluationPrompt(context);
|
|
170
|
-
case "simple":
|
|
171
|
-
default:
|
|
172
|
-
return createSimpleEvaluationPrompt(context);
|
|
173
|
-
}
|
|
174
|
-
}
|
|
175
|
-
/**
|
|
176
|
-
* Create Lighthouse-style domain-aware evaluation prompt
|
|
177
|
-
*/
|
|
178
|
-
function createLighthouseEvaluationPrompt(context) {
|
|
179
|
-
const { userQuery, aiResponse, primaryDomain = "general AI assistant", assistantRole = "AI assistant", toolContext = "No specific tools used in this interaction", conversationHistory = [], } = context;
|
|
180
|
-
const formattedHistory = formatConversationHistory(conversationHistory);
|
|
181
|
-
return `You are an AI Response Evaluator with advanced domain awareness.
|
|
182
|
-
|
|
183
|
-
**EVALUATION CONTEXT**:
|
|
184
|
-
|
|
185
|
-
1. **Primary Assistant Domain**: "${primaryDomain}"
|
|
186
|
-
- This defines the AI assistant's core expertise area
|
|
187
|
-
- Responses should demonstrate competency within this domain
|
|
188
|
-
- Domain-specific terminology should be used accurately
|
|
189
|
-
|
|
190
|
-
2. **Assistant Role**: "${assistantRole}"
|
|
191
|
-
- This defines the specific role the assistant should fulfill
|
|
192
|
-
- Responses should align with this role's responsibilities
|
|
193
|
-
|
|
194
|
-
3. **Tool Usage Context**: "${toolContext}"
|
|
195
|
-
- Tools/MCPs are capabilities the assistant used to generate the response
|
|
196
|
-
- Evaluate how effectively these tools were utilized
|
|
197
|
-
- Consider if additional tools should have been used
|
|
198
|
-
|
|
199
|
-
4. **Conversation History**:
|
|
200
|
-
\`\`\`
|
|
201
|
-
${formattedHistory}
|
|
202
|
-
\`\`\`
|
|
203
|
-
|
|
204
|
-
**CRITICAL DOMAIN FAILURE ASSESSMENT**:
|
|
205
|
-
Pay special attention to domain alignment. If the query is within the assistant's domain and sufficient context is available:
|
|
206
|
-
- Inability to answer ("I can't help", generic errors, evasions) = HIGH ALERT
|
|
207
|
-
- Incorrect domain-specific information = HIGH ALERT
|
|
208
|
-
- Misuse of domain terminology = MEDIUM-HIGH ALERT
|
|
209
|
-
|
|
210
|
-
**EVALUATION CRITERIA**:
|
|
211
|
-
- **relevanceScore** (0-10): Direct query addressing + domain alignment
|
|
212
|
-
- **accuracyScore** (0-10): Factual correctness + terminology accuracy
|
|
213
|
-
- **completenessScore** (0-10): Full query addressing + appropriate depth
|
|
214
|
-
- **domainAlignment** (0-10): How well response fits the domain expertise
|
|
215
|
-
- **terminologyAccuracy** (0-10): Correct use of domain-specific terms
|
|
216
|
-
- **toolEffectiveness** (0-10): How well available tools were utilized
|
|
217
|
-
- **isOffTopic** (boolean): True if significantly deviates from domain/query
|
|
218
|
-
- **reasoning** (string): Brief explanation (max 150 words)
|
|
219
|
-
- **suggestedImprovements** (string): How to improve (max 100 words)
|
|
220
|
-
- **alertSeverity** ('low'|'medium'|'high'|'none'): Based on domain failure assessment
|
|
221
|
-
|
|
222
|
-
**Current User Query**:
|
|
223
|
-
"${userQuery}"
|
|
224
|
-
|
|
225
|
-
**AI Assistant Response**:
|
|
226
|
-
"${aiResponse}"
|
|
227
|
-
|
|
228
|
-
Provide your assessment in the specified format.`;
|
|
229
|
-
}
|
|
230
|
-
/**
|
|
231
|
-
* Create enhanced evaluation prompt
|
|
232
|
-
*/
|
|
233
|
-
function createEnhancedEvaluationPrompt(context) {
|
|
234
|
-
const { userQuery, aiResponse, context: additionalContext } = context;
|
|
235
|
-
const contextInfo = additionalContext
|
|
236
|
-
? `\nContext: ${JSON.stringify(additionalContext, null, 2)}`
|
|
237
|
-
: "";
|
|
238
|
-
return `Evaluate this AI response with enhanced criteria:
|
|
239
|
-
|
|
240
|
-
Query: "${userQuery}"
|
|
241
|
-
Response: "${aiResponse}"${contextInfo}
|
|
242
|
-
|
|
243
|
-
Provide scores for:
|
|
244
|
-
- relevanceScore (0-10): How well the response addresses the query
|
|
245
|
-
- accuracyScore (0-10): Factual correctness and reliability
|
|
246
|
-
- completenessScore (0-10): Whether the response fully answers the question
|
|
247
|
-
- isOffTopic (boolean): Whether response deviates from query
|
|
248
|
-
- reasoning (string): Brief explanation of scores
|
|
249
|
-
- alertSeverity ('low'|'medium'|'high'|'none'): Overall quality assessment
|
|
250
|
-
|
|
251
|
-
Respond in the specified format.`;
|
|
252
|
-
}
|
|
253
|
-
/**
|
|
254
|
-
* Create simple evaluation prompt
|
|
255
|
-
*/
|
|
256
|
-
function createSimpleEvaluationPrompt(context) {
|
|
257
|
-
const { userQuery, aiResponse } = context;
|
|
258
|
-
return `Rate this AI response:
|
|
259
|
-
|
|
260
|
-
Q: "${userQuery}"
|
|
261
|
-
A: "${aiResponse}"
|
|
262
|
-
|
|
263
|
-
Provide:
|
|
264
|
-
- relevanceScore (0-10)
|
|
265
|
-
- accuracyScore (0-10)
|
|
266
|
-
- completenessScore (0-10)
|
|
267
|
-
- reasoning (brief explanation)
|
|
268
|
-
|
|
269
|
-
Respond in the specified format.`;
|
|
270
|
-
}
|
|
271
|
-
/**
|
|
272
|
-
* Create unified system prompt based on mode
|
|
273
|
-
*/
|
|
274
|
-
function createUnifiedSystemPrompt(mode) {
|
|
275
|
-
const basePrompt = "You are an expert AI Response Evaluator. Respond with valid structured output only.";
|
|
276
|
-
switch (mode) {
|
|
277
|
-
case "lighthouse":
|
|
278
|
-
return `${basePrompt} Use advanced domain awareness and sophisticated context analysis for comprehensive evaluation.`;
|
|
279
|
-
case "enhanced":
|
|
280
|
-
return `${basePrompt} Consider all provided context and metadata for thorough evaluation.`;
|
|
281
|
-
case "simple":
|
|
282
|
-
default:
|
|
283
|
-
return `${basePrompt} Focus on core quality metrics: relevance, accuracy, and completeness.`;
|
|
284
|
-
}
|
|
285
|
-
}
|
|
286
|
-
/**
|
|
287
|
-
* Process structured evaluation result
|
|
288
|
-
*/
|
|
289
|
-
function processStructuredEvaluationResult(result, modelConfig, evaluationTime, context, attempt) {
|
|
290
|
-
// Calculate overall score
|
|
291
|
-
const coreScores = [
|
|
292
|
-
result.relevanceScore || 0,
|
|
293
|
-
result.accuracyScore || 0,
|
|
294
|
-
result.completenessScore || 0,
|
|
295
|
-
];
|
|
296
|
-
const enhancedScores = [
|
|
297
|
-
result.domainAlignment,
|
|
298
|
-
result.terminologyAccuracy,
|
|
299
|
-
result.toolEffectiveness,
|
|
300
|
-
].filter((score) => typeof score === "number" && score > 0);
|
|
301
|
-
const allScores = [...coreScores, ...enhancedScores];
|
|
302
|
-
const overall = Math.round(allScores.reduce((sum, score) => sum + score, 0) / allScores.length);
|
|
303
28
|
return {
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
// Domain-specific scores (if available)
|
|
315
|
-
domainAlignment: result.domainAlignment
|
|
316
|
-
? Math.max(0, Math.min(10, Math.round(result.domainAlignment)))
|
|
317
|
-
: undefined,
|
|
318
|
-
terminologyAccuracy: result.terminologyAccuracy
|
|
319
|
-
? Math.max(0, Math.min(10, Math.round(result.terminologyAccuracy)))
|
|
320
|
-
: undefined,
|
|
321
|
-
toolEffectiveness: result.toolEffectiveness
|
|
322
|
-
? Math.max(0, Math.min(10, Math.round(result.toolEffectiveness)))
|
|
323
|
-
: undefined,
|
|
324
|
-
// Context analysis
|
|
29
|
+
relevance: 1,
|
|
30
|
+
accuracy: 1,
|
|
31
|
+
completeness: 1,
|
|
32
|
+
overall: 1,
|
|
33
|
+
domainAlignment: 1,
|
|
34
|
+
terminologyAccuracy: 1,
|
|
35
|
+
toolEffectiveness: 1,
|
|
36
|
+
isOffTopic: false,
|
|
37
|
+
alertSeverity: "low",
|
|
38
|
+
reasoning: `Default evaluation used due to: ${reason}`,
|
|
325
39
|
contextUtilization: {
|
|
326
|
-
conversationUsed:
|
|
327
|
-
toolsUsed:
|
|
328
|
-
domainKnowledgeUsed:
|
|
40
|
+
conversationUsed: false,
|
|
41
|
+
toolsUsed: false,
|
|
42
|
+
domainKnowledgeUsed: false,
|
|
329
43
|
},
|
|
330
|
-
// Enhanced metadata
|
|
331
44
|
evaluationContext: {
|
|
332
45
|
domain: context.primaryDomain || "general",
|
|
333
|
-
toolsEvaluated:
|
|
334
|
-
conversationTurns:
|
|
46
|
+
toolsEvaluated: [],
|
|
47
|
+
conversationTurns: 0,
|
|
335
48
|
},
|
|
336
|
-
|
|
337
|
-
evaluationModel: `${modelConfig.providerName}/${modelConfig.modelName}`,
|
|
49
|
+
evaluationModel: "default",
|
|
338
50
|
evaluationTime,
|
|
339
|
-
evaluationProvider:
|
|
340
|
-
evaluationAttempt:
|
|
51
|
+
evaluationProvider: "default",
|
|
52
|
+
evaluationAttempt: 1,
|
|
341
53
|
evaluationConfig: {
|
|
342
|
-
mode:
|
|
343
|
-
fallbackUsed:
|
|
54
|
+
mode: "fallback",
|
|
55
|
+
fallbackUsed: true,
|
|
344
56
|
costEstimate: 0,
|
|
345
57
|
},
|
|
346
58
|
};
|
|
347
59
|
}
|
|
348
60
|
/**
|
|
349
|
-
* Parse evaluation result from text response
|
|
61
|
+
* Parse unified evaluation result from text response
|
|
350
62
|
*/
|
|
351
|
-
function parseUnifiedEvaluationResult(
|
|
63
|
+
function parseUnifiedEvaluationResult(response, context) {
|
|
64
|
+
const functionTag = "parseUnifiedEvaluationResult";
|
|
352
65
|
try {
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
66
|
+
logger.debug(`[${functionTag}] Parsing evaluation response`, {
|
|
67
|
+
responseLength: response.length,
|
|
68
|
+
});
|
|
69
|
+
// Try JSON parsing first
|
|
70
|
+
const jsonMatch = response.match(/\{[^}]*\}/s);
|
|
356
71
|
if (jsonMatch) {
|
|
357
|
-
|
|
358
|
-
|
|
72
|
+
try {
|
|
73
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
74
|
+
return parsed;
|
|
75
|
+
}
|
|
76
|
+
catch (e) {
|
|
77
|
+
logger.debug(`[${functionTag}] JSON parsing failed, trying regex`);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
// Fallback to regex parsing
|
|
81
|
+
const result = {};
|
|
82
|
+
const patterns = {
|
|
83
|
+
relevance: /relevance[:\s]*([0-9]+(?:\.[0-9]+)?)/i,
|
|
84
|
+
accuracy: /accuracy[:\s]*([0-9]+(?:\.[0-9]+)?)/i,
|
|
85
|
+
completeness: /completeness[:\s]*([0-9]+(?:\.[0-9]+)?)/i,
|
|
86
|
+
overall: /overall[:\s]*([0-9]+(?:\.[0-9]+)?)/i,
|
|
87
|
+
};
|
|
88
|
+
for (const [key, pattern] of Object.entries(patterns)) {
|
|
89
|
+
const match = response.match(pattern);
|
|
90
|
+
if (match) {
|
|
91
|
+
const value = parseFloat(match[1]);
|
|
92
|
+
if (value >= 1 && value <= 10) {
|
|
93
|
+
result[key] = Math.round(value);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
359
96
|
}
|
|
360
|
-
//
|
|
361
|
-
const relevanceMatch = evaluationText.match(/(?:relevance[Score"\s]*:?["\s]*(\d+)|Relevance["\s]*:?["\s]*(\d+)|relevance.*?(\d+))/i);
|
|
362
|
-
const accuracyMatch = evaluationText.match(/(?:accuracy[Score"\s]*:?["\s]*(\d+)|Accuracy["\s]*:?["\s]*(\d+)|accuracy.*?(\d+))/i);
|
|
363
|
-
const completenessMatch = evaluationText.match(/(?:completeness[Score"\s]*:?["\s]*(\d+)|Completeness["\s]*:?["\s]*(\d+)|completeness.*?(\d+))/i);
|
|
364
|
-
// Extract scores with fallback to default values
|
|
365
|
-
const relevance = relevanceMatch
|
|
366
|
-
? parseInt(relevanceMatch[1] || relevanceMatch[2] || relevanceMatch[3], 10)
|
|
367
|
-
: 8; // Default fallback score
|
|
368
|
-
const accuracy = accuracyMatch
|
|
369
|
-
? parseInt(accuracyMatch[1] || accuracyMatch[2] || accuracyMatch[3], 10)
|
|
370
|
-
: 8; // Default fallback score
|
|
371
|
-
const completeness = completenessMatch
|
|
372
|
-
? parseInt(completenessMatch[1] || completenessMatch[2] || completenessMatch[3], 10)
|
|
373
|
-
: 8; // Default fallback score
|
|
97
|
+
// Ensure minimum valid scores
|
|
374
98
|
return {
|
|
375
|
-
relevance:
|
|
376
|
-
accuracy:
|
|
377
|
-
completeness:
|
|
378
|
-
overall:
|
|
379
|
-
isOffTopic: false,
|
|
380
|
-
alertSeverity: "none",
|
|
381
|
-
reasoning: "Parsed using regex fallback - response was not in expected JSON format.",
|
|
382
|
-
evaluationModel: `${modelConfig.providerName}/${modelConfig.modelName}`,
|
|
383
|
-
evaluationTime,
|
|
384
|
-
evaluationProvider: modelConfig.providerName,
|
|
385
|
-
evaluationAttempt: attempt,
|
|
386
|
-
evaluationConfig: {
|
|
387
|
-
mode: "fallback",
|
|
388
|
-
fallbackUsed: true,
|
|
389
|
-
costEstimate: 0,
|
|
390
|
-
},
|
|
99
|
+
relevance: result.relevance || 1,
|
|
100
|
+
accuracy: result.accuracy || 1,
|
|
101
|
+
completeness: result.completeness || 1,
|
|
102
|
+
overall: result.overall || 1,
|
|
391
103
|
};
|
|
392
104
|
}
|
|
393
105
|
catch (error) {
|
|
394
|
-
logger.error(
|
|
395
|
-
|
|
106
|
+
logger.error(`[${functionTag}] Failed to parse evaluation result`, {
|
|
107
|
+
error,
|
|
108
|
+
});
|
|
109
|
+
return {
|
|
110
|
+
relevance: 1,
|
|
111
|
+
accuracy: 1,
|
|
112
|
+
completeness: 1,
|
|
113
|
+
overall: 1,
|
|
114
|
+
};
|
|
396
115
|
}
|
|
397
116
|
}
|
|
398
117
|
/**
|
|
399
|
-
*
|
|
400
|
-
*/
|
|
401
|
-
function getDefaultUnifiedEvaluation(reason, evaluationTime, context) {
|
|
402
|
-
return {
|
|
403
|
-
relevance: 0,
|
|
404
|
-
accuracy: 0,
|
|
405
|
-
completeness: 0,
|
|
406
|
-
overall: 0,
|
|
407
|
-
isOffTopic: false,
|
|
408
|
-
alertSeverity: "high",
|
|
409
|
-
reasoning: `Evaluation unavailable (${reason}). This may be due to missing API keys, network issues, or service unavailability.`,
|
|
410
|
-
suggestedImprovements: "Check evaluation system configuration, API credentials, and network connectivity.",
|
|
411
|
-
evaluationModel: "unavailable",
|
|
412
|
-
evaluationTime,
|
|
413
|
-
evaluationProvider: "none",
|
|
414
|
-
evaluationAttempt: 0,
|
|
415
|
-
evaluationConfig: {
|
|
416
|
-
mode: "default",
|
|
417
|
-
fallbackUsed: true,
|
|
418
|
-
costEstimate: 0,
|
|
419
|
-
},
|
|
420
|
-
contextUtilization: {
|
|
421
|
-
conversationUsed: (context.conversationHistory?.length || 0) > 0,
|
|
422
|
-
toolsUsed: (context.toolsUsed?.length || 0) > 0,
|
|
423
|
-
domainKnowledgeUsed: !!context.primaryDomain,
|
|
424
|
-
},
|
|
425
|
-
evaluationContext: {
|
|
426
|
-
domain: context.primaryDomain || "unknown",
|
|
427
|
-
toolsEvaluated: context.toolsUsed || [],
|
|
428
|
-
conversationTurns: context.conversationHistory?.length || 0,
|
|
429
|
-
},
|
|
430
|
-
};
|
|
431
|
-
}
|
|
432
|
-
/**
|
|
433
|
-
* Enhanced evaluation model selection
|
|
118
|
+
* Main unified evaluation function
|
|
434
119
|
*/
|
|
435
|
-
export async function
|
|
436
|
-
const
|
|
437
|
-
const
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
provider,
|
|
455
|
-
config: {
|
|
456
|
-
providerName,
|
|
457
|
-
modelName,
|
|
458
|
-
providerConfig,
|
|
459
|
-
evaluationConfig: config,
|
|
460
|
-
},
|
|
461
|
-
};
|
|
462
|
-
}
|
|
120
|
+
export async function generateUnifiedEvaluation(context) {
|
|
121
|
+
const functionTag = "generateUnifiedEvaluation";
|
|
122
|
+
const startTime = Date.now();
|
|
123
|
+
logger.debug(`[${functionTag}] Starting evaluation`, {
|
|
124
|
+
hasUserQuery: !!context.userQuery,
|
|
125
|
+
hasAiResponse: !!context.aiResponse,
|
|
126
|
+
domain: context.primaryDomain,
|
|
127
|
+
});
|
|
128
|
+
try {
|
|
129
|
+
// Ensure providers are registered
|
|
130
|
+
await ProviderRegistry.registerAllProviders();
|
|
131
|
+
// Get evaluation provider
|
|
132
|
+
const evaluationProvider = process.env.NEUROLINK_EVALUATION_PROVIDER || "google-ai";
|
|
133
|
+
const evaluationModel = process.env.NEUROLINK_EVALUATION_MODEL || "gemini-2.5-flash";
|
|
134
|
+
logger.debug(`[${functionTag}] Using provider: ${evaluationProvider}, model: ${evaluationModel}`);
|
|
135
|
+
const provider = await AIProviderFactory.createProvider(evaluationProvider, evaluationModel);
|
|
136
|
+
if (!provider) {
|
|
137
|
+
logger.debug(`[${functionTag}] No evaluation provider available, returning defaults`);
|
|
138
|
+
return getDefaultUnifiedEvaluation("no-provider", Date.now() - startTime, context);
|
|
463
139
|
}
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
140
|
+
// Create evaluation prompt
|
|
141
|
+
const prompt = `
|
|
142
|
+
Evaluate this AI response on a scale of 1-10 for each criterion:
|
|
143
|
+
|
|
144
|
+
User Query: ${context.userQuery}
|
|
145
|
+
AI Response: ${context.aiResponse}
|
|
146
|
+
|
|
147
|
+
Rate on these criteria (1-10 scale):
|
|
148
|
+
- Relevance: How well does the response address the user's question?
|
|
149
|
+
- Accuracy: How factually correct and precise is the information?
|
|
150
|
+
- Completeness: How thoroughly does it cover the topic?
|
|
151
|
+
- Overall: General quality assessment
|
|
152
|
+
|
|
153
|
+
Respond in this exact format:
|
|
154
|
+
Relevance: [score]
|
|
155
|
+
Accuracy: [score]
|
|
156
|
+
Completeness: [score]
|
|
157
|
+
Overall: [score]
|
|
158
|
+
`;
|
|
159
|
+
// Generate evaluation
|
|
160
|
+
const result = await provider.generate(prompt);
|
|
161
|
+
if (!result) {
|
|
162
|
+
logger.debug(`[${functionTag}] No response from provider`);
|
|
163
|
+
return getDefaultUnifiedEvaluation("no-response", Date.now() - startTime, context);
|
|
469
164
|
}
|
|
165
|
+
// Extract text from result
|
|
166
|
+
const response = typeof result === "string"
|
|
167
|
+
? result
|
|
168
|
+
: result.text || String(result);
|
|
169
|
+
// Parse evaluation result
|
|
170
|
+
const parsed = parseUnifiedEvaluationResult(response, context);
|
|
171
|
+
// Validate and enhance result
|
|
172
|
+
const validatedResult = {
|
|
173
|
+
...parsed,
|
|
174
|
+
evaluationModel: `${evaluationProvider}/${evaluationModel}`,
|
|
175
|
+
evaluationTime: Date.now() - startTime,
|
|
176
|
+
evaluationProvider,
|
|
177
|
+
evaluationAttempt: 1,
|
|
178
|
+
evaluationConfig: {
|
|
179
|
+
mode: "standard",
|
|
180
|
+
fallbackUsed: false,
|
|
181
|
+
costEstimate: 0.001, // Rough estimate
|
|
182
|
+
},
|
|
183
|
+
};
|
|
184
|
+
logger.debug(`[${functionTag}] Evaluation completed`, {
|
|
185
|
+
relevance: validatedResult.relevance,
|
|
186
|
+
accuracy: validatedResult.accuracy,
|
|
187
|
+
completeness: validatedResult.completeness,
|
|
188
|
+
overall: validatedResult.overall,
|
|
189
|
+
evaluationTime: validatedResult.evaluationTime,
|
|
190
|
+
});
|
|
191
|
+
return validatedResult;
|
|
470
192
|
}
|
|
471
|
-
|
|
472
|
-
}
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
function formatConversationHistory(history) {
|
|
477
|
-
if (!history?.length) {
|
|
478
|
-
return "No prior conversation context.";
|
|
193
|
+
catch (error) {
|
|
194
|
+
logger.error(`[${functionTag}] Evaluation failed`, {
|
|
195
|
+
error: error instanceof Error ? error.message : String(error),
|
|
196
|
+
});
|
|
197
|
+
return getDefaultUnifiedEvaluation(error instanceof Error ? error.message : "unknown-error", Date.now() - startTime, context);
|
|
479
198
|
}
|
|
480
|
-
return history
|
|
481
|
-
.slice(-3) // Last 3 turns
|
|
482
|
-
.map((msg, i) => `${i + 1}. ${msg.role.toUpperCase()}: ${msg.content.substring(0, 200)}${msg.content.length > 200 ? "..." : ""}`)
|
|
483
|
-
.join("\n");
|
|
484
|
-
}
|
|
485
|
-
/**
|
|
486
|
-
* Create simple evaluation context (backward compatibility)
|
|
487
|
-
*/
|
|
488
|
-
export function createSimpleEvaluationContext(prompt, response, context) {
|
|
489
|
-
return {
|
|
490
|
-
userQuery: prompt,
|
|
491
|
-
aiResponse: response,
|
|
492
|
-
context,
|
|
493
|
-
mode: "simple",
|
|
494
|
-
};
|
|
495
199
|
}
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
200
|
+
// Legacy compatibility function with flexible arguments
|
|
201
|
+
export async function evaluateResponse(responseOrContext, contextOrUserQuery, userQuery, providedContexts, options, additionalArgs) {
|
|
202
|
+
// Handle different call patterns for backward compatibility
|
|
203
|
+
let aiResponse;
|
|
204
|
+
let context;
|
|
205
|
+
if (typeof responseOrContext === "string") {
|
|
206
|
+
// Normal call: evaluateResponse(response, context, ...)
|
|
207
|
+
aiResponse = responseOrContext;
|
|
208
|
+
context = contextOrUserQuery;
|
|
209
|
+
}
|
|
210
|
+
else {
|
|
211
|
+
// Provider call pattern: evaluateResponse(contextObject, userQuery, ...)
|
|
212
|
+
context = responseOrContext;
|
|
213
|
+
aiResponse =
|
|
214
|
+
context?.aiResponse ||
|
|
215
|
+
context?.response ||
|
|
216
|
+
String(contextOrUserQuery || "");
|
|
217
|
+
}
|
|
218
|
+
const evalContext = {
|
|
219
|
+
userQuery: userQuery ||
|
|
220
|
+
context?.userQuery ||
|
|
221
|
+
contextOrUserQuery ||
|
|
222
|
+
"Generated response",
|
|
502
223
|
aiResponse,
|
|
503
|
-
primaryDomain: options.domain,
|
|
504
|
-
assistantRole: options.role,
|
|
505
|
-
toolsUsed: options.toolsUsed,
|
|
506
|
-
toolContext: options.toolsUsed?.length
|
|
507
|
-
? `Tools used: ${options.toolsUsed.join(", ")}`
|
|
508
|
-
: undefined,
|
|
509
|
-
conversationHistory: options.conversationHistory,
|
|
510
|
-
sessionId: options.sessionId,
|
|
511
|
-
context: options.context,
|
|
512
|
-
mode: "lighthouse",
|
|
513
|
-
};
|
|
514
|
-
}
|
|
515
|
-
// Legacy compatibility wrapper for old function signature
|
|
516
|
-
export async function evaluateResponse(prompt, response, context, evaluationDomain, toolUsageContext, conversationHistory) {
|
|
517
|
-
// Convert old arguments to new context format
|
|
518
|
-
const unifiedContext = {
|
|
519
|
-
userQuery: prompt,
|
|
520
|
-
aiResponse: response,
|
|
521
224
|
context,
|
|
522
|
-
primaryDomain: evaluationDomain,
|
|
523
|
-
toolContext: toolUsageContext,
|
|
524
|
-
conversationHistory: conversationHistory,
|
|
525
|
-
mode: evaluationDomain ? "lighthouse" : "simple",
|
|
526
225
|
};
|
|
527
|
-
return
|
|
226
|
+
return generateUnifiedEvaluation(evalContext);
|
|
528
227
|
}
|
|
228
|
+
// Export additional utilities
|
|
229
|
+
export { getDefaultUnifiedEvaluation, parseUnifiedEvaluationResult };
|