@juspay/neurolink 3.0.1 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (232) hide show
  1. package/CHANGELOG.md +66 -6
  2. package/README.md +318 -27
  3. package/dist/agent/direct-tools.d.ts +6 -6
  4. package/dist/chat/client-utils.d.ts +92 -0
  5. package/dist/chat/client-utils.js +298 -0
  6. package/dist/chat/index.d.ts +27 -0
  7. package/dist/chat/index.js +41 -0
  8. package/dist/chat/session-storage.d.ts +77 -0
  9. package/dist/chat/session-storage.js +233 -0
  10. package/dist/chat/session.d.ts +95 -0
  11. package/dist/chat/session.js +257 -0
  12. package/dist/chat/sse-handler.d.ts +49 -0
  13. package/dist/chat/sse-handler.js +266 -0
  14. package/dist/chat/types.d.ts +73 -0
  15. package/dist/chat/types.js +5 -0
  16. package/dist/chat/websocket-chat-handler.d.ts +36 -0
  17. package/dist/chat/websocket-chat-handler.js +262 -0
  18. package/dist/cli/commands/config.js +12 -12
  19. package/dist/cli/commands/mcp.js +3 -4
  20. package/dist/cli/index.d.ts +0 -7
  21. package/dist/cli/index.js +247 -28
  22. package/dist/config/configManager.d.ts +60 -0
  23. package/dist/config/configManager.js +300 -0
  24. package/dist/config/types.d.ts +136 -0
  25. package/dist/config/types.js +43 -0
  26. package/dist/core/analytics.d.ts +23 -0
  27. package/dist/core/analytics.js +131 -0
  28. package/dist/core/constants.d.ts +41 -0
  29. package/dist/core/constants.js +50 -0
  30. package/dist/core/defaults.d.ts +18 -0
  31. package/dist/core/defaults.js +29 -0
  32. package/dist/core/evaluation-config.d.ts +29 -0
  33. package/dist/core/evaluation-config.js +144 -0
  34. package/dist/core/evaluation-providers.d.ts +30 -0
  35. package/dist/core/evaluation-providers.js +187 -0
  36. package/dist/core/evaluation.d.ts +117 -0
  37. package/dist/core/evaluation.js +528 -0
  38. package/dist/core/factory.js +33 -25
  39. package/dist/core/types.d.ts +165 -6
  40. package/dist/core/types.js +3 -4
  41. package/dist/index.d.ts +9 -4
  42. package/dist/index.js +25 -4
  43. package/dist/lib/agent/direct-tools.d.ts +6 -6
  44. package/dist/lib/chat/client-utils.d.ts +92 -0
  45. package/dist/lib/chat/client-utils.js +298 -0
  46. package/dist/lib/chat/index.d.ts +27 -0
  47. package/dist/lib/chat/index.js +41 -0
  48. package/dist/lib/chat/session-storage.d.ts +77 -0
  49. package/dist/lib/chat/session-storage.js +233 -0
  50. package/dist/lib/chat/session.d.ts +95 -0
  51. package/dist/lib/chat/session.js +257 -0
  52. package/dist/lib/chat/sse-handler.d.ts +49 -0
  53. package/dist/lib/chat/sse-handler.js +266 -0
  54. package/dist/lib/chat/types.d.ts +73 -0
  55. package/dist/lib/chat/types.js +5 -0
  56. package/dist/lib/chat/websocket-chat-handler.d.ts +36 -0
  57. package/dist/lib/chat/websocket-chat-handler.js +262 -0
  58. package/dist/lib/config/configManager.d.ts +60 -0
  59. package/dist/lib/config/configManager.js +300 -0
  60. package/dist/lib/config/types.d.ts +136 -0
  61. package/dist/lib/config/types.js +43 -0
  62. package/dist/lib/core/analytics.d.ts +23 -0
  63. package/dist/lib/core/analytics.js +131 -0
  64. package/dist/lib/core/constants.d.ts +41 -0
  65. package/dist/lib/core/constants.js +50 -0
  66. package/dist/lib/core/defaults.d.ts +18 -0
  67. package/dist/lib/core/defaults.js +29 -0
  68. package/dist/lib/core/evaluation-config.d.ts +29 -0
  69. package/dist/lib/core/evaluation-config.js +144 -0
  70. package/dist/lib/core/evaluation-providers.d.ts +30 -0
  71. package/dist/lib/core/evaluation-providers.js +187 -0
  72. package/dist/lib/core/evaluation.d.ts +117 -0
  73. package/dist/lib/core/evaluation.js +528 -0
  74. package/dist/lib/core/factory.js +33 -26
  75. package/dist/lib/core/types.d.ts +165 -6
  76. package/dist/lib/core/types.js +3 -4
  77. package/dist/lib/index.d.ts +9 -4
  78. package/dist/lib/index.js +25 -4
  79. package/dist/lib/mcp/contracts/mcpContract.d.ts +118 -0
  80. package/dist/lib/mcp/contracts/mcpContract.js +5 -0
  81. package/dist/lib/mcp/dynamic-chain-executor.d.ts +201 -0
  82. package/dist/lib/mcp/dynamic-chain-executor.js +489 -0
  83. package/dist/lib/mcp/dynamic-orchestrator.d.ts +109 -0
  84. package/dist/lib/mcp/dynamic-orchestrator.js +351 -0
  85. package/dist/lib/mcp/error-manager.d.ts +254 -0
  86. package/dist/lib/mcp/error-manager.js +501 -0
  87. package/dist/lib/mcp/error-recovery.d.ts +158 -0
  88. package/dist/lib/mcp/error-recovery.js +405 -0
  89. package/dist/lib/mcp/function-calling.js +11 -3
  90. package/dist/lib/mcp/health-monitor.d.ts +256 -0
  91. package/dist/lib/mcp/health-monitor.js +621 -0
  92. package/dist/lib/mcp/logging.js +5 -0
  93. package/dist/lib/mcp/neurolink-mcp-client.js +2 -1
  94. package/dist/lib/mcp/orchestrator.d.ts +136 -5
  95. package/dist/lib/mcp/orchestrator.js +332 -16
  96. package/dist/lib/mcp/registry.d.ts +71 -16
  97. package/dist/lib/mcp/registry.js +104 -6
  98. package/dist/lib/mcp/semaphore-manager.d.ts +137 -0
  99. package/dist/lib/mcp/semaphore-manager.js +329 -0
  100. package/dist/lib/mcp/servers/ai-providers/ai-workflow-tools.d.ts +2 -2
  101. package/dist/lib/mcp/servers/ai-providers/ai-workflow-tools.js +5 -4
  102. package/dist/lib/mcp/session-manager.d.ts +186 -0
  103. package/dist/lib/mcp/session-manager.js +400 -0
  104. package/dist/lib/mcp/session-persistence.d.ts +93 -0
  105. package/dist/lib/mcp/session-persistence.js +298 -0
  106. package/dist/lib/mcp/tool-integration.js +1 -1
  107. package/dist/lib/mcp/tool-registry.d.ts +55 -34
  108. package/dist/lib/mcp/tool-registry.js +111 -97
  109. package/dist/lib/mcp/transport-manager.d.ts +153 -0
  110. package/dist/lib/mcp/transport-manager.js +330 -0
  111. package/dist/lib/mcp/unified-mcp.js +6 -1
  112. package/dist/lib/mcp/unified-registry.d.ts +54 -5
  113. package/dist/lib/mcp/unified-registry.js +139 -6
  114. package/dist/lib/neurolink.d.ts +101 -0
  115. package/dist/lib/neurolink.js +147 -1
  116. package/dist/lib/providers/agent-enhanced-provider.d.ts +11 -2
  117. package/dist/lib/providers/agent-enhanced-provider.js +86 -15
  118. package/dist/lib/providers/amazonBedrock.d.ts +9 -1
  119. package/dist/lib/providers/amazonBedrock.js +26 -2
  120. package/dist/lib/providers/analytics-helper.d.ts +53 -0
  121. package/dist/lib/providers/analytics-helper.js +151 -0
  122. package/dist/lib/providers/anthropic.d.ts +11 -1
  123. package/dist/lib/providers/anthropic.js +29 -4
  124. package/dist/lib/providers/azureOpenAI.d.ts +3 -1
  125. package/dist/lib/providers/azureOpenAI.js +28 -4
  126. package/dist/lib/providers/function-calling-provider.d.ts +9 -1
  127. package/dist/lib/providers/function-calling-provider.js +14 -1
  128. package/dist/lib/providers/googleAIStudio.d.ts +15 -1
  129. package/dist/lib/providers/googleAIStudio.js +32 -2
  130. package/dist/lib/providers/googleVertexAI.d.ts +9 -1
  131. package/dist/lib/providers/googleVertexAI.js +31 -2
  132. package/dist/lib/providers/huggingFace.d.ts +3 -1
  133. package/dist/lib/providers/huggingFace.js +26 -3
  134. package/dist/lib/providers/mcp-provider.d.ts +9 -1
  135. package/dist/lib/providers/mcp-provider.js +12 -0
  136. package/dist/lib/providers/mistralAI.d.ts +3 -1
  137. package/dist/lib/providers/mistralAI.js +25 -2
  138. package/dist/lib/providers/ollama.d.ts +3 -1
  139. package/dist/lib/providers/ollama.js +27 -4
  140. package/dist/lib/providers/openAI.d.ts +15 -1
  141. package/dist/lib/providers/openAI.js +32 -2
  142. package/dist/lib/proxy/proxy-fetch.js +8 -7
  143. package/dist/lib/services/streaming/streaming-manager.d.ts +29 -0
  144. package/dist/lib/services/streaming/streaming-manager.js +244 -0
  145. package/dist/lib/services/types.d.ts +155 -0
  146. package/dist/lib/services/types.js +2 -0
  147. package/dist/lib/services/websocket/websocket-server.d.ts +34 -0
  148. package/dist/lib/services/websocket/websocket-server.js +304 -0
  149. package/dist/lib/telemetry/index.d.ts +15 -0
  150. package/dist/lib/telemetry/index.js +22 -0
  151. package/dist/lib/telemetry/telemetry-service.d.ts +47 -0
  152. package/dist/lib/telemetry/telemetry-service.js +259 -0
  153. package/dist/lib/utils/streaming-utils.d.ts +67 -0
  154. package/dist/lib/utils/streaming-utils.js +201 -0
  155. package/dist/mcp/contracts/mcpContract.d.ts +118 -0
  156. package/dist/mcp/contracts/mcpContract.js +5 -0
  157. package/dist/mcp/dynamic-chain-executor.d.ts +201 -0
  158. package/dist/mcp/dynamic-chain-executor.js +489 -0
  159. package/dist/mcp/dynamic-orchestrator.d.ts +109 -0
  160. package/dist/mcp/dynamic-orchestrator.js +351 -0
  161. package/dist/mcp/error-manager.d.ts +254 -0
  162. package/dist/mcp/error-manager.js +501 -0
  163. package/dist/mcp/error-recovery.d.ts +158 -0
  164. package/dist/mcp/error-recovery.js +405 -0
  165. package/dist/mcp/function-calling.js +11 -3
  166. package/dist/mcp/health-monitor.d.ts +256 -0
  167. package/dist/mcp/health-monitor.js +621 -0
  168. package/dist/mcp/logging.js +5 -0
  169. package/dist/mcp/neurolink-mcp-client.js +2 -1
  170. package/dist/mcp/orchestrator.d.ts +136 -5
  171. package/dist/mcp/orchestrator.js +332 -16
  172. package/dist/mcp/plugins/core/neurolink-mcp.json +15 -15
  173. package/dist/mcp/registry.d.ts +71 -16
  174. package/dist/mcp/registry.js +104 -6
  175. package/dist/mcp/semaphore-manager.d.ts +137 -0
  176. package/dist/mcp/semaphore-manager.js +329 -0
  177. package/dist/mcp/servers/ai-providers/ai-workflow-tools.d.ts +2 -2
  178. package/dist/mcp/servers/ai-providers/ai-workflow-tools.js +5 -4
  179. package/dist/mcp/session-manager.d.ts +186 -0
  180. package/dist/mcp/session-manager.js +400 -0
  181. package/dist/mcp/session-persistence.d.ts +93 -0
  182. package/dist/mcp/session-persistence.js +299 -0
  183. package/dist/mcp/tool-integration.js +1 -1
  184. package/dist/mcp/tool-registry.d.ts +55 -34
  185. package/dist/mcp/tool-registry.js +111 -97
  186. package/dist/mcp/transport-manager.d.ts +153 -0
  187. package/dist/mcp/transport-manager.js +331 -0
  188. package/dist/mcp/unified-mcp.js +6 -1
  189. package/dist/mcp/unified-registry.d.ts +54 -5
  190. package/dist/mcp/unified-registry.js +139 -6
  191. package/dist/neurolink.d.ts +101 -0
  192. package/dist/neurolink.js +147 -1
  193. package/dist/providers/agent-enhanced-provider.d.ts +11 -2
  194. package/dist/providers/agent-enhanced-provider.js +86 -15
  195. package/dist/providers/amazonBedrock.d.ts +9 -1
  196. package/dist/providers/amazonBedrock.js +26 -2
  197. package/dist/providers/analytics-helper.d.ts +53 -0
  198. package/dist/providers/analytics-helper.js +151 -0
  199. package/dist/providers/anthropic.d.ts +11 -1
  200. package/dist/providers/anthropic.js +29 -4
  201. package/dist/providers/azureOpenAI.d.ts +3 -1
  202. package/dist/providers/azureOpenAI.js +29 -4
  203. package/dist/providers/function-calling-provider.d.ts +9 -1
  204. package/dist/providers/function-calling-provider.js +14 -1
  205. package/dist/providers/googleAIStudio.d.ts +15 -1
  206. package/dist/providers/googleAIStudio.js +32 -2
  207. package/dist/providers/googleVertexAI.d.ts +9 -1
  208. package/dist/providers/googleVertexAI.js +31 -2
  209. package/dist/providers/huggingFace.d.ts +3 -1
  210. package/dist/providers/huggingFace.js +26 -3
  211. package/dist/providers/mcp-provider.d.ts +9 -1
  212. package/dist/providers/mcp-provider.js +12 -0
  213. package/dist/providers/mistralAI.d.ts +3 -1
  214. package/dist/providers/mistralAI.js +25 -2
  215. package/dist/providers/ollama.d.ts +3 -1
  216. package/dist/providers/ollama.js +27 -4
  217. package/dist/providers/openAI.d.ts +15 -1
  218. package/dist/providers/openAI.js +33 -2
  219. package/dist/proxy/proxy-fetch.js +8 -7
  220. package/dist/services/streaming/streaming-manager.d.ts +29 -0
  221. package/dist/services/streaming/streaming-manager.js +244 -0
  222. package/dist/services/types.d.ts +155 -0
  223. package/dist/services/types.js +2 -0
  224. package/dist/services/websocket/websocket-server.d.ts +34 -0
  225. package/dist/services/websocket/websocket-server.js +304 -0
  226. package/dist/telemetry/index.d.ts +15 -0
  227. package/dist/telemetry/index.js +22 -0
  228. package/dist/telemetry/telemetry-service.d.ts +47 -0
  229. package/dist/telemetry/telemetry-service.js +261 -0
  230. package/dist/utils/streaming-utils.d.ts +67 -0
  231. package/dist/utils/streaming-utils.js +201 -0
  232. package/package.json +245 -228
@@ -0,0 +1,528 @@
1
+ /**
2
+ * NeuroLink Unified Evaluation System
3
+ *
4
+ * Combines Universal Evaluation with Lighthouse-Enhanced capabilities
5
+ * - Domain-aware evaluation with sophisticated context handling
6
+ * - Multi-provider support with fallback strategies
7
+ * - Structured output with Zod schema validation
8
+ * - Tool usage and conversation history analysis
9
+ * - Enterprise-grade reliability and performance
10
+ */
11
+ import { logger } from "../utils/logger.js";
12
+ import { AIProviderFactory } from "./factory.js";
13
+ import { z } from "zod";
14
+ /**
15
+ * Unified Evaluation Schema (Lighthouse-compatible with extensions)
16
+ */
17
+ export const unifiedEvaluationSchema = z.object({
18
+ // Core evaluation scores
19
+ relevanceScore: z
20
+ .number()
21
+ .min(0)
22
+ .max(10)
23
+ .describe("Score (0-10) for how well the response addresses query intent and aligns with domain/role. 10 is most relevant."),
24
+ accuracyScore: z
25
+ .number()
26
+ .min(0)
27
+ .max(10)
28
+ .describe("Score (0-10) for factual correctness against data, tool outputs, and domain knowledge. 10 is most accurate."),
29
+ completenessScore: z
30
+ .number()
31
+ .min(0)
32
+ .max(10)
33
+ .describe("Score (0-10) for how completely the response addresses the query. 10 is most complete."),
34
+ // Enhanced domain scores (optional)
35
+ domainAlignment: z
36
+ .number()
37
+ .min(0)
38
+ .max(10)
39
+ .optional()
40
+ .describe("Score (0-10) for how well response aligns with specified domain expertise."),
41
+ terminologyAccuracy: z
42
+ .number()
43
+ .min(0)
44
+ .max(10)
45
+ .optional()
46
+ .describe("Score (0-10) for correct usage of domain-specific terminology."),
47
+ toolEffectiveness: z
48
+ .number()
49
+ .min(0)
50
+ .max(10)
51
+ .optional()
52
+ .describe("Score (0-10) for how effectively available tools/MCPs were utilized."),
53
+ // Qualitative assessment
54
+ isOffTopic: z
55
+ .boolean()
56
+ .describe("True if the response significantly deviates from query/domain."),
57
+ reasoning: z
58
+ .string()
59
+ .describe("Brief justification for scores, especially if low or off-topic. Max 150 words."),
60
+ suggestedImprovements: z
61
+ .string()
62
+ .optional()
63
+ .describe("Optional: Suggestions for improving the original response. Max 100 words."),
64
+ alertSeverity: z
65
+ .enum(["low", "medium", "high", "none"])
66
+ .describe("Suggested alert severity considering all scores and domain context."),
67
+ });
68
+ /**
69
+ * Main unified evaluation function
70
+ */
71
+ export async function performUnifiedEvaluation(context) {
72
+ const functionTag = "performUnifiedEvaluation";
73
+ const startTime = Date.now();
74
+ // Determine evaluation mode
75
+ const mode = context.mode || detectEvaluationMode(context);
76
+ logger.debug(`[${functionTag}] Starting unified evaluation`, {
77
+ mode,
78
+ domain: context.primaryDomain,
79
+ toolsUsed: context.toolsUsed?.length || 0,
80
+ conversationTurns: context.conversationHistory?.length || 0,
81
+ queryLength: context.userQuery.length,
82
+ responseLength: context.aiResponse.length,
83
+ });
84
+ const { parseEvaluationConfig } = await import("./evaluation-config.js");
85
+ const config = parseEvaluationConfig();
86
+ let lastError = null;
87
+ for (let attempt = 0; attempt <= config.retryAttempts; attempt++) {
88
+ try {
89
+ // Get evaluation model
90
+ const evaluationModelResult = await getEvaluationModel();
91
+ if (!evaluationModelResult) {
92
+ logger.debug(`[${functionTag}] No evaluation model available, returning defaults`);
93
+ return getDefaultUnifiedEvaluation("unavailable", Date.now() - startTime, context);
94
+ }
95
+ const { provider: evaluationModel, config: modelConfig } = evaluationModelResult;
96
+ // Create evaluation prompt based on mode
97
+ const evaluationPrompt = createUnifiedEvaluationPrompt(context, mode);
98
+ logger.debug(`[${functionTag}] Using ${mode} evaluation mode`, {
99
+ provider: modelConfig.providerName,
100
+ model: modelConfig.modelName,
101
+ attempt: attempt + 1,
102
+ });
103
+ // Try structured evaluation first (preferred)
104
+ try {
105
+ const structuredResult = await evaluationModel.generateObject({
106
+ schema: unifiedEvaluationSchema,
107
+ prompt: evaluationPrompt,
108
+ temperature: 0.1,
109
+ maxTokens: 1000,
110
+ system: createUnifiedSystemPrompt(mode),
111
+ });
112
+ return processStructuredEvaluationResult(structuredResult.object, modelConfig, Date.now() - startTime, context, attempt + 1);
113
+ }
114
+ catch (structuredError) {
115
+ logger.warn(`[${functionTag}] Structured evaluation failed, using fallback`, { structuredError });
116
+ // Fallback to legacy generateText
117
+ const result = await evaluationModel.generateText({
118
+ prompt: evaluationPrompt + "\n\nRespond with valid JSON only.",
119
+ temperature: 0.1,
120
+ maxTokens: 1000,
121
+ systemPrompt: createUnifiedSystemPrompt(mode),
122
+ });
123
+ const responseText = result?.text || result?.content;
124
+ if (!responseText) {
125
+ throw new Error("No evaluation text received from fallback");
126
+ }
127
+ return parseUnifiedEvaluationResult(responseText, modelConfig, Date.now() - startTime, context, attempt + 1);
128
+ }
129
+ }
130
+ catch (error) {
131
+ lastError = error instanceof Error ? error : new Error(String(error));
132
+ logger.warn(`[${functionTag}] Evaluation attempt ${attempt + 1} failed:`, lastError.message);
133
+ if (attempt === config.retryAttempts) {
134
+ break;
135
+ }
136
+ // Exponential backoff
137
+ await new Promise((resolve) => setTimeout(resolve, Math.pow(2, attempt) * 1000));
138
+ }
139
+ }
140
+ // All attempts failed
141
+ logger.error(`[${functionTag}] All evaluation attempts failed:`, lastError?.message);
142
+ return getDefaultUnifiedEvaluation(lastError?.message || "unknown-error", Date.now() - startTime, context);
143
+ }
144
+ /**
145
+ * Detect appropriate evaluation mode based on context
146
+ */
147
+ function detectEvaluationMode(context) {
148
+ // Lighthouse mode: Has domain awareness, tool context, or conversation history
149
+ if (context.primaryDomain ||
150
+ context.toolsUsed?.length ||
151
+ context.conversationHistory?.length) {
152
+ return "lighthouse";
153
+ }
154
+ // Enhanced mode: Has rich context
155
+ if (context.context && Object.keys(context.context).length > 0) {
156
+ return "enhanced";
157
+ }
158
+ // Simple mode: Basic evaluation
159
+ return "simple";
160
+ }
161
+ /**
162
+ * Create unified evaluation prompt based on mode
163
+ */
164
+ function createUnifiedEvaluationPrompt(context, mode) {
165
+ switch (mode) {
166
+ case "lighthouse":
167
+ return createLighthouseEvaluationPrompt(context);
168
+ case "enhanced":
169
+ return createEnhancedEvaluationPrompt(context);
170
+ case "simple":
171
+ default:
172
+ return createSimpleEvaluationPrompt(context);
173
+ }
174
+ }
175
+ /**
176
+ * Create Lighthouse-style domain-aware evaluation prompt
177
+ */
178
+ function createLighthouseEvaluationPrompt(context) {
179
+ const { userQuery, aiResponse, primaryDomain = "general AI assistant", assistantRole = "AI assistant", toolContext = "No specific tools used in this interaction", conversationHistory = [], } = context;
180
+ const formattedHistory = formatConversationHistory(conversationHistory);
181
+ return `You are an AI Response Evaluator with advanced domain awareness.
182
+
183
+ **EVALUATION CONTEXT**:
184
+
185
+ 1. **Primary Assistant Domain**: "${primaryDomain}"
186
+ - This defines the AI assistant's core expertise area
187
+ - Responses should demonstrate competency within this domain
188
+ - Domain-specific terminology should be used accurately
189
+
190
+ 2. **Assistant Role**: "${assistantRole}"
191
+ - This defines the specific role the assistant should fulfill
192
+ - Responses should align with this role's responsibilities
193
+
194
+ 3. **Tool Usage Context**: "${toolContext}"
195
+ - Tools/MCPs are capabilities the assistant used to generate the response
196
+ - Evaluate how effectively these tools were utilized
197
+ - Consider if additional tools should have been used
198
+
199
+ 4. **Conversation History**:
200
+ \`\`\`
201
+ ${formattedHistory}
202
+ \`\`\`
203
+
204
+ **CRITICAL DOMAIN FAILURE ASSESSMENT**:
205
+ Pay special attention to domain alignment. If the query is within the assistant's domain and sufficient context is available:
206
+ - Inability to answer ("I can't help", generic errors, evasions) = HIGH ALERT
207
+ - Incorrect domain-specific information = HIGH ALERT
208
+ - Misuse of domain terminology = MEDIUM-HIGH ALERT
209
+
210
+ **EVALUATION CRITERIA**:
211
+ - **relevanceScore** (0-10): Direct query addressing + domain alignment
212
+ - **accuracyScore** (0-10): Factual correctness + terminology accuracy
213
+ - **completenessScore** (0-10): Full query addressing + appropriate depth
214
+ - **domainAlignment** (0-10): How well response fits the domain expertise
215
+ - **terminologyAccuracy** (0-10): Correct use of domain-specific terms
216
+ - **toolEffectiveness** (0-10): How well available tools were utilized
217
+ - **isOffTopic** (boolean): True if significantly deviates from domain/query
218
+ - **reasoning** (string): Brief explanation (max 150 words)
219
+ - **suggestedImprovements** (string): How to improve (max 100 words)
220
+ - **alertSeverity** ('low'|'medium'|'high'|'none'): Based on domain failure assessment
221
+
222
+ **Current User Query**:
223
+ "${userQuery}"
224
+
225
+ **AI Assistant Response**:
226
+ "${aiResponse}"
227
+
228
+ Provide your assessment in the specified format.`;
229
+ }
230
+ /**
231
+ * Create enhanced evaluation prompt
232
+ */
233
+ function createEnhancedEvaluationPrompt(context) {
234
+ const { userQuery, aiResponse, context: additionalContext } = context;
235
+ const contextInfo = additionalContext
236
+ ? `\nContext: ${JSON.stringify(additionalContext, null, 2)}`
237
+ : "";
238
+ return `Evaluate this AI response with enhanced criteria:
239
+
240
+ Query: "${userQuery}"
241
+ Response: "${aiResponse}"${contextInfo}
242
+
243
+ Provide scores for:
244
+ - relevanceScore (0-10): How well the response addresses the query
245
+ - accuracyScore (0-10): Factual correctness and reliability
246
+ - completenessScore (0-10): Whether the response fully answers the question
247
+ - isOffTopic (boolean): Whether response deviates from query
248
+ - reasoning (string): Brief explanation of scores
249
+ - alertSeverity ('low'|'medium'|'high'|'none'): Overall quality assessment
250
+
251
+ Respond in the specified format.`;
252
+ }
253
+ /**
254
+ * Create simple evaluation prompt
255
+ */
256
+ function createSimpleEvaluationPrompt(context) {
257
+ const { userQuery, aiResponse } = context;
258
+ return `Rate this AI response:
259
+
260
+ Q: "${userQuery}"
261
+ A: "${aiResponse}"
262
+
263
+ Provide:
264
+ - relevanceScore (0-10)
265
+ - accuracyScore (0-10)
266
+ - completenessScore (0-10)
267
+ - reasoning (brief explanation)
268
+
269
+ Respond in the specified format.`;
270
+ }
271
+ /**
272
+ * Create unified system prompt based on mode
273
+ */
274
+ function createUnifiedSystemPrompt(mode) {
275
+ const basePrompt = "You are an expert AI Response Evaluator. Respond with valid structured output only.";
276
+ switch (mode) {
277
+ case "lighthouse":
278
+ return `${basePrompt} Use advanced domain awareness and sophisticated context analysis for comprehensive evaluation.`;
279
+ case "enhanced":
280
+ return `${basePrompt} Consider all provided context and metadata for thorough evaluation.`;
281
+ case "simple":
282
+ default:
283
+ return `${basePrompt} Focus on core quality metrics: relevance, accuracy, and completeness.`;
284
+ }
285
+ }
286
+ /**
287
+ * Process structured evaluation result
288
+ */
289
+ function processStructuredEvaluationResult(result, modelConfig, evaluationTime, context, attempt) {
290
+ // Calculate overall score
291
+ const coreScores = [
292
+ result.relevanceScore || 0,
293
+ result.accuracyScore || 0,
294
+ result.completenessScore || 0,
295
+ ];
296
+ const enhancedScores = [
297
+ result.domainAlignment,
298
+ result.terminologyAccuracy,
299
+ result.toolEffectiveness,
300
+ ].filter((score) => typeof score === "number" && score > 0);
301
+ const allScores = [...coreScores, ...enhancedScores];
302
+ const overall = Math.round(allScores.reduce((sum, score) => sum + score, 0) / allScores.length);
303
+ return {
304
+ // Core scores
305
+ relevanceScore: Math.max(0, Math.min(10, Math.round(result.relevanceScore || 0))),
306
+ accuracyScore: Math.max(0, Math.min(10, Math.round(result.accuracyScore || 0))),
307
+ completenessScore: Math.max(0, Math.min(10, Math.round(result.completenessScore || 0))),
308
+ overall: Math.max(0, Math.min(10, overall)),
309
+ // Enhanced insights
310
+ isOffTopic: result.isOffTopic || false,
311
+ alertSeverity: result.alertSeverity || "none",
312
+ reasoning: result.reasoning || "Evaluation completed successfully.",
313
+ suggestedImprovements: result.suggestedImprovements,
314
+ // Domain-specific scores (if available)
315
+ domainAlignment: result.domainAlignment
316
+ ? Math.max(0, Math.min(10, Math.round(result.domainAlignment)))
317
+ : undefined,
318
+ terminologyAccuracy: result.terminologyAccuracy
319
+ ? Math.max(0, Math.min(10, Math.round(result.terminologyAccuracy)))
320
+ : undefined,
321
+ toolEffectiveness: result.toolEffectiveness
322
+ ? Math.max(0, Math.min(10, Math.round(result.toolEffectiveness)))
323
+ : undefined,
324
+ // Context analysis
325
+ contextUtilization: {
326
+ conversationUsed: (context.conversationHistory?.length || 0) > 0,
327
+ toolsUsed: (context.toolsUsed?.length || 0) > 0,
328
+ domainKnowledgeUsed: !!context.primaryDomain,
329
+ },
330
+ // Enhanced metadata
331
+ evaluationContext: {
332
+ domain: context.primaryDomain || "general",
333
+ toolsEvaluated: context.toolsUsed || [],
334
+ conversationTurns: context.conversationHistory?.length || 0,
335
+ },
336
+ // Standard metadata
337
+ evaluationModel: `${modelConfig.providerName}/${modelConfig.modelName}`,
338
+ evaluationTime,
339
+ evaluationProvider: modelConfig.providerName,
340
+ evaluationAttempt: attempt,
341
+ evaluationConfig: {
342
+ mode: context.mode || "auto",
343
+ fallbackUsed: attempt > 1,
344
+ costEstimate: 0,
345
+ },
346
+ };
347
+ }
348
+ /**
349
+ * Parse evaluation result from text response
350
+ */
351
+ function parseUnifiedEvaluationResult(evaluationText, modelConfig, evaluationTime, context, attempt) {
352
+ try {
353
+ // Clean and parse JSON
354
+ const cleanText = evaluationText.trim().replace(/```json\s*|```\s*/g, "");
355
+ const jsonMatch = cleanText.match(/\{[^]*?\}/s);
356
+ if (jsonMatch) {
357
+ const parsed = JSON.parse(jsonMatch[0]);
358
+ return processStructuredEvaluationResult(parsed, modelConfig, evaluationTime, context, attempt);
359
+ }
360
+ // Fallback to regex parsing with improved patterns
361
+ const relevanceMatch = evaluationText.match(/(?:relevance[Score"\s]*:?["\s]*(\d+)|Relevance["\s]*:?["\s]*(\d+)|relevance.*?(\d+))/i);
362
+ const accuracyMatch = evaluationText.match(/(?:accuracy[Score"\s]*:?["\s]*(\d+)|Accuracy["\s]*:?["\s]*(\d+)|accuracy.*?(\d+))/i);
363
+ const completenessMatch = evaluationText.match(/(?:completeness[Score"\s]*:?["\s]*(\d+)|Completeness["\s]*:?["\s]*(\d+)|completeness.*?(\d+))/i);
364
+ // Extract scores with fallback to default values
365
+ const relevance = relevanceMatch
366
+ ? parseInt(relevanceMatch[1] || relevanceMatch[2] || relevanceMatch[3], 10)
367
+ : 8; // Default fallback score
368
+ const accuracy = accuracyMatch
369
+ ? parseInt(accuracyMatch[1] || accuracyMatch[2] || accuracyMatch[3], 10)
370
+ : 8; // Default fallback score
371
+ const completeness = completenessMatch
372
+ ? parseInt(completenessMatch[1] || completenessMatch[2] || completenessMatch[3], 10)
373
+ : 8; // Default fallback score
374
+ return {
375
+ relevanceScore: Math.max(0, Math.min(10, relevance)),
376
+ accuracyScore: Math.max(0, Math.min(10, accuracy)),
377
+ completenessScore: Math.max(0, Math.min(10, completeness)),
378
+ overall: Math.round((relevance + accuracy + completeness) / 3),
379
+ isOffTopic: false,
380
+ alertSeverity: "none",
381
+ reasoning: "Parsed using regex fallback - response was not in expected JSON format.",
382
+ evaluationModel: `${modelConfig.providerName}/${modelConfig.modelName}`,
383
+ evaluationTime,
384
+ evaluationProvider: modelConfig.providerName,
385
+ evaluationAttempt: attempt,
386
+ evaluationConfig: {
387
+ mode: "fallback",
388
+ fallbackUsed: true,
389
+ costEstimate: 0,
390
+ },
391
+ };
392
+ }
393
+ catch (error) {
394
+ logger.error("Failed to parse unified evaluation result", { error });
395
+ return getDefaultUnifiedEvaluation("parse-error", evaluationTime, context);
396
+ }
397
+ }
398
+ /**
399
+ * Get default evaluation when evaluation fails
400
+ */
401
+ function getDefaultUnifiedEvaluation(reason, evaluationTime, context) {
402
+ return {
403
+ relevanceScore: 0,
404
+ accuracyScore: 0,
405
+ completenessScore: 0,
406
+ overall: 0,
407
+ isOffTopic: false,
408
+ alertSeverity: "high",
409
+ reasoning: `Evaluation unavailable (${reason}). This may be due to missing API keys, network issues, or service unavailability.`,
410
+ suggestedImprovements: "Check evaluation system configuration, API credentials, and network connectivity.",
411
+ evaluationModel: "unavailable",
412
+ evaluationTime,
413
+ evaluationProvider: "none",
414
+ evaluationAttempt: 0,
415
+ evaluationConfig: {
416
+ mode: "default",
417
+ fallbackUsed: true,
418
+ costEstimate: 0,
419
+ },
420
+ contextUtilization: {
421
+ conversationUsed: (context.conversationHistory?.length || 0) > 0,
422
+ toolsUsed: (context.toolsUsed?.length || 0) > 0,
423
+ domainKnowledgeUsed: !!context.primaryDomain,
424
+ },
425
+ evaluationContext: {
426
+ domain: context.primaryDomain || "unknown",
427
+ toolsEvaluated: context.toolsUsed || [],
428
+ conversationTurns: context.conversationHistory?.length || 0,
429
+ },
430
+ };
431
+ }
432
+ /**
433
+ * Enhanced evaluation model selection
434
+ */
435
+ export async function getEvaluationModel() {
436
+ const { parseEvaluationConfig, getProviderFallbackOrder } = await import("./evaluation-config.js");
437
+ const { getProviderConfig } = await import("./evaluation-providers.js");
438
+ const config = parseEvaluationConfig();
439
+ const fallbackOrder = getProviderFallbackOrder(config);
440
+ for (const providerName of fallbackOrder) {
441
+ try {
442
+ const providerConfig = getProviderConfig(providerName);
443
+ if (!providerConfig) {
444
+ continue;
445
+ }
446
+ let modelName = config.model;
447
+ if (modelName === "auto" || !config.model) {
448
+ modelName =
449
+ providerConfig.models[config.mode] || providerConfig.models.fast;
450
+ }
451
+ const provider = await AIProviderFactory.createProvider(providerName, modelName);
452
+ if (provider) {
453
+ return {
454
+ provider,
455
+ config: {
456
+ providerName,
457
+ modelName,
458
+ providerConfig,
459
+ evaluationConfig: config,
460
+ },
461
+ };
462
+ }
463
+ }
464
+ catch (error) {
465
+ if (!config.fallbackEnabled) {
466
+ throw error;
467
+ }
468
+ continue;
469
+ }
470
+ }
471
+ return null;
472
+ }
473
+ /**
474
+ * Format conversation history for evaluation
475
+ */
476
+ function formatConversationHistory(history) {
477
+ if (!history?.length) {
478
+ return "No prior conversation context.";
479
+ }
480
+ return history
481
+ .slice(-3) // Last 3 turns
482
+ .map((msg, i) => `${i + 1}. ${msg.role.toUpperCase()}: ${msg.content.substring(0, 200)}${msg.content.length > 200 ? "..." : ""}`)
483
+ .join("\n");
484
+ }
485
+ /**
486
+ * Create simple evaluation context (backward compatibility)
487
+ */
488
+ export function createSimpleEvaluationContext(prompt, response, context) {
489
+ return {
490
+ userQuery: prompt,
491
+ aiResponse: response,
492
+ context,
493
+ mode: "simple",
494
+ };
495
+ }
496
+ /**
497
+ * Create enhanced evaluation context
498
+ */
499
+ export function createEnhancedEvaluationContext(userQuery, aiResponse, options = {}) {
500
+ return {
501
+ userQuery,
502
+ aiResponse,
503
+ primaryDomain: options.domain,
504
+ assistantRole: options.role,
505
+ toolsUsed: options.toolsUsed,
506
+ toolContext: options.toolsUsed?.length
507
+ ? `Tools used: ${options.toolsUsed.join(", ")}`
508
+ : undefined,
509
+ conversationHistory: options.conversationHistory,
510
+ sessionId: options.sessionId,
511
+ context: options.context,
512
+ mode: "lighthouse",
513
+ };
514
+ }
515
+ // Legacy compatibility wrapper for old function signature
516
+ export async function evaluateResponse(prompt, response, context, evaluationDomain, toolUsageContext, conversationHistory) {
517
+ // Convert old arguments to new context format
518
+ const unifiedContext = {
519
+ userQuery: prompt,
520
+ aiResponse: response,
521
+ context,
522
+ primaryDomain: evaluationDomain,
523
+ toolContext: toolUsageContext,
524
+ conversationHistory: conversationHistory,
525
+ mode: evaluationDomain ? "lighthouse" : "simple",
526
+ };
527
+ return performUnifiedEvaluation(unifiedContext);
528
+ }
@@ -81,31 +81,39 @@ export class AIProviderFactory {
81
81
  // error: dynamicError instanceof Error ? dynamicError.message : String(dynamicError),
82
82
  // });
83
83
  // }
84
- // Resolve dynamic model if available
85
- let resolvedModelName = modelName;
86
- if (!modelName || modelName === "default") {
87
- try {
88
- const normalizedProvider = this.normalizeProviderName(providerName);
89
- const dynamicModel = dynamicModelProvider.resolveModel(normalizedProvider, modelName || undefined);
90
- if (dynamicModel) {
91
- resolvedModelName = dynamicModel.id;
92
- logger.debug(`[${functionTag}] Resolved dynamic model`, {
93
- provider: normalizedProvider,
94
- requestedModel: modelName || "default",
95
- resolvedModel: resolvedModelName,
96
- displayName: dynamicModel.displayName,
97
- pricing: dynamicModel.pricing.input,
98
- });
99
- }
100
- }
101
- catch (resolveError) {
102
- logger.debug(`[${functionTag}] Dynamic model resolution failed, using fallback`, {
103
- error: resolveError instanceof Error
104
- ? resolveError.message
105
- : String(resolveError),
106
- });
107
- }
108
- }
84
+ // COMPREHENSIVE FIX: Disable dynamic model resolution completely until provider is fixed
85
+ // This prevents stale gemini-1.5-pro-latest from overriding correct gemini-2.5-pro defaults
86
+ const resolvedModelName = modelName;
87
+ // COMMENTED OUT: Dynamic model resolution causing 1.5 vs 2.5 Pro issues
88
+ // if (!modelName || modelName === "default") {
89
+ // try {
90
+ // const normalizedProvider = this.normalizeProviderName(providerName);
91
+ // const dynamicModel = dynamicModelProvider.resolveModel(
92
+ // normalizedProvider,
93
+ // modelName || undefined,
94
+ // );
95
+ // if (dynamicModel) {
96
+ // resolvedModelName = dynamicModel.id;
97
+ // logger.debug(`[${functionTag}] Resolved dynamic model`, {
98
+ // provider: normalizedProvider,
99
+ // requestedModel: modelName || "default",
100
+ // resolvedModel: resolvedModelName,
101
+ // displayName: dynamicModel.displayName,
102
+ // pricing: dynamicModel.pricing.input,
103
+ // });
104
+ // }
105
+ // } catch (resolveError) {
106
+ // logger.debug(
107
+ // `[${functionTag}] Dynamic model resolution failed, using fallback`,
108
+ // {
109
+ // error:
110
+ // resolveError instanceof Error
111
+ // ? resolveError.message
112
+ // : String(resolveError),
113
+ // },
114
+ // );
115
+ // }
116
+ // }
109
117
  let provider;
110
118
  switch (providerName.toLowerCase()) {
111
119
  case "vertex":