@juspay/neurolink 5.0.0 → 5.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. package/CHANGELOG.md +20 -7
  2. package/README.md +160 -172
  3. package/dist/agent/direct-tools.d.ts +6 -6
  4. package/dist/chat/sse-handler.js +5 -4
  5. package/dist/chat/websocket-chat-handler.js +9 -9
  6. package/dist/cli/commands/config.d.ts +3 -3
  7. package/dist/cli/commands/mcp.js +9 -8
  8. package/dist/cli/commands/ollama.js +3 -3
  9. package/dist/cli/factories/command-factory.d.ts +18 -0
  10. package/dist/cli/factories/command-factory.js +183 -0
  11. package/dist/cli/index.js +105 -157
  12. package/dist/cli/utils/interactive-setup.js +2 -2
  13. package/dist/core/base-provider.d.ts +423 -0
  14. package/dist/core/base-provider.js +365 -0
  15. package/dist/core/constants.d.ts +1 -1
  16. package/dist/core/constants.js +1 -1
  17. package/dist/core/dynamic-models.d.ts +6 -6
  18. package/dist/core/evaluation.d.ts +19 -80
  19. package/dist/core/evaluation.js +185 -484
  20. package/dist/core/factory.d.ts +3 -3
  21. package/dist/core/factory.js +31 -91
  22. package/dist/core/service-registry.d.ts +47 -0
  23. package/dist/core/service-registry.js +112 -0
  24. package/dist/core/types.d.ts +49 -49
  25. package/dist/core/types.js +1 -0
  26. package/dist/factories/compatibility-factory.d.ts +20 -0
  27. package/dist/factories/compatibility-factory.js +69 -0
  28. package/dist/factories/provider-factory.d.ts +72 -0
  29. package/dist/factories/provider-factory.js +144 -0
  30. package/dist/factories/provider-generate-factory.d.ts +20 -0
  31. package/dist/factories/provider-generate-factory.js +87 -0
  32. package/dist/factories/provider-registry.d.ts +38 -0
  33. package/dist/factories/provider-registry.js +107 -0
  34. package/dist/index.d.ts +8 -5
  35. package/dist/index.js +5 -5
  36. package/dist/lib/agent/direct-tools.d.ts +6 -6
  37. package/dist/lib/chat/sse-handler.js +5 -4
  38. package/dist/lib/chat/websocket-chat-handler.js +9 -9
  39. package/dist/lib/core/base-provider.d.ts +423 -0
  40. package/dist/lib/core/base-provider.js +365 -0
  41. package/dist/lib/core/constants.d.ts +1 -1
  42. package/dist/lib/core/constants.js +1 -1
  43. package/dist/lib/core/dynamic-models.d.ts +6 -6
  44. package/dist/lib/core/evaluation.d.ts +19 -80
  45. package/dist/lib/core/evaluation.js +185 -484
  46. package/dist/lib/core/factory.d.ts +3 -3
  47. package/dist/lib/core/factory.js +30 -91
  48. package/dist/lib/core/service-registry.d.ts +47 -0
  49. package/dist/lib/core/service-registry.js +112 -0
  50. package/dist/lib/core/types.d.ts +49 -49
  51. package/dist/lib/core/types.js +1 -0
  52. package/dist/lib/factories/compatibility-factory.d.ts +20 -0
  53. package/dist/lib/factories/compatibility-factory.js +69 -0
  54. package/dist/lib/factories/provider-factory.d.ts +72 -0
  55. package/dist/lib/factories/provider-factory.js +144 -0
  56. package/dist/lib/factories/provider-generate-factory.d.ts +20 -0
  57. package/dist/lib/factories/provider-generate-factory.js +87 -0
  58. package/dist/lib/factories/provider-registry.d.ts +38 -0
  59. package/dist/lib/factories/provider-registry.js +107 -0
  60. package/dist/lib/index.d.ts +8 -5
  61. package/dist/lib/index.js +5 -5
  62. package/dist/lib/mcp/client.js +5 -5
  63. package/dist/lib/mcp/config.js +28 -3
  64. package/dist/lib/mcp/dynamic-orchestrator.js +8 -8
  65. package/dist/lib/mcp/external-client.js +2 -2
  66. package/dist/lib/mcp/factory.d.ts +1 -1
  67. package/dist/lib/mcp/factory.js +1 -1
  68. package/dist/lib/mcp/function-calling.js +1 -1
  69. package/dist/lib/mcp/initialize-tools.d.ts +1 -1
  70. package/dist/lib/mcp/initialize-tools.js +45 -1
  71. package/dist/lib/mcp/initialize.js +16 -6
  72. package/dist/lib/mcp/neurolink-mcp-client.js +10 -10
  73. package/dist/lib/mcp/orchestrator.js +4 -4
  74. package/dist/lib/mcp/servers/agent/direct-tools-server.d.ts +8 -0
  75. package/dist/lib/mcp/servers/agent/direct-tools-server.js +109 -0
  76. package/dist/lib/mcp/servers/ai-providers/ai-analysis-tools.js +10 -10
  77. package/dist/lib/mcp/servers/ai-providers/ai-core-server.js +8 -6
  78. package/dist/lib/mcp/servers/ai-providers/ai-workflow-tools.d.ts +2 -2
  79. package/dist/lib/mcp/servers/ai-providers/ai-workflow-tools.js +16 -16
  80. package/dist/lib/mcp/unified-registry.d.ts +4 -0
  81. package/dist/lib/mcp/unified-registry.js +42 -9
  82. package/dist/lib/neurolink.d.ts +161 -174
  83. package/dist/lib/neurolink.js +723 -397
  84. package/dist/lib/providers/amazon-bedrock.d.ts +32 -0
  85. package/dist/lib/providers/amazon-bedrock.js +143 -0
  86. package/dist/lib/providers/analytics-helper.js +7 -4
  87. package/dist/lib/providers/anthropic-baseprovider.d.ts +23 -0
  88. package/dist/lib/providers/anthropic-baseprovider.js +114 -0
  89. package/dist/lib/providers/anthropic.d.ts +19 -39
  90. package/dist/lib/providers/anthropic.js +84 -378
  91. package/dist/lib/providers/azure-openai.d.ts +20 -0
  92. package/dist/lib/providers/azure-openai.js +89 -0
  93. package/dist/lib/providers/function-calling-provider.d.ts +14 -12
  94. package/dist/lib/providers/function-calling-provider.js +114 -64
  95. package/dist/lib/providers/google-ai-studio.d.ts +23 -0
  96. package/dist/lib/providers/google-ai-studio.js +107 -0
  97. package/dist/lib/providers/google-vertex.d.ts +47 -0
  98. package/dist/lib/providers/google-vertex.js +205 -0
  99. package/dist/lib/providers/huggingFace.d.ts +33 -27
  100. package/dist/lib/providers/huggingFace.js +103 -400
  101. package/dist/lib/providers/index.d.ts +9 -9
  102. package/dist/lib/providers/index.js +9 -9
  103. package/dist/lib/providers/mcp-provider.d.ts +13 -8
  104. package/dist/lib/providers/mcp-provider.js +63 -18
  105. package/dist/lib/providers/mistral.d.ts +42 -0
  106. package/dist/lib/providers/mistral.js +160 -0
  107. package/dist/lib/providers/ollama.d.ts +52 -35
  108. package/dist/lib/providers/ollama.js +297 -477
  109. package/dist/lib/providers/openAI.d.ts +21 -21
  110. package/dist/lib/providers/openAI.js +81 -245
  111. package/dist/lib/sdk/tool-extension.d.ts +181 -0
  112. package/dist/lib/sdk/tool-extension.js +283 -0
  113. package/dist/lib/sdk/tool-registration.d.ts +95 -0
  114. package/dist/lib/sdk/tool-registration.js +167 -0
  115. package/dist/lib/types/generate-types.d.ts +80 -0
  116. package/dist/lib/types/generate-types.js +1 -0
  117. package/dist/lib/types/mcp-types.d.ts +116 -0
  118. package/dist/lib/types/mcp-types.js +5 -0
  119. package/dist/lib/types/stream-types.d.ts +95 -0
  120. package/dist/lib/types/stream-types.js +1 -0
  121. package/dist/lib/types/universal-provider-options.d.ts +87 -0
  122. package/dist/lib/types/universal-provider-options.js +53 -0
  123. package/dist/lib/utils/providerUtils-fixed.js +1 -1
  124. package/dist/lib/utils/streaming-utils.d.ts +14 -2
  125. package/dist/lib/utils/streaming-utils.js +0 -3
  126. package/dist/mcp/client.js +5 -5
  127. package/dist/mcp/config.js +28 -3
  128. package/dist/mcp/dynamic-orchestrator.js +8 -8
  129. package/dist/mcp/external-client.js +2 -2
  130. package/dist/mcp/factory.d.ts +1 -1
  131. package/dist/mcp/factory.js +1 -1
  132. package/dist/mcp/function-calling.js +1 -1
  133. package/dist/mcp/initialize-tools.d.ts +1 -1
  134. package/dist/mcp/initialize-tools.js +45 -1
  135. package/dist/mcp/initialize.js +16 -6
  136. package/dist/mcp/neurolink-mcp-client.js +10 -10
  137. package/dist/mcp/orchestrator.js +4 -4
  138. package/dist/mcp/servers/agent/direct-tools-server.d.ts +8 -0
  139. package/dist/mcp/servers/agent/direct-tools-server.js +109 -0
  140. package/dist/mcp/servers/ai-providers/ai-analysis-tools.js +10 -10
  141. package/dist/mcp/servers/ai-providers/ai-core-server.js +8 -6
  142. package/dist/mcp/servers/ai-providers/ai-workflow-tools.d.ts +2 -2
  143. package/dist/mcp/servers/ai-providers/ai-workflow-tools.js +16 -16
  144. package/dist/mcp/unified-registry.d.ts +4 -0
  145. package/dist/mcp/unified-registry.js +42 -9
  146. package/dist/neurolink.d.ts +161 -174
  147. package/dist/neurolink.js +723 -397
  148. package/dist/providers/amazon-bedrock.d.ts +32 -0
  149. package/dist/providers/amazon-bedrock.js +143 -0
  150. package/dist/providers/analytics-helper.js +7 -4
  151. package/dist/providers/anthropic-baseprovider.d.ts +23 -0
  152. package/dist/providers/anthropic-baseprovider.js +114 -0
  153. package/dist/providers/anthropic.d.ts +19 -39
  154. package/dist/providers/anthropic.js +83 -377
  155. package/dist/providers/azure-openai.d.ts +20 -0
  156. package/dist/providers/azure-openai.js +89 -0
  157. package/dist/providers/function-calling-provider.d.ts +14 -12
  158. package/dist/providers/function-calling-provider.js +114 -64
  159. package/dist/providers/google-ai-studio.d.ts +23 -0
  160. package/dist/providers/google-ai-studio.js +108 -0
  161. package/dist/providers/google-vertex.d.ts +47 -0
  162. package/dist/providers/google-vertex.js +205 -0
  163. package/dist/providers/huggingFace.d.ts +33 -27
  164. package/dist/providers/huggingFace.js +102 -399
  165. package/dist/providers/index.d.ts +9 -9
  166. package/dist/providers/index.js +9 -9
  167. package/dist/providers/mcp-provider.d.ts +13 -8
  168. package/dist/providers/mcp-provider.js +63 -18
  169. package/dist/providers/mistral.d.ts +42 -0
  170. package/dist/providers/mistral.js +160 -0
  171. package/dist/providers/ollama.d.ts +52 -35
  172. package/dist/providers/ollama.js +297 -476
  173. package/dist/providers/openAI.d.ts +21 -21
  174. package/dist/providers/openAI.js +81 -246
  175. package/dist/sdk/tool-extension.d.ts +181 -0
  176. package/dist/sdk/tool-extension.js +283 -0
  177. package/dist/sdk/tool-registration.d.ts +95 -0
  178. package/dist/sdk/tool-registration.js +168 -0
  179. package/dist/types/generate-types.d.ts +80 -0
  180. package/dist/types/generate-types.js +1 -0
  181. package/dist/types/mcp-types.d.ts +116 -0
  182. package/dist/types/mcp-types.js +5 -0
  183. package/dist/types/stream-types.d.ts +95 -0
  184. package/dist/types/stream-types.js +1 -0
  185. package/dist/types/universal-provider-options.d.ts +87 -0
  186. package/dist/types/universal-provider-options.js +53 -0
  187. package/dist/utils/providerUtils-fixed.js +1 -1
  188. package/dist/utils/streaming-utils.d.ts +14 -2
  189. package/dist/utils/streaming-utils.js +0 -3
  190. package/package.json +15 -10
  191. package/dist/lib/providers/agent-enhanced-provider.d.ts +0 -89
  192. package/dist/lib/providers/agent-enhanced-provider.js +0 -614
  193. package/dist/lib/providers/amazonBedrock.d.ts +0 -19
  194. package/dist/lib/providers/amazonBedrock.js +0 -334
  195. package/dist/lib/providers/azureOpenAI.d.ts +0 -39
  196. package/dist/lib/providers/azureOpenAI.js +0 -436
  197. package/dist/lib/providers/googleAIStudio.d.ts +0 -49
  198. package/dist/lib/providers/googleAIStudio.js +0 -333
  199. package/dist/lib/providers/googleVertexAI.d.ts +0 -38
  200. package/dist/lib/providers/googleVertexAI.js +0 -519
  201. package/dist/lib/providers/mistralAI.d.ts +0 -34
  202. package/dist/lib/providers/mistralAI.js +0 -294
  203. package/dist/providers/agent-enhanced-provider.d.ts +0 -89
  204. package/dist/providers/agent-enhanced-provider.js +0 -614
  205. package/dist/providers/amazonBedrock.d.ts +0 -19
  206. package/dist/providers/amazonBedrock.js +0 -334
  207. package/dist/providers/azureOpenAI.d.ts +0 -39
  208. package/dist/providers/azureOpenAI.js +0 -437
  209. package/dist/providers/googleAIStudio.d.ts +0 -49
  210. package/dist/providers/googleAIStudio.js +0 -333
  211. package/dist/providers/googleVertexAI.d.ts +0 -38
  212. package/dist/providers/googleVertexAI.js +0 -519
  213. package/dist/providers/mistralAI.d.ts +0 -34
  214. package/dist/providers/mistralAI.js +0 -294
@@ -1,528 +1,229 @@
1
1
  /**
2
2
  * NeuroLink Unified Evaluation System
3
- *
4
- * Combines Universal Evaluation with Lighthouse-Enhanced capabilities
5
- * - Domain-aware evaluation with sophisticated context handling
6
- * - Multi-provider support with fallback strategies
7
- * - Structured output with Zod schema validation
8
- * - Tool usage and conversation history analysis
9
- * - Enterprise-grade reliability and performance
10
3
  */
11
4
  import { logger } from "../utils/logger.js";
12
5
  import { AIProviderFactory } from "./factory.js";
13
6
  import { z } from "zod";
14
- /**
15
- * Unified Evaluation Schema (Lighthouse-compatible with extensions)
16
- */
17
- export const unifiedEvaluationSchema = z.object({
18
- // Core evaluation scores
19
- relevanceScore: z
20
- .number()
21
- .min(0)
22
- .max(10)
23
- .describe("Score (0-10) for how well the response addresses query intent and aligns with domain/role. 10 is most relevant."),
24
- accuracyScore: z
25
- .number()
26
- .min(0)
27
- .max(10)
28
- .describe("Score (0-10) for factual correctness against data, tool outputs, and domain knowledge. 10 is most accurate."),
29
- completenessScore: z
30
- .number()
31
- .min(0)
32
- .max(10)
33
- .describe("Score (0-10) for how completely the response addresses the query. 10 is most complete."),
34
- // Enhanced domain scores (optional)
35
- domainAlignment: z
36
- .number()
37
- .min(0)
38
- .max(10)
39
- .optional()
40
- .describe("Score (0-10) for how well response aligns with specified domain expertise."),
41
- terminologyAccuracy: z
42
- .number()
43
- .min(0)
44
- .max(10)
45
- .optional()
46
- .describe("Score (0-10) for correct usage of domain-specific terminology."),
47
- toolEffectiveness: z
48
- .number()
49
- .min(0)
50
- .max(10)
51
- .optional()
52
- .describe("Score (0-10) for how effectively available tools/MCPs were utilized."),
53
- // Qualitative assessment
54
- isOffTopic: z
55
- .boolean()
56
- .describe("True if the response significantly deviates from query/domain."),
57
- reasoning: z
58
- .string()
59
- .describe("Brief justification for scores, especially if low or off-topic. Max 150 words."),
60
- suggestedImprovements: z
61
- .string()
62
- .optional()
63
- .describe("Optional: Suggestions for improving the original response. Max 100 words."),
64
- alertSeverity: z
65
- .enum(["low", "medium", "high", "none"])
66
- .describe("Suggested alert severity considering all scores and domain context."),
7
+ import { ProviderRegistry } from "../factories/provider-registry.js";
8
+ // Zod schema for validation
9
+ const UnifiedEvaluationSchema = z.object({
10
+ relevance: z.number().min(1).max(10),
11
+ accuracy: z.number().min(1).max(10),
12
+ completeness: z.number().min(1).max(10),
13
+ overall: z.number().min(1).max(10),
14
+ domainAlignment: z.number().min(1).max(10).optional(),
15
+ terminologyAccuracy: z.number().min(1).max(10).optional(),
16
+ toolEffectiveness: z.number().min(1).max(10).optional(),
67
17
  });
68
18
  /**
69
- * Main unified evaluation function
19
+ * Get default evaluation when evaluation fails
70
20
  */
71
- export async function performUnifiedEvaluation(context) {
72
- const functionTag = "performUnifiedEvaluation";
73
- const startTime = Date.now();
74
- // Determine evaluation mode
75
- const mode = context.mode || detectEvaluationMode(context);
76
- logger.debug(`[${functionTag}] Starting unified evaluation`, {
77
- mode,
78
- domain: context.primaryDomain,
79
- toolsUsed: context.toolsUsed?.length || 0,
80
- conversationTurns: context.conversationHistory?.length || 0,
81
- queryLength: context.userQuery.length,
82
- responseLength: context.aiResponse.length,
21
+ function getDefaultUnifiedEvaluation(reason, evaluationTime, context) {
22
+ const functionTag = "getDefaultUnifiedEvaluation";
23
+ logger.debug(`[${functionTag}] Creating default evaluation`, {
24
+ reason,
25
+ evaluationTime,
26
+ hasContext: !!context,
83
27
  });
84
- const { parseEvaluationConfig } = await import("./evaluation-config.js");
85
- const config = parseEvaluationConfig();
86
- let lastError = null;
87
- for (let attempt = 0; attempt <= config.retryAttempts; attempt++) {
88
- try {
89
- // Get evaluation model
90
- const evaluationModelResult = await getEvaluationModel();
91
- if (!evaluationModelResult) {
92
- logger.debug(`[${functionTag}] No evaluation model available, returning defaults`);
93
- return getDefaultUnifiedEvaluation("unavailable", Date.now() - startTime, context);
94
- }
95
- const { provider: evaluationModel, config: modelConfig } = evaluationModelResult;
96
- // Create evaluation prompt based on mode
97
- const evaluationPrompt = createUnifiedEvaluationPrompt(context, mode);
98
- logger.debug(`[${functionTag}] Using ${mode} evaluation mode`, {
99
- provider: modelConfig.providerName,
100
- model: modelConfig.modelName,
101
- attempt: attempt + 1,
102
- });
103
- // Try structured evaluation first (preferred)
104
- try {
105
- const structuredResult = await evaluationModel.generateObject({
106
- schema: unifiedEvaluationSchema,
107
- prompt: evaluationPrompt,
108
- temperature: 0.1,
109
- maxTokens: 1000,
110
- system: createUnifiedSystemPrompt(mode),
111
- });
112
- return processStructuredEvaluationResult(structuredResult.object, modelConfig, Date.now() - startTime, context, attempt + 1);
113
- }
114
- catch (structuredError) {
115
- logger.warn(`[${functionTag}] Structured evaluation failed, using fallback`, { structuredError });
116
- // Fallback to legacy generateText
117
- const result = await evaluationModel.generateText({
118
- prompt: evaluationPrompt + "\n\nRespond with valid JSON only.",
119
- temperature: 0.1,
120
- maxTokens: 1000,
121
- systemPrompt: createUnifiedSystemPrompt(mode),
122
- });
123
- const responseText = result?.text || result?.content;
124
- if (!responseText) {
125
- throw new Error("No evaluation text received from fallback");
126
- }
127
- return parseUnifiedEvaluationResult(responseText, modelConfig, Date.now() - startTime, context, attempt + 1);
128
- }
129
- }
130
- catch (error) {
131
- lastError = error instanceof Error ? error : new Error(String(error));
132
- logger.warn(`[${functionTag}] Evaluation attempt ${attempt + 1} failed:`, lastError.message);
133
- if (attempt === config.retryAttempts) {
134
- break;
135
- }
136
- // Exponential backoff
137
- await new Promise((resolve) => setTimeout(resolve, Math.pow(2, attempt) * 1000));
138
- }
139
- }
140
- // All attempts failed
141
- logger.error(`[${functionTag}] All evaluation attempts failed:`, lastError?.message);
142
- return getDefaultUnifiedEvaluation(lastError?.message || "unknown-error", Date.now() - startTime, context);
143
- }
144
- /**
145
- * Detect appropriate evaluation mode based on context
146
- */
147
- function detectEvaluationMode(context) {
148
- // Lighthouse mode: Has domain awareness, tool context, or conversation history
149
- if (context.primaryDomain ||
150
- context.toolsUsed?.length ||
151
- context.conversationHistory?.length) {
152
- return "lighthouse";
153
- }
154
- // Enhanced mode: Has rich context
155
- if (context.context && Object.keys(context.context).length > 0) {
156
- return "enhanced";
157
- }
158
- // Simple mode: Basic evaluation
159
- return "simple";
160
- }
161
- /**
162
- * Create unified evaluation prompt based on mode
163
- */
164
- function createUnifiedEvaluationPrompt(context, mode) {
165
- switch (mode) {
166
- case "lighthouse":
167
- return createLighthouseEvaluationPrompt(context);
168
- case "enhanced":
169
- return createEnhancedEvaluationPrompt(context);
170
- case "simple":
171
- default:
172
- return createSimpleEvaluationPrompt(context);
173
- }
174
- }
175
- /**
176
- * Create Lighthouse-style domain-aware evaluation prompt
177
- */
178
- function createLighthouseEvaluationPrompt(context) {
179
- const { userQuery, aiResponse, primaryDomain = "general AI assistant", assistantRole = "AI assistant", toolContext = "No specific tools used in this interaction", conversationHistory = [], } = context;
180
- const formattedHistory = formatConversationHistory(conversationHistory);
181
- return `You are an AI Response Evaluator with advanced domain awareness.
182
-
183
- **EVALUATION CONTEXT**:
184
-
185
- 1. **Primary Assistant Domain**: "${primaryDomain}"
186
- - This defines the AI assistant's core expertise area
187
- - Responses should demonstrate competency within this domain
188
- - Domain-specific terminology should be used accurately
189
-
190
- 2. **Assistant Role**: "${assistantRole}"
191
- - This defines the specific role the assistant should fulfill
192
- - Responses should align with this role's responsibilities
193
-
194
- 3. **Tool Usage Context**: "${toolContext}"
195
- - Tools/MCPs are capabilities the assistant used to generate the response
196
- - Evaluate how effectively these tools were utilized
197
- - Consider if additional tools should have been used
198
-
199
- 4. **Conversation History**:
200
- \`\`\`
201
- ${formattedHistory}
202
- \`\`\`
203
-
204
- **CRITICAL DOMAIN FAILURE ASSESSMENT**:
205
- Pay special attention to domain alignment. If the query is within the assistant's domain and sufficient context is available:
206
- - Inability to answer ("I can't help", generic errors, evasions) = HIGH ALERT
207
- - Incorrect domain-specific information = HIGH ALERT
208
- - Misuse of domain terminology = MEDIUM-HIGH ALERT
209
-
210
- **EVALUATION CRITERIA**:
211
- - **relevanceScore** (0-10): Direct query addressing + domain alignment
212
- - **accuracyScore** (0-10): Factual correctness + terminology accuracy
213
- - **completenessScore** (0-10): Full query addressing + appropriate depth
214
- - **domainAlignment** (0-10): How well response fits the domain expertise
215
- - **terminologyAccuracy** (0-10): Correct use of domain-specific terms
216
- - **toolEffectiveness** (0-10): How well available tools were utilized
217
- - **isOffTopic** (boolean): True if significantly deviates from domain/query
218
- - **reasoning** (string): Brief explanation (max 150 words)
219
- - **suggestedImprovements** (string): How to improve (max 100 words)
220
- - **alertSeverity** ('low'|'medium'|'high'|'none'): Based on domain failure assessment
221
-
222
- **Current User Query**:
223
- "${userQuery}"
224
-
225
- **AI Assistant Response**:
226
- "${aiResponse}"
227
-
228
- Provide your assessment in the specified format.`;
229
- }
230
- /**
231
- * Create enhanced evaluation prompt
232
- */
233
- function createEnhancedEvaluationPrompt(context) {
234
- const { userQuery, aiResponse, context: additionalContext } = context;
235
- const contextInfo = additionalContext
236
- ? `\nContext: ${JSON.stringify(additionalContext, null, 2)}`
237
- : "";
238
- return `Evaluate this AI response with enhanced criteria:
239
-
240
- Query: "${userQuery}"
241
- Response: "${aiResponse}"${contextInfo}
242
-
243
- Provide scores for:
244
- - relevanceScore (0-10): How well the response addresses the query
245
- - accuracyScore (0-10): Factual correctness and reliability
246
- - completenessScore (0-10): Whether the response fully answers the question
247
- - isOffTopic (boolean): Whether response deviates from query
248
- - reasoning (string): Brief explanation of scores
249
- - alertSeverity ('low'|'medium'|'high'|'none'): Overall quality assessment
250
-
251
- Respond in the specified format.`;
252
- }
253
- /**
254
- * Create simple evaluation prompt
255
- */
256
- function createSimpleEvaluationPrompt(context) {
257
- const { userQuery, aiResponse } = context;
258
- return `Rate this AI response:
259
-
260
- Q: "${userQuery}"
261
- A: "${aiResponse}"
262
-
263
- Provide:
264
- - relevanceScore (0-10)
265
- - accuracyScore (0-10)
266
- - completenessScore (0-10)
267
- - reasoning (brief explanation)
268
-
269
- Respond in the specified format.`;
270
- }
271
- /**
272
- * Create unified system prompt based on mode
273
- */
274
- function createUnifiedSystemPrompt(mode) {
275
- const basePrompt = "You are an expert AI Response Evaluator. Respond with valid structured output only.";
276
- switch (mode) {
277
- case "lighthouse":
278
- return `${basePrompt} Use advanced domain awareness and sophisticated context analysis for comprehensive evaluation.`;
279
- case "enhanced":
280
- return `${basePrompt} Consider all provided context and metadata for thorough evaluation.`;
281
- case "simple":
282
- default:
283
- return `${basePrompt} Focus on core quality metrics: relevance, accuracy, and completeness.`;
284
- }
285
- }
286
- /**
287
- * Process structured evaluation result
288
- */
289
- function processStructuredEvaluationResult(result, modelConfig, evaluationTime, context, attempt) {
290
- // Calculate overall score
291
- const coreScores = [
292
- result.relevanceScore || 0,
293
- result.accuracyScore || 0,
294
- result.completenessScore || 0,
295
- ];
296
- const enhancedScores = [
297
- result.domainAlignment,
298
- result.terminologyAccuracy,
299
- result.toolEffectiveness,
300
- ].filter((score) => typeof score === "number" && score > 0);
301
- const allScores = [...coreScores, ...enhancedScores];
302
- const overall = Math.round(allScores.reduce((sum, score) => sum + score, 0) / allScores.length);
303
28
  return {
304
- // Core scores
305
- relevanceScore: Math.max(0, Math.min(10, Math.round(result.relevanceScore || 0))),
306
- accuracyScore: Math.max(0, Math.min(10, Math.round(result.accuracyScore || 0))),
307
- completenessScore: Math.max(0, Math.min(10, Math.round(result.completenessScore || 0))),
308
- overall: Math.max(0, Math.min(10, overall)),
309
- // Enhanced insights
310
- isOffTopic: result.isOffTopic || false,
311
- alertSeverity: result.alertSeverity || "none",
312
- reasoning: result.reasoning || "Evaluation completed successfully.",
313
- suggestedImprovements: result.suggestedImprovements,
314
- // Domain-specific scores (if available)
315
- domainAlignment: result.domainAlignment
316
- ? Math.max(0, Math.min(10, Math.round(result.domainAlignment)))
317
- : undefined,
318
- terminologyAccuracy: result.terminologyAccuracy
319
- ? Math.max(0, Math.min(10, Math.round(result.terminologyAccuracy)))
320
- : undefined,
321
- toolEffectiveness: result.toolEffectiveness
322
- ? Math.max(0, Math.min(10, Math.round(result.toolEffectiveness)))
323
- : undefined,
324
- // Context analysis
29
+ relevance: 1,
30
+ accuracy: 1,
31
+ completeness: 1,
32
+ overall: 1,
33
+ domainAlignment: 1,
34
+ terminologyAccuracy: 1,
35
+ toolEffectiveness: 1,
36
+ isOffTopic: false,
37
+ alertSeverity: "low",
38
+ reasoning: `Default evaluation used due to: ${reason}`,
325
39
  contextUtilization: {
326
- conversationUsed: (context.conversationHistory?.length || 0) > 0,
327
- toolsUsed: (context.toolsUsed?.length || 0) > 0,
328
- domainKnowledgeUsed: !!context.primaryDomain,
40
+ conversationUsed: false,
41
+ toolsUsed: false,
42
+ domainKnowledgeUsed: false,
329
43
  },
330
- // Enhanced metadata
331
44
  evaluationContext: {
332
45
  domain: context.primaryDomain || "general",
333
- toolsEvaluated: context.toolsUsed || [],
334
- conversationTurns: context.conversationHistory?.length || 0,
46
+ toolsEvaluated: [],
47
+ conversationTurns: 0,
335
48
  },
336
- // Standard metadata
337
- evaluationModel: `${modelConfig.providerName}/${modelConfig.modelName}`,
49
+ evaluationModel: "default",
338
50
  evaluationTime,
339
- evaluationProvider: modelConfig.providerName,
340
- evaluationAttempt: attempt,
51
+ evaluationProvider: "default",
52
+ evaluationAttempt: 1,
341
53
  evaluationConfig: {
342
- mode: context.mode || "auto",
343
- fallbackUsed: attempt > 1,
54
+ mode: "fallback",
55
+ fallbackUsed: true,
344
56
  costEstimate: 0,
345
57
  },
346
58
  };
347
59
  }
348
60
  /**
349
- * Parse evaluation result from text response
61
+ * Parse unified evaluation result from text response
350
62
  */
351
- function parseUnifiedEvaluationResult(evaluationText, modelConfig, evaluationTime, context, attempt) {
63
+ function parseUnifiedEvaluationResult(response, context) {
64
+ const functionTag = "parseUnifiedEvaluationResult";
352
65
  try {
353
- // Clean and parse JSON
354
- const cleanText = evaluationText.trim().replace(/```json\s*|```\s*/g, "");
355
- const jsonMatch = cleanText.match(/\{[^]*?\}/s);
66
+ logger.debug(`[${functionTag}] Parsing evaluation response`, {
67
+ responseLength: response.length,
68
+ });
69
+ // Try JSON parsing first
70
+ const jsonMatch = response.match(/\{[^}]*\}/s);
356
71
  if (jsonMatch) {
357
- const parsed = JSON.parse(jsonMatch[0]);
358
- return processStructuredEvaluationResult(parsed, modelConfig, evaluationTime, context, attempt);
72
+ try {
73
+ const parsed = JSON.parse(jsonMatch[0]);
74
+ return parsed;
75
+ }
76
+ catch (e) {
77
+ logger.debug(`[${functionTag}] JSON parsing failed, trying regex`);
78
+ }
79
+ }
80
+ // Fallback to regex parsing
81
+ const result = {};
82
+ const patterns = {
83
+ relevance: /relevance[:\s]*([0-9]+(?:\.[0-9]+)?)/i,
84
+ accuracy: /accuracy[:\s]*([0-9]+(?:\.[0-9]+)?)/i,
85
+ completeness: /completeness[:\s]*([0-9]+(?:\.[0-9]+)?)/i,
86
+ overall: /overall[:\s]*([0-9]+(?:\.[0-9]+)?)/i,
87
+ };
88
+ for (const [key, pattern] of Object.entries(patterns)) {
89
+ const match = response.match(pattern);
90
+ if (match) {
91
+ const value = parseFloat(match[1]);
92
+ if (value >= 1 && value <= 10) {
93
+ result[key] = Math.round(value);
94
+ }
95
+ }
359
96
  }
360
- // Fallback to regex parsing with improved patterns
361
- const relevanceMatch = evaluationText.match(/(?:relevance[Score"\s]*:?["\s]*(\d+)|Relevance["\s]*:?["\s]*(\d+)|relevance.*?(\d+))/i);
362
- const accuracyMatch = evaluationText.match(/(?:accuracy[Score"\s]*:?["\s]*(\d+)|Accuracy["\s]*:?["\s]*(\d+)|accuracy.*?(\d+))/i);
363
- const completenessMatch = evaluationText.match(/(?:completeness[Score"\s]*:?["\s]*(\d+)|Completeness["\s]*:?["\s]*(\d+)|completeness.*?(\d+))/i);
364
- // Extract scores with fallback to default values
365
- const relevance = relevanceMatch
366
- ? parseInt(relevanceMatch[1] || relevanceMatch[2] || relevanceMatch[3], 10)
367
- : 8; // Default fallback score
368
- const accuracy = accuracyMatch
369
- ? parseInt(accuracyMatch[1] || accuracyMatch[2] || accuracyMatch[3], 10)
370
- : 8; // Default fallback score
371
- const completeness = completenessMatch
372
- ? parseInt(completenessMatch[1] || completenessMatch[2] || completenessMatch[3], 10)
373
- : 8; // Default fallback score
97
+ // Ensure minimum valid scores
374
98
  return {
375
- relevanceScore: Math.max(0, Math.min(10, relevance)),
376
- accuracyScore: Math.max(0, Math.min(10, accuracy)),
377
- completenessScore: Math.max(0, Math.min(10, completeness)),
378
- overall: Math.round((relevance + accuracy + completeness) / 3),
379
- isOffTopic: false,
380
- alertSeverity: "none",
381
- reasoning: "Parsed using regex fallback - response was not in expected JSON format.",
382
- evaluationModel: `${modelConfig.providerName}/${modelConfig.modelName}`,
383
- evaluationTime,
384
- evaluationProvider: modelConfig.providerName,
385
- evaluationAttempt: attempt,
386
- evaluationConfig: {
387
- mode: "fallback",
388
- fallbackUsed: true,
389
- costEstimate: 0,
390
- },
99
+ relevance: result.relevance || 1,
100
+ accuracy: result.accuracy || 1,
101
+ completeness: result.completeness || 1,
102
+ overall: result.overall || 1,
391
103
  };
392
104
  }
393
105
  catch (error) {
394
- logger.error("Failed to parse unified evaluation result", { error });
395
- return getDefaultUnifiedEvaluation("parse-error", evaluationTime, context);
106
+ logger.error(`[${functionTag}] Failed to parse evaluation result`, {
107
+ error,
108
+ });
109
+ return {
110
+ relevance: 1,
111
+ accuracy: 1,
112
+ completeness: 1,
113
+ overall: 1,
114
+ };
396
115
  }
397
116
  }
398
117
  /**
399
- * Get default evaluation when evaluation fails
400
- */
401
- function getDefaultUnifiedEvaluation(reason, evaluationTime, context) {
402
- return {
403
- relevanceScore: 0,
404
- accuracyScore: 0,
405
- completenessScore: 0,
406
- overall: 0,
407
- isOffTopic: false,
408
- alertSeverity: "high",
409
- reasoning: `Evaluation unavailable (${reason}). This may be due to missing API keys, network issues, or service unavailability.`,
410
- suggestedImprovements: "Check evaluation system configuration, API credentials, and network connectivity.",
411
- evaluationModel: "unavailable",
412
- evaluationTime,
413
- evaluationProvider: "none",
414
- evaluationAttempt: 0,
415
- evaluationConfig: {
416
- mode: "default",
417
- fallbackUsed: true,
418
- costEstimate: 0,
419
- },
420
- contextUtilization: {
421
- conversationUsed: (context.conversationHistory?.length || 0) > 0,
422
- toolsUsed: (context.toolsUsed?.length || 0) > 0,
423
- domainKnowledgeUsed: !!context.primaryDomain,
424
- },
425
- evaluationContext: {
426
- domain: context.primaryDomain || "unknown",
427
- toolsEvaluated: context.toolsUsed || [],
428
- conversationTurns: context.conversationHistory?.length || 0,
429
- },
430
- };
431
- }
432
- /**
433
- * Enhanced evaluation model selection
118
+ * Main unified evaluation function
434
119
  */
435
- export async function getEvaluationModel() {
436
- const { parseEvaluationConfig, getProviderFallbackOrder } = await import("./evaluation-config.js");
437
- const { getProviderConfig } = await import("./evaluation-providers.js");
438
- const config = parseEvaluationConfig();
439
- const fallbackOrder = getProviderFallbackOrder(config);
440
- for (const providerName of fallbackOrder) {
441
- try {
442
- const providerConfig = getProviderConfig(providerName);
443
- if (!providerConfig) {
444
- continue;
445
- }
446
- let modelName = config.model;
447
- if (modelName === "auto" || !config.model) {
448
- modelName =
449
- providerConfig.models[config.mode] || providerConfig.models.fast;
450
- }
451
- const provider = await AIProviderFactory.createProvider(providerName, modelName);
452
- if (provider) {
453
- return {
454
- provider,
455
- config: {
456
- providerName,
457
- modelName,
458
- providerConfig,
459
- evaluationConfig: config,
460
- },
461
- };
462
- }
120
+ export async function generateUnifiedEvaluation(context) {
121
+ const functionTag = "generateUnifiedEvaluation";
122
+ const startTime = Date.now();
123
+ logger.debug(`[${functionTag}] Starting evaluation`, {
124
+ hasUserQuery: !!context.userQuery,
125
+ hasAiResponse: !!context.aiResponse,
126
+ domain: context.primaryDomain,
127
+ });
128
+ try {
129
+ // Ensure providers are registered
130
+ await ProviderRegistry.registerAllProviders();
131
+ // Get evaluation provider
132
+ const evaluationProvider = process.env.NEUROLINK_EVALUATION_PROVIDER || "google-ai";
133
+ const evaluationModel = process.env.NEUROLINK_EVALUATION_MODEL || "gemini-2.5-flash";
134
+ logger.debug(`[${functionTag}] Using provider: ${evaluationProvider}, model: ${evaluationModel}`);
135
+ const provider = await AIProviderFactory.createProvider(evaluationProvider, evaluationModel);
136
+ if (!provider) {
137
+ logger.debug(`[${functionTag}] No evaluation provider available, returning defaults`);
138
+ return getDefaultUnifiedEvaluation("no-provider", Date.now() - startTime, context);
463
139
  }
464
- catch (error) {
465
- if (!config.fallbackEnabled) {
466
- throw error;
467
- }
468
- continue;
140
+ // Create evaluation prompt
141
+ const prompt = `
142
+ Evaluate this AI response on a scale of 1-10 for each criterion:
143
+
144
+ User Query: ${context.userQuery}
145
+ AI Response: ${context.aiResponse}
146
+
147
+ Rate on these criteria (1-10 scale):
148
+ - Relevance: How well does the response address the user's question?
149
+ - Accuracy: How factually correct and precise is the information?
150
+ - Completeness: How thoroughly does it cover the topic?
151
+ - Overall: General quality assessment
152
+
153
+ Respond in this exact format:
154
+ Relevance: [score]
155
+ Accuracy: [score]
156
+ Completeness: [score]
157
+ Overall: [score]
158
+ `;
159
+ // Generate evaluation
160
+ const result = await provider.generate(prompt);
161
+ if (!result) {
162
+ logger.debug(`[${functionTag}] No response from provider`);
163
+ return getDefaultUnifiedEvaluation("no-response", Date.now() - startTime, context);
469
164
  }
165
+ // Extract text from result
166
+ const response = typeof result === "string"
167
+ ? result
168
+ : result.text || String(result);
169
+ // Parse evaluation result
170
+ const parsed = parseUnifiedEvaluationResult(response, context);
171
+ // Validate and enhance result
172
+ const validatedResult = {
173
+ ...parsed,
174
+ evaluationModel: `${evaluationProvider}/${evaluationModel}`,
175
+ evaluationTime: Date.now() - startTime,
176
+ evaluationProvider,
177
+ evaluationAttempt: 1,
178
+ evaluationConfig: {
179
+ mode: "standard",
180
+ fallbackUsed: false,
181
+ costEstimate: 0.001, // Rough estimate
182
+ },
183
+ };
184
+ logger.debug(`[${functionTag}] Evaluation completed`, {
185
+ relevance: validatedResult.relevance,
186
+ accuracy: validatedResult.accuracy,
187
+ completeness: validatedResult.completeness,
188
+ overall: validatedResult.overall,
189
+ evaluationTime: validatedResult.evaluationTime,
190
+ });
191
+ return validatedResult;
470
192
  }
471
- return null;
472
- }
473
- /**
474
- * Format conversation history for evaluation
475
- */
476
- function formatConversationHistory(history) {
477
- if (!history?.length) {
478
- return "No prior conversation context.";
193
+ catch (error) {
194
+ logger.error(`[${functionTag}] Evaluation failed`, {
195
+ error: error instanceof Error ? error.message : String(error),
196
+ });
197
+ return getDefaultUnifiedEvaluation(error instanceof Error ? error.message : "unknown-error", Date.now() - startTime, context);
479
198
  }
480
- return history
481
- .slice(-3) // Last 3 turns
482
- .map((msg, i) => `${i + 1}. ${msg.role.toUpperCase()}: ${msg.content.substring(0, 200)}${msg.content.length > 200 ? "..." : ""}`)
483
- .join("\n");
484
- }
485
- /**
486
- * Create simple evaluation context (backward compatibility)
487
- */
488
- export function createSimpleEvaluationContext(prompt, response, context) {
489
- return {
490
- userQuery: prompt,
491
- aiResponse: response,
492
- context,
493
- mode: "simple",
494
- };
495
199
  }
496
- /**
497
- * Create enhanced evaluation context
498
- */
499
- export function createEnhancedEvaluationContext(userQuery, aiResponse, options = {}) {
500
- return {
501
- userQuery,
200
+ // Legacy compatibility function with flexible arguments
201
+ export async function evaluateResponse(responseOrContext, contextOrUserQuery, userQuery, providedContexts, options, additionalArgs) {
202
+ // Handle different call patterns for backward compatibility
203
+ let aiResponse;
204
+ let context;
205
+ if (typeof responseOrContext === "string") {
206
+ // Normal call: evaluateResponse(response, context, ...)
207
+ aiResponse = responseOrContext;
208
+ context = contextOrUserQuery;
209
+ }
210
+ else {
211
+ // Provider call pattern: evaluateResponse(contextObject, userQuery, ...)
212
+ context = responseOrContext;
213
+ aiResponse =
214
+ context?.aiResponse ||
215
+ context?.response ||
216
+ String(contextOrUserQuery || "");
217
+ }
218
+ const evalContext = {
219
+ userQuery: userQuery ||
220
+ context?.userQuery ||
221
+ contextOrUserQuery ||
222
+ "Generated response",
502
223
  aiResponse,
503
- primaryDomain: options.domain,
504
- assistantRole: options.role,
505
- toolsUsed: options.toolsUsed,
506
- toolContext: options.toolsUsed?.length
507
- ? `Tools used: ${options.toolsUsed.join(", ")}`
508
- : undefined,
509
- conversationHistory: options.conversationHistory,
510
- sessionId: options.sessionId,
511
- context: options.context,
512
- mode: "lighthouse",
513
- };
514
- }
515
- // Legacy compatibility wrapper for old function signature
516
- export async function evaluateResponse(prompt, response, context, evaluationDomain, toolUsageContext, conversationHistory) {
517
- // Convert old arguments to new context format
518
- const unifiedContext = {
519
- userQuery: prompt,
520
- aiResponse: response,
521
224
  context,
522
- primaryDomain: evaluationDomain,
523
- toolContext: toolUsageContext,
524
- conversationHistory: conversationHistory,
525
- mode: evaluationDomain ? "lighthouse" : "simple",
526
225
  };
527
- return performUnifiedEvaluation(unifiedContext);
226
+ return generateUnifiedEvaluation(evalContext);
528
227
  }
228
+ // Export additional utilities
229
+ export { getDefaultUnifiedEvaluation, parseUnifiedEvaluationResult };