@juspay/neurolink 9.3.0 → 9.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/README.md +8 -8
- package/dist/cli/commands/config.d.ts +3 -3
- package/dist/cli/index.js +1 -0
- package/dist/index.d.ts +35 -0
- package/dist/index.js +17 -0
- package/dist/lib/agent/directTools.d.ts +5 -5
- package/dist/lib/index.d.ts +35 -0
- package/dist/lib/index.js +17 -0
- package/dist/lib/neurolink.d.ts +12 -1
- package/dist/lib/neurolink.js +265 -4
- package/dist/lib/server/utils/validation.d.ts +8 -8
- package/dist/lib/types/generateTypes.d.ts +28 -0
- package/dist/lib/types/index.d.ts +6 -0
- package/dist/lib/types/index.js +12 -0
- package/dist/lib/types/modelTypes.d.ts +2 -2
- package/dist/lib/types/streamTypes.d.ts +35 -0
- package/dist/lib/types/workflowTypes.d.ts +558 -0
- package/dist/lib/types/workflowTypes.js +32 -0
- package/dist/lib/workflow/LAYER-EXAMPLES.d.ts +13 -0
- package/dist/lib/workflow/LAYER-EXAMPLES.js +312 -0
- package/dist/lib/workflow/PROMPT-EXAMPLES.d.ts +117 -0
- package/dist/lib/workflow/PROMPT-EXAMPLES.js +246 -0
- package/dist/lib/workflow/config.d.ts +1569 -0
- package/dist/lib/workflow/config.js +399 -0
- package/dist/lib/workflow/core/ensembleExecutor.d.ts +56 -0
- package/dist/lib/workflow/core/ensembleExecutor.js +398 -0
- package/dist/lib/workflow/core/judgeScorer.d.ts +26 -0
- package/dist/lib/workflow/core/judgeScorer.js +527 -0
- package/dist/lib/workflow/core/responseConditioner.d.ts +22 -0
- package/dist/lib/workflow/core/responseConditioner.js +226 -0
- package/dist/lib/workflow/core/types/conditionerTypes.d.ts +7 -0
- package/dist/lib/workflow/core/types/conditionerTypes.js +8 -0
- package/dist/lib/workflow/core/types/ensembleTypes.d.ts +7 -0
- package/dist/lib/workflow/core/types/ensembleTypes.js +8 -0
- package/dist/lib/workflow/core/types/index.d.ts +7 -0
- package/dist/lib/workflow/core/types/index.js +8 -0
- package/dist/lib/workflow/core/types/judgeTypes.d.ts +7 -0
- package/dist/lib/workflow/core/types/judgeTypes.js +8 -0
- package/dist/lib/workflow/core/types/layerTypes.d.ts +7 -0
- package/dist/lib/workflow/core/types/layerTypes.js +8 -0
- package/dist/lib/workflow/core/types/registryTypes.d.ts +7 -0
- package/dist/lib/workflow/core/types/registryTypes.js +8 -0
- package/dist/lib/workflow/core/workflowRegistry.d.ts +73 -0
- package/dist/lib/workflow/core/workflowRegistry.js +305 -0
- package/dist/lib/workflow/core/workflowRunner.d.ts +115 -0
- package/dist/lib/workflow/core/workflowRunner.js +554 -0
- package/dist/lib/workflow/index.d.ts +36 -0
- package/dist/lib/workflow/index.js +51 -0
- package/dist/lib/workflow/types.d.ts +19 -0
- package/dist/lib/workflow/types.js +10 -0
- package/dist/lib/workflow/utils/types/index.d.ts +7 -0
- package/dist/lib/workflow/utils/types/index.js +8 -0
- package/dist/lib/workflow/utils/types/metricsTypes.d.ts +7 -0
- package/dist/lib/workflow/utils/types/metricsTypes.js +8 -0
- package/dist/lib/workflow/utils/types/validationTypes.d.ts +7 -0
- package/dist/lib/workflow/utils/types/validationTypes.js +8 -0
- package/dist/lib/workflow/utils/workflowMetrics.d.ts +76 -0
- package/dist/lib/workflow/utils/workflowMetrics.js +312 -0
- package/dist/lib/workflow/utils/workflowValidation.d.ts +29 -0
- package/dist/lib/workflow/utils/workflowValidation.js +421 -0
- package/dist/lib/workflow/workflows/adaptiveWorkflow.d.ts +72 -0
- package/dist/lib/workflow/workflows/adaptiveWorkflow.js +367 -0
- package/dist/lib/workflow/workflows/consensusWorkflow.d.ts +69 -0
- package/dist/lib/workflow/workflows/consensusWorkflow.js +193 -0
- package/dist/lib/workflow/workflows/fallbackWorkflow.d.ts +49 -0
- package/dist/lib/workflow/workflows/fallbackWorkflow.js +226 -0
- package/dist/lib/workflow/workflows/multiJudgeWorkflow.d.ts +70 -0
- package/dist/lib/workflow/workflows/multiJudgeWorkflow.js +352 -0
- package/dist/neurolink.d.ts +12 -1
- package/dist/neurolink.js +265 -4
- package/dist/types/generateTypes.d.ts +28 -0
- package/dist/types/index.d.ts +6 -0
- package/dist/types/index.js +12 -0
- package/dist/types/streamTypes.d.ts +35 -0
- package/dist/types/workflowTypes.d.ts +558 -0
- package/dist/types/workflowTypes.js +31 -0
- package/dist/workflow/LAYER-EXAMPLES.d.ts +13 -0
- package/dist/workflow/LAYER-EXAMPLES.js +311 -0
- package/dist/workflow/PROMPT-EXAMPLES.d.ts +117 -0
- package/dist/workflow/PROMPT-EXAMPLES.js +245 -0
- package/dist/workflow/config.d.ts +1569 -0
- package/dist/workflow/config.js +398 -0
- package/dist/workflow/core/ensembleExecutor.d.ts +56 -0
- package/dist/workflow/core/ensembleExecutor.js +397 -0
- package/dist/workflow/core/judgeScorer.d.ts +26 -0
- package/dist/workflow/core/judgeScorer.js +526 -0
- package/dist/workflow/core/responseConditioner.d.ts +22 -0
- package/dist/workflow/core/responseConditioner.js +225 -0
- package/dist/workflow/core/types/conditionerTypes.d.ts +7 -0
- package/dist/workflow/core/types/conditionerTypes.js +7 -0
- package/dist/workflow/core/types/ensembleTypes.d.ts +7 -0
- package/dist/workflow/core/types/ensembleTypes.js +7 -0
- package/dist/workflow/core/types/index.d.ts +7 -0
- package/dist/workflow/core/types/index.js +7 -0
- package/dist/workflow/core/types/judgeTypes.d.ts +7 -0
- package/dist/workflow/core/types/judgeTypes.js +7 -0
- package/dist/workflow/core/types/layerTypes.d.ts +7 -0
- package/dist/workflow/core/types/layerTypes.js +7 -0
- package/dist/workflow/core/types/registryTypes.d.ts +7 -0
- package/dist/workflow/core/types/registryTypes.js +7 -0
- package/dist/workflow/core/workflowRegistry.d.ts +73 -0
- package/dist/workflow/core/workflowRegistry.js +304 -0
- package/dist/workflow/core/workflowRunner.d.ts +115 -0
- package/dist/workflow/core/workflowRunner.js +553 -0
- package/dist/workflow/index.d.ts +36 -0
- package/dist/workflow/index.js +50 -0
- package/dist/workflow/types.d.ts +19 -0
- package/dist/workflow/types.js +9 -0
- package/dist/workflow/utils/types/index.d.ts +7 -0
- package/dist/workflow/utils/types/index.js +7 -0
- package/dist/workflow/utils/types/metricsTypes.d.ts +7 -0
- package/dist/workflow/utils/types/metricsTypes.js +7 -0
- package/dist/workflow/utils/types/validationTypes.d.ts +7 -0
- package/dist/workflow/utils/types/validationTypes.js +7 -0
- package/dist/workflow/utils/workflowMetrics.d.ts +76 -0
- package/dist/workflow/utils/workflowMetrics.js +311 -0
- package/dist/workflow/utils/workflowValidation.d.ts +29 -0
- package/dist/workflow/utils/workflowValidation.js +420 -0
- package/dist/workflow/workflows/adaptiveWorkflow.d.ts +72 -0
- package/dist/workflow/workflows/adaptiveWorkflow.js +366 -0
- package/dist/workflow/workflows/consensusWorkflow.d.ts +69 -0
- package/dist/workflow/workflows/consensusWorkflow.js +192 -0
- package/dist/workflow/workflows/fallbackWorkflow.d.ts +49 -0
- package/dist/workflow/workflows/fallbackWorkflow.js +225 -0
- package/dist/workflow/workflows/multiJudgeWorkflow.d.ts +70 -0
- package/dist/workflow/workflows/multiJudgeWorkflow.js +351 -0
- package/package.json +3 -2
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* workflow/utils/types/metricsTypes.ts
|
|
3
|
+
* Type definitions for workflow metrics utilities
|
|
4
|
+
*
|
|
5
|
+
* Re-exports from the central types folder for backward compatibility
|
|
6
|
+
*/
|
|
7
|
+
export type { WorkflowExecutionMetrics, SummaryStats, WorkflowComparison, } from "../../../types/workflowTypes.js";
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* workflow/utils/workflowMetrics.ts
|
|
3
|
+
* Metrics tracking and collection for workflow execution
|
|
4
|
+
*/
|
|
5
|
+
import type { JsonValue } from "../../types/common.js";
|
|
6
|
+
import type { EnsembleResponse, WorkflowResult } from "../types.js";
|
|
7
|
+
import type { SummaryStats, WorkflowComparison, WorkflowExecutionMetrics } from "./types/index.js";
|
|
8
|
+
/**
|
|
9
|
+
* Workflow metrics tracker
|
|
10
|
+
*/
|
|
11
|
+
export declare class WorkflowMetrics {
|
|
12
|
+
/**
|
|
13
|
+
* Record a workflow execution
|
|
14
|
+
*/
|
|
15
|
+
recordExecution(workflowId: string, result: WorkflowResult): void;
|
|
16
|
+
/**
|
|
17
|
+
* Record a workflow failure
|
|
18
|
+
*/
|
|
19
|
+
recordFailure(workflowId: string, error: Error): void;
|
|
20
|
+
/**
|
|
21
|
+
* Get metrics for a specific workflow
|
|
22
|
+
*/
|
|
23
|
+
getMetrics(workflowId: string): WorkflowExecutionMetrics | undefined;
|
|
24
|
+
/**
|
|
25
|
+
* Get all workflow metrics
|
|
26
|
+
*/
|
|
27
|
+
getAllMetrics(): WorkflowExecutionMetrics[];
|
|
28
|
+
/**
|
|
29
|
+
* Clear metrics for a workflow
|
|
30
|
+
*/
|
|
31
|
+
clearMetrics(workflowId: string): void;
|
|
32
|
+
/**
|
|
33
|
+
* Clear all metrics
|
|
34
|
+
*/
|
|
35
|
+
clearAllMetrics(): void;
|
|
36
|
+
/**
|
|
37
|
+
* Export metrics as JSON
|
|
38
|
+
*/
|
|
39
|
+
exportMetrics(): string;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Calculate model-specific metrics from ensemble responses
|
|
43
|
+
*/
|
|
44
|
+
export declare function calculateModelMetrics(responses: EnsembleResponse[]): Record<string, {
|
|
45
|
+
successRate: number;
|
|
46
|
+
avgResponseTime: number;
|
|
47
|
+
}>;
|
|
48
|
+
/**
|
|
49
|
+
* Calculate consensus level between responses
|
|
50
|
+
* NOTE: Placeholder implementation - uses response length similarity
|
|
51
|
+
* TODO: Implement semantic similarity in Phase 2
|
|
52
|
+
*/
|
|
53
|
+
export declare function calculateConsensus(responses: EnsembleResponse[]): number;
|
|
54
|
+
/**
|
|
55
|
+
* Calculate confidence score from judge results and ensemble data
|
|
56
|
+
*/
|
|
57
|
+
export declare function calculateConfidence(ensembleResponses: EnsembleResponse[], judgeConfidence?: number, scores?: Record<string, number>): number;
|
|
58
|
+
/**
|
|
59
|
+
* Format metrics for logging
|
|
60
|
+
* @param result - Workflow result to format
|
|
61
|
+
* @returns Formatted metrics as JSON-compatible record
|
|
62
|
+
*/
|
|
63
|
+
export declare function formatMetricsForLogging(result: WorkflowResult): Record<string, JsonValue>;
|
|
64
|
+
/**
|
|
65
|
+
* Generate summary statistics for multiple executions
|
|
66
|
+
* @param results - Array of workflow results to analyze
|
|
67
|
+
* @returns Summary statistics including averages and success rate
|
|
68
|
+
*/
|
|
69
|
+
export declare function generateSummaryStats(results: WorkflowResult[]): SummaryStats;
|
|
70
|
+
/**
|
|
71
|
+
* Compare two workflows based on metrics
|
|
72
|
+
* @param workflow1Results - Results from first workflow
|
|
73
|
+
* @param workflow2Results - Results from second workflow
|
|
74
|
+
* @returns Comparison with stats for both workflows and winner determination
|
|
75
|
+
*/
|
|
76
|
+
export declare function compareWorkflows(workflow1Results: WorkflowResult[], workflow2Results: WorkflowResult[]): WorkflowComparison;
|
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* workflow/utils/workflowMetrics.ts
|
|
3
|
+
* Metrics tracking and collection for workflow execution
|
|
4
|
+
*/
|
|
5
|
+
import { logger } from "../../utils/logger.js";
|
|
6
|
+
const functionTag = "WorkflowMetrics";
|
|
7
|
+
/**
|
|
8
|
+
* In-memory metrics storage (can be replaced with persistent storage)
|
|
9
|
+
*/
|
|
10
|
+
const metricsStore = new Map();
|
|
11
|
+
// ============================================================================
|
|
12
|
+
// METRICS COLLECTION
|
|
13
|
+
// ============================================================================
|
|
14
|
+
/**
|
|
15
|
+
* Workflow metrics tracker
|
|
16
|
+
*/
|
|
17
|
+
export class WorkflowMetrics {
|
|
18
|
+
/**
|
|
19
|
+
* Record a workflow execution
|
|
20
|
+
*/
|
|
21
|
+
recordExecution(workflowId, result) {
|
|
22
|
+
const existing = metricsStore.get(workflowId);
|
|
23
|
+
if (!existing) {
|
|
24
|
+
// Initialize new metrics
|
|
25
|
+
metricsStore.set(workflowId, {
|
|
26
|
+
workflowId,
|
|
27
|
+
executionCount: 1,
|
|
28
|
+
successCount: 1,
|
|
29
|
+
failureCount: 0,
|
|
30
|
+
averageExecutionTime: result.totalTime,
|
|
31
|
+
averageScore: result.score,
|
|
32
|
+
averageConfidence: result.confidence,
|
|
33
|
+
totalCost: result.cost || 0,
|
|
34
|
+
lastExecutionTime: result.timestamp,
|
|
35
|
+
});
|
|
36
|
+
}
|
|
37
|
+
else {
|
|
38
|
+
// Update existing metrics
|
|
39
|
+
const newCount = existing.executionCount + 1;
|
|
40
|
+
metricsStore.set(workflowId, {
|
|
41
|
+
...existing,
|
|
42
|
+
executionCount: newCount,
|
|
43
|
+
successCount: existing.successCount + 1,
|
|
44
|
+
averageExecutionTime: (existing.averageExecutionTime * existing.executionCount +
|
|
45
|
+
result.totalTime) /
|
|
46
|
+
newCount,
|
|
47
|
+
averageScore: (existing.averageScore * existing.executionCount + result.score) /
|
|
48
|
+
newCount,
|
|
49
|
+
averageConfidence: (existing.averageConfidence * existing.executionCount +
|
|
50
|
+
result.confidence) /
|
|
51
|
+
newCount,
|
|
52
|
+
totalCost: existing.totalCost + (result.cost || 0),
|
|
53
|
+
lastExecutionTime: result.timestamp,
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
logger.debug(`[${functionTag}] Recorded workflow execution`, {
|
|
57
|
+
workflowId,
|
|
58
|
+
totalExecutions: metricsStore.get(workflowId)?.executionCount,
|
|
59
|
+
});
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Record a workflow failure
|
|
63
|
+
*/
|
|
64
|
+
recordFailure(workflowId, error) {
|
|
65
|
+
const existing = metricsStore.get(workflowId);
|
|
66
|
+
if (!existing) {
|
|
67
|
+
// Initialize with failure
|
|
68
|
+
metricsStore.set(workflowId, {
|
|
69
|
+
workflowId,
|
|
70
|
+
executionCount: 1,
|
|
71
|
+
successCount: 0,
|
|
72
|
+
failureCount: 1,
|
|
73
|
+
averageExecutionTime: 0,
|
|
74
|
+
averageScore: 0,
|
|
75
|
+
averageConfidence: 0,
|
|
76
|
+
totalCost: 0,
|
|
77
|
+
lastExecutionTime: new Date().toISOString(),
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
else {
|
|
81
|
+
// Update with failure
|
|
82
|
+
metricsStore.set(workflowId, {
|
|
83
|
+
...existing,
|
|
84
|
+
executionCount: existing.executionCount + 1,
|
|
85
|
+
failureCount: existing.failureCount + 1,
|
|
86
|
+
lastExecutionTime: new Date().toISOString(),
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
logger.warn(`[${functionTag}] Recorded workflow failure`, {
|
|
90
|
+
workflowId,
|
|
91
|
+
error: error.message,
|
|
92
|
+
totalFailures: metricsStore.get(workflowId)?.failureCount,
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* Get metrics for a specific workflow
|
|
97
|
+
*/
|
|
98
|
+
getMetrics(workflowId) {
|
|
99
|
+
return metricsStore.get(workflowId);
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* Get all workflow metrics
|
|
103
|
+
*/
|
|
104
|
+
getAllMetrics() {
|
|
105
|
+
return Array.from(metricsStore.values());
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Clear metrics for a workflow
|
|
109
|
+
*/
|
|
110
|
+
clearMetrics(workflowId) {
|
|
111
|
+
metricsStore.delete(workflowId);
|
|
112
|
+
logger.debug(`[${functionTag}] Cleared metrics`, { workflowId });
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Clear all metrics
|
|
116
|
+
*/
|
|
117
|
+
clearAllMetrics() {
|
|
118
|
+
metricsStore.clear();
|
|
119
|
+
logger.debug(`[${functionTag}] Cleared all metrics`);
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* Export metrics as JSON
|
|
123
|
+
*/
|
|
124
|
+
exportMetrics() {
|
|
125
|
+
const metrics = this.getAllMetrics();
|
|
126
|
+
return JSON.stringify(metrics, null, 2);
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
// ============================================================================
|
|
130
|
+
// ANALYTICS HELPERS
|
|
131
|
+
// ============================================================================
|
|
132
|
+
/**
|
|
133
|
+
* Calculate model-specific metrics from ensemble responses
|
|
134
|
+
*/
|
|
135
|
+
export function calculateModelMetrics(responses) {
|
|
136
|
+
const modelStats = new Map();
|
|
137
|
+
responses.forEach((response) => {
|
|
138
|
+
const key = `${response.provider}/${response.model}`;
|
|
139
|
+
const existing = modelStats.get(key) || {
|
|
140
|
+
total: 0,
|
|
141
|
+
successful: 0,
|
|
142
|
+
totalTime: 0,
|
|
143
|
+
};
|
|
144
|
+
modelStats.set(key, {
|
|
145
|
+
total: existing.total + 1,
|
|
146
|
+
successful: existing.successful + (response.status === "success" ? 1 : 0),
|
|
147
|
+
totalTime: existing.totalTime + response.responseTime,
|
|
148
|
+
});
|
|
149
|
+
});
|
|
150
|
+
const result = {};
|
|
151
|
+
modelStats.forEach((stats, key) => {
|
|
152
|
+
result[key] = {
|
|
153
|
+
successRate: stats.successful / stats.total,
|
|
154
|
+
avgResponseTime: stats.totalTime / stats.total,
|
|
155
|
+
};
|
|
156
|
+
});
|
|
157
|
+
return result;
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Calculate consensus level between responses
|
|
161
|
+
* NOTE: Placeholder implementation - uses response length similarity
|
|
162
|
+
* TODO: Implement semantic similarity in Phase 2
|
|
163
|
+
*/
|
|
164
|
+
export function calculateConsensus(responses) {
|
|
165
|
+
const successful = responses.filter((r) => r.status === "success");
|
|
166
|
+
if (successful.length < 2) {
|
|
167
|
+
return 1.0; // Perfect consensus with single response
|
|
168
|
+
}
|
|
169
|
+
// Simple length-based similarity (placeholder)
|
|
170
|
+
const lengths = successful.map((r) => r.content.length);
|
|
171
|
+
const avgLength = lengths.reduce((a, b) => a + b, 0) / lengths.length;
|
|
172
|
+
if (avgLength === 0) {
|
|
173
|
+
logger.warn("[WorkflowMetrics] All responses have zero length - semantic similarity needed for accurate consensus");
|
|
174
|
+
return 0;
|
|
175
|
+
}
|
|
176
|
+
const variance = lengths.reduce((sum, len) => sum + (len - avgLength) ** 2, 0) /
|
|
177
|
+
lengths.length;
|
|
178
|
+
const stdDev = Math.sqrt(variance);
|
|
179
|
+
// Normalize to 0-1 (lower std dev = higher consensus)
|
|
180
|
+
const normalized = Math.max(0, 1 - stdDev / avgLength);
|
|
181
|
+
return Math.min(1, Math.max(0, normalized));
|
|
182
|
+
}
|
|
183
|
+
/**
|
|
184
|
+
* Calculate confidence score from judge results and ensemble data
|
|
185
|
+
*/
|
|
186
|
+
export function calculateConfidence(ensembleResponses, judgeConfidence, scores) {
|
|
187
|
+
// If judge provided confidence, use it
|
|
188
|
+
if (judgeConfidence !== undefined) {
|
|
189
|
+
return Math.min(1, Math.max(0, judgeConfidence));
|
|
190
|
+
}
|
|
191
|
+
// Calculate from judge scores
|
|
192
|
+
if (scores && Object.keys(scores).length > 0) {
|
|
193
|
+
const scoreValues = Object.keys(scores).map((k) => scores[k]);
|
|
194
|
+
const maxScore = Math.max(...scoreValues);
|
|
195
|
+
const avgScore = scoreValues.reduce((a, b) => a + b, 0) /
|
|
196
|
+
scoreValues.length;
|
|
197
|
+
// Normalize 0-100 scores to 0-1
|
|
198
|
+
const maxNormalized = maxScore / 100;
|
|
199
|
+
const avgNormalized = avgScore / 100;
|
|
200
|
+
// Combine max and average (weighted 60/40)
|
|
201
|
+
return maxNormalized * 0.6 + avgNormalized * 0.4;
|
|
202
|
+
}
|
|
203
|
+
// Fallback: based on success rate
|
|
204
|
+
if (ensembleResponses.length === 0) {
|
|
205
|
+
return 0;
|
|
206
|
+
}
|
|
207
|
+
const successCount = ensembleResponses.filter((r) => r.status === "success").length;
|
|
208
|
+
return successCount / ensembleResponses.length;
|
|
209
|
+
}
|
|
210
|
+
/**
|
|
211
|
+
* Format metrics for logging
|
|
212
|
+
* @param result - Workflow result to format
|
|
213
|
+
* @returns Formatted metrics as JSON-compatible record
|
|
214
|
+
*/
|
|
215
|
+
export function formatMetricsForLogging(result) {
|
|
216
|
+
return {
|
|
217
|
+
workflowId: result.workflow,
|
|
218
|
+
workflowType: result.analytics?.workflowType ?? null,
|
|
219
|
+
totalTime: result.totalTime,
|
|
220
|
+
ensembleTime: result.ensembleTime,
|
|
221
|
+
judgeTime: result.judgeTime ?? null,
|
|
222
|
+
score: result.score,
|
|
223
|
+
reasoning: result.reasoning,
|
|
224
|
+
confidence: result.confidence,
|
|
225
|
+
consensus: result.consensus ?? null,
|
|
226
|
+
modelsExecuted: result.ensembleResponses.length,
|
|
227
|
+
modelsSuccessful: result.ensembleResponses.filter((r) => r.status === "success").length,
|
|
228
|
+
selectedModel: result.selectedResponse
|
|
229
|
+
? `${result.selectedResponse.provider}/${result.selectedResponse.model}`
|
|
230
|
+
: null,
|
|
231
|
+
totalTokens: result.usage?.totalTokens ?? null,
|
|
232
|
+
estimatedCost: result.cost ?? null,
|
|
233
|
+
timestamp: result.timestamp,
|
|
234
|
+
};
|
|
235
|
+
}
|
|
236
|
+
/**
|
|
237
|
+
* Generate summary statistics for multiple executions
|
|
238
|
+
* @param results - Array of workflow results to analyze
|
|
239
|
+
* @returns Summary statistics including averages and success rate
|
|
240
|
+
*/
|
|
241
|
+
export function generateSummaryStats(results) {
|
|
242
|
+
if (results.length === 0) {
|
|
243
|
+
return {
|
|
244
|
+
totalExecutions: 0,
|
|
245
|
+
averageScore: 0,
|
|
246
|
+
averageConfidence: 0,
|
|
247
|
+
averageExecutionTime: 0,
|
|
248
|
+
successRate: 0,
|
|
249
|
+
totalCost: 0,
|
|
250
|
+
};
|
|
251
|
+
}
|
|
252
|
+
const totalScore = results.reduce((sum, r) => sum + r.score, 0);
|
|
253
|
+
const totalConfidence = results.reduce((sum, r) => sum + r.confidence, 0);
|
|
254
|
+
const totalTime = results.reduce((sum, r) => sum + r.totalTime, 0);
|
|
255
|
+
const totalCost = results.reduce((sum, r) => sum + (r.cost || 0), 0);
|
|
256
|
+
const successCount = results.filter((r) => r.score > 0).length;
|
|
257
|
+
return {
|
|
258
|
+
totalExecutions: results.length,
|
|
259
|
+
averageScore: totalScore / results.length,
|
|
260
|
+
averageConfidence: totalConfidence / results.length,
|
|
261
|
+
averageExecutionTime: totalTime / results.length,
|
|
262
|
+
successRate: successCount / results.length,
|
|
263
|
+
totalCost,
|
|
264
|
+
};
|
|
265
|
+
}
|
|
266
|
+
/**
|
|
267
|
+
* Compare two workflows based on metrics
|
|
268
|
+
* @param workflow1Results - Results from first workflow
|
|
269
|
+
* @param workflow2Results - Results from second workflow
|
|
270
|
+
* @returns Comparison with stats for both workflows and winner determination
|
|
271
|
+
*/
|
|
272
|
+
export function compareWorkflows(workflow1Results, workflow2Results) {
|
|
273
|
+
const stats1 = generateSummaryStats(workflow1Results);
|
|
274
|
+
const stats2 = generateSummaryStats(workflow2Results);
|
|
275
|
+
// Simple scoring: 40% quality (score), 30% confidence, 20% speed, 10% cost
|
|
276
|
+
const speedScore1 = stats1.averageExecutionTime > 0
|
|
277
|
+
? (1 / stats1.averageExecutionTime) * 10000 * 0.2
|
|
278
|
+
: 0;
|
|
279
|
+
const speedScore2 = stats2.averageExecutionTime > 0
|
|
280
|
+
? (1 / stats2.averageExecutionTime) * 10000 * 0.2
|
|
281
|
+
: 0;
|
|
282
|
+
const score1 = stats1.averageScore * 0.4 +
|
|
283
|
+
stats1.averageConfidence * 100 * 0.3 +
|
|
284
|
+
speedScore1 +
|
|
285
|
+
(1 / (stats1.totalCost + 1)) * 100 * 0.1;
|
|
286
|
+
const score2 = stats2.averageScore * 0.4 +
|
|
287
|
+
stats2.averageConfidence * 100 * 0.3 +
|
|
288
|
+
speedScore2 +
|
|
289
|
+
(1 / (stats2.totalCost + 1)) * 100 * 0.1;
|
|
290
|
+
const diff = Math.abs(score1 - score2);
|
|
291
|
+
let winner;
|
|
292
|
+
let reasoning;
|
|
293
|
+
if (diff < 5) {
|
|
294
|
+
winner = "tie";
|
|
295
|
+
reasoning = "Workflows perform similarly overall";
|
|
296
|
+
}
|
|
297
|
+
else if (score1 > score2) {
|
|
298
|
+
winner = "workflow1";
|
|
299
|
+
reasoning = `Workflow 1 scores higher (${score1.toFixed(2)} vs ${score2.toFixed(2)})`;
|
|
300
|
+
}
|
|
301
|
+
else {
|
|
302
|
+
winner = "workflow2";
|
|
303
|
+
reasoning = `Workflow 2 scores higher (${score2.toFixed(2)} vs ${score1.toFixed(2)})`;
|
|
304
|
+
}
|
|
305
|
+
return {
|
|
306
|
+
workflow1: stats1,
|
|
307
|
+
workflow2: stats2,
|
|
308
|
+
winner,
|
|
309
|
+
reasoning,
|
|
310
|
+
};
|
|
311
|
+
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* workflow/utils/workflowValidation.ts
|
|
3
|
+
* Validation utilities for workflow configurations and execution
|
|
4
|
+
*/
|
|
5
|
+
import type { WorkflowConfig, WorkflowValidationResult } from "../types.js";
|
|
6
|
+
/**
|
|
7
|
+
* Comprehensive workflow validation
|
|
8
|
+
* @param config - Workflow configuration to validate
|
|
9
|
+
* @returns Validation result with errors and warnings
|
|
10
|
+
*/
|
|
11
|
+
export declare function validateWorkflow(config: WorkflowConfig): WorkflowValidationResult;
|
|
12
|
+
/**
|
|
13
|
+
* Log validation results
|
|
14
|
+
* @param workflowId - ID of the workflow being validated
|
|
15
|
+
* @param result - Validation result to log
|
|
16
|
+
*/
|
|
17
|
+
export declare function logValidationResults(workflowId: string, result: WorkflowValidationResult): void;
|
|
18
|
+
/**
|
|
19
|
+
* Validate workflow at registration time
|
|
20
|
+
* @param config - Workflow configuration to validate for registration
|
|
21
|
+
* @returns Validation result with registration-specific checks
|
|
22
|
+
*/
|
|
23
|
+
export declare function validateForRegistration(config: WorkflowConfig): WorkflowValidationResult;
|
|
24
|
+
/**
|
|
25
|
+
* Validate workflow at execution time
|
|
26
|
+
* @param config - Workflow configuration to validate for execution
|
|
27
|
+
* @returns Validation result for execution-time checks
|
|
28
|
+
*/
|
|
29
|
+
export declare function validateForExecution(config: WorkflowConfig): WorkflowValidationResult;
|