@vfarcic/dot-ai 0.115.0 → 0.117.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -18
- package/dist/core/ai-provider-factory.d.ts +4 -2
- package/dist/core/ai-provider-factory.d.ts.map +1 -1
- package/dist/core/ai-provider-factory.js +17 -6
- package/dist/core/capability-operations.js +1 -1
- package/dist/core/generic-session-manager.d.ts +67 -0
- package/dist/core/generic-session-manager.d.ts.map +1 -0
- package/dist/core/generic-session-manager.js +192 -0
- package/dist/core/pattern-operations.js +1 -1
- package/dist/core/providers/noop-provider.d.ts +47 -0
- package/dist/core/providers/noop-provider.d.ts.map +1 -0
- package/dist/core/providers/noop-provider.js +63 -0
- package/dist/core/schema.d.ts.map +1 -1
- package/dist/core/schema.js +13 -13
- package/dist/core/session-utils.d.ts +3 -6
- package/dist/core/session-utils.d.ts.map +1 -1
- package/dist/core/session-utils.js +5 -13
- package/dist/core/shared-prompt-loader.d.ts +15 -3
- package/dist/core/shared-prompt-loader.d.ts.map +1 -1
- package/dist/core/shared-prompt-loader.js +67 -14
- package/dist/core/unified-creation-session.d.ts +3 -10
- package/dist/core/unified-creation-session.d.ts.map +1 -1
- package/dist/core/unified-creation-session.js +34 -75
- package/dist/core/unified-creation-types.d.ts +31 -22
- package/dist/core/unified-creation-types.d.ts.map +1 -1
- package/dist/evaluation/eval-runner.js +12 -3
- package/dist/evaluation/evaluators/base-comparative.d.ts +2 -0
- package/dist/evaluation/evaluators/base-comparative.d.ts.map +1 -1
- package/dist/evaluation/evaluators/base-comparative.js +13 -1
- package/dist/evaluation/graph-generator.d.ts +56 -0
- package/dist/evaluation/graph-generator.d.ts.map +1 -0
- package/dist/evaluation/graph-generator.js +694 -0
- package/dist/evaluation/metadata-loader.d.ts +39 -0
- package/dist/evaluation/metadata-loader.d.ts.map +1 -0
- package/dist/evaluation/metadata-loader.js +74 -0
- package/dist/evaluation/platform-synthesizer.d.ts +5 -1
- package/dist/evaluation/platform-synthesizer.d.ts.map +1 -1
- package/dist/evaluation/platform-synthesizer.js +65 -23
- package/dist/evaluation/run-platform-synthesis.js +22 -5
- package/dist/interfaces/mcp.d.ts.map +1 -1
- package/dist/interfaces/mcp.js +9 -34
- package/dist/tools/answer-question.d.ts.map +1 -1
- package/dist/tools/answer-question.js +12 -12
- package/dist/tools/choose-solution.js +1 -1
- package/dist/tools/generate-manifests.d.ts.map +1 -1
- package/dist/tools/generate-manifests.js +9 -10
- package/dist/tools/index.d.ts +1 -1
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +6 -6
- package/dist/tools/organizational-data.js +12 -12
- package/dist/tools/project-setup/discovery.d.ts +15 -0
- package/dist/tools/project-setup/discovery.d.ts.map +1 -0
- package/dist/tools/project-setup/discovery.js +104 -0
- package/dist/tools/project-setup/generate-scope.d.ts +15 -0
- package/dist/tools/project-setup/generate-scope.d.ts.map +1 -0
- package/dist/tools/project-setup/generate-scope.js +237 -0
- package/dist/tools/project-setup/report-scan.d.ts +15 -0
- package/dist/tools/project-setup/report-scan.d.ts.map +1 -0
- package/dist/tools/project-setup/report-scan.js +156 -0
- package/dist/tools/project-setup/types.d.ts +111 -0
- package/dist/tools/project-setup/types.d.ts.map +1 -0
- package/dist/tools/project-setup/types.js +8 -0
- package/dist/tools/project-setup.d.ts +28 -0
- package/dist/tools/project-setup.d.ts.map +1 -0
- package/dist/tools/project-setup.js +134 -0
- package/dist/tools/recommend.js +1 -1
- package/dist/tools/remediate.js +1 -1
- package/dist/tools/version.d.ts +0 -7
- package/dist/tools/version.d.ts.map +1 -1
- package/dist/tools/version.js +5 -34
- package/package.json +4 -2
- package/prompts/capability-inference.md +2 -2
- package/prompts/infrastructure-trigger-expansion.md +2 -2
- package/prompts/intent-analysis.md +2 -2
- package/prompts/kyverno-generation.md +14 -14
- package/prompts/manifest-generation.md +5 -5
- package/prompts/map-intent-to-operation.md +2 -2
- package/prompts/pattern-complete-error.md +1 -1
- package/prompts/pattern-complete-success.md +4 -4
- package/prompts/pattern-rationale.md +1 -1
- package/prompts/pattern-resources.md +1 -1
- package/prompts/pattern-review.md +5 -5
- package/prompts/policy-complete-apply.md +4 -4
- package/prompts/policy-complete-discard.md +1 -1
- package/prompts/policy-complete-error.md +1 -1
- package/prompts/policy-complete-save.md +4 -4
- package/prompts/policy-complete-success.md +4 -4
- package/prompts/policy-namespace-scope.md +1 -1
- package/prompts/question-generation.md +5 -5
- package/prompts/resource-analysis.md +3 -3
- package/prompts/resource-selection.md +3 -3
- package/prompts/solution-enhancement.md +4 -4
- package/scripts/anthropic.nu +9 -13
- package/scripts/common.nu +31 -33
- package/scripts/ingress.nu +5 -4
- package/scripts/kubernetes.nu +38 -53
- package/dist/core/doc-discovery.d.ts +0 -38
- package/dist/core/doc-discovery.d.ts.map +0 -1
- package/dist/core/doc-discovery.js +0 -231
- package/dist/core/doc-testing-session.d.ts +0 -109
- package/dist/core/doc-testing-session.d.ts.map +0 -1
- package/dist/core/doc-testing-session.js +0 -696
- package/dist/core/doc-testing-types.d.ts +0 -127
- package/dist/core/doc-testing-types.d.ts.map +0 -1
- package/dist/core/doc-testing-types.js +0 -53
- package/dist/core/nushell-runtime.d.ts +0 -39
- package/dist/core/nushell-runtime.d.ts.map +0 -1
- package/dist/core/nushell-runtime.js +0 -103
- package/dist/core/platform-operations.d.ts +0 -70
- package/dist/core/platform-operations.d.ts.map +0 -1
- package/dist/core/platform-operations.js +0 -294
- package/dist/tools/build-platform.d.ts +0 -25
- package/dist/tools/build-platform.d.ts.map +0 -1
- package/dist/tools/build-platform.js +0 -277
- package/dist/tools/test-docs.d.ts +0 -22
- package/dist/tools/test-docs.d.ts.map +0 -1
- package/dist/tools/test-docs.js +0 -351
- package/prompts/doc-testing-done.md +0 -51
- package/prompts/doc-testing-fix.md +0 -120
- package/prompts/doc-testing-scan.md +0 -140
- package/prompts/doc-testing-test-section.md +0 -169
- package/prompts/platform-operations-parse-script-help.md +0 -68
- package/scripts/ack.nu +0 -195
- package/scripts/argo-workflows.nu +0 -47
- package/scripts/argocd.nu +0 -85
- package/scripts/aso.nu +0 -74
- package/scripts/backstage.nu +0 -349
- package/scripts/cert-manager.nu +0 -13
- package/scripts/cnpg.nu +0 -14
- package/scripts/dot.nu +0 -32
- package/scripts/external-secrets.nu +0 -110
- package/scripts/gatekeeper.nu +0 -19
- package/scripts/github.nu +0 -42
- package/scripts/image.nu +0 -67
- package/scripts/kro.nu +0 -11
- package/scripts/kubevela.nu +0 -22
- package/scripts/port.nu +0 -71
- package/scripts/prometheus.nu +0 -21
- package/scripts/registry.nu +0 -55
- package/scripts/storage.nu +0 -210
- package/scripts/tests.nu +0 -12
- package/scripts/velero.nu +0 -45
- package/shared-prompts/validate-docs.md +0 -22
|
@@ -0,0 +1,694 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.GraphGenerator = void 0;
|
|
37
|
+
const fs = __importStar(require("fs"));
|
|
38
|
+
const path = __importStar(require("path"));
|
|
39
|
+
const https = __importStar(require("https"));
|
|
40
|
+
/**
|
|
41
|
+
* GraphGenerator creates data visualizations for platform synthesis reports.
|
|
42
|
+
* Uses QuickChart.io API to generate chart images without requiring native dependencies.
|
|
43
|
+
*/
|
|
44
|
+
class GraphGenerator {
|
|
45
|
+
outputDir;
|
|
46
|
+
quickchartBaseUrl = 'https://quickchart.io/chart';
|
|
47
|
+
constructor(outputDir = './eval/analysis/platform/graphs') {
|
|
48
|
+
this.outputDir = outputDir;
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Generates all or specific graphs for the platform report
|
|
52
|
+
* @param modelPerformances Model performance data
|
|
53
|
+
* @param graphNames Optional array of specific graph names to generate. If not provided, generates all graphs.
|
|
54
|
+
* Valid names: 'performance-tiers', 'cost-vs-quality', 'reliability-comparison',
|
|
55
|
+
* 'tool-performance-heatmap', 'context-window-correlation'
|
|
56
|
+
*/
|
|
57
|
+
async generateAllGraphs(modelPerformances, graphNames) {
|
|
58
|
+
// Ensure output directory exists
|
|
59
|
+
if (!fs.existsSync(this.outputDir)) {
|
|
60
|
+
fs.mkdirSync(this.outputDir, { recursive: true });
|
|
61
|
+
}
|
|
62
|
+
const results = {};
|
|
63
|
+
// Define all available graphs
|
|
64
|
+
const allGraphs = {
|
|
65
|
+
'performance-tiers': () => this.generatePerformanceTiersGraph(modelPerformances),
|
|
66
|
+
'cost-vs-quality': () => this.generateCostVsQualityGraph(modelPerformances),
|
|
67
|
+
'reliability-comparison': () => this.generateReliabilityComparisonGraph(modelPerformances),
|
|
68
|
+
'tool-performance-heatmap': () => this.generateToolPerformanceHeatmap(modelPerformances),
|
|
69
|
+
'context-window-correlation': () => this.generateContextWindowCorrelationGraph(modelPerformances)
|
|
70
|
+
};
|
|
71
|
+
// If specific graphs requested, only generate those
|
|
72
|
+
const graphsToGenerate = graphNames && graphNames.length > 0
|
|
73
|
+
? graphNames
|
|
74
|
+
: Object.keys(allGraphs);
|
|
75
|
+
// Generate requested graphs
|
|
76
|
+
for (const graphName of graphsToGenerate) {
|
|
77
|
+
if (allGraphs[graphName]) {
|
|
78
|
+
results[graphName] = await allGraphs[graphName]();
|
|
79
|
+
}
|
|
80
|
+
else {
|
|
81
|
+
console.warn(`⚠️ Unknown graph name: ${graphName}`);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
return results;
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* Graph 1: Performance Tiers - Grouped bar chart showing score, reliability, and consistency
|
|
88
|
+
*/
|
|
89
|
+
async generatePerformanceTiersGraph(modelPerformances) {
|
|
90
|
+
try {
|
|
91
|
+
// Sort by average score descending, take top 10 models
|
|
92
|
+
const topModels = modelPerformances
|
|
93
|
+
.sort((a, b) => b.averageScore - a.averageScore)
|
|
94
|
+
.slice(0, 10);
|
|
95
|
+
// Clean model names (remove "vercel_" prefix)
|
|
96
|
+
const labels = topModels.map(m => this.cleanModelName(m.modelId));
|
|
97
|
+
const scores = topModels.map(m => m.averageScore);
|
|
98
|
+
const reliability = topModels.map(m => m.reliabilityScore);
|
|
99
|
+
const consistency = topModels.map(m => m.consistencyAcrossTools);
|
|
100
|
+
const chartConfig = {
|
|
101
|
+
type: 'bar',
|
|
102
|
+
data: {
|
|
103
|
+
labels,
|
|
104
|
+
datasets: [
|
|
105
|
+
{
|
|
106
|
+
label: 'Overall Score',
|
|
107
|
+
data: scores,
|
|
108
|
+
backgroundColor: 'rgba(54, 162, 235, 0.9)',
|
|
109
|
+
borderColor: 'rgba(54, 162, 235, 1)',
|
|
110
|
+
borderWidth: 1
|
|
111
|
+
},
|
|
112
|
+
{
|
|
113
|
+
label: 'Reliability',
|
|
114
|
+
data: reliability,
|
|
115
|
+
backgroundColor: 'rgba(75, 192, 192, 0.9)',
|
|
116
|
+
borderColor: 'rgba(75, 192, 192, 1)',
|
|
117
|
+
borderWidth: 1
|
|
118
|
+
},
|
|
119
|
+
{
|
|
120
|
+
label: 'Consistency',
|
|
121
|
+
data: consistency,
|
|
122
|
+
backgroundColor: 'rgba(153, 102, 255, 0.9)',
|
|
123
|
+
borderColor: 'rgba(153, 102, 255, 1)',
|
|
124
|
+
borderWidth: 1
|
|
125
|
+
}
|
|
126
|
+
]
|
|
127
|
+
},
|
|
128
|
+
options: {
|
|
129
|
+
plugins: {
|
|
130
|
+
datalabels: {
|
|
131
|
+
display: false
|
|
132
|
+
}
|
|
133
|
+
},
|
|
134
|
+
title: {
|
|
135
|
+
display: true,
|
|
136
|
+
text: 'Model Performance Tiers: Score, Reliability, and Consistency',
|
|
137
|
+
fontSize: 18,
|
|
138
|
+
fontColor: '#FFFFFF',
|
|
139
|
+
fontStyle: 'bold'
|
|
140
|
+
},
|
|
141
|
+
scales: {
|
|
142
|
+
yAxes: [{
|
|
143
|
+
ticks: {
|
|
144
|
+
beginAtZero: true,
|
|
145
|
+
max: 1.0,
|
|
146
|
+
stepSize: 0.1,
|
|
147
|
+
fontColor: '#FFFFFF',
|
|
148
|
+
fontSize: 12
|
|
149
|
+
},
|
|
150
|
+
scaleLabel: {
|
|
151
|
+
display: true,
|
|
152
|
+
labelString: 'Score (0-1)',
|
|
153
|
+
fontColor: '#FFFFFF',
|
|
154
|
+
fontSize: 14
|
|
155
|
+
},
|
|
156
|
+
gridLines: {
|
|
157
|
+
color: 'rgba(255, 255, 255, 0.2)',
|
|
158
|
+
zeroLineColor: 'rgba(255, 255, 255, 0.4)'
|
|
159
|
+
}
|
|
160
|
+
}],
|
|
161
|
+
xAxes: [{
|
|
162
|
+
ticks: {
|
|
163
|
+
autoSkip: false,
|
|
164
|
+
maxRotation: 45,
|
|
165
|
+
minRotation: 45,
|
|
166
|
+
fontColor: '#FFFFFF',
|
|
167
|
+
fontSize: 11
|
|
168
|
+
},
|
|
169
|
+
gridLines: {
|
|
170
|
+
color: 'rgba(255, 255, 255, 0.1)'
|
|
171
|
+
}
|
|
172
|
+
}]
|
|
173
|
+
},
|
|
174
|
+
legend: {
|
|
175
|
+
display: true,
|
|
176
|
+
position: 'top',
|
|
177
|
+
labels: {
|
|
178
|
+
fontColor: '#FFFFFF',
|
|
179
|
+
fontSize: 13
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
};
|
|
184
|
+
const outputPath = path.join(this.outputDir, 'performance-tiers.png');
|
|
185
|
+
await this.downloadChart(chartConfig, outputPath);
|
|
186
|
+
return {
|
|
187
|
+
success: true,
|
|
188
|
+
graphPath: outputPath
|
|
189
|
+
};
|
|
190
|
+
}
|
|
191
|
+
catch (error) {
|
|
192
|
+
return {
|
|
193
|
+
success: false,
|
|
194
|
+
error: `Failed to generate performance tiers graph: ${error}`
|
|
195
|
+
};
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
/**
|
|
199
|
+
* Graph 2: Cost vs Quality - Line chart showing input/output cost range per model
|
|
200
|
+
*/
|
|
201
|
+
async generateCostVsQualityGraph(modelPerformances) {
|
|
202
|
+
try {
|
|
203
|
+
// Filter out models with no pricing data and sort by quality score descending
|
|
204
|
+
const modelsWithPricing = modelPerformances
|
|
205
|
+
.filter(m => m.pricing.input_cost_per_million_tokens > 0 || m.pricing.output_cost_per_million_tokens > 0)
|
|
206
|
+
.sort((a, b) => b.averageScore - a.averageScore);
|
|
207
|
+
// Create datasets: one for each model showing the cost range line
|
|
208
|
+
const datasets = modelsWithPricing.map((m, idx) => {
|
|
209
|
+
const inputCost = m.pricing.input_cost_per_million_tokens;
|
|
210
|
+
const outputCost = m.pricing.output_cost_per_million_tokens;
|
|
211
|
+
const color = this.getToolColor(idx);
|
|
212
|
+
// Line from input cost to output cost at the model's quality score
|
|
213
|
+
return {
|
|
214
|
+
label: this.cleanModelName(m.modelId),
|
|
215
|
+
data: [
|
|
216
|
+
{ x: inputCost, y: m.averageScore },
|
|
217
|
+
{ x: outputCost, y: m.averageScore }
|
|
218
|
+
],
|
|
219
|
+
borderColor: color,
|
|
220
|
+
backgroundColor: color,
|
|
221
|
+
borderWidth: 3,
|
|
222
|
+
pointRadius: 5,
|
|
223
|
+
pointHoverRadius: 7,
|
|
224
|
+
fill: false,
|
|
225
|
+
showLine: true,
|
|
226
|
+
tension: 0
|
|
227
|
+
};
|
|
228
|
+
});
|
|
229
|
+
const chartConfig = {
|
|
230
|
+
type: 'line',
|
|
231
|
+
data: { datasets },
|
|
232
|
+
options: {
|
|
233
|
+
plugins: {
|
|
234
|
+
datalabels: {
|
|
235
|
+
display: false
|
|
236
|
+
}
|
|
237
|
+
},
|
|
238
|
+
title: {
|
|
239
|
+
display: true,
|
|
240
|
+
text: 'Cost vs Quality Analysis (line shows input → output cost range)',
|
|
241
|
+
fontSize: 18,
|
|
242
|
+
fontColor: '#FFFFFF',
|
|
243
|
+
fontStyle: 'bold'
|
|
244
|
+
},
|
|
245
|
+
scales: {
|
|
246
|
+
xAxes: [{
|
|
247
|
+
type: 'linear',
|
|
248
|
+
scaleLabel: {
|
|
249
|
+
display: true,
|
|
250
|
+
labelString: 'Cost per 1M Tokens in $ (Input ← → Output)',
|
|
251
|
+
fontColor: '#FFFFFF',
|
|
252
|
+
fontSize: 14
|
|
253
|
+
},
|
|
254
|
+
ticks: {
|
|
255
|
+
callback: function (value) {
|
|
256
|
+
return '$' + value;
|
|
257
|
+
},
|
|
258
|
+
fontColor: '#FFFFFF',
|
|
259
|
+
fontSize: 12
|
|
260
|
+
},
|
|
261
|
+
gridLines: {
|
|
262
|
+
color: 'rgba(255, 255, 255, 0.2)',
|
|
263
|
+
zeroLineColor: 'rgba(255, 255, 255, 0.4)'
|
|
264
|
+
}
|
|
265
|
+
}],
|
|
266
|
+
yAxes: [{
|
|
267
|
+
scaleLabel: {
|
|
268
|
+
display: true,
|
|
269
|
+
labelString: 'Overall Score',
|
|
270
|
+
fontColor: '#FFFFFF',
|
|
271
|
+
fontSize: 14
|
|
272
|
+
},
|
|
273
|
+
ticks: {
|
|
274
|
+
beginAtZero: false,
|
|
275
|
+
min: 0.3,
|
|
276
|
+
max: 1.0,
|
|
277
|
+
fontColor: '#FFFFFF',
|
|
278
|
+
fontSize: 12
|
|
279
|
+
},
|
|
280
|
+
gridLines: {
|
|
281
|
+
color: 'rgba(255, 255, 255, 0.2)',
|
|
282
|
+
zeroLineColor: 'rgba(255, 255, 255, 0.4)'
|
|
283
|
+
}
|
|
284
|
+
}]
|
|
285
|
+
},
|
|
286
|
+
legend: {
|
|
287
|
+
display: true,
|
|
288
|
+
position: 'right',
|
|
289
|
+
labels: {
|
|
290
|
+
fontColor: '#FFFFFF',
|
|
291
|
+
fontSize: 10,
|
|
292
|
+
boxWidth: 15,
|
|
293
|
+
usePointStyle: true
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
};
|
|
298
|
+
const outputPath = path.join(this.outputDir, 'cost-vs-quality.png');
|
|
299
|
+
await this.downloadChart(chartConfig, outputPath);
|
|
300
|
+
return {
|
|
301
|
+
success: true,
|
|
302
|
+
graphPath: outputPath
|
|
303
|
+
};
|
|
304
|
+
}
|
|
305
|
+
catch (error) {
|
|
306
|
+
return {
|
|
307
|
+
success: false,
|
|
308
|
+
error: `Failed to generate cost vs quality graph: ${error}`
|
|
309
|
+
};
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
/**
|
|
313
|
+
* Graph 3: Reliability Comparison - Bar chart with reliability scores
|
|
314
|
+
*/
|
|
315
|
+
async generateReliabilityComparisonGraph(modelPerformances) {
|
|
316
|
+
try {
|
|
317
|
+
// Sort by reliability descending
|
|
318
|
+
const sortedModels = modelPerformances
|
|
319
|
+
.sort((a, b) => b.reliabilityScore - a.reliabilityScore);
|
|
320
|
+
const labels = sortedModels.map(m => this.cleanModelName(m.modelId));
|
|
321
|
+
const reliabilityScores = sortedModels.map(m => m.reliabilityScore);
|
|
322
|
+
// Create separate datasets for legend
|
|
323
|
+
const datasets = [
|
|
324
|
+
{
|
|
325
|
+
label: 'High Reliability (≥0.9)',
|
|
326
|
+
data: reliabilityScores.map(score => score >= 0.9 ? score : null),
|
|
327
|
+
backgroundColor: 'rgba(75, 192, 192, 0.8)',
|
|
328
|
+
borderWidth: 1
|
|
329
|
+
},
|
|
330
|
+
{
|
|
331
|
+
label: 'Medium Reliability (0.7-0.9)',
|
|
332
|
+
data: reliabilityScores.map(score => score >= 0.7 && score < 0.9 ? score : null),
|
|
333
|
+
backgroundColor: 'rgba(255, 206, 86, 0.8)',
|
|
334
|
+
borderWidth: 1
|
|
335
|
+
},
|
|
336
|
+
{
|
|
337
|
+
label: 'Low Reliability (<0.7)',
|
|
338
|
+
data: reliabilityScores.map(score => score < 0.7 ? score : null),
|
|
339
|
+
backgroundColor: 'rgba(255, 99, 132, 0.8)',
|
|
340
|
+
borderWidth: 1
|
|
341
|
+
}
|
|
342
|
+
];
|
|
343
|
+
const chartConfig = {
|
|
344
|
+
type: 'horizontalBar',
|
|
345
|
+
data: {
|
|
346
|
+
labels,
|
|
347
|
+
datasets
|
|
348
|
+
},
|
|
349
|
+
options: {
|
|
350
|
+
plugins: {
|
|
351
|
+
datalabels: {
|
|
352
|
+
display: false
|
|
353
|
+
}
|
|
354
|
+
},
|
|
355
|
+
title: {
|
|
356
|
+
display: true,
|
|
357
|
+
text: 'Model Reliability Comparison',
|
|
358
|
+
fontSize: 18,
|
|
359
|
+
fontColor: '#FFFFFF',
|
|
360
|
+
fontStyle: 'bold'
|
|
361
|
+
},
|
|
362
|
+
scales: {
|
|
363
|
+
xAxes: [{
|
|
364
|
+
stacked: true,
|
|
365
|
+
ticks: {
|
|
366
|
+
beginAtZero: true,
|
|
367
|
+
max: 1.0,
|
|
368
|
+
stepSize: 0.1,
|
|
369
|
+
fontColor: '#FFFFFF',
|
|
370
|
+
fontSize: 12
|
|
371
|
+
},
|
|
372
|
+
scaleLabel: {
|
|
373
|
+
display: true,
|
|
374
|
+
labelString: 'Reliability Score (0-1)',
|
|
375
|
+
fontColor: '#FFFFFF',
|
|
376
|
+
fontSize: 14
|
|
377
|
+
},
|
|
378
|
+
gridLines: {
|
|
379
|
+
color: 'rgba(255, 255, 255, 0.2)',
|
|
380
|
+
zeroLineColor: 'rgba(255, 255, 255, 0.4)'
|
|
381
|
+
}
|
|
382
|
+
}],
|
|
383
|
+
yAxes: [{
|
|
384
|
+
stacked: true,
|
|
385
|
+
ticks: {
|
|
386
|
+
fontColor: '#FFFFFF',
|
|
387
|
+
fontSize: 11
|
|
388
|
+
},
|
|
389
|
+
gridLines: {
|
|
390
|
+
color: 'rgba(255, 255, 255, 0.1)'
|
|
391
|
+
}
|
|
392
|
+
}]
|
|
393
|
+
},
|
|
394
|
+
legend: {
|
|
395
|
+
display: true,
|
|
396
|
+
position: 'top',
|
|
397
|
+
labels: {
|
|
398
|
+
fontColor: '#FFFFFF',
|
|
399
|
+
fontSize: 12
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
};
|
|
404
|
+
const outputPath = path.join(this.outputDir, 'reliability-comparison.png');
|
|
405
|
+
await this.downloadChart(chartConfig, outputPath);
|
|
406
|
+
return {
|
|
407
|
+
success: true,
|
|
408
|
+
graphPath: outputPath
|
|
409
|
+
};
|
|
410
|
+
}
|
|
411
|
+
catch (error) {
|
|
412
|
+
return {
|
|
413
|
+
success: false,
|
|
414
|
+
error: `Failed to generate reliability comparison graph: ${error}`
|
|
415
|
+
};
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
/**
|
|
419
|
+
* Graph 4: Tool Performance Heatmap - Shows model scores per tool
|
|
420
|
+
*/
|
|
421
|
+
async generateToolPerformanceHeatmap(modelPerformances) {
|
|
422
|
+
try {
|
|
423
|
+
// Get all unique tool names
|
|
424
|
+
const toolNames = new Set();
|
|
425
|
+
modelPerformances.forEach(m => {
|
|
426
|
+
Object.keys(m.toolScores).forEach(tool => toolNames.add(tool));
|
|
427
|
+
});
|
|
428
|
+
const tools = Array.from(toolNames).sort();
|
|
429
|
+
// Sort models by average score
|
|
430
|
+
const sortedModels = modelPerformances
|
|
431
|
+
.sort((a, b) => b.averageScore - a.averageScore)
|
|
432
|
+
.slice(0, 10); // Top 10 models
|
|
433
|
+
// Create matrix data
|
|
434
|
+
const labels = sortedModels.map(m => this.cleanModelName(m.modelId));
|
|
435
|
+
const datasets = tools.map((tool, idx) => ({
|
|
436
|
+
label: tool.charAt(0).toUpperCase() + tool.slice(1),
|
|
437
|
+
data: sortedModels.map(m => m.toolScores[tool] || 0),
|
|
438
|
+
backgroundColor: this.getToolColor(idx),
|
|
439
|
+
borderWidth: 1
|
|
440
|
+
}));
|
|
441
|
+
const chartConfig = {
|
|
442
|
+
type: 'horizontalBar',
|
|
443
|
+
data: {
|
|
444
|
+
labels,
|
|
445
|
+
datasets
|
|
446
|
+
},
|
|
447
|
+
options: {
|
|
448
|
+
plugins: {
|
|
449
|
+
datalabels: {
|
|
450
|
+
display: false
|
|
451
|
+
}
|
|
452
|
+
},
|
|
453
|
+
title: {
|
|
454
|
+
display: true,
|
|
455
|
+
text: 'Tool-Specific Performance Patterns',
|
|
456
|
+
fontSize: 18,
|
|
457
|
+
fontColor: '#FFFFFF',
|
|
458
|
+
fontStyle: 'bold'
|
|
459
|
+
},
|
|
460
|
+
scales: {
|
|
461
|
+
xAxes: [{
|
|
462
|
+
stacked: false,
|
|
463
|
+
ticks: {
|
|
464
|
+
beginAtZero: true,
|
|
465
|
+
max: 1.0,
|
|
466
|
+
stepSize: 0.2,
|
|
467
|
+
fontColor: '#FFFFFF',
|
|
468
|
+
fontSize: 12
|
|
469
|
+
},
|
|
470
|
+
scaleLabel: {
|
|
471
|
+
display: true,
|
|
472
|
+
labelString: 'Tool Score',
|
|
473
|
+
fontColor: '#FFFFFF',
|
|
474
|
+
fontSize: 14
|
|
475
|
+
},
|
|
476
|
+
gridLines: {
|
|
477
|
+
color: 'rgba(255, 255, 255, 0.2)',
|
|
478
|
+
zeroLineColor: 'rgba(255, 255, 255, 0.4)'
|
|
479
|
+
}
|
|
480
|
+
}],
|
|
481
|
+
yAxes: [{
|
|
482
|
+
stacked: false,
|
|
483
|
+
ticks: {
|
|
484
|
+
fontColor: '#FFFFFF',
|
|
485
|
+
fontSize: 11
|
|
486
|
+
},
|
|
487
|
+
gridLines: {
|
|
488
|
+
color: 'rgba(255, 255, 255, 0.1)'
|
|
489
|
+
}
|
|
490
|
+
}]
|
|
491
|
+
},
|
|
492
|
+
legend: {
|
|
493
|
+
display: true,
|
|
494
|
+
position: 'right',
|
|
495
|
+
labels: {
|
|
496
|
+
fontColor: '#FFFFFF',
|
|
497
|
+
fontSize: 12
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
}
|
|
501
|
+
};
|
|
502
|
+
const outputPath = path.join(this.outputDir, 'tool-performance-heatmap.png');
|
|
503
|
+
await this.downloadChart(chartConfig, outputPath);
|
|
504
|
+
return {
|
|
505
|
+
success: true,
|
|
506
|
+
graphPath: outputPath
|
|
507
|
+
};
|
|
508
|
+
}
|
|
509
|
+
catch (error) {
|
|
510
|
+
return {
|
|
511
|
+
success: false,
|
|
512
|
+
error: `Failed to generate tool performance heatmap: ${error}`
|
|
513
|
+
};
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
/**
|
|
517
|
+
* Graph 5: Context Window Correlation - Scatter plot showing context window vs performance
|
|
518
|
+
*/
|
|
519
|
+
async generateContextWindowCorrelationGraph(modelPerformances) {
|
|
520
|
+
try {
|
|
521
|
+
const scatterData = modelPerformances.map((m) => ({
|
|
522
|
+
x: m.capabilities.context_window / 1000, // Convert to thousands for readability
|
|
523
|
+
y: m.averageScore,
|
|
524
|
+
r: 8,
|
|
525
|
+
label: this.cleanModelName(m.modelId)
|
|
526
|
+
}));
|
|
527
|
+
const chartConfig = {
|
|
528
|
+
type: 'scatter',
|
|
529
|
+
data: {
|
|
530
|
+
datasets: [{
|
|
531
|
+
label: 'Models',
|
|
532
|
+
data: scatterData,
|
|
533
|
+
backgroundColor: 'rgba(153, 102, 255, 0.7)',
|
|
534
|
+
borderColor: 'rgba(153, 102, 255, 1)',
|
|
535
|
+
borderWidth: 2,
|
|
536
|
+
pointRadius: 10
|
|
537
|
+
}]
|
|
538
|
+
},
|
|
539
|
+
options: {
|
|
540
|
+
layout: {
|
|
541
|
+
padding: {
|
|
542
|
+
right: 300,
|
|
543
|
+
left: 20,
|
|
544
|
+
top: 20,
|
|
545
|
+
bottom: 20
|
|
546
|
+
}
|
|
547
|
+
},
|
|
548
|
+
plugins: {
|
|
549
|
+
datalabels: {
|
|
550
|
+
display: true,
|
|
551
|
+
align: 'right',
|
|
552
|
+
offset: 12,
|
|
553
|
+
color: '#FFFFFF',
|
|
554
|
+
font: {
|
|
555
|
+
size: 20
|
|
556
|
+
},
|
|
557
|
+
formatter: (value) => value.label
|
|
558
|
+
}
|
|
559
|
+
},
|
|
560
|
+
title: {
|
|
561
|
+
display: true,
|
|
562
|
+
text: 'Context Window Size vs Performance',
|
|
563
|
+
fontSize: 18,
|
|
564
|
+
fontColor: '#FFFFFF',
|
|
565
|
+
fontStyle: 'bold'
|
|
566
|
+
},
|
|
567
|
+
scales: {
|
|
568
|
+
xAxes: [{
|
|
569
|
+
type: 'linear',
|
|
570
|
+
scaleLabel: {
|
|
571
|
+
display: true,
|
|
572
|
+
labelString: 'Context Window Size (K tokens)',
|
|
573
|
+
fontColor: '#FFFFFF',
|
|
574
|
+
fontSize: 14
|
|
575
|
+
},
|
|
576
|
+
ticks: {
|
|
577
|
+
callback: (value) => value + 'K',
|
|
578
|
+
fontColor: '#FFFFFF',
|
|
579
|
+
fontSize: 12
|
|
580
|
+
},
|
|
581
|
+
gridLines: {
|
|
582
|
+
color: 'rgba(255, 255, 255, 0.2)',
|
|
583
|
+
zeroLineColor: 'rgba(255, 255, 255, 0.4)'
|
|
584
|
+
}
|
|
585
|
+
}],
|
|
586
|
+
yAxes: [{
|
|
587
|
+
scaleLabel: {
|
|
588
|
+
display: true,
|
|
589
|
+
labelString: 'Overall Score',
|
|
590
|
+
fontColor: '#FFFFFF',
|
|
591
|
+
fontSize: 14
|
|
592
|
+
},
|
|
593
|
+
ticks: {
|
|
594
|
+
beginAtZero: false,
|
|
595
|
+
min: 0.3,
|
|
596
|
+
max: 1.0,
|
|
597
|
+
fontColor: '#FFFFFF',
|
|
598
|
+
fontSize: 12
|
|
599
|
+
},
|
|
600
|
+
gridLines: {
|
|
601
|
+
color: 'rgba(255, 255, 255, 0.2)',
|
|
602
|
+
zeroLineColor: 'rgba(255, 255, 255, 0.4)'
|
|
603
|
+
}
|
|
604
|
+
}]
|
|
605
|
+
},
|
|
606
|
+
legend: {
|
|
607
|
+
display: false
|
|
608
|
+
},
|
|
609
|
+
tooltips: {
|
|
610
|
+
backgroundColor: 'rgba(0, 0, 0, 0.8)',
|
|
611
|
+
titleFontColor: '#FFFFFF',
|
|
612
|
+
bodyFontColor: '#FFFFFF',
|
|
613
|
+
callbacks: {
|
|
614
|
+
label: (tooltipItem, data) => {
|
|
615
|
+
const dataset = data.datasets[tooltipItem.datasetIndex];
|
|
616
|
+
const point = dataset.data[tooltipItem.index];
|
|
617
|
+
return `${point.label}: ${point.y.toFixed(3)} (${Math.round(point.x)}K tokens)`;
|
|
618
|
+
}
|
|
619
|
+
}
|
|
620
|
+
}
|
|
621
|
+
}
|
|
622
|
+
};
|
|
623
|
+
const outputPath = path.join(this.outputDir, 'context-window-correlation.png');
|
|
624
|
+
await this.downloadChart(chartConfig, outputPath, 1400, 700);
|
|
625
|
+
return {
|
|
626
|
+
success: true,
|
|
627
|
+
graphPath: outputPath
|
|
628
|
+
};
|
|
629
|
+
}
|
|
630
|
+
catch (error) {
|
|
631
|
+
return {
|
|
632
|
+
success: false,
|
|
633
|
+
error: `Failed to generate context window correlation graph: ${error}`
|
|
634
|
+
};
|
|
635
|
+
}
|
|
636
|
+
}
|
|
637
|
+
/**
|
|
638
|
+
* Downloads a chart from QuickChart.io API and saves it as PNG
|
|
639
|
+
*/
|
|
640
|
+
async downloadChart(chartConfig, outputPath, width = 1000, height = 600) {
|
|
641
|
+
return new Promise((resolve, reject) => {
|
|
642
|
+
const chartJson = JSON.stringify(chartConfig);
|
|
643
|
+
const url = `${this.quickchartBaseUrl}?c=${encodeURIComponent(chartJson)}&width=${width}&height=${height}&format=png&backgroundColor=black`;
|
|
644
|
+
https.get(url, (response) => {
|
|
645
|
+
if (response.statusCode !== 200) {
|
|
646
|
+
reject(new Error(`QuickChart API returned status ${response.statusCode}`));
|
|
647
|
+
return;
|
|
648
|
+
}
|
|
649
|
+
const fileStream = fs.createWriteStream(outputPath);
|
|
650
|
+
response.pipe(fileStream);
|
|
651
|
+
fileStream.on('finish', () => {
|
|
652
|
+
fileStream.close();
|
|
653
|
+
console.log(`✅ Graph saved: ${outputPath}`);
|
|
654
|
+
resolve();
|
|
655
|
+
});
|
|
656
|
+
fileStream.on('error', (err) => {
|
|
657
|
+
fs.unlink(outputPath, () => { }); // Clean up partial file
|
|
658
|
+
reject(err);
|
|
659
|
+
});
|
|
660
|
+
}).on('error', (err) => {
|
|
661
|
+
reject(err);
|
|
662
|
+
});
|
|
663
|
+
});
|
|
664
|
+
}
|
|
665
|
+
/**
|
|
666
|
+
* Cleans model names by removing provider prefixes
|
|
667
|
+
*/
|
|
668
|
+
cleanModelName(modelId) {
|
|
669
|
+
// Remove "vercel_" prefix and timestamp suffix
|
|
670
|
+
return modelId
|
|
671
|
+
.replace(/^vercel_/, '')
|
|
672
|
+
.replace(/_\d{4}-\d{2}-\d{2}$/, '')
|
|
673
|
+
.replace(/_/g, '-');
|
|
674
|
+
}
|
|
675
|
+
/**
|
|
676
|
+
* Returns a consistent color for each tool index (supports up to 10 tools)
|
|
677
|
+
*/
|
|
678
|
+
getToolColor(index) {
|
|
679
|
+
const colors = [
|
|
680
|
+
'rgba(255, 99, 132, 0.8)', // Red
|
|
681
|
+
'rgba(54, 162, 235, 0.8)', // Blue
|
|
682
|
+
'rgba(255, 206, 86, 0.8)', // Yellow
|
|
683
|
+
'rgba(75, 192, 192, 0.8)', // Green
|
|
684
|
+
'rgba(153, 102, 255, 0.8)', // Purple
|
|
685
|
+
'rgba(255, 159, 64, 0.8)', // Orange
|
|
686
|
+
'rgba(199, 199, 199, 0.8)', // Grey
|
|
687
|
+
'rgba(83, 102, 255, 0.8)', // Indigo
|
|
688
|
+
'rgba(255, 99, 255, 0.8)', // Pink
|
|
689
|
+
'rgba(99, 255, 132, 0.8)' // Light Green
|
|
690
|
+
];
|
|
691
|
+
return colors[index % colors.length];
|
|
692
|
+
}
|
|
693
|
+
}
|
|
694
|
+
exports.GraphGenerator = GraphGenerator;
|