@vfarcic/dot-ai 0.115.0 → 0.117.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. package/README.md +21 -18
  2. package/dist/core/ai-provider-factory.d.ts +4 -2
  3. package/dist/core/ai-provider-factory.d.ts.map +1 -1
  4. package/dist/core/ai-provider-factory.js +17 -6
  5. package/dist/core/capability-operations.js +1 -1
  6. package/dist/core/generic-session-manager.d.ts +67 -0
  7. package/dist/core/generic-session-manager.d.ts.map +1 -0
  8. package/dist/core/generic-session-manager.js +192 -0
  9. package/dist/core/pattern-operations.js +1 -1
  10. package/dist/core/providers/noop-provider.d.ts +47 -0
  11. package/dist/core/providers/noop-provider.d.ts.map +1 -0
  12. package/dist/core/providers/noop-provider.js +63 -0
  13. package/dist/core/schema.d.ts.map +1 -1
  14. package/dist/core/schema.js +13 -13
  15. package/dist/core/session-utils.d.ts +3 -6
  16. package/dist/core/session-utils.d.ts.map +1 -1
  17. package/dist/core/session-utils.js +5 -13
  18. package/dist/core/shared-prompt-loader.d.ts +15 -3
  19. package/dist/core/shared-prompt-loader.d.ts.map +1 -1
  20. package/dist/core/shared-prompt-loader.js +67 -14
  21. package/dist/core/unified-creation-session.d.ts +3 -10
  22. package/dist/core/unified-creation-session.d.ts.map +1 -1
  23. package/dist/core/unified-creation-session.js +34 -75
  24. package/dist/core/unified-creation-types.d.ts +31 -22
  25. package/dist/core/unified-creation-types.d.ts.map +1 -1
  26. package/dist/evaluation/eval-runner.js +12 -3
  27. package/dist/evaluation/evaluators/base-comparative.d.ts +2 -0
  28. package/dist/evaluation/evaluators/base-comparative.d.ts.map +1 -1
  29. package/dist/evaluation/evaluators/base-comparative.js +13 -1
  30. package/dist/evaluation/graph-generator.d.ts +56 -0
  31. package/dist/evaluation/graph-generator.d.ts.map +1 -0
  32. package/dist/evaluation/graph-generator.js +694 -0
  33. package/dist/evaluation/metadata-loader.d.ts +39 -0
  34. package/dist/evaluation/metadata-loader.d.ts.map +1 -0
  35. package/dist/evaluation/metadata-loader.js +74 -0
  36. package/dist/evaluation/platform-synthesizer.d.ts +5 -1
  37. package/dist/evaluation/platform-synthesizer.d.ts.map +1 -1
  38. package/dist/evaluation/platform-synthesizer.js +65 -23
  39. package/dist/evaluation/run-platform-synthesis.js +22 -5
  40. package/dist/interfaces/mcp.d.ts.map +1 -1
  41. package/dist/interfaces/mcp.js +9 -34
  42. package/dist/tools/answer-question.d.ts.map +1 -1
  43. package/dist/tools/answer-question.js +12 -12
  44. package/dist/tools/choose-solution.js +1 -1
  45. package/dist/tools/generate-manifests.d.ts.map +1 -1
  46. package/dist/tools/generate-manifests.js +9 -10
  47. package/dist/tools/index.d.ts +1 -1
  48. package/dist/tools/index.d.ts.map +1 -1
  49. package/dist/tools/index.js +6 -6
  50. package/dist/tools/organizational-data.js +12 -12
  51. package/dist/tools/project-setup/discovery.d.ts +15 -0
  52. package/dist/tools/project-setup/discovery.d.ts.map +1 -0
  53. package/dist/tools/project-setup/discovery.js +104 -0
  54. package/dist/tools/project-setup/generate-scope.d.ts +15 -0
  55. package/dist/tools/project-setup/generate-scope.d.ts.map +1 -0
  56. package/dist/tools/project-setup/generate-scope.js +237 -0
  57. package/dist/tools/project-setup/report-scan.d.ts +15 -0
  58. package/dist/tools/project-setup/report-scan.d.ts.map +1 -0
  59. package/dist/tools/project-setup/report-scan.js +156 -0
  60. package/dist/tools/project-setup/types.d.ts +111 -0
  61. package/dist/tools/project-setup/types.d.ts.map +1 -0
  62. package/dist/tools/project-setup/types.js +8 -0
  63. package/dist/tools/project-setup.d.ts +28 -0
  64. package/dist/tools/project-setup.d.ts.map +1 -0
  65. package/dist/tools/project-setup.js +134 -0
  66. package/dist/tools/recommend.js +1 -1
  67. package/dist/tools/remediate.js +1 -1
  68. package/dist/tools/version.d.ts +0 -7
  69. package/dist/tools/version.d.ts.map +1 -1
  70. package/dist/tools/version.js +5 -34
  71. package/package.json +4 -2
  72. package/prompts/capability-inference.md +2 -2
  73. package/prompts/infrastructure-trigger-expansion.md +2 -2
  74. package/prompts/intent-analysis.md +2 -2
  75. package/prompts/kyverno-generation.md +14 -14
  76. package/prompts/manifest-generation.md +5 -5
  77. package/prompts/map-intent-to-operation.md +2 -2
  78. package/prompts/pattern-complete-error.md +1 -1
  79. package/prompts/pattern-complete-success.md +4 -4
  80. package/prompts/pattern-rationale.md +1 -1
  81. package/prompts/pattern-resources.md +1 -1
  82. package/prompts/pattern-review.md +5 -5
  83. package/prompts/policy-complete-apply.md +4 -4
  84. package/prompts/policy-complete-discard.md +1 -1
  85. package/prompts/policy-complete-error.md +1 -1
  86. package/prompts/policy-complete-save.md +4 -4
  87. package/prompts/policy-complete-success.md +4 -4
  88. package/prompts/policy-namespace-scope.md +1 -1
  89. package/prompts/question-generation.md +5 -5
  90. package/prompts/resource-analysis.md +3 -3
  91. package/prompts/resource-selection.md +3 -3
  92. package/prompts/solution-enhancement.md +4 -4
  93. package/scripts/anthropic.nu +9 -13
  94. package/scripts/common.nu +31 -33
  95. package/scripts/ingress.nu +5 -4
  96. package/scripts/kubernetes.nu +38 -53
  97. package/dist/core/doc-discovery.d.ts +0 -38
  98. package/dist/core/doc-discovery.d.ts.map +0 -1
  99. package/dist/core/doc-discovery.js +0 -231
  100. package/dist/core/doc-testing-session.d.ts +0 -109
  101. package/dist/core/doc-testing-session.d.ts.map +0 -1
  102. package/dist/core/doc-testing-session.js +0 -696
  103. package/dist/core/doc-testing-types.d.ts +0 -127
  104. package/dist/core/doc-testing-types.d.ts.map +0 -1
  105. package/dist/core/doc-testing-types.js +0 -53
  106. package/dist/core/nushell-runtime.d.ts +0 -39
  107. package/dist/core/nushell-runtime.d.ts.map +0 -1
  108. package/dist/core/nushell-runtime.js +0 -103
  109. package/dist/core/platform-operations.d.ts +0 -70
  110. package/dist/core/platform-operations.d.ts.map +0 -1
  111. package/dist/core/platform-operations.js +0 -294
  112. package/dist/tools/build-platform.d.ts +0 -25
  113. package/dist/tools/build-platform.d.ts.map +0 -1
  114. package/dist/tools/build-platform.js +0 -277
  115. package/dist/tools/test-docs.d.ts +0 -22
  116. package/dist/tools/test-docs.d.ts.map +0 -1
  117. package/dist/tools/test-docs.js +0 -351
  118. package/prompts/doc-testing-done.md +0 -51
  119. package/prompts/doc-testing-fix.md +0 -120
  120. package/prompts/doc-testing-scan.md +0 -140
  121. package/prompts/doc-testing-test-section.md +0 -169
  122. package/prompts/platform-operations-parse-script-help.md +0 -68
  123. package/scripts/ack.nu +0 -195
  124. package/scripts/argo-workflows.nu +0 -47
  125. package/scripts/argocd.nu +0 -85
  126. package/scripts/aso.nu +0 -74
  127. package/scripts/backstage.nu +0 -349
  128. package/scripts/cert-manager.nu +0 -13
  129. package/scripts/cnpg.nu +0 -14
  130. package/scripts/dot.nu +0 -32
  131. package/scripts/external-secrets.nu +0 -110
  132. package/scripts/gatekeeper.nu +0 -19
  133. package/scripts/github.nu +0 -42
  134. package/scripts/image.nu +0 -67
  135. package/scripts/kro.nu +0 -11
  136. package/scripts/kubevela.nu +0 -22
  137. package/scripts/port.nu +0 -71
  138. package/scripts/prometheus.nu +0 -21
  139. package/scripts/registry.nu +0 -55
  140. package/scripts/storage.nu +0 -210
  141. package/scripts/tests.nu +0 -12
  142. package/scripts/velero.nu +0 -45
  143. package/shared-prompts/validate-docs.md +0 -22
@@ -0,0 +1,694 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.GraphGenerator = void 0;
37
+ const fs = __importStar(require("fs"));
38
+ const path = __importStar(require("path"));
39
+ const https = __importStar(require("https"));
40
+ /**
41
+ * GraphGenerator creates data visualizations for platform synthesis reports.
42
+ * Uses QuickChart.io API to generate chart images without requiring native dependencies.
43
+ */
44
+ class GraphGenerator {
45
+ outputDir;
46
+ quickchartBaseUrl = 'https://quickchart.io/chart';
47
+ constructor(outputDir = './eval/analysis/platform/graphs') {
48
+ this.outputDir = outputDir;
49
+ }
50
+ /**
51
+ * Generates all or specific graphs for the platform report
52
+ * @param modelPerformances Model performance data
53
+ * @param graphNames Optional array of specific graph names to generate. If not provided, generates all graphs.
54
+ * Valid names: 'performance-tiers', 'cost-vs-quality', 'reliability-comparison',
55
+ * 'tool-performance-heatmap', 'context-window-correlation'
56
+ */
57
+ async generateAllGraphs(modelPerformances, graphNames) {
58
+ // Ensure output directory exists
59
+ if (!fs.existsSync(this.outputDir)) {
60
+ fs.mkdirSync(this.outputDir, { recursive: true });
61
+ }
62
+ const results = {};
63
+ // Define all available graphs
64
+ const allGraphs = {
65
+ 'performance-tiers': () => this.generatePerformanceTiersGraph(modelPerformances),
66
+ 'cost-vs-quality': () => this.generateCostVsQualityGraph(modelPerformances),
67
+ 'reliability-comparison': () => this.generateReliabilityComparisonGraph(modelPerformances),
68
+ 'tool-performance-heatmap': () => this.generateToolPerformanceHeatmap(modelPerformances),
69
+ 'context-window-correlation': () => this.generateContextWindowCorrelationGraph(modelPerformances)
70
+ };
71
+ // If specific graphs requested, only generate those
72
+ const graphsToGenerate = graphNames && graphNames.length > 0
73
+ ? graphNames
74
+ : Object.keys(allGraphs);
75
+ // Generate requested graphs
76
+ for (const graphName of graphsToGenerate) {
77
+ if (allGraphs[graphName]) {
78
+ results[graphName] = await allGraphs[graphName]();
79
+ }
80
+ else {
81
+ console.warn(`⚠️ Unknown graph name: ${graphName}`);
82
+ }
83
+ }
84
+ return results;
85
+ }
86
+ /**
87
+ * Graph 1: Performance Tiers - Grouped bar chart showing score, reliability, and consistency
88
+ */
89
+ async generatePerformanceTiersGraph(modelPerformances) {
90
+ try {
91
+ // Sort by average score descending, take top 10 models
92
+ const topModels = modelPerformances
93
+ .sort((a, b) => b.averageScore - a.averageScore)
94
+ .slice(0, 10);
95
+ // Clean model names (remove "vercel_" prefix)
96
+ const labels = topModels.map(m => this.cleanModelName(m.modelId));
97
+ const scores = topModels.map(m => m.averageScore);
98
+ const reliability = topModels.map(m => m.reliabilityScore);
99
+ const consistency = topModels.map(m => m.consistencyAcrossTools);
100
+ const chartConfig = {
101
+ type: 'bar',
102
+ data: {
103
+ labels,
104
+ datasets: [
105
+ {
106
+ label: 'Overall Score',
107
+ data: scores,
108
+ backgroundColor: 'rgba(54, 162, 235, 0.9)',
109
+ borderColor: 'rgba(54, 162, 235, 1)',
110
+ borderWidth: 1
111
+ },
112
+ {
113
+ label: 'Reliability',
114
+ data: reliability,
115
+ backgroundColor: 'rgba(75, 192, 192, 0.9)',
116
+ borderColor: 'rgba(75, 192, 192, 1)',
117
+ borderWidth: 1
118
+ },
119
+ {
120
+ label: 'Consistency',
121
+ data: consistency,
122
+ backgroundColor: 'rgba(153, 102, 255, 0.9)',
123
+ borderColor: 'rgba(153, 102, 255, 1)',
124
+ borderWidth: 1
125
+ }
126
+ ]
127
+ },
128
+ options: {
129
+ plugins: {
130
+ datalabels: {
131
+ display: false
132
+ }
133
+ },
134
+ title: {
135
+ display: true,
136
+ text: 'Model Performance Tiers: Score, Reliability, and Consistency',
137
+ fontSize: 18,
138
+ fontColor: '#FFFFFF',
139
+ fontStyle: 'bold'
140
+ },
141
+ scales: {
142
+ yAxes: [{
143
+ ticks: {
144
+ beginAtZero: true,
145
+ max: 1.0,
146
+ stepSize: 0.1,
147
+ fontColor: '#FFFFFF',
148
+ fontSize: 12
149
+ },
150
+ scaleLabel: {
151
+ display: true,
152
+ labelString: 'Score (0-1)',
153
+ fontColor: '#FFFFFF',
154
+ fontSize: 14
155
+ },
156
+ gridLines: {
157
+ color: 'rgba(255, 255, 255, 0.2)',
158
+ zeroLineColor: 'rgba(255, 255, 255, 0.4)'
159
+ }
160
+ }],
161
+ xAxes: [{
162
+ ticks: {
163
+ autoSkip: false,
164
+ maxRotation: 45,
165
+ minRotation: 45,
166
+ fontColor: '#FFFFFF',
167
+ fontSize: 11
168
+ },
169
+ gridLines: {
170
+ color: 'rgba(255, 255, 255, 0.1)'
171
+ }
172
+ }]
173
+ },
174
+ legend: {
175
+ display: true,
176
+ position: 'top',
177
+ labels: {
178
+ fontColor: '#FFFFFF',
179
+ fontSize: 13
180
+ }
181
+ }
182
+ }
183
+ };
184
+ const outputPath = path.join(this.outputDir, 'performance-tiers.png');
185
+ await this.downloadChart(chartConfig, outputPath);
186
+ return {
187
+ success: true,
188
+ graphPath: outputPath
189
+ };
190
+ }
191
+ catch (error) {
192
+ return {
193
+ success: false,
194
+ error: `Failed to generate performance tiers graph: ${error}`
195
+ };
196
+ }
197
+ }
198
+ /**
199
+ * Graph 2: Cost vs Quality - Line chart showing input/output cost range per model
200
+ */
201
+ async generateCostVsQualityGraph(modelPerformances) {
202
+ try {
203
+ // Filter out models with no pricing data and sort by quality score descending
204
+ const modelsWithPricing = modelPerformances
205
+ .filter(m => m.pricing.input_cost_per_million_tokens > 0 || m.pricing.output_cost_per_million_tokens > 0)
206
+ .sort((a, b) => b.averageScore - a.averageScore);
207
+ // Create datasets: one for each model showing the cost range line
208
+ const datasets = modelsWithPricing.map((m, idx) => {
209
+ const inputCost = m.pricing.input_cost_per_million_tokens;
210
+ const outputCost = m.pricing.output_cost_per_million_tokens;
211
+ const color = this.getToolColor(idx);
212
+ // Line from input cost to output cost at the model's quality score
213
+ return {
214
+ label: this.cleanModelName(m.modelId),
215
+ data: [
216
+ { x: inputCost, y: m.averageScore },
217
+ { x: outputCost, y: m.averageScore }
218
+ ],
219
+ borderColor: color,
220
+ backgroundColor: color,
221
+ borderWidth: 3,
222
+ pointRadius: 5,
223
+ pointHoverRadius: 7,
224
+ fill: false,
225
+ showLine: true,
226
+ tension: 0
227
+ };
228
+ });
229
+ const chartConfig = {
230
+ type: 'line',
231
+ data: { datasets },
232
+ options: {
233
+ plugins: {
234
+ datalabels: {
235
+ display: false
236
+ }
237
+ },
238
+ title: {
239
+ display: true,
240
+ text: 'Cost vs Quality Analysis (line shows input → output cost range)',
241
+ fontSize: 18,
242
+ fontColor: '#FFFFFF',
243
+ fontStyle: 'bold'
244
+ },
245
+ scales: {
246
+ xAxes: [{
247
+ type: 'linear',
248
+ scaleLabel: {
249
+ display: true,
250
+ labelString: 'Cost per 1M Tokens in $ (Input ← → Output)',
251
+ fontColor: '#FFFFFF',
252
+ fontSize: 14
253
+ },
254
+ ticks: {
255
+ callback: function (value) {
256
+ return '$' + value;
257
+ },
258
+ fontColor: '#FFFFFF',
259
+ fontSize: 12
260
+ },
261
+ gridLines: {
262
+ color: 'rgba(255, 255, 255, 0.2)',
263
+ zeroLineColor: 'rgba(255, 255, 255, 0.4)'
264
+ }
265
+ }],
266
+ yAxes: [{
267
+ scaleLabel: {
268
+ display: true,
269
+ labelString: 'Overall Score',
270
+ fontColor: '#FFFFFF',
271
+ fontSize: 14
272
+ },
273
+ ticks: {
274
+ beginAtZero: false,
275
+ min: 0.3,
276
+ max: 1.0,
277
+ fontColor: '#FFFFFF',
278
+ fontSize: 12
279
+ },
280
+ gridLines: {
281
+ color: 'rgba(255, 255, 255, 0.2)',
282
+ zeroLineColor: 'rgba(255, 255, 255, 0.4)'
283
+ }
284
+ }]
285
+ },
286
+ legend: {
287
+ display: true,
288
+ position: 'right',
289
+ labels: {
290
+ fontColor: '#FFFFFF',
291
+ fontSize: 10,
292
+ boxWidth: 15,
293
+ usePointStyle: true
294
+ }
295
+ }
296
+ }
297
+ };
298
+ const outputPath = path.join(this.outputDir, 'cost-vs-quality.png');
299
+ await this.downloadChart(chartConfig, outputPath);
300
+ return {
301
+ success: true,
302
+ graphPath: outputPath
303
+ };
304
+ }
305
+ catch (error) {
306
+ return {
307
+ success: false,
308
+ error: `Failed to generate cost vs quality graph: ${error}`
309
+ };
310
+ }
311
+ }
312
+ /**
313
+ * Graph 3: Reliability Comparison - Bar chart with reliability scores
314
+ */
315
+ async generateReliabilityComparisonGraph(modelPerformances) {
316
+ try {
317
+ // Sort by reliability descending
318
+ const sortedModels = modelPerformances
319
+ .sort((a, b) => b.reliabilityScore - a.reliabilityScore);
320
+ const labels = sortedModels.map(m => this.cleanModelName(m.modelId));
321
+ const reliabilityScores = sortedModels.map(m => m.reliabilityScore);
322
+ // Create separate datasets for legend
323
+ const datasets = [
324
+ {
325
+ label: 'High Reliability (≥0.9)',
326
+ data: reliabilityScores.map(score => score >= 0.9 ? score : null),
327
+ backgroundColor: 'rgba(75, 192, 192, 0.8)',
328
+ borderWidth: 1
329
+ },
330
+ {
331
+ label: 'Medium Reliability (0.7-0.9)',
332
+ data: reliabilityScores.map(score => score >= 0.7 && score < 0.9 ? score : null),
333
+ backgroundColor: 'rgba(255, 206, 86, 0.8)',
334
+ borderWidth: 1
335
+ },
336
+ {
337
+ label: 'Low Reliability (<0.7)',
338
+ data: reliabilityScores.map(score => score < 0.7 ? score : null),
339
+ backgroundColor: 'rgba(255, 99, 132, 0.8)',
340
+ borderWidth: 1
341
+ }
342
+ ];
343
+ const chartConfig = {
344
+ type: 'horizontalBar',
345
+ data: {
346
+ labels,
347
+ datasets
348
+ },
349
+ options: {
350
+ plugins: {
351
+ datalabels: {
352
+ display: false
353
+ }
354
+ },
355
+ title: {
356
+ display: true,
357
+ text: 'Model Reliability Comparison',
358
+ fontSize: 18,
359
+ fontColor: '#FFFFFF',
360
+ fontStyle: 'bold'
361
+ },
362
+ scales: {
363
+ xAxes: [{
364
+ stacked: true,
365
+ ticks: {
366
+ beginAtZero: true,
367
+ max: 1.0,
368
+ stepSize: 0.1,
369
+ fontColor: '#FFFFFF',
370
+ fontSize: 12
371
+ },
372
+ scaleLabel: {
373
+ display: true,
374
+ labelString: 'Reliability Score (0-1)',
375
+ fontColor: '#FFFFFF',
376
+ fontSize: 14
377
+ },
378
+ gridLines: {
379
+ color: 'rgba(255, 255, 255, 0.2)',
380
+ zeroLineColor: 'rgba(255, 255, 255, 0.4)'
381
+ }
382
+ }],
383
+ yAxes: [{
384
+ stacked: true,
385
+ ticks: {
386
+ fontColor: '#FFFFFF',
387
+ fontSize: 11
388
+ },
389
+ gridLines: {
390
+ color: 'rgba(255, 255, 255, 0.1)'
391
+ }
392
+ }]
393
+ },
394
+ legend: {
395
+ display: true,
396
+ position: 'top',
397
+ labels: {
398
+ fontColor: '#FFFFFF',
399
+ fontSize: 12
400
+ }
401
+ }
402
+ }
403
+ };
404
+ const outputPath = path.join(this.outputDir, 'reliability-comparison.png');
405
+ await this.downloadChart(chartConfig, outputPath);
406
+ return {
407
+ success: true,
408
+ graphPath: outputPath
409
+ };
410
+ }
411
+ catch (error) {
412
+ return {
413
+ success: false,
414
+ error: `Failed to generate reliability comparison graph: ${error}`
415
+ };
416
+ }
417
+ }
418
+ /**
419
+ * Graph 4: Tool Performance Heatmap - Shows model scores per tool
420
+ */
421
+ async generateToolPerformanceHeatmap(modelPerformances) {
422
+ try {
423
+ // Get all unique tool names
424
+ const toolNames = new Set();
425
+ modelPerformances.forEach(m => {
426
+ Object.keys(m.toolScores).forEach(tool => toolNames.add(tool));
427
+ });
428
+ const tools = Array.from(toolNames).sort();
429
+ // Sort models by average score
430
+ const sortedModels = modelPerformances
431
+ .sort((a, b) => b.averageScore - a.averageScore)
432
+ .slice(0, 10); // Top 10 models
433
+ // Create matrix data
434
+ const labels = sortedModels.map(m => this.cleanModelName(m.modelId));
435
+ const datasets = tools.map((tool, idx) => ({
436
+ label: tool.charAt(0).toUpperCase() + tool.slice(1),
437
+ data: sortedModels.map(m => m.toolScores[tool] || 0),
438
+ backgroundColor: this.getToolColor(idx),
439
+ borderWidth: 1
440
+ }));
441
+ const chartConfig = {
442
+ type: 'horizontalBar',
443
+ data: {
444
+ labels,
445
+ datasets
446
+ },
447
+ options: {
448
+ plugins: {
449
+ datalabels: {
450
+ display: false
451
+ }
452
+ },
453
+ title: {
454
+ display: true,
455
+ text: 'Tool-Specific Performance Patterns',
456
+ fontSize: 18,
457
+ fontColor: '#FFFFFF',
458
+ fontStyle: 'bold'
459
+ },
460
+ scales: {
461
+ xAxes: [{
462
+ stacked: false,
463
+ ticks: {
464
+ beginAtZero: true,
465
+ max: 1.0,
466
+ stepSize: 0.2,
467
+ fontColor: '#FFFFFF',
468
+ fontSize: 12
469
+ },
470
+ scaleLabel: {
471
+ display: true,
472
+ labelString: 'Tool Score',
473
+ fontColor: '#FFFFFF',
474
+ fontSize: 14
475
+ },
476
+ gridLines: {
477
+ color: 'rgba(255, 255, 255, 0.2)',
478
+ zeroLineColor: 'rgba(255, 255, 255, 0.4)'
479
+ }
480
+ }],
481
+ yAxes: [{
482
+ stacked: false,
483
+ ticks: {
484
+ fontColor: '#FFFFFF',
485
+ fontSize: 11
486
+ },
487
+ gridLines: {
488
+ color: 'rgba(255, 255, 255, 0.1)'
489
+ }
490
+ }]
491
+ },
492
+ legend: {
493
+ display: true,
494
+ position: 'right',
495
+ labels: {
496
+ fontColor: '#FFFFFF',
497
+ fontSize: 12
498
+ }
499
+ }
500
+ }
501
+ };
502
+ const outputPath = path.join(this.outputDir, 'tool-performance-heatmap.png');
503
+ await this.downloadChart(chartConfig, outputPath);
504
+ return {
505
+ success: true,
506
+ graphPath: outputPath
507
+ };
508
+ }
509
+ catch (error) {
510
+ return {
511
+ success: false,
512
+ error: `Failed to generate tool performance heatmap: ${error}`
513
+ };
514
+ }
515
+ }
516
+ /**
517
+ * Graph 5: Context Window Correlation - Scatter plot showing context window vs performance
518
+ */
519
+ async generateContextWindowCorrelationGraph(modelPerformances) {
520
+ try {
521
+ const scatterData = modelPerformances.map((m) => ({
522
+ x: m.capabilities.context_window / 1000, // Convert to thousands for readability
523
+ y: m.averageScore,
524
+ r: 8,
525
+ label: this.cleanModelName(m.modelId)
526
+ }));
527
+ const chartConfig = {
528
+ type: 'scatter',
529
+ data: {
530
+ datasets: [{
531
+ label: 'Models',
532
+ data: scatterData,
533
+ backgroundColor: 'rgba(153, 102, 255, 0.7)',
534
+ borderColor: 'rgba(153, 102, 255, 1)',
535
+ borderWidth: 2,
536
+ pointRadius: 10
537
+ }]
538
+ },
539
+ options: {
540
+ layout: {
541
+ padding: {
542
+ right: 300,
543
+ left: 20,
544
+ top: 20,
545
+ bottom: 20
546
+ }
547
+ },
548
+ plugins: {
549
+ datalabels: {
550
+ display: true,
551
+ align: 'right',
552
+ offset: 12,
553
+ color: '#FFFFFF',
554
+ font: {
555
+ size: 20
556
+ },
557
+ formatter: (value) => value.label
558
+ }
559
+ },
560
+ title: {
561
+ display: true,
562
+ text: 'Context Window Size vs Performance',
563
+ fontSize: 18,
564
+ fontColor: '#FFFFFF',
565
+ fontStyle: 'bold'
566
+ },
567
+ scales: {
568
+ xAxes: [{
569
+ type: 'linear',
570
+ scaleLabel: {
571
+ display: true,
572
+ labelString: 'Context Window Size (K tokens)',
573
+ fontColor: '#FFFFFF',
574
+ fontSize: 14
575
+ },
576
+ ticks: {
577
+ callback: (value) => value + 'K',
578
+ fontColor: '#FFFFFF',
579
+ fontSize: 12
580
+ },
581
+ gridLines: {
582
+ color: 'rgba(255, 255, 255, 0.2)',
583
+ zeroLineColor: 'rgba(255, 255, 255, 0.4)'
584
+ }
585
+ }],
586
+ yAxes: [{
587
+ scaleLabel: {
588
+ display: true,
589
+ labelString: 'Overall Score',
590
+ fontColor: '#FFFFFF',
591
+ fontSize: 14
592
+ },
593
+ ticks: {
594
+ beginAtZero: false,
595
+ min: 0.3,
596
+ max: 1.0,
597
+ fontColor: '#FFFFFF',
598
+ fontSize: 12
599
+ },
600
+ gridLines: {
601
+ color: 'rgba(255, 255, 255, 0.2)',
602
+ zeroLineColor: 'rgba(255, 255, 255, 0.4)'
603
+ }
604
+ }]
605
+ },
606
+ legend: {
607
+ display: false
608
+ },
609
+ tooltips: {
610
+ backgroundColor: 'rgba(0, 0, 0, 0.8)',
611
+ titleFontColor: '#FFFFFF',
612
+ bodyFontColor: '#FFFFFF',
613
+ callbacks: {
614
+ label: (tooltipItem, data) => {
615
+ const dataset = data.datasets[tooltipItem.datasetIndex];
616
+ const point = dataset.data[tooltipItem.index];
617
+ return `${point.label}: ${point.y.toFixed(3)} (${Math.round(point.x)}K tokens)`;
618
+ }
619
+ }
620
+ }
621
+ }
622
+ };
623
+ const outputPath = path.join(this.outputDir, 'context-window-correlation.png');
624
+ await this.downloadChart(chartConfig, outputPath, 1400, 700);
625
+ return {
626
+ success: true,
627
+ graphPath: outputPath
628
+ };
629
+ }
630
+ catch (error) {
631
+ return {
632
+ success: false,
633
+ error: `Failed to generate context window correlation graph: ${error}`
634
+ };
635
+ }
636
+ }
637
+ /**
638
+ * Downloads a chart from QuickChart.io API and saves it as PNG
639
+ */
640
+ async downloadChart(chartConfig, outputPath, width = 1000, height = 600) {
641
+ return new Promise((resolve, reject) => {
642
+ const chartJson = JSON.stringify(chartConfig);
643
+ const url = `${this.quickchartBaseUrl}?c=${encodeURIComponent(chartJson)}&width=${width}&height=${height}&format=png&backgroundColor=black`;
644
+ https.get(url, (response) => {
645
+ if (response.statusCode !== 200) {
646
+ reject(new Error(`QuickChart API returned status ${response.statusCode}`));
647
+ return;
648
+ }
649
+ const fileStream = fs.createWriteStream(outputPath);
650
+ response.pipe(fileStream);
651
+ fileStream.on('finish', () => {
652
+ fileStream.close();
653
+ console.log(`✅ Graph saved: ${outputPath}`);
654
+ resolve();
655
+ });
656
+ fileStream.on('error', (err) => {
657
+ fs.unlink(outputPath, () => { }); // Clean up partial file
658
+ reject(err);
659
+ });
660
+ }).on('error', (err) => {
661
+ reject(err);
662
+ });
663
+ });
664
+ }
665
+ /**
666
+ * Cleans model names by removing provider prefixes
667
+ */
668
+ cleanModelName(modelId) {
669
+ // Remove "vercel_" prefix and timestamp suffix
670
+ return modelId
671
+ .replace(/^vercel_/, '')
672
+ .replace(/_\d{4}-\d{2}-\d{2}$/, '')
673
+ .replace(/_/g, '-');
674
+ }
675
+ /**
676
+ * Returns a consistent color for each tool index (supports up to 10 tools)
677
+ */
678
+ getToolColor(index) {
679
+ const colors = [
680
+ 'rgba(255, 99, 132, 0.8)', // Red
681
+ 'rgba(54, 162, 235, 0.8)', // Blue
682
+ 'rgba(255, 206, 86, 0.8)', // Yellow
683
+ 'rgba(75, 192, 192, 0.8)', // Green
684
+ 'rgba(153, 102, 255, 0.8)', // Purple
685
+ 'rgba(255, 159, 64, 0.8)', // Orange
686
+ 'rgba(199, 199, 199, 0.8)', // Grey
687
+ 'rgba(83, 102, 255, 0.8)', // Indigo
688
+ 'rgba(255, 99, 255, 0.8)', // Pink
689
+ 'rgba(99, 255, 132, 0.8)' // Light Green
690
+ ];
691
+ return colors[index % colors.length];
692
+ }
693
+ }
694
+ exports.GraphGenerator = GraphGenerator;