@vfarcic/dot-ai 0.112.0 ā 0.114.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/ai-provider-factory.d.ts +0 -15
- package/dist/core/ai-provider-factory.d.ts.map +1 -1
- package/dist/core/ai-provider-factory.js +12 -33
- package/dist/core/ai-provider.interface.d.ts +12 -0
- package/dist/core/ai-provider.interface.d.ts.map +1 -1
- package/dist/core/embedding-service.d.ts +35 -2
- package/dist/core/embedding-service.d.ts.map +1 -1
- package/dist/core/embedding-service.js +228 -15
- package/dist/core/model-config.d.ts +6 -0
- package/dist/core/model-config.d.ts.map +1 -1
- package/dist/core/model-config.js +7 -1
- package/dist/core/platform-utils.d.ts +10 -0
- package/dist/core/platform-utils.d.ts.map +1 -1
- package/dist/core/platform-utils.js +56 -0
- package/dist/core/providers/anthropic-provider.d.ts +2 -0
- package/dist/core/providers/anthropic-provider.d.ts.map +1 -1
- package/dist/core/providers/anthropic-provider.js +10 -0
- package/dist/core/providers/provider-debug-utils.d.ts +5 -1
- package/dist/core/providers/provider-debug-utils.d.ts.map +1 -1
- package/dist/core/providers/provider-debug-utils.js +13 -24
- package/dist/core/providers/vercel-provider.d.ts +2 -0
- package/dist/core/providers/vercel-provider.d.ts.map +1 -1
- package/dist/core/providers/vercel-provider.js +154 -63
- package/dist/core/schema.d.ts +0 -96
- package/dist/core/schema.d.ts.map +1 -1
- package/dist/core/schema.js +4 -112
- package/dist/core/unified-creation-session.d.ts.map +1 -1
- package/dist/core/unified-creation-session.js +3 -1
- package/dist/evaluation/eval-runner.js +185 -41
- package/dist/evaluation/evaluators/base-comparative.d.ts +4 -1
- package/dist/evaluation/evaluators/base-comparative.d.ts.map +1 -1
- package/dist/evaluation/evaluators/base-comparative.js +36 -1
- package/dist/evaluation/platform-synthesizer.d.ts +54 -0
- package/dist/evaluation/platform-synthesizer.d.ts.map +1 -0
- package/dist/evaluation/platform-synthesizer.js +368 -0
- package/dist/evaluation/run-platform-synthesis.d.ts +9 -0
- package/dist/evaluation/run-platform-synthesis.d.ts.map +1 -0
- package/dist/evaluation/run-platform-synthesis.js +45 -0
- package/dist/interfaces/mcp.d.ts.map +1 -1
- package/dist/interfaces/mcp.js +23 -29
- package/dist/tools/generate-manifests.d.ts.map +1 -1
- package/dist/tools/generate-manifests.js +3 -8
- package/dist/tools/recommend.d.ts.map +1 -1
- package/dist/tools/recommend.js +3 -16
- package/dist/tools/remediate.d.ts.map +1 -1
- package/dist/tools/remediate.js +10 -2
- package/dist/tools/version.d.ts +1 -0
- package/dist/tools/version.d.ts.map +1 -1
- package/dist/tools/version.js +11 -4
- package/package.json +15 -1
package/dist/core/schema.js
CHANGED
|
@@ -352,10 +352,8 @@ class ResourceRecommender {
|
|
|
352
352
|
const capabilityFilteredResources = relevantCapabilities.map(cap => ({
|
|
353
353
|
kind: this.extractKindFromResourceName(cap.data.resourceName),
|
|
354
354
|
group: this.extractGroupFromResourceName(cap.data.resourceName),
|
|
355
|
-
apiVersion: this.constructApiVersionFromResourceName(cap.data.resourceName),
|
|
356
355
|
resourceName: cap.data.resourceName,
|
|
357
|
-
|
|
358
|
-
capabilities: cap.data // Include capability data for AI decision-making
|
|
356
|
+
capabilities: cap.data // Include capability data for AI decision-making (includes namespaced, etc.)
|
|
359
357
|
}));
|
|
360
358
|
// Phase 1: Add missing pattern-suggested resources to available resources list
|
|
361
359
|
const enhancedResources = await this.addMissingPatternResources(capabilityFilteredResources, relevantPatterns);
|
|
@@ -433,9 +431,8 @@ class ResourceRecommender {
|
|
|
433
431
|
const template = (0, shared_prompt_loader_1.loadPrompt)('resource-selection');
|
|
434
432
|
// Format resources for the prompt with capability information
|
|
435
433
|
const resourcesText = resources.map((resource, index) => {
|
|
436
|
-
return `${index}: ${resource.kind.toUpperCase()}
|
|
434
|
+
return `${index}: ${resource.kind.toUpperCase()}
|
|
437
435
|
Group: ${resource.group || 'core'}
|
|
438
|
-
Namespaced: ${resource.namespaced}
|
|
439
436
|
Resource Name: ${resource.resourceName}
|
|
440
437
|
Capabilities: ${Array.isArray(resource.capabilities.capabilities) ? resource.capabilities.capabilities.join(', ') : 'Not specified'}
|
|
441
438
|
Providers: ${Array.isArray(resource.capabilities.providers) ? resource.capabilities.providers.join(', ') : resource.capabilities.providers || 'kubernetes'}
|
|
@@ -484,14 +481,10 @@ class ResourceRecommender {
|
|
|
484
481
|
const parts = suggestedResource.split('.');
|
|
485
482
|
const kind = parts[0]; // Use resource name as-is: resourcegroups, servicemonitors, etc.
|
|
486
483
|
const group = parts.length > 1 ? parts.slice(1).join('.') : '';
|
|
487
|
-
const version = 'v1beta1'; // Default version for CRDs, could be enhanced
|
|
488
|
-
const apiVersion = group ? `${group}/${version}` : version;
|
|
489
484
|
missingPatternResources.push({
|
|
490
485
|
kind,
|
|
491
486
|
group,
|
|
492
|
-
apiVersion,
|
|
493
487
|
resourceName,
|
|
494
|
-
namespaced: true, // Default assumption for pattern resources
|
|
495
488
|
capabilities: {
|
|
496
489
|
resourceName,
|
|
497
490
|
description: `Resource suggested by organizational pattern: ${pattern.description}`,
|
|
@@ -551,17 +544,8 @@ class ResourceRecommender {
|
|
|
551
544
|
// Return everything after the first dot
|
|
552
545
|
return resourceName.substring(resourceName.indexOf('.') + 1);
|
|
553
546
|
}
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
*/
|
|
557
|
-
constructApiVersionFromResourceName(resourceName) {
|
|
558
|
-
if (!resourceName.includes('.')) {
|
|
559
|
-
return 'v1'; // Core resources typically use v1
|
|
560
|
-
}
|
|
561
|
-
// For CRDs, construct group/version format
|
|
562
|
-
const group = this.extractGroupFromResourceName(resourceName);
|
|
563
|
-
return `${group}/v1beta1`; // Default to v1beta1 for CRDs
|
|
564
|
-
}
|
|
547
|
+
// Note: constructApiVersionFromResourceName method removed - no longer needed
|
|
548
|
+
// API versions are extracted from kubectl explain schema content during manifest generation
|
|
565
549
|
/**
|
|
566
550
|
* Phase 0: Search for relevant organizational patterns using multi-concept approach
|
|
567
551
|
* Returns empty array if Vector DB is not available - this is completely optional
|
|
@@ -585,98 +569,6 @@ class ResourceRecommender {
|
|
|
585
569
|
}
|
|
586
570
|
// REMOVED: selectResourceCandidates - replaced by single-phase assembleAndRankSolutions
|
|
587
571
|
// REMOVED: fetchDetailedSchemas - no longer needed in single-phase architecture
|
|
588
|
-
/**
|
|
589
|
-
const basic = `${index}: ${resource.kind} (${resource.apiVersion})
|
|
590
|
-
Group: ${resource.group || 'core'}
|
|
591
|
-
Namespaced: ${resource.namespaced}`;
|
|
592
|
-
|
|
593
|
-
// Include rich capability context if available (from capability-based pre-filtering)
|
|
594
|
-
if (resource.capabilities) {
|
|
595
|
-
const cap = resource.capabilities;
|
|
596
|
-
return `${basic}
|
|
597
|
-
Resource Name: ${resource.resourceName || 'Not specified'}
|
|
598
|
-
Capabilities: ${cap.capabilities?.join(', ') || 'Not specified'}
|
|
599
|
-
Providers: ${cap.providers?.join(', ') || 'Not specified'}
|
|
600
|
-
Complexity: ${cap.complexity || 'Not specified'}
|
|
601
|
-
Use Case: ${cap.useCase || 'Not specified'}
|
|
602
|
-
Description: ${cap.description || 'Not specified'}
|
|
603
|
-
Confidence: ${cap.confidence || 'N/A'}`;
|
|
604
|
-
}
|
|
605
|
-
|
|
606
|
-
return basic;
|
|
607
|
-
}).join('\n\n');
|
|
608
|
-
|
|
609
|
-
// Format organizational patterns for AI context
|
|
610
|
-
const patternsContext = patterns.length > 0
|
|
611
|
-
? patterns.map(pattern =>
|
|
612
|
-
`- ID: ${pattern.id}
|
|
613
|
-
Description: ${pattern.description}
|
|
614
|
-
Suggested Resources: ${pattern.suggestedResources?.join(', ') || 'Not specified'}
|
|
615
|
-
Rationale: ${pattern.rationale}
|
|
616
|
-
Triggers: ${pattern.triggers?.join(', ') || 'None'}`
|
|
617
|
-
).join('\n')
|
|
618
|
-
: 'No organizational patterns found for this request.';
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
const template = loadPrompt('resource-selection');
|
|
622
|
-
|
|
623
|
-
const selectionPrompt = template
|
|
624
|
-
.replace('{intent}', intent)
|
|
625
|
-
.replace('{resources}', resourceSummary)
|
|
626
|
-
.replace('{patterns}', patternsContext);
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
const response = await this.aiProvider.sendMessage(selectionPrompt, 'resource-selection');
|
|
630
|
-
|
|
631
|
-
try {
|
|
632
|
-
// Extract JSON from response with robust parsing
|
|
633
|
-
let jsonContent = response.content;
|
|
634
|
-
|
|
635
|
-
// First try to find JSON array wrapped in code blocks
|
|
636
|
-
const codeBlockMatch = response.content.match(/```(?:json)?\s*(\[[\s\S]*?\])\s*```/);
|
|
637
|
-
if (codeBlockMatch) {
|
|
638
|
-
jsonContent = codeBlockMatch[1];
|
|
639
|
-
} else {
|
|
640
|
-
// Try to find JSON array that starts with [ and find the matching closing ]
|
|
641
|
-
const startIndex = response.content.indexOf('[');
|
|
642
|
-
if (startIndex !== -1) {
|
|
643
|
-
let bracketCount = 0;
|
|
644
|
-
let endIndex = startIndex;
|
|
645
|
-
|
|
646
|
-
for (let i = startIndex; i < response.content.length; i++) {
|
|
647
|
-
if (response.content[i] === '[') bracketCount++;
|
|
648
|
-
if (response.content[i] === ']') bracketCount--;
|
|
649
|
-
if (bracketCount === 0) {
|
|
650
|
-
endIndex = i;
|
|
651
|
-
break;
|
|
652
|
-
}
|
|
653
|
-
}
|
|
654
|
-
|
|
655
|
-
if (bracketCount === 0) {
|
|
656
|
-
jsonContent = response.content.substring(startIndex, endIndex + 1);
|
|
657
|
-
}
|
|
658
|
-
}
|
|
659
|
-
}
|
|
660
|
-
|
|
661
|
-
const selectedResources = JSON.parse(jsonContent.trim());
|
|
662
|
-
|
|
663
|
-
if (!Array.isArray(selectedResources)) {
|
|
664
|
-
throw new Error('AI response is not an array');
|
|
665
|
-
}
|
|
666
|
-
|
|
667
|
-
// Validate that each resource has required fields
|
|
668
|
-
for (const resource of selectedResources) {
|
|
669
|
-
if (!resource.kind || !resource.apiVersion) {
|
|
670
|
-
throw new Error(`AI selected invalid resource: ${JSON.stringify(resource)}`);
|
|
671
|
-
}
|
|
672
|
-
}
|
|
673
|
-
|
|
674
|
-
return selectedResources;
|
|
675
|
-
} catch (error) {
|
|
676
|
-
throw new Error(`AI failed to select resources in valid JSON format. Error: ${(error as Error).message}. AI response: "${response.content.substring(0, 200)}..."`);
|
|
677
|
-
}
|
|
678
|
-
}
|
|
679
|
-
|
|
680
572
|
/**
|
|
681
573
|
* Phase 2: Fetch detailed schemas for selected candidates
|
|
682
574
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"unified-creation-session.d.ts","sourceRoot":"","sources":["../../src/core/unified-creation-session.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAQH,OAAO,EAAE,mBAAmB,EAAE,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"unified-creation-session.d.ts","sourceRoot":"","sources":["../../src/core/unified-creation-session.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAQH,OAAO,EAAE,mBAAmB,EAAE,MAAM,aAAa,CAAC;AAKlD,OAAO,EACL,sBAAsB,EACtB,2BAA2B,EAC3B,iCAAiC,EACjC,UAAU,EAIX,MAAM,0BAA0B,CAAC;AAKlC,qBAAa,6BAA6B;IACxC,OAAO,CAAC,MAAM,CAAiB;IAC/B,OAAO,CAAC,SAAS,CAAsB;gBAE3B,UAAU,EAAE,UAAU,EAAE,SAAS,CAAC,EAAE,mBAAmB;IAKnE;;OAEG;IACH,aAAa,CAAC,IAAI,EAAE,GAAG,GAAG,sBAAsB;IAqBhD;;OAEG;IACH,WAAW,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,GAAG,sBAAsB,GAAG,IAAI;IAiBxE;;OAEG;IACH,eAAe,CAAC,SAAS,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,GAAG,sBAAsB;IA2GvF;;OAEG;IACG,mBAAmB,CAAC,OAAO,EAAE,sBAAsB,EAAE,IAAI,CAAC,EAAE,GAAG,GAAG,OAAO,CAAC,2BAA2B,GAAG,iCAAiC,CAAC;IAoGhJ;;OAEG;YACW,4BAA4B;IA4C1C;;OAEG;YACW,gCAAgC;IAyC9C;;OAEG;IACH,OAAO,CAAC,kBAAkB;IA6E1B;;OAEG;YACW,gBAAgB;IAsF9B;;OAEG;YACW,4BAA4B;IAoH1C;;OAEG;IACH,OAAO,CAAC,kBAAkB;IAY1B;;OAEG;YACW,qBAAqB;IAqCnC;;OAEG;YACW,mBAAmB;IAyLjC;;OAEG;YACW,uBAAuB;IAmFrC;;OAEG;IACH,OAAO,CAAC,oBAAoB;IAU5B;;OAEG;IACH,OAAO,CAAC,sBAAsB;IAY9B;;OAEG;IACH,OAAO,CAAC,WAAW;IAgBnB;;OAEG;IACH,OAAO,CAAC,iBAAiB;CAG1B"}
|
|
@@ -50,6 +50,7 @@ const capability_vector_service_1 = require("./capability-vector-service");
|
|
|
50
50
|
const discovery_1 = require("./discovery");
|
|
51
51
|
const schema_1 = require("./schema");
|
|
52
52
|
const version_1 = require("../tools/version");
|
|
53
|
+
const platform_utils_1 = require("./platform-utils");
|
|
53
54
|
const yaml = __importStar(require("js-yaml"));
|
|
54
55
|
const unified_creation_types_1 = require("./unified-creation-types");
|
|
55
56
|
const pattern_operations_1 = require("./pattern-operations");
|
|
@@ -743,7 +744,8 @@ The policy intent has been stored in the database. The Kyverno policy was not ap
|
|
|
743
744
|
interaction_id: args?.interaction_id || 'kyverno_generation'
|
|
744
745
|
});
|
|
745
746
|
// Response should be clean YAML with analysis comments
|
|
746
|
-
|
|
747
|
+
// Extract YAML from code blocks if wrapped (using shared utility)
|
|
748
|
+
const kyvernoPolicy = (0, platform_utils_1.extractContentFromMarkdownCodeBlocks)(response.content, 'yaml');
|
|
747
749
|
// Save policy to file immediately after generation
|
|
748
750
|
const yamlPath = path.join(policySessionDir, `${session.sessionId}-kyverno.yaml`);
|
|
749
751
|
fs.writeFileSync(yamlPath, kyvernoPolicy, 'utf8');
|
|
@@ -46,6 +46,9 @@ const capability_comparative_js_1 = require("./evaluators/capability-comparative
|
|
|
46
46
|
const pattern_comparative_js_1 = require("./evaluators/pattern-comparative.js");
|
|
47
47
|
const policy_comparative_js_1 = require("./evaluators/policy-comparative.js");
|
|
48
48
|
const promises_1 = require("fs/promises");
|
|
49
|
+
const child_process_1 = require("child_process");
|
|
50
|
+
const util_1 = require("util");
|
|
51
|
+
const execAsync = (0, util_1.promisify)(child_process_1.exec);
|
|
49
52
|
const EVALUATOR_CONFIG = {
|
|
50
53
|
remediation: {
|
|
51
54
|
evaluator: remediation_comparative_js_1.RemediationComparativeEvaluator,
|
|
@@ -73,15 +76,13 @@ const EVALUATOR_CONFIG = {
|
|
|
73
76
|
title: 'Policy AI Model Comparison Report'
|
|
74
77
|
}
|
|
75
78
|
};
|
|
76
|
-
function generateMarkdownReport(results, stats, evaluationType) {
|
|
79
|
+
function generateMarkdownReport(results, stats, evaluationType, finalAssessment) {
|
|
77
80
|
const timestamp = new Date().toISOString();
|
|
78
|
-
//
|
|
79
|
-
const
|
|
81
|
+
// Use final assessment if provided
|
|
82
|
+
const overallAssessment = finalAssessment?.overall_assessment || null;
|
|
83
|
+
// Calculate basic statistics for reference
|
|
80
84
|
const modelScores = new Map();
|
|
81
85
|
results.forEach(result => {
|
|
82
|
-
const winner = result.bestModel;
|
|
83
|
-
modelWins.set(winner, (modelWins.get(winner) || 0) + 1);
|
|
84
|
-
// Collect all scores for each model
|
|
85
86
|
if (result.modelRankings) {
|
|
86
87
|
result.modelRankings.forEach((ranking) => {
|
|
87
88
|
if (!modelScores.has(ranking.model)) {
|
|
@@ -91,8 +92,7 @@ function generateMarkdownReport(results, stats, evaluationType) {
|
|
|
91
92
|
});
|
|
92
93
|
}
|
|
93
94
|
});
|
|
94
|
-
|
|
95
|
-
// Calculate average scores
|
|
95
|
+
// Calculate average scores for supplementary information
|
|
96
96
|
const modelAverages = new Map();
|
|
97
97
|
modelScores.forEach((scores, model) => {
|
|
98
98
|
const avg = scores.reduce((a, b) => a + b, 0) / scores.length;
|
|
@@ -108,20 +108,39 @@ function generateMarkdownReport(results, stats, evaluationType) {
|
|
|
108
108
|
|
|
109
109
|
## Executive Summary
|
|
110
110
|
|
|
111
|
-
### š Overall
|
|
112
|
-
${
|
|
111
|
+
### š Overall Winner (AI Assessment)
|
|
112
|
+
${overallAssessment ? `
|
|
113
|
+
**${overallAssessment.winner}**
|
|
113
114
|
|
|
114
|
-
|
|
115
|
+
${overallAssessment.rationale}
|
|
116
|
+
` : 'Overall assessment not available'}
|
|
115
117
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
+
### š AI Reliability Rankings
|
|
119
|
+
|
|
120
|
+
${overallAssessment ? overallAssessment.reliability_ranking
|
|
121
|
+
.map((ranking, index) => `${index + 1}. **${ranking.model}** (${Math.round(ranking.reliability_score * 100)}%) - ${ranking.reliability_notes}`)
|
|
122
|
+
.join('\n') : 'Reliability rankings not available'}
|
|
123
|
+
|
|
124
|
+
### š Production Recommendations
|
|
125
|
+
|
|
126
|
+
${overallAssessment ? `
|
|
127
|
+
- **Primary Choice**: ${overallAssessment.production_recommendations.primary}
|
|
128
|
+
- **Secondary Option**: ${overallAssessment.production_recommendations.secondary}
|
|
129
|
+
- **Avoid for Production**: ${overallAssessment.production_recommendations.avoid.length > 0 ? overallAssessment.production_recommendations.avoid.join(', ') : 'None'}
|
|
130
|
+
${Object.keys(overallAssessment.production_recommendations.specialized_use).length > 0 ?
|
|
131
|
+
'\n**Specialized Use Cases:**\n' + Object.entries(overallAssessment.production_recommendations.specialized_use)
|
|
132
|
+
.map(([useCase, model]) => `- **${useCase}**: ${model}`)
|
|
133
|
+
.join('\n') : ''}
|
|
134
|
+
` : 'Production recommendations not available'}
|
|
135
|
+
|
|
136
|
+
### š Supplementary Statistics (Reference Only)
|
|
137
|
+
|
|
138
|
+
| Model | Avg Score | Notes |
|
|
139
|
+
|-------|-----------|-------|
|
|
118
140
|
${Array.from(modelAverages.entries())
|
|
119
141
|
.sort((a, b) => b[1] - a[1])
|
|
120
|
-
.map(([model, avgScore]) => {
|
|
121
|
-
|
|
122
|
-
const performance = wins > 0 ? 'š¢ Strong' : wins === 0 && avgScore > 0.8 ? 'š” Good' : 'š“ Weak';
|
|
123
|
-
return `| ${model} | ${avgScore} | ${wins} | ${performance} |`;
|
|
124
|
-
}).join('\n')}
|
|
142
|
+
.map(([model, avgScore]) => `| ${model} | ${avgScore} | See AI assessment above |`)
|
|
143
|
+
.join('\n')}
|
|
125
144
|
|
|
126
145
|
## Detailed Scenario Results
|
|
127
146
|
|
|
@@ -142,14 +161,21 @@ ${result.comment}
|
|
|
142
161
|
---`;
|
|
143
162
|
}).join('\n\n')}
|
|
144
163
|
|
|
145
|
-
## Model Selection Guide
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
164
|
+
## AI Model Selection Guide
|
|
165
|
+
|
|
166
|
+
${overallAssessment ? `
|
|
167
|
+
### Key Insights
|
|
168
|
+
${overallAssessment.key_insights}
|
|
169
|
+
|
|
170
|
+
### Recommended Selection Strategy
|
|
171
|
+
- **For Production Use**: Choose ${overallAssessment.production_recommendations.primary}
|
|
172
|
+
- **For Secondary Option**: Consider ${overallAssessment.production_recommendations.secondary}
|
|
173
|
+
${overallAssessment.production_recommendations.avoid.length > 0 ?
|
|
174
|
+
`- **Avoid**: ${overallAssessment.production_recommendations.avoid.join(', ')} (reliability concerns)` : ''}
|
|
175
|
+
|
|
176
|
+
### Decision Framework
|
|
177
|
+
The AI assessment prioritizes **reliability and consistency** over peak performance. Models that fail completely in any scenario are heavily penalized, ensuring production-ready recommendations.
|
|
178
|
+
` : 'AI model selection guide not available'}
|
|
153
179
|
|
|
154
180
|
---
|
|
155
181
|
|
|
@@ -158,12 +184,74 @@ ${sortedWins.slice(0, 3).map(([model], index) => {
|
|
|
158
184
|
Report generated by DevOps AI Toolkit Comparative Evaluation System
|
|
159
185
|
`;
|
|
160
186
|
}
|
|
161
|
-
|
|
187
|
+
function loadModelMetadata() {
|
|
188
|
+
try {
|
|
189
|
+
const fs = require('fs');
|
|
190
|
+
const path = require('path');
|
|
191
|
+
const metadataPath = path.join(__dirname, 'model-metadata.json');
|
|
192
|
+
if (!fs.existsSync(metadataPath)) {
|
|
193
|
+
console.error('ā Model metadata file not found');
|
|
194
|
+
console.error('š Pricing and capabilities data required for cost analysis');
|
|
195
|
+
console.error('');
|
|
196
|
+
console.error('š To create model metadata, run:');
|
|
197
|
+
console.error(' /update-model-metadata');
|
|
198
|
+
console.error('');
|
|
199
|
+
process.exit(1);
|
|
200
|
+
}
|
|
201
|
+
const metadata = JSON.parse(fs.readFileSync(metadataPath, 'utf8'));
|
|
202
|
+
// Check if metadata is older than 30 days
|
|
203
|
+
const metadataAge = Date.now() - new Date(metadata.lastUpdated).getTime();
|
|
204
|
+
const thirtyDays = 30 * 24 * 60 * 60 * 1000;
|
|
205
|
+
if (metadataAge > thirtyDays) {
|
|
206
|
+
console.error('ā Model metadata is over 30 days old (last updated: ' + metadata.lastUpdated + ')');
|
|
207
|
+
console.error('š Pricing and capabilities data may be outdated, affecting cost analysis accuracy');
|
|
208
|
+
console.error('');
|
|
209
|
+
console.error('š To update model metadata, run:');
|
|
210
|
+
console.error(' /update-model-metadata');
|
|
211
|
+
console.error('');
|
|
212
|
+
process.exit(1);
|
|
213
|
+
}
|
|
214
|
+
console.log('ā
Model metadata loaded (updated: ' + metadata.lastUpdated + ')');
|
|
215
|
+
return metadata;
|
|
216
|
+
}
|
|
217
|
+
catch (error) {
|
|
218
|
+
console.error('ā Failed to load model metadata:', error instanceof Error ? error.message : String(error));
|
|
219
|
+
console.error('š To create model metadata, run: /update-model-metadata');
|
|
220
|
+
process.exit(1);
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
function generateJsonReport(results, stats, evaluationType, modelMetadata, finalAssessment) {
|
|
224
|
+
const timestamp = new Date().toISOString();
|
|
225
|
+
// Use final assessment if provided
|
|
226
|
+
const overallAssessment = finalAssessment || null;
|
|
227
|
+
return {
|
|
228
|
+
metadata: {
|
|
229
|
+
reportType: 'comparative-evaluation',
|
|
230
|
+
evaluationType: evaluationType,
|
|
231
|
+
generated: timestamp,
|
|
232
|
+
scenariosAnalyzed: results.length,
|
|
233
|
+
modelsEvaluated: stats.availableModels.length,
|
|
234
|
+
totalDatasets: stats.totalDatasets,
|
|
235
|
+
tool: EVALUATOR_CONFIG[evaluationType].title
|
|
236
|
+
},
|
|
237
|
+
modelMetadata: modelMetadata.models,
|
|
238
|
+
overallAssessment: overallAssessment,
|
|
239
|
+
results: results,
|
|
240
|
+
summary: stats
|
|
241
|
+
};
|
|
242
|
+
}
|
|
243
|
+
async function detectAvailableDatasets(datasetsDir, filterType) {
|
|
162
244
|
try {
|
|
163
245
|
const files = await (0, promises_1.readdir)(datasetsDir);
|
|
164
246
|
const result = {};
|
|
165
247
|
for (const [type, config] of Object.entries(EVALUATOR_CONFIG)) {
|
|
166
|
-
|
|
248
|
+
// If filter specified, only check for that type
|
|
249
|
+
if (filterType && type !== filterType) {
|
|
250
|
+
result[type] = false;
|
|
251
|
+
}
|
|
252
|
+
else {
|
|
253
|
+
result[type] = files.some(file => file.startsWith(config.prefix));
|
|
254
|
+
}
|
|
167
255
|
}
|
|
168
256
|
return result;
|
|
169
257
|
}
|
|
@@ -176,7 +264,7 @@ async function detectAvailableDatasets(datasetsDir) {
|
|
|
176
264
|
return result;
|
|
177
265
|
}
|
|
178
266
|
}
|
|
179
|
-
async function runEvaluation(evaluatorType, datasetsDir) {
|
|
267
|
+
async function runEvaluation(evaluatorType, datasetsDir, modelMetadata) {
|
|
180
268
|
const EvaluatorClass = EVALUATOR_CONFIG[evaluatorType].evaluator;
|
|
181
269
|
const evaluator = new EvaluatorClass(datasetsDir);
|
|
182
270
|
console.log(`\nš¬ Starting ${evaluatorType.charAt(0).toUpperCase() + evaluatorType.slice(1)} Evaluation\n`);
|
|
@@ -201,18 +289,26 @@ async function runEvaluation(evaluatorType, datasetsDir) {
|
|
|
201
289
|
console.log('š Running Comparative Evaluation...\n');
|
|
202
290
|
const results = await evaluator.evaluateAllScenarios();
|
|
203
291
|
console.log(`ā
${evaluatorType.charAt(0).toUpperCase() + evaluatorType.slice(1)} Evaluation Complete! Analyzed ${results.length} scenarios\n`);
|
|
204
|
-
//
|
|
205
|
-
const
|
|
206
|
-
//
|
|
207
|
-
const
|
|
208
|
-
const
|
|
292
|
+
// Conduct final assessment across all scenarios
|
|
293
|
+
const finalAssessment = await evaluator.conductFinalAssessment(results);
|
|
294
|
+
// Generate dual-format reports using final assessment
|
|
295
|
+
const reportContent = generateMarkdownReport(results, stats, evaluatorType, finalAssessment);
|
|
296
|
+
const jsonResults = generateJsonReport(results, stats, evaluatorType, modelMetadata, finalAssessment);
|
|
297
|
+
// Save reports to files
|
|
298
|
+
const dateStamp = new Date().toISOString().split('T')[0];
|
|
299
|
+
const markdownPath = `./eval/analysis/individual/${evaluatorType}-evaluation-${dateStamp}.md`;
|
|
300
|
+
const jsonPath = `./eval/analysis/individual/${evaluatorType}-results-${dateStamp}.json`;
|
|
301
|
+
const reportDir = './eval/analysis/individual';
|
|
209
302
|
// Ensure report directory exists
|
|
210
303
|
const fs = await Promise.resolve().then(() => __importStar(require('fs')));
|
|
211
304
|
if (!fs.existsSync(reportDir)) {
|
|
212
305
|
fs.mkdirSync(reportDir, { recursive: true });
|
|
213
306
|
}
|
|
214
|
-
fs.writeFileSync(
|
|
215
|
-
|
|
307
|
+
fs.writeFileSync(markdownPath, reportContent);
|
|
308
|
+
fs.writeFileSync(jsonPath, JSON.stringify(jsonResults, null, 2));
|
|
309
|
+
console.log(`š ${evaluatorType.charAt(0).toUpperCase() + evaluatorType.slice(1)} reports generated:`);
|
|
310
|
+
console.log(` š Markdown: ${markdownPath}`);
|
|
311
|
+
console.log(` š JSON: ${jsonPath}`);
|
|
216
312
|
// Brief console summary
|
|
217
313
|
console.log(`š ${evaluatorType.charAt(0).toUpperCase() + evaluatorType.slice(1)} Results:`);
|
|
218
314
|
results.forEach((result, index) => {
|
|
@@ -222,24 +318,72 @@ async function runEvaluation(evaluatorType, datasetsDir) {
|
|
|
222
318
|
}
|
|
223
319
|
async function main() {
|
|
224
320
|
console.log('š¬ Starting Multi-Model Comparative Evaluation\n');
|
|
321
|
+
// Clean old debug files but preserve evaluation datasets
|
|
322
|
+
console.log('š§¹ Cleaning old debug files...');
|
|
323
|
+
try {
|
|
324
|
+
await execAsync('find ./tmp/debug-ai -type f ! -name \'*.jsonl\' -delete 2>/dev/null || true');
|
|
325
|
+
await execAsync('mkdir -p ./tmp/debug-ai');
|
|
326
|
+
console.log('ā
Debug files cleaned (datasets preserved)\n');
|
|
327
|
+
}
|
|
328
|
+
catch (error) {
|
|
329
|
+
console.warn('ā ļø Could not clean debug files:', error instanceof Error ? error.message : String(error));
|
|
330
|
+
}
|
|
331
|
+
// Check model metadata freshness before starting any evaluation work
|
|
332
|
+
const modelMetadata = loadModelMetadata();
|
|
225
333
|
const datasetsDir = './eval/datasets';
|
|
226
|
-
|
|
334
|
+
// Parse command line arguments for subset evaluation
|
|
335
|
+
const args = process.argv.slice(2);
|
|
336
|
+
let filterType = undefined;
|
|
337
|
+
if (args.length > 0) {
|
|
338
|
+
const requestedType = args[0];
|
|
339
|
+
if (requestedType in EVALUATOR_CONFIG) {
|
|
340
|
+
filterType = requestedType;
|
|
341
|
+
}
|
|
342
|
+
else {
|
|
343
|
+
console.error(`ā Invalid evaluation type: "${requestedType}"`);
|
|
344
|
+
console.error(`ā
Available types: ${Object.keys(EVALUATOR_CONFIG).join(', ')}`);
|
|
345
|
+
process.exit(1);
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
const availableDatasets = await detectAvailableDatasets(datasetsDir, filterType);
|
|
227
349
|
console.log('š Dataset Detection:');
|
|
228
350
|
for (const [type, available] of Object.entries(availableDatasets)) {
|
|
229
351
|
console.log(`- ${type.charAt(0).toUpperCase() + type.slice(1)} datasets: ${available ? 'ā
' : 'ā'}`);
|
|
230
352
|
}
|
|
353
|
+
if (filterType) {
|
|
354
|
+
console.log(`\nšÆ Running evaluation for: ${filterType}`);
|
|
355
|
+
}
|
|
231
356
|
const hasAnyDatasets = Object.values(availableDatasets).some(Boolean);
|
|
232
357
|
if (!hasAnyDatasets) {
|
|
233
|
-
|
|
358
|
+
if (filterType) {
|
|
359
|
+
console.error(`ā No datasets found for type: ${filterType}`);
|
|
360
|
+
}
|
|
361
|
+
else {
|
|
362
|
+
console.error('ā No evaluation datasets found. Please run integration tests first to generate datasets.');
|
|
363
|
+
}
|
|
234
364
|
process.exit(1);
|
|
235
365
|
}
|
|
236
366
|
try {
|
|
237
367
|
const allResults = [];
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
368
|
+
// If filterType is specified, only run that evaluation type
|
|
369
|
+
if (filterType) {
|
|
370
|
+
if (availableDatasets[filterType]) {
|
|
371
|
+
const results = await runEvaluation(filterType, datasetsDir, modelMetadata);
|
|
241
372
|
allResults.push(...results);
|
|
242
373
|
}
|
|
374
|
+
else {
|
|
375
|
+
console.error(`ā No datasets available for type: ${filterType}`);
|
|
376
|
+
process.exit(1);
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
else {
|
|
380
|
+
// Run all available evaluations
|
|
381
|
+
for (const [type, available] of Object.entries(availableDatasets)) {
|
|
382
|
+
if (available) {
|
|
383
|
+
const results = await runEvaluation(type, datasetsDir, modelMetadata);
|
|
384
|
+
allResults.push(...results);
|
|
385
|
+
}
|
|
386
|
+
}
|
|
243
387
|
}
|
|
244
388
|
console.log(`\nš All Evaluations Complete! Total scenarios analyzed: ${allResults.length}`);
|
|
245
389
|
console.log(`š Check ./eval/reports/ for detailed analysis reports\n`);
|
|
@@ -31,7 +31,6 @@ export interface ComparativeEvaluationResult {
|
|
|
31
31
|
reasoning?: string;
|
|
32
32
|
}>;
|
|
33
33
|
overall_insights?: string;
|
|
34
|
-
overall_assessment?: string;
|
|
35
34
|
}
|
|
36
35
|
export interface ComparativeEvaluationScore extends EvaluationScore {
|
|
37
36
|
modelRankings: Array<{
|
|
@@ -60,6 +59,10 @@ export declare abstract class BaseComparativeEvaluator {
|
|
|
60
59
|
* This method finds all scenarios with multiple model responses and evaluates them comparatively
|
|
61
60
|
*/
|
|
62
61
|
evaluateAllScenarios(): Promise<ComparativeEvaluationScore[]>;
|
|
62
|
+
/**
|
|
63
|
+
* Conduct final assessment across all scenarios to determine overall winner
|
|
64
|
+
*/
|
|
65
|
+
conductFinalAssessment(scenarioResults: ComparativeEvaluationScore[]): Promise<any>;
|
|
63
66
|
/**
|
|
64
67
|
* Evaluate a single scenario comparing all available models
|
|
65
68
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"base-comparative.d.ts","sourceRoot":"","sources":["../../../src/evaluation/evaluators/base-comparative.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,eAAe,EAAE,MAAM,WAAW,CAAC;AAC5C,OAAO,EAAE,cAAc,EAAE,MAAM,sCAAsC,CAAC;AAKtE,OAAO,EAAE,eAAe,EAAE,kBAAkB,EAAE,MAAM,wBAAwB,CAAC;AAE7E,MAAM,WAAW,2BAA2B;IAC1C,gBAAgB,EAAE,MAAM,CAAC;IACzB,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,oBAAoB,EAAE,MAAM,CAAC,MAAM,EAAE;QACnC,aAAa,CAAC,EAAE,MAAM,CAAC;QACvB,gBAAgB,CAAC,EAAE,MAAM,CAAC;QAC1B,iBAAiB,CAAC,EAAE,MAAM,CAAC;QAC3B,mBAAmB,CAAC,EAAE,MAAM,CAAC;QAC7B,cAAc,CAAC,EAAE,MAAM,CAAC;QACxB,kBAAkB,CAAC,EAAE,MAAM,CAAC;QAC5B,aAAa,CAAC,EAAE,MAAM,CAAC;QACvB,iBAAiB,CAAC,EAAE,MAAM,CAAC;QAC3B,cAAc,EAAE,MAAM,CAAC;QACvB,SAAS,EAAE,MAAM,CAAC;QAClB,UAAU,EAAE,MAAM,CAAC;KACpB,CAAC,CAAC;IACH,OAAO,EAAE,KAAK,CAAC;QACb,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,EAAE,MAAM,CAAC;QACd,KAAK,EAAE,MAAM,CAAC;QACd,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,SAAS,CAAC,EAAE,MAAM,CAAC;KACpB,CAAC,CAAC;IACH,gBAAgB,CAAC,EAAE,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"base-comparative.d.ts","sourceRoot":"","sources":["../../../src/evaluation/evaluators/base-comparative.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,eAAe,EAAE,MAAM,WAAW,CAAC;AAC5C,OAAO,EAAE,cAAc,EAAE,MAAM,sCAAsC,CAAC;AAKtE,OAAO,EAAE,eAAe,EAAE,kBAAkB,EAAE,MAAM,wBAAwB,CAAC;AAE7E,MAAM,WAAW,2BAA2B;IAC1C,gBAAgB,EAAE,MAAM,CAAC;IACzB,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,oBAAoB,EAAE,MAAM,CAAC,MAAM,EAAE;QACnC,aAAa,CAAC,EAAE,MAAM,CAAC;QACvB,gBAAgB,CAAC,EAAE,MAAM,CAAC;QAC1B,iBAAiB,CAAC,EAAE,MAAM,CAAC;QAC3B,mBAAmB,CAAC,EAAE,MAAM,CAAC;QAC7B,cAAc,CAAC,EAAE,MAAM,CAAC;QACxB,kBAAkB,CAAC,EAAE,MAAM,CAAC;QAC5B,aAAa,CAAC,EAAE,MAAM,CAAC;QACvB,iBAAiB,CAAC,EAAE,MAAM,CAAC;QAC3B,cAAc,EAAE,MAAM,CAAC;QACvB,SAAS,EAAE,MAAM,CAAC;QAClB,UAAU,EAAE,MAAM,CAAC;KACpB,CAAC,CAAC;IACH,OAAO,EAAE,KAAK,CAAC;QACb,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,EAAE,MAAM,CAAC;QACd,KAAK,EAAE,MAAM,CAAC;QACd,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,SAAS,CAAC,EAAE,MAAM,CAAC;KACpB,CAAC,CAAC;IACH,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED,MAAM,WAAW,0BAA2B,SAAQ,eAAe;IACjE,aAAa,EAAE,KAAK,CAAC;QACnB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,EAAE,MAAM,CAAC;QACd,KAAK,EAAE,MAAM,CAAC;KACf,CAAC,CAAC;IACH,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,8BAAsB,wBAAwB;IAC5C,QAAQ,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IAC/B,QAAQ,CAAC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IACtC,SAAS,CAAC,QAAQ,CAAC,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC;IACnD,SAAS,CAAC,QAAQ,CAAC,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAE7C,SAAS,CAAC,cAAc,EAAE,cAAc,CAAC;IACzC,SAAS,CAAC,eAAe,EAAE,eAAe,CAAC;IAC3C,SAAS,CAAC,cAAc,EAAE,MAAM,CAAC;gBAErB,UAAU,CAAC,EAAE,MAAM;IAe/B;;OAEG;IACH,SAAS,CAAC,gBAAgB;IAK1B;;;OAGG;IACG,oBAAoB,IAAI,OAAO,CAAC,0BAA0B,EAAE,CAAC;IAkBnE;;OAEG;IACG,sBAAsB,CAAC,eAAe,EAAE,0BAA0B,EAAE,GAAG,OAAO,CAAC,GAAG,CAAC;IA6CzF;;OAEG;IACG,gBAAgB,CAAC,QAAQ,EAAE,kBAAkB,GAAG,OAAO,CAAC,0BAA0B,CAAC;IAsFzF;;OAEG;IACH,SAAS,CAAC,qBAAqB,CAAC,QAAQ,EAAE,kBAAkB,EAAE,kBAAkB,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM;IAQpH;;OAEG;IACH,eAAe;;;;;;IAIf;;;OAGG;IACH,QAAQ,CAAC,mBAAmB,IAAI;QAC9B,KAAK,EAAE,MAAM,CAAC;QACd,WAAW,EAAE,MAAM,CAAC;QACpB,eAAe,EAAE,MAAM,EAAE,CAAC;QAC1B,aAAa,EAAE,MAAM,CAAC;KACvB,EAAE;CACJ"}
|
|
@@ -55,6 +55,41 @@ class BaseComparativeEvaluator {
|
|
|
55
55
|
}
|
|
56
56
|
return results;
|
|
57
57
|
}
|
|
58
|
+
/**
|
|
59
|
+
* Conduct final assessment across all scenarios to determine overall winner
|
|
60
|
+
*/
|
|
61
|
+
async conductFinalAssessment(scenarioResults) {
|
|
62
|
+
if (scenarioResults.length === 0) {
|
|
63
|
+
throw new Error('No scenario results provided for final assessment');
|
|
64
|
+
}
|
|
65
|
+
// Load the overall winner assessment prompt
|
|
66
|
+
const promptPath = (0, path_1.join)(process.cwd(), 'src', 'evaluation', 'prompts', 'overall-winner-assessment.md');
|
|
67
|
+
const overallWinnerTemplate = (0, fs_1.readFileSync)(promptPath, 'utf8');
|
|
68
|
+
// Get all models that should have been tested (from first scenario)
|
|
69
|
+
const allModels = scenarioResults[0]?.modelRankings?.map(r => r.model) || [];
|
|
70
|
+
// Build the final assessment prompt with raw data
|
|
71
|
+
const finalPrompt = overallWinnerTemplate
|
|
72
|
+
.replace('{tool_type}', this.toolName)
|
|
73
|
+
.replace('{total_scenarios}', scenarioResults.length.toString())
|
|
74
|
+
.replace('{expected_models}', JSON.stringify(allModels))
|
|
75
|
+
.replace('{scenario_results}', JSON.stringify(scenarioResults, null, 2));
|
|
76
|
+
try {
|
|
77
|
+
console.log(`\nš Conducting final assessment across ${scenarioResults.length} scenarios for ${this.toolName}\n`);
|
|
78
|
+
const response = await this.evaluatorModel.sendMessage(finalPrompt, `${this.name}-final-assessment`, {
|
|
79
|
+
user_intent: `Final cross-scenario assessment for ${this.toolName}`,
|
|
80
|
+
interaction_id: 'final-assessment'
|
|
81
|
+
});
|
|
82
|
+
// Extract JSON from AI response
|
|
83
|
+
const finalAssessment = (0, platform_utils_1.extractJsonFromAIResponse)(response.content);
|
|
84
|
+
console.log(`ā
Final Assessment Complete for ${this.toolName}`);
|
|
85
|
+
console.log(`š Overall Winner: ${finalAssessment.overall_assessment?.winner || 'Unknown'}`);
|
|
86
|
+
return finalAssessment;
|
|
87
|
+
}
|
|
88
|
+
catch (error) {
|
|
89
|
+
console.error(`Final assessment failed for ${this.toolName}:`, error);
|
|
90
|
+
throw error;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
58
93
|
/**
|
|
59
94
|
* Evaluate a single scenario comparing all available models
|
|
60
95
|
*/
|
|
@@ -108,7 +143,7 @@ ${modelResponse.response}
|
|
|
108
143
|
return {
|
|
109
144
|
key: `${this.name}_${scenario.interaction_id}`,
|
|
110
145
|
score: bestScore,
|
|
111
|
-
comment: evaluation.overall_insights ||
|
|
146
|
+
comment: evaluation.overall_insights || 'Comparative evaluation completed',
|
|
112
147
|
confidence: 0.9, // High confidence for comparative evaluation
|
|
113
148
|
modelRankings: rankings.map(r => ({
|
|
114
149
|
rank: r.rank,
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import { VercelProvider } from '../core/providers/vercel-provider.js';
|
|
2
|
+
export interface ModelPerformance {
|
|
3
|
+
modelId: string;
|
|
4
|
+
provider: string;
|
|
5
|
+
toolScores: Record<string, number>;
|
|
6
|
+
averageScore: number;
|
|
7
|
+
participationRate: number;
|
|
8
|
+
reliabilityScore: number;
|
|
9
|
+
consistencyAcrossTools: number;
|
|
10
|
+
pricing: {
|
|
11
|
+
input_cost_per_million_tokens: number;
|
|
12
|
+
output_cost_per_million_tokens: number;
|
|
13
|
+
};
|
|
14
|
+
capabilities: {
|
|
15
|
+
context_window: number;
|
|
16
|
+
supports_function_calling: boolean;
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
export interface DecisionMatrix {
|
|
20
|
+
qualityLeaders: ModelPerformance[];
|
|
21
|
+
speedOptimized: ModelPerformance[];
|
|
22
|
+
costEffective: ModelPerformance[];
|
|
23
|
+
balanced: ModelPerformance[];
|
|
24
|
+
reliabilityFocused: ModelPerformance[];
|
|
25
|
+
}
|
|
26
|
+
export interface UsageRecommendation {
|
|
27
|
+
priority: 'quality-first' | 'speed-first' | 'cost-first' | 'balanced';
|
|
28
|
+
primaryModel: string;
|
|
29
|
+
fallbackModel: string;
|
|
30
|
+
reasoning: string;
|
|
31
|
+
costImplications: string;
|
|
32
|
+
useCases: string[];
|
|
33
|
+
}
|
|
34
|
+
export declare class PlatformSynthesizer {
|
|
35
|
+
private aiProvider;
|
|
36
|
+
private reportsDir;
|
|
37
|
+
constructor(aiProvider: VercelProvider, reportsDir?: string);
|
|
38
|
+
generatePlatformWideAnalysis(): Promise<string>;
|
|
39
|
+
private loadToolMetadata;
|
|
40
|
+
private loadAllReports;
|
|
41
|
+
private analyzeCrossToolPerformance;
|
|
42
|
+
private calculateModelPerformances;
|
|
43
|
+
private generateDecisionMatrices;
|
|
44
|
+
private generateUsageRecommendations;
|
|
45
|
+
private generatePlatformInsights;
|
|
46
|
+
private extractKeyFindings;
|
|
47
|
+
private categorizeModelTiers;
|
|
48
|
+
private identifyCrossToolPatterns;
|
|
49
|
+
private generateProductionRecommendations;
|
|
50
|
+
private calculateCostEstimate;
|
|
51
|
+
private extractBaseModelId;
|
|
52
|
+
saveSynthesisReport(markdownContent: string, outputPath?: string): Promise<void>;
|
|
53
|
+
}
|
|
54
|
+
//# sourceMappingURL=platform-synthesizer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"platform-synthesizer.d.ts","sourceRoot":"","sources":["../../src/evaluation/platform-synthesizer.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,cAAc,EAAE,MAAM,sCAAsC,CAAC;AAEtE,MAAM,WAAW,gBAAgB;IAC/B,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACnC,YAAY,EAAE,MAAM,CAAC;IACrB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,gBAAgB,EAAE,MAAM,CAAC;IACzB,sBAAsB,EAAE,MAAM,CAAC;IAC/B,OAAO,EAAE;QACP,6BAA6B,EAAE,MAAM,CAAC;QACtC,8BAA8B,EAAE,MAAM,CAAC;KACxC,CAAC;IACF,YAAY,EAAE;QACZ,cAAc,EAAE,MAAM,CAAC;QACvB,yBAAyB,EAAE,OAAO,CAAC;KACpC,CAAC;CACH;AAED,MAAM,WAAW,cAAc;IAC7B,cAAc,EAAE,gBAAgB,EAAE,CAAC;IACnC,cAAc,EAAE,gBAAgB,EAAE,CAAC;IACnC,aAAa,EAAE,gBAAgB,EAAE,CAAC;IAClC,QAAQ,EAAE,gBAAgB,EAAE,CAAC;IAC7B,kBAAkB,EAAE,gBAAgB,EAAE,CAAC;CACxC;AAED,MAAM,WAAW,mBAAmB;IAClC,QAAQ,EAAE,eAAe,GAAG,aAAa,GAAG,YAAY,GAAG,UAAU,CAAC;IACtE,YAAY,EAAE,MAAM,CAAC;IACrB,aAAa,EAAE,MAAM,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;IAClB,gBAAgB,EAAE,MAAM,CAAC;IACzB,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAED,qBAAa,mBAAmB;IAC9B,OAAO,CAAC,UAAU,CAAiB;IACnC,OAAO,CAAC,UAAU,CAAS;gBAEf,UAAU,EAAE,cAAc,EAAE,UAAU,SAA+B;IAK3E,4BAA4B,IAAI,OAAO,CAAC,MAAM,CAAC;IA8BrD,OAAO,CAAC,gBAAgB;YAaV,cAAc;YA0Bd,2BAA2B;IA8DzC,OAAO,CAAC,0BAA0B;IA2ElC,OAAO,CAAC,wBAAwB;IAiDhC,OAAO,CAAC,4BAA4B;YA0CtB,wBAAwB;IAoBtC,OAAO,CAAC,kBAAkB;IAe1B,OAAO,CAAC,oBAAoB;IAsB5B,OAAO,CAAC,yBAAyB;IAWjC,OAAO,CAAC,iCAAiC;IASzC,OAAO,CAAC,qBAAqB;IAQ7B,OAAO,CAAC,kBAAkB;IASpB,mBAAmB,CACvB,eAAe,EAAE,MAAM,EACvB,UAAU,SAAiD,GAC1D,OAAO,CAAC,IAAI,CAAC;CAWjB"}
|