@vfarcic/dot-ai 0.111.0 → 0.113.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/ai-provider-factory.d.ts +0 -10
- package/dist/core/ai-provider-factory.d.ts.map +1 -1
- package/dist/core/ai-provider-factory.js +14 -24
- package/dist/core/ai-provider.interface.d.ts +28 -1
- package/dist/core/ai-provider.interface.d.ts.map +1 -1
- package/dist/core/capabilities.d.ts +1 -1
- package/dist/core/capabilities.d.ts.map +1 -1
- package/dist/core/capabilities.js +7 -4
- package/dist/core/capability-scan-workflow.js +2 -2
- package/dist/core/embedding-service.d.ts +35 -2
- package/dist/core/embedding-service.d.ts.map +1 -1
- package/dist/core/embedding-service.js +228 -15
- package/dist/core/model-config.d.ts +23 -0
- package/dist/core/model-config.d.ts.map +1 -0
- package/dist/core/model-config.js +28 -0
- package/dist/core/platform-operations.d.ts.map +1 -1
- package/dist/core/platform-operations.js +3 -5
- package/dist/core/platform-utils.d.ts +13 -2
- package/dist/core/platform-utils.d.ts.map +1 -1
- package/dist/core/platform-utils.js +91 -9
- package/dist/core/providers/anthropic-provider.d.ts +6 -1
- package/dist/core/providers/anthropic-provider.d.ts.map +1 -1
- package/dist/core/providers/anthropic-provider.js +99 -27
- package/dist/core/providers/provider-debug-utils.d.ts +53 -20
- package/dist/core/providers/provider-debug-utils.d.ts.map +1 -1
- package/dist/core/providers/provider-debug-utils.js +106 -51
- package/dist/core/providers/vercel-provider.d.ts +6 -1
- package/dist/core/providers/vercel-provider.d.ts.map +1 -1
- package/dist/core/providers/vercel-provider.js +212 -130
- package/dist/core/schema.d.ts +1 -101
- package/dist/core/schema.d.ts.map +1 -1
- package/dist/core/schema.js +20 -154
- package/dist/core/unified-creation-session.d.ts.map +1 -1
- package/dist/core/unified-creation-session.js +15 -7
- package/dist/evaluation/dataset-analyzer.d.ts +118 -0
- package/dist/evaluation/dataset-analyzer.d.ts.map +1 -0
- package/dist/evaluation/dataset-analyzer.js +234 -0
- package/dist/evaluation/datasets/loader.d.ts +42 -0
- package/dist/evaluation/datasets/loader.d.ts.map +1 -0
- package/dist/evaluation/datasets/loader.js +104 -0
- package/dist/evaluation/eval-runner.d.ts +9 -0
- package/dist/evaluation/eval-runner.d.ts.map +1 -0
- package/dist/evaluation/eval-runner.js +399 -0
- package/dist/evaluation/evaluators/base-comparative.d.ts +94 -0
- package/dist/evaluation/evaluators/base-comparative.d.ts.map +1 -0
- package/dist/evaluation/evaluators/base-comparative.js +187 -0
- package/dist/evaluation/evaluators/base.d.ts +47 -0
- package/dist/evaluation/evaluators/base.d.ts.map +1 -0
- package/dist/evaluation/evaluators/base.js +10 -0
- package/dist/evaluation/evaluators/capability-comparative.d.ts +32 -0
- package/dist/evaluation/evaluators/capability-comparative.d.ts.map +1 -0
- package/dist/evaluation/evaluators/capability-comparative.js +104 -0
- package/dist/evaluation/evaluators/pattern-comparative.d.ts +31 -0
- package/dist/evaluation/evaluators/pattern-comparative.d.ts.map +1 -0
- package/dist/evaluation/evaluators/pattern-comparative.js +97 -0
- package/dist/evaluation/evaluators/policy-comparative.d.ts +31 -0
- package/dist/evaluation/evaluators/policy-comparative.d.ts.map +1 -0
- package/dist/evaluation/evaluators/policy-comparative.js +97 -0
- package/dist/evaluation/evaluators/recommendation-comparative.d.ts +25 -0
- package/dist/evaluation/evaluators/recommendation-comparative.d.ts.map +1 -0
- package/dist/evaluation/evaluators/recommendation-comparative.js +55 -0
- package/dist/evaluation/evaluators/remediation-comparative.d.ts +25 -0
- package/dist/evaluation/evaluators/remediation-comparative.d.ts.map +1 -0
- package/dist/evaluation/evaluators/remediation-comparative.js +54 -0
- package/dist/evaluation/platform-synthesizer.d.ts +54 -0
- package/dist/evaluation/platform-synthesizer.d.ts.map +1 -0
- package/dist/evaluation/platform-synthesizer.js +368 -0
- package/dist/evaluation/run-platform-synthesis.d.ts +9 -0
- package/dist/evaluation/run-platform-synthesis.d.ts.map +1 -0
- package/dist/evaluation/run-platform-synthesis.js +45 -0
- package/dist/interfaces/mcp.d.ts.map +1 -1
- package/dist/interfaces/mcp.js +23 -29
- package/dist/interfaces/rest-api.d.ts.map +1 -1
- package/dist/tools/answer-question.d.ts +2 -0
- package/dist/tools/answer-question.d.ts.map +1 -1
- package/dist/tools/answer-question.js +18 -11
- package/dist/tools/generate-manifests.d.ts +2 -0
- package/dist/tools/generate-manifests.d.ts.map +1 -1
- package/dist/tools/generate-manifests.js +11 -12
- package/dist/tools/organizational-data.d.ts +1 -0
- package/dist/tools/organizational-data.d.ts.map +1 -1
- package/dist/tools/organizational-data.js +2 -1
- package/dist/tools/recommend.d.ts +1 -0
- package/dist/tools/recommend.d.ts.map +1 -1
- package/dist/tools/recommend.js +13 -21
- package/dist/tools/remediate.d.ts +3 -0
- package/dist/tools/remediate.d.ts.map +1 -1
- package/dist/tools/remediate.js +35 -14
- package/dist/tools/test-docs.d.ts +1 -0
- package/dist/tools/test-docs.d.ts.map +1 -1
- package/dist/tools/test-docs.js +4 -2
- package/dist/tools/version.d.ts +5 -1
- package/dist/tools/version.d.ts.map +1 -1
- package/dist/tools/version.js +23 -8
- package/package.json +19 -1
|
@@ -41,7 +41,8 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
41
41
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
42
42
|
exports.ensureDebugDirectory = ensureDebugDirectory;
|
|
43
43
|
exports.generateDebugId = generateDebugId;
|
|
44
|
-
exports.
|
|
44
|
+
exports.shouldSkipDatasetGeneration = shouldSkipDatasetGeneration;
|
|
45
|
+
exports.logEvaluationDataset = logEvaluationDataset;
|
|
45
46
|
exports.createAndLogAgenticResult = createAndLogAgenticResult;
|
|
46
47
|
exports.debugLogInteraction = debugLogInteraction;
|
|
47
48
|
const fs = __importStar(require("fs"));
|
|
@@ -67,70 +68,92 @@ function generateDebugId(operation) {
|
|
|
67
68
|
return `${dateTime}_${randomHex}_${operation}`;
|
|
68
69
|
}
|
|
69
70
|
/**
|
|
70
|
-
*
|
|
71
|
-
*
|
|
72
|
-
* PRD #143 Decision 5: Extended metrics for model comparison analysis
|
|
71
|
+
* Determine if dataset generation should be skipped for specific operations
|
|
73
72
|
*/
|
|
74
|
-
function
|
|
73
|
+
function shouldSkipDatasetGeneration(operation) {
|
|
74
|
+
const skipDatasetOperations = ['version-connectivity-check', 'generic'];
|
|
75
|
+
return skipDatasetOperations.includes(operation);
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Log unified evaluation metrics when DEBUG_DOT_AI=true
|
|
79
|
+
* Single function for all metrics and evaluation data capture
|
|
80
|
+
*/
|
|
81
|
+
/**
|
|
82
|
+
* Generate eval dataset entry in standard OpenAI Evals format
|
|
83
|
+
* Logs evaluation metrics to JSONL dataset files for AI quality assessment
|
|
84
|
+
*/
|
|
85
|
+
function logEvaluationDataset(metrics, debugMode = false) {
|
|
75
86
|
if (!debugMode)
|
|
76
87
|
return;
|
|
88
|
+
// Skip dataset generation for non-evaluable operations
|
|
89
|
+
if (shouldSkipDatasetGeneration(metrics.test_scenario))
|
|
90
|
+
return;
|
|
77
91
|
try {
|
|
78
|
-
|
|
79
|
-
const
|
|
80
|
-
const
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
entry.cacheCreationTokens = result.totalTokens.cacheCreation;
|
|
92
|
+
// Parse operation for tool name
|
|
93
|
+
const operationParts = metrics.operation.split('-');
|
|
94
|
+
const toolName = operationParts[0]; // e.g., "remediate"
|
|
95
|
+
// Check if this is a comparative evaluation
|
|
96
|
+
const isComparativeEvaluation = metrics.operation.includes('-comparative-');
|
|
97
|
+
// Use different directories for comparative evaluations vs raw test datasets
|
|
98
|
+
const baseDir = isComparativeEvaluation ?
|
|
99
|
+
path.join(process.cwd(), 'eval', 'results') : // Comparative evaluation results go here
|
|
100
|
+
path.join(process.cwd(), 'eval', 'datasets'); // Raw test datasets go here
|
|
101
|
+
// Ensure directory exists
|
|
102
|
+
if (!fs.existsSync(baseDir)) {
|
|
103
|
+
fs.mkdirSync(baseDir, { recursive: true });
|
|
91
104
|
}
|
|
92
|
-
|
|
93
|
-
|
|
105
|
+
let datasetFile;
|
|
106
|
+
const timestamp = new Date().toISOString().replace(/[:.]/g, '').split('T').join('_');
|
|
107
|
+
if (isComparativeEvaluation) {
|
|
108
|
+
// For comparative evaluations, save to results directory
|
|
109
|
+
datasetFile = path.join(baseDir, `${toolName}_comparative_evaluation_${timestamp}.jsonl`);
|
|
94
110
|
}
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
111
|
+
else {
|
|
112
|
+
// Use modelVersion directly for accurate model identification
|
|
113
|
+
const modelName = metrics.modelVersion || 'unknown';
|
|
114
|
+
// Create filename with interaction ID, SDK, model, and timestamp for single-model datasets
|
|
115
|
+
datasetFile = path.join(baseDir, `${toolName}_${metrics.interaction_id}_${metrics.sdk}_${modelName}_${timestamp}.jsonl`);
|
|
98
116
|
}
|
|
99
|
-
//
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
117
|
+
// Transform metrics into OpenAI Evals format (no ideal field - using model-graded evaluation)
|
|
118
|
+
const evalEntry = {
|
|
119
|
+
input: {
|
|
120
|
+
issue: metrics.user_intent || "Tool execution scenario"
|
|
121
|
+
},
|
|
122
|
+
output: metrics.ai_response_summary || "",
|
|
123
|
+
performance: {
|
|
124
|
+
duration_ms: metrics.durationMs,
|
|
125
|
+
input_tokens: metrics.inputTokens,
|
|
126
|
+
output_tokens: metrics.outputTokens,
|
|
127
|
+
total_tokens: metrics.inputTokens + metrics.outputTokens,
|
|
128
|
+
sdk: metrics.sdk,
|
|
129
|
+
model_version: metrics.modelVersion,
|
|
130
|
+
iterations: metrics.iterationCount,
|
|
131
|
+
tool_calls_executed: metrics.toolCallCount,
|
|
132
|
+
cache_read_tokens: metrics.cacheReadTokens || 0,
|
|
133
|
+
cache_creation_tokens: metrics.cacheCreationTokens || 0
|
|
134
|
+
},
|
|
135
|
+
metadata: {
|
|
136
|
+
timestamp: new Date().toISOString(),
|
|
137
|
+
complexity: "medium",
|
|
138
|
+
tags: ["troubleshooting"],
|
|
139
|
+
source: "integration_test",
|
|
140
|
+
tool: toolName,
|
|
141
|
+
test_scenario: metrics.test_scenario || `${toolName}_test`,
|
|
142
|
+
failure_analysis: metrics.failure_analysis || ""
|
|
143
|
+
}
|
|
144
|
+
};
|
|
145
|
+
fs.writeFileSync(datasetFile, JSON.stringify(evalEntry) + '\n');
|
|
146
|
+
console.log(`📊 Generated eval dataset: ${path.basename(datasetFile)} (${metrics.interaction_id}, ${metrics.durationMs}ms, ${metrics.inputTokens}+${metrics.outputTokens} tokens)`);
|
|
124
147
|
}
|
|
125
148
|
catch (error) {
|
|
126
|
-
console.
|
|
149
|
+
console.error(`❌ Failed to generate eval dataset for ${metrics.interaction_id} (${metrics.test_scenario}):`, error);
|
|
127
150
|
}
|
|
128
151
|
}
|
|
129
152
|
/**
|
|
130
153
|
* Create AgenticResult and log metrics in one step
|
|
131
154
|
* Reduces code duplication across providers
|
|
132
155
|
*
|
|
133
|
-
* PRD #
|
|
156
|
+
* PRD #154: Updated to use unified evaluation metrics
|
|
134
157
|
*/
|
|
135
158
|
function createAndLogAgenticResult(config) {
|
|
136
159
|
const result = {
|
|
@@ -144,7 +167,39 @@ function createAndLogAgenticResult(config) {
|
|
|
144
167
|
};
|
|
145
168
|
const durationMs = Date.now() - config.startTime;
|
|
146
169
|
if (config.debugMode) {
|
|
147
|
-
|
|
170
|
+
// PRD #154: Use unified evaluation metrics system
|
|
171
|
+
const evaluationMetrics = {
|
|
172
|
+
// Core execution data
|
|
173
|
+
operation: config.operation,
|
|
174
|
+
sdk: config.sdk,
|
|
175
|
+
inputTokens: config.totalTokens.input,
|
|
176
|
+
outputTokens: config.totalTokens.output,
|
|
177
|
+
durationMs,
|
|
178
|
+
// Required fields
|
|
179
|
+
iterationCount: config.iterations,
|
|
180
|
+
toolCallCount: config.toolCallsExecuted.length,
|
|
181
|
+
status: config.status,
|
|
182
|
+
completionReason: config.completionReason,
|
|
183
|
+
modelVersion: config.modelVersion,
|
|
184
|
+
// Required evaluation context - NO DEFAULTS, must be provided
|
|
185
|
+
test_scenario: config.operation,
|
|
186
|
+
ai_response_summary: config.finalMessage,
|
|
187
|
+
user_intent: config.evaluationContext?.user_intent || '', // Will be enhanced later by EvalDatasetEnhancer
|
|
188
|
+
interaction_id: config.interaction_id || '', // Will be enhanced later if missing
|
|
189
|
+
// Optional performance data
|
|
190
|
+
...(config.totalTokens.cacheCreation !== undefined && { cacheCreationTokens: config.totalTokens.cacheCreation }),
|
|
191
|
+
...(config.totalTokens.cacheRead !== undefined && { cacheReadTokens: config.totalTokens.cacheRead }),
|
|
192
|
+
...(config.toolCallsExecuted.length > 0 && {
|
|
193
|
+
uniqueToolsUsed: [...new Set(config.toolCallsExecuted.map(tc => tc.tool))]
|
|
194
|
+
}),
|
|
195
|
+
...(config.debugFiles && { debug_files: { full_prompt: config.debugFiles.promptFile, full_response: config.debugFiles.responseFile } }),
|
|
196
|
+
...(config.evaluationContext?.failure_analysis && { failure_analysis: config.evaluationContext.failure_analysis })
|
|
197
|
+
};
|
|
198
|
+
// Calculate cache hit rate if applicable
|
|
199
|
+
if (config.totalTokens.cacheRead !== undefined && config.totalTokens.input > 0) {
|
|
200
|
+
evaluationMetrics.cacheHitRate = Math.round((config.totalTokens.cacheRead / config.totalTokens.input) * 100);
|
|
201
|
+
}
|
|
202
|
+
logEvaluationDataset(evaluationMetrics, config.debugMode);
|
|
148
203
|
}
|
|
149
204
|
return result;
|
|
150
205
|
}
|
|
@@ -16,9 +16,14 @@ export declare class VercelProvider implements AIProvider {
|
|
|
16
16
|
private initializeModel;
|
|
17
17
|
getProviderType(): string;
|
|
18
18
|
getDefaultModel(): string;
|
|
19
|
+
getModelName(): string;
|
|
20
|
+
getSDKProvider(): string;
|
|
19
21
|
isInitialized(): boolean;
|
|
20
22
|
private logDebugIfEnabled;
|
|
21
|
-
sendMessage(message: string, operation?: string
|
|
23
|
+
sendMessage(message: string, operation?: string, evaluationContext?: {
|
|
24
|
+
user_intent?: string;
|
|
25
|
+
interaction_id?: string;
|
|
26
|
+
}): Promise<AIResponse>;
|
|
22
27
|
/**
|
|
23
28
|
* Agentic tool loop using Vercel AI SDK
|
|
24
29
|
*
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"vercel-provider.d.ts","sourceRoot":"","sources":["../../../src/core/providers/vercel-provider.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;
|
|
1
|
+
{"version":3,"file":"vercel-provider.d.ts","sourceRoot":"","sources":["../../../src/core/providers/vercel-provider.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AASH,OAAO,EACL,UAAU,EACV,UAAU,EACV,gBAAgB,EAChB,cAAc,EACd,aAAa,EACd,MAAM,0BAA0B,CAAC;AASlC,qBAAa,cAAe,YAAW,UAAU;IAC/C,OAAO,CAAC,YAAY,CAAoB;IACxC,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,SAAS,CAAU;IAC3B,OAAO,CAAC,aAAa,CAAM;gBAEf,MAAM,EAAE,gBAAgB;IAUpC,OAAO,CAAC,qBAAqB;IAU7B,OAAO,CAAC,eAAe;IA4CvB,eAAe,IAAI,MAAM;IAIzB,eAAe,IAAI,MAAM;IAIzB,YAAY,IAAI,MAAM;IAItB,cAAc,IAAI,MAAM;IAIxB,aAAa,IAAI,OAAO;IAIxB,OAAO,CAAC,iBAAiB;IAiBnB,WAAW,CACf,OAAO,EAAE,MAAM,EACf,SAAS,GAAE,MAAkB,EAC7B,iBAAiB,CAAC,EAAE;QAClB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,cAAc,CAAC,EAAE,MAAM,CAAC;KACzB,GACA,OAAO,CAAC,UAAU,CAAC;IAoGtB;;;;;;;;;;;;OAYG;IACG,QAAQ,CAAC,MAAM,EAAE,cAAc,GAAG,OAAO,CAAC,aAAa,CAAC;CA0T/D"}
|