@usewhisper/mcp-server 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +182 -154
- package/dist/autosubscribe-6EDKPBE2.js +4068 -4068
- package/dist/autosubscribe-GHO6YR5A.js +4068 -4068
- package/dist/autosubscribe-ISDETQIB.js +435 -435
- package/dist/chunk-3WGYBAYR.js +8387 -8387
- package/dist/chunk-52VJYCZ7.js +455 -455
- package/dist/chunk-5KBZQHDL.js +189 -189
- package/dist/chunk-5KIJNY6Z.js +370 -370
- package/dist/chunk-7SN3CKDK.js +1076 -1076
- package/dist/chunk-B3VWOHUA.js +271 -271
- package/dist/chunk-C57DHKTL.js +459 -459
- package/dist/chunk-EI5CE3EY.js +616 -616
- package/dist/chunk-FTWUJBAH.js +386 -386
- package/dist/chunk-H3HSKH2P.js +4841 -4841
- package/dist/chunk-JO3ORBZD.js +616 -616
- package/dist/chunk-L6DXSM2U.js +456 -456
- package/dist/chunk-LMEYV4JD.js +368 -368
- package/dist/chunk-MEFLJ4PV.js +8385 -8385
- package/dist/chunk-OBLI4FE4.js +275 -275
- package/dist/chunk-PPGYJJED.js +271 -271
- package/dist/chunk-QGM4M3NI.js +37 -37
- package/dist/chunk-T7KMSTWP.js +399 -399
- package/dist/chunk-TWEIYHI6.js +399 -399
- package/dist/chunk-UYWE7HSU.js +368 -368
- package/dist/chunk-X2DL2GWT.js +32 -32
- package/dist/chunk-X7HNNNJJ.js +1079 -1079
- package/dist/consolidation-2GCKI4RE.js +220 -220
- package/dist/consolidation-4JOPW6BG.js +220 -220
- package/dist/consolidation-FOVQTWNQ.js +222 -222
- package/dist/consolidation-IFQ52E44.js +209 -209
- package/dist/context-sharing-4ITCNKG4.js +307 -307
- package/dist/context-sharing-6CCFIAKL.js +275 -275
- package/dist/context-sharing-GYKLXHZA.js +307 -307
- package/dist/context-sharing-PH64JTXS.js +308 -308
- package/dist/context-sharing-Y6LTZZOF.js +307 -307
- package/dist/cost-optimization-6OIKRSBV.js +195 -195
- package/dist/cost-optimization-7DVSTL6R.js +307 -307
- package/dist/cost-optimization-BH5NAX33.js +286 -286
- package/dist/cost-optimization-F3L5BS5F.js +303 -303
- package/dist/ingest-2LPTWUUM.js +16 -16
- package/dist/ingest-7T5FAZNC.js +15 -15
- package/dist/ingest-EBNIE7XB.js +15 -15
- package/dist/ingest-FSHT5BCS.js +15 -15
- package/dist/ingest-QE2BTV72.js +14 -14
- package/dist/oracle-3RLQF3DP.js +259 -259
- package/dist/oracle-FKRTQUUG.js +282 -282
- package/dist/oracle-J47QCSEW.js +263 -263
- package/dist/oracle-MDP5MZRC.js +256 -256
- package/dist/search-BLVHWLWC.js +14 -14
- package/dist/search-CZ5NYL5B.js +12 -12
- package/dist/search-EG6TYWWW.js +13 -13
- package/dist/search-I22QQA7T.js +13 -13
- package/dist/search-T7H5G6DW.js +13 -13
- package/dist/server.d.ts +2 -2
- package/dist/server.js +1973 -169
- package/dist/server.js.map +1 -1
- package/package.json +51 -51
|
@@ -1,307 +1,307 @@
|
|
|
1
|
-
import "./chunk-QGM4M3NI.js";
|
|
2
|
-
|
|
3
|
-
// ../src/engine/cost-optimization.ts
|
|
4
|
-
import OpenAI from "openai";
|
|
5
|
-
var MODELS = {
|
|
6
|
-
haiku: {
|
|
7
|
-
model: "claude-haiku-4.5",
|
|
8
|
-
maxTokens: 4096,
|
|
9
|
-
temperature: 0,
|
|
10
|
-
costPerMillion: 0.25
|
|
11
|
-
// $0.25 per million input tokens
|
|
12
|
-
},
|
|
13
|
-
sonnet: {
|
|
14
|
-
model: "claude-sonnet-4.5",
|
|
15
|
-
maxTokens: 8192,
|
|
16
|
-
temperature: 0,
|
|
17
|
-
costPerMillion: 3
|
|
18
|
-
// $3.00 per million input tokens
|
|
19
|
-
},
|
|
20
|
-
opus: {
|
|
21
|
-
model: "claude-opus-4.5",
|
|
22
|
-
maxTokens: 16384,
|
|
23
|
-
temperature: 0,
|
|
24
|
-
costPerMillion: 15
|
|
25
|
-
// $15.00 per million input tokens
|
|
26
|
-
}
|
|
27
|
-
};
|
|
28
|
-
var TASK_MODEL_MAP = {
|
|
29
|
-
temporal_parsing: "haiku",
|
|
30
|
-
// Fast, simple parsing
|
|
31
|
-
simple_classification: "haiku",
|
|
32
|
-
// Fast classification
|
|
33
|
-
memory_extraction: "sonnet",
|
|
34
|
-
// Needs accuracy for disambiguation
|
|
35
|
-
relation_detection: "sonnet",
|
|
36
|
-
// Needs reasoning
|
|
37
|
-
consolidation: "sonnet",
|
|
38
|
-
// Needs to merge intelligently
|
|
39
|
-
summarization: "haiku",
|
|
40
|
-
// Fast summarization
|
|
41
|
-
complex_reasoning: "opus"
|
|
42
|
-
// Deep reasoning tasks
|
|
43
|
-
};
|
|
44
|
-
function getOptimalModel(taskType, options = {}) {
|
|
45
|
-
if (options.forceModel) {
|
|
46
|
-
return MODELS[options.forceModel];
|
|
47
|
-
}
|
|
48
|
-
let tier = TASK_MODEL_MAP[taskType];
|
|
49
|
-
if (options.minQuality && tier === "haiku") {
|
|
50
|
-
tier = "sonnet";
|
|
51
|
-
}
|
|
52
|
-
return MODELS[tier];
|
|
53
|
-
}
|
|
54
|
-
function estimateCost(params) {
|
|
55
|
-
const modelConfig = getOptimalModel(params.taskType, { forceModel: params.model });
|
|
56
|
-
const inputCost = params.inputTokens / 1e6 * modelConfig.costPerMillion;
|
|
57
|
-
const outputCostPerMillion = modelConfig.costPerMillion * 5;
|
|
58
|
-
const outputCost = params.outputTokens / 1e6 * outputCostPerMillion;
|
|
59
|
-
return {
|
|
60
|
-
model: modelConfig.model,
|
|
61
|
-
inputCost,
|
|
62
|
-
outputCost,
|
|
63
|
-
totalCost: inputCost + outputCost
|
|
64
|
-
};
|
|
65
|
-
}
|
|
66
|
-
async function smartLLMCall(params) {
|
|
67
|
-
const { taskType, prompt, systemPrompt, maxTokens, temperature, forceModel } = params;
|
|
68
|
-
const modelConfig = getOptimalModel(taskType, { forceModel });
|
|
69
|
-
const openai = new OpenAI({
|
|
70
|
-
apiKey: process.env.OPENAI_API_KEY || ""
|
|
71
|
-
});
|
|
72
|
-
const messages = [{ role: "user", content: prompt }];
|
|
73
|
-
if (systemPrompt) {
|
|
74
|
-
messages.unshift({ role: "system", content: systemPrompt });
|
|
75
|
-
}
|
|
76
|
-
const modelMap = {
|
|
77
|
-
"claude-haiku-4-5-20251001": "gpt-4o-mini",
|
|
78
|
-
"claude-sonnet-4-5-20250929": "gpt-4o",
|
|
79
|
-
"claude-opus-4-5-20251101": "gpt-4o"
|
|
80
|
-
};
|
|
81
|
-
const openaiModel = modelMap[modelConfig.model] || "gpt-4o";
|
|
82
|
-
const response = await openai.chat.completions.create({
|
|
83
|
-
model: openaiModel,
|
|
84
|
-
max_tokens: maxTokens || modelConfig.maxTokens,
|
|
85
|
-
temperature: temperature !== void 0 ? temperature : modelConfig.temperature,
|
|
86
|
-
messages
|
|
87
|
-
});
|
|
88
|
-
const responseText = response.choices[0]?.message?.content || "";
|
|
89
|
-
const tokensUsed = {
|
|
90
|
-
input: response.usage?.prompt_tokens || 0,
|
|
91
|
-
output: response.usage?.completion_tokens || 0
|
|
92
|
-
};
|
|
93
|
-
const cost = estimateCost({
|
|
94
|
-
taskType,
|
|
95
|
-
inputTokens: tokensUsed.input,
|
|
96
|
-
outputTokens: tokensUsed.output,
|
|
97
|
-
model: forceModel
|
|
98
|
-
});
|
|
99
|
-
return {
|
|
100
|
-
response: responseText,
|
|
101
|
-
model: modelConfig.model,
|
|
102
|
-
tokensUsed,
|
|
103
|
-
cost: cost.totalCost
|
|
104
|
-
};
|
|
105
|
-
}
|
|
106
|
-
async function batchOptimize(params) {
|
|
107
|
-
const { items, processFn, batchSize = 10, delayMs = 100 } = params;
|
|
108
|
-
const results = [];
|
|
109
|
-
for (let i = 0; i < items.length; i += batchSize) {
|
|
110
|
-
const batch = items.slice(i, i + batchSize);
|
|
111
|
-
const batchResults = await Promise.all(batch.map(processFn));
|
|
112
|
-
results.push(...batchResults);
|
|
113
|
-
if (i + batchSize < items.length) {
|
|
114
|
-
await new Promise((resolve) => setTimeout(resolve, delayMs));
|
|
115
|
-
}
|
|
116
|
-
}
|
|
117
|
-
return results;
|
|
118
|
-
}
|
|
119
|
-
var costRecords = [];
|
|
120
|
-
function trackCost(record) {
|
|
121
|
-
costRecords.push({
|
|
122
|
-
...record,
|
|
123
|
-
timestamp: /* @__PURE__ */ new Date()
|
|
124
|
-
});
|
|
125
|
-
}
|
|
126
|
-
async function getCostSummary(params) {
|
|
127
|
-
const { startDate, endDate } = params;
|
|
128
|
-
let filtered = [...costRecords];
|
|
129
|
-
if (startDate) {
|
|
130
|
-
filtered = filtered.filter((r) => r.timestamp >= startDate);
|
|
131
|
-
}
|
|
132
|
-
if (endDate) {
|
|
133
|
-
filtered = filtered.filter((r) => r.timestamp <= endDate);
|
|
134
|
-
}
|
|
135
|
-
const period = {
|
|
136
|
-
start: filtered.length > 0 ? filtered[0].timestamp : /* @__PURE__ */ new Date(),
|
|
137
|
-
end: filtered.length > 0 ? filtered[filtered.length - 1].timestamp : /* @__PURE__ */ new Date()
|
|
138
|
-
};
|
|
139
|
-
const totalCost = filtered.reduce((sum, r) => sum + r.cost, 0);
|
|
140
|
-
const totalRequests = filtered.length;
|
|
141
|
-
const costByModel = {};
|
|
142
|
-
const costByTask = {};
|
|
143
|
-
for (const record of filtered) {
|
|
144
|
-
costByModel[record.model] = (costByModel[record.model] || 0) + record.cost;
|
|
145
|
-
costByTask[record.taskType] = (costByTask[record.taskType] || 0) + record.cost;
|
|
146
|
-
}
|
|
147
|
-
const avgCostPerRequest = totalRequests > 0 ? totalCost / totalRequests : 0;
|
|
148
|
-
const daysDiff = period.end.getTime() - period.start.getTime();
|
|
149
|
-
const days = daysDiff > 0 ? daysDiff / (1e3 * 60 * 60 * 24) : 1;
|
|
150
|
-
const estimatedMonthlyCost = totalCost / days * 30;
|
|
151
|
-
return {
|
|
152
|
-
period,
|
|
153
|
-
totalCost,
|
|
154
|
-
totalRequests,
|
|
155
|
-
costByModel,
|
|
156
|
-
costByTask,
|
|
157
|
-
avgCostPerRequest,
|
|
158
|
-
estimatedMonthlyCost
|
|
159
|
-
};
|
|
160
|
-
}
|
|
161
|
-
function calculateSavings(params) {
|
|
162
|
-
const { since } = params;
|
|
163
|
-
const filtered = since ? costRecords.filter((r) => r.timestamp >= since) : costRecords;
|
|
164
|
-
const actualCost = filtered.reduce((sum, r) => sum + r.cost, 0);
|
|
165
|
-
const opusCost = filtered.reduce((sum, r) => {
|
|
166
|
-
const cost = estimateCost({
|
|
167
|
-
taskType: r.taskType,
|
|
168
|
-
inputTokens: r.inputTokens,
|
|
169
|
-
outputTokens: r.outputTokens,
|
|
170
|
-
model: "opus"
|
|
171
|
-
});
|
|
172
|
-
return sum + cost.totalCost;
|
|
173
|
-
}, 0);
|
|
174
|
-
const savings = opusCost - actualCost;
|
|
175
|
-
const savingsPercent = opusCost > 0 ? savings / opusCost * 100 : 0;
|
|
176
|
-
return {
|
|
177
|
-
actualCost,
|
|
178
|
-
opusCost,
|
|
179
|
-
savings,
|
|
180
|
-
savingsPercent
|
|
181
|
-
};
|
|
182
|
-
}
|
|
183
|
-
function recommendModelUpgrades(params) {
|
|
184
|
-
const { errorRates, threshold = 0.05 } = params;
|
|
185
|
-
const recommendations = [];
|
|
186
|
-
for (const [taskType, errorRate] of Object.entries(errorRates)) {
|
|
187
|
-
if (errorRate > threshold) {
|
|
188
|
-
const currentModel = TASK_MODEL_MAP[taskType];
|
|
189
|
-
let recommendedModel;
|
|
190
|
-
if (currentModel === "haiku") {
|
|
191
|
-
recommendedModel = "sonnet";
|
|
192
|
-
} else if (currentModel === "sonnet") {
|
|
193
|
-
recommendedModel = "opus";
|
|
194
|
-
} else {
|
|
195
|
-
continue;
|
|
196
|
-
}
|
|
197
|
-
recommendations.push({
|
|
198
|
-
taskType,
|
|
199
|
-
currentModel,
|
|
200
|
-
recommendedModel
|
|
201
|
-
});
|
|
202
|
-
}
|
|
203
|
-
}
|
|
204
|
-
return recommendations;
|
|
205
|
-
}
|
|
206
|
-
async function getCostBreakdown(params) {
|
|
207
|
-
const { groupBy, startDate, endDate } = params;
|
|
208
|
-
let filtered = [...costRecords];
|
|
209
|
-
if (startDate) {
|
|
210
|
-
filtered = filtered.filter((r) => r.timestamp >= startDate);
|
|
211
|
-
}
|
|
212
|
-
if (endDate) {
|
|
213
|
-
filtered = filtered.filter((r) => r.timestamp <= endDate);
|
|
214
|
-
}
|
|
215
|
-
const groups = {};
|
|
216
|
-
for (const record of filtered) {
|
|
217
|
-
let key;
|
|
218
|
-
switch (groupBy) {
|
|
219
|
-
case "model":
|
|
220
|
-
key = record.model;
|
|
221
|
-
break;
|
|
222
|
-
case "task":
|
|
223
|
-
key = record.taskType;
|
|
224
|
-
break;
|
|
225
|
-
case "day":
|
|
226
|
-
key = record.timestamp.toISOString().split("T")[0];
|
|
227
|
-
break;
|
|
228
|
-
case "hour":
|
|
229
|
-
key = record.timestamp.toISOString().slice(0, 13) + ":00";
|
|
230
|
-
break;
|
|
231
|
-
default:
|
|
232
|
-
key = record.taskType;
|
|
233
|
-
}
|
|
234
|
-
if (!groups[key]) {
|
|
235
|
-
groups[key] = { cost: 0, requests: 0 };
|
|
236
|
-
}
|
|
237
|
-
groups[key].cost += record.cost;
|
|
238
|
-
groups[key].requests += 1;
|
|
239
|
-
}
|
|
240
|
-
const totalCost = filtered.reduce((sum, r) => sum + r.cost, 0);
|
|
241
|
-
const totalRequests = filtered.length;
|
|
242
|
-
return { groups, totalCost, totalRequests };
|
|
243
|
-
}
|
|
244
|
-
async function getSavingsReport(params) {
|
|
245
|
-
const { startDate, endDate } = params;
|
|
246
|
-
let filtered = [...costRecords];
|
|
247
|
-
if (startDate) {
|
|
248
|
-
filtered = filtered.filter((r) => r.timestamp >= startDate);
|
|
249
|
-
}
|
|
250
|
-
if (endDate) {
|
|
251
|
-
filtered = filtered.filter((r) => r.timestamp <= endDate);
|
|
252
|
-
}
|
|
253
|
-
const period = {
|
|
254
|
-
start: filtered.length > 0 ? filtered[0].timestamp : /* @__PURE__ */ new Date(),
|
|
255
|
-
end: filtered.length > 0 ? filtered[filtered.length - 1].timestamp : /* @__PURE__ */ new Date()
|
|
256
|
-
};
|
|
257
|
-
const actualCost = filtered.reduce((sum, r) => sum + r.cost, 0);
|
|
258
|
-
let opusOnlyCost = 0;
|
|
259
|
-
const requests = { total: filtered.length, haiku: 0, sonnet: 0, opus: 0 };
|
|
260
|
-
for (const record of filtered) {
|
|
261
|
-
opusOnlyCost += estimateCost({
|
|
262
|
-
taskType: record.taskType,
|
|
263
|
-
inputTokens: record.inputTokens,
|
|
264
|
-
outputTokens: record.outputTokens,
|
|
265
|
-
model: "opus"
|
|
266
|
-
}).totalCost;
|
|
267
|
-
if (record.model.includes("haiku")) {
|
|
268
|
-
requests.haiku++;
|
|
269
|
-
} else if (record.model.includes("sonnet")) {
|
|
270
|
-
requests.sonnet++;
|
|
271
|
-
} else if (record.model.includes("opus")) {
|
|
272
|
-
requests.opus++;
|
|
273
|
-
}
|
|
274
|
-
}
|
|
275
|
-
const savings = opusOnlyCost - actualCost;
|
|
276
|
-
const savingsPercentage = opusOnlyCost > 0 ? savings / opusOnlyCost * 100 : 0;
|
|
277
|
-
let recommendation = "";
|
|
278
|
-
if (savingsPercentage > 50) {
|
|
279
|
-
recommendation = "Excellent! Your model selection is highly optimized.";
|
|
280
|
-
} else if (savingsPercentage > 30) {
|
|
281
|
-
recommendation = "Good savings. Consider using Haiku for simpler tasks.";
|
|
282
|
-
} else {
|
|
283
|
-
recommendation = "Consider reviewing task complexity to better match models.";
|
|
284
|
-
}
|
|
285
|
-
return {
|
|
286
|
-
period,
|
|
287
|
-
actualCost,
|
|
288
|
-
opusOnlyCost,
|
|
289
|
-
savings,
|
|
290
|
-
savingsPercentage,
|
|
291
|
-
requests,
|
|
292
|
-
recommendation
|
|
293
|
-
};
|
|
294
|
-
}
|
|
295
|
-
export {
|
|
296
|
-
MODELS,
|
|
297
|
-
batchOptimize,
|
|
298
|
-
calculateSavings,
|
|
299
|
-
estimateCost,
|
|
300
|
-
getCostBreakdown,
|
|
301
|
-
getCostSummary,
|
|
302
|
-
getOptimalModel,
|
|
303
|
-
getSavingsReport,
|
|
304
|
-
recommendModelUpgrades,
|
|
305
|
-
smartLLMCall,
|
|
306
|
-
trackCost
|
|
307
|
-
};
|
|
1
|
+
import "./chunk-QGM4M3NI.js";
|
|
2
|
+
|
|
3
|
+
// ../src/engine/cost-optimization.ts
|
|
4
|
+
import OpenAI from "openai";
|
|
5
|
+
var MODELS = {
|
|
6
|
+
haiku: {
|
|
7
|
+
model: "claude-haiku-4.5",
|
|
8
|
+
maxTokens: 4096,
|
|
9
|
+
temperature: 0,
|
|
10
|
+
costPerMillion: 0.25
|
|
11
|
+
// $0.25 per million input tokens
|
|
12
|
+
},
|
|
13
|
+
sonnet: {
|
|
14
|
+
model: "claude-sonnet-4.5",
|
|
15
|
+
maxTokens: 8192,
|
|
16
|
+
temperature: 0,
|
|
17
|
+
costPerMillion: 3
|
|
18
|
+
// $3.00 per million input tokens
|
|
19
|
+
},
|
|
20
|
+
opus: {
|
|
21
|
+
model: "claude-opus-4.5",
|
|
22
|
+
maxTokens: 16384,
|
|
23
|
+
temperature: 0,
|
|
24
|
+
costPerMillion: 15
|
|
25
|
+
// $15.00 per million input tokens
|
|
26
|
+
}
|
|
27
|
+
};
|
|
28
|
+
var TASK_MODEL_MAP = {
|
|
29
|
+
temporal_parsing: "haiku",
|
|
30
|
+
// Fast, simple parsing
|
|
31
|
+
simple_classification: "haiku",
|
|
32
|
+
// Fast classification
|
|
33
|
+
memory_extraction: "sonnet",
|
|
34
|
+
// Needs accuracy for disambiguation
|
|
35
|
+
relation_detection: "sonnet",
|
|
36
|
+
// Needs reasoning
|
|
37
|
+
consolidation: "sonnet",
|
|
38
|
+
// Needs to merge intelligently
|
|
39
|
+
summarization: "haiku",
|
|
40
|
+
// Fast summarization
|
|
41
|
+
complex_reasoning: "opus"
|
|
42
|
+
// Deep reasoning tasks
|
|
43
|
+
};
|
|
44
|
+
function getOptimalModel(taskType, options = {}) {
|
|
45
|
+
if (options.forceModel) {
|
|
46
|
+
return MODELS[options.forceModel];
|
|
47
|
+
}
|
|
48
|
+
let tier = TASK_MODEL_MAP[taskType];
|
|
49
|
+
if (options.minQuality && tier === "haiku") {
|
|
50
|
+
tier = "sonnet";
|
|
51
|
+
}
|
|
52
|
+
return MODELS[tier];
|
|
53
|
+
}
|
|
54
|
+
function estimateCost(params) {
|
|
55
|
+
const modelConfig = getOptimalModel(params.taskType, { forceModel: params.model });
|
|
56
|
+
const inputCost = params.inputTokens / 1e6 * modelConfig.costPerMillion;
|
|
57
|
+
const outputCostPerMillion = modelConfig.costPerMillion * 5;
|
|
58
|
+
const outputCost = params.outputTokens / 1e6 * outputCostPerMillion;
|
|
59
|
+
return {
|
|
60
|
+
model: modelConfig.model,
|
|
61
|
+
inputCost,
|
|
62
|
+
outputCost,
|
|
63
|
+
totalCost: inputCost + outputCost
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
async function smartLLMCall(params) {
|
|
67
|
+
const { taskType, prompt, systemPrompt, maxTokens, temperature, forceModel } = params;
|
|
68
|
+
const modelConfig = getOptimalModel(taskType, { forceModel });
|
|
69
|
+
const openai = new OpenAI({
|
|
70
|
+
apiKey: process.env.OPENAI_API_KEY || ""
|
|
71
|
+
});
|
|
72
|
+
const messages = [{ role: "user", content: prompt }];
|
|
73
|
+
if (systemPrompt) {
|
|
74
|
+
messages.unshift({ role: "system", content: systemPrompt });
|
|
75
|
+
}
|
|
76
|
+
const modelMap = {
|
|
77
|
+
"claude-haiku-4-5-20251001": "gpt-4o-mini",
|
|
78
|
+
"claude-sonnet-4-5-20250929": "gpt-4o",
|
|
79
|
+
"claude-opus-4-5-20251101": "gpt-4o"
|
|
80
|
+
};
|
|
81
|
+
const openaiModel = modelMap[modelConfig.model] || "gpt-4o";
|
|
82
|
+
const response = await openai.chat.completions.create({
|
|
83
|
+
model: openaiModel,
|
|
84
|
+
max_tokens: maxTokens || modelConfig.maxTokens,
|
|
85
|
+
temperature: temperature !== void 0 ? temperature : modelConfig.temperature,
|
|
86
|
+
messages
|
|
87
|
+
});
|
|
88
|
+
const responseText = response.choices[0]?.message?.content || "";
|
|
89
|
+
const tokensUsed = {
|
|
90
|
+
input: response.usage?.prompt_tokens || 0,
|
|
91
|
+
output: response.usage?.completion_tokens || 0
|
|
92
|
+
};
|
|
93
|
+
const cost = estimateCost({
|
|
94
|
+
taskType,
|
|
95
|
+
inputTokens: tokensUsed.input,
|
|
96
|
+
outputTokens: tokensUsed.output,
|
|
97
|
+
model: forceModel
|
|
98
|
+
});
|
|
99
|
+
return {
|
|
100
|
+
response: responseText,
|
|
101
|
+
model: modelConfig.model,
|
|
102
|
+
tokensUsed,
|
|
103
|
+
cost: cost.totalCost
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
async function batchOptimize(params) {
|
|
107
|
+
const { items, processFn, batchSize = 10, delayMs = 100 } = params;
|
|
108
|
+
const results = [];
|
|
109
|
+
for (let i = 0; i < items.length; i += batchSize) {
|
|
110
|
+
const batch = items.slice(i, i + batchSize);
|
|
111
|
+
const batchResults = await Promise.all(batch.map(processFn));
|
|
112
|
+
results.push(...batchResults);
|
|
113
|
+
if (i + batchSize < items.length) {
|
|
114
|
+
await new Promise((resolve) => setTimeout(resolve, delayMs));
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
return results;
|
|
118
|
+
}
|
|
119
|
+
var costRecords = [];
|
|
120
|
+
function trackCost(record) {
|
|
121
|
+
costRecords.push({
|
|
122
|
+
...record,
|
|
123
|
+
timestamp: /* @__PURE__ */ new Date()
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
async function getCostSummary(params) {
|
|
127
|
+
const { startDate, endDate } = params;
|
|
128
|
+
let filtered = [...costRecords];
|
|
129
|
+
if (startDate) {
|
|
130
|
+
filtered = filtered.filter((r) => r.timestamp >= startDate);
|
|
131
|
+
}
|
|
132
|
+
if (endDate) {
|
|
133
|
+
filtered = filtered.filter((r) => r.timestamp <= endDate);
|
|
134
|
+
}
|
|
135
|
+
const period = {
|
|
136
|
+
start: filtered.length > 0 ? filtered[0].timestamp : /* @__PURE__ */ new Date(),
|
|
137
|
+
end: filtered.length > 0 ? filtered[filtered.length - 1].timestamp : /* @__PURE__ */ new Date()
|
|
138
|
+
};
|
|
139
|
+
const totalCost = filtered.reduce((sum, r) => sum + r.cost, 0);
|
|
140
|
+
const totalRequests = filtered.length;
|
|
141
|
+
const costByModel = {};
|
|
142
|
+
const costByTask = {};
|
|
143
|
+
for (const record of filtered) {
|
|
144
|
+
costByModel[record.model] = (costByModel[record.model] || 0) + record.cost;
|
|
145
|
+
costByTask[record.taskType] = (costByTask[record.taskType] || 0) + record.cost;
|
|
146
|
+
}
|
|
147
|
+
const avgCostPerRequest = totalRequests > 0 ? totalCost / totalRequests : 0;
|
|
148
|
+
const daysDiff = period.end.getTime() - period.start.getTime();
|
|
149
|
+
const days = daysDiff > 0 ? daysDiff / (1e3 * 60 * 60 * 24) : 1;
|
|
150
|
+
const estimatedMonthlyCost = totalCost / days * 30;
|
|
151
|
+
return {
|
|
152
|
+
period,
|
|
153
|
+
totalCost,
|
|
154
|
+
totalRequests,
|
|
155
|
+
costByModel,
|
|
156
|
+
costByTask,
|
|
157
|
+
avgCostPerRequest,
|
|
158
|
+
estimatedMonthlyCost
|
|
159
|
+
};
|
|
160
|
+
}
|
|
161
|
+
function calculateSavings(params) {
|
|
162
|
+
const { since } = params;
|
|
163
|
+
const filtered = since ? costRecords.filter((r) => r.timestamp >= since) : costRecords;
|
|
164
|
+
const actualCost = filtered.reduce((sum, r) => sum + r.cost, 0);
|
|
165
|
+
const opusCost = filtered.reduce((sum, r) => {
|
|
166
|
+
const cost = estimateCost({
|
|
167
|
+
taskType: r.taskType,
|
|
168
|
+
inputTokens: r.inputTokens,
|
|
169
|
+
outputTokens: r.outputTokens,
|
|
170
|
+
model: "opus"
|
|
171
|
+
});
|
|
172
|
+
return sum + cost.totalCost;
|
|
173
|
+
}, 0);
|
|
174
|
+
const savings = opusCost - actualCost;
|
|
175
|
+
const savingsPercent = opusCost > 0 ? savings / opusCost * 100 : 0;
|
|
176
|
+
return {
|
|
177
|
+
actualCost,
|
|
178
|
+
opusCost,
|
|
179
|
+
savings,
|
|
180
|
+
savingsPercent
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
function recommendModelUpgrades(params) {
|
|
184
|
+
const { errorRates, threshold = 0.05 } = params;
|
|
185
|
+
const recommendations = [];
|
|
186
|
+
for (const [taskType, errorRate] of Object.entries(errorRates)) {
|
|
187
|
+
if (errorRate > threshold) {
|
|
188
|
+
const currentModel = TASK_MODEL_MAP[taskType];
|
|
189
|
+
let recommendedModel;
|
|
190
|
+
if (currentModel === "haiku") {
|
|
191
|
+
recommendedModel = "sonnet";
|
|
192
|
+
} else if (currentModel === "sonnet") {
|
|
193
|
+
recommendedModel = "opus";
|
|
194
|
+
} else {
|
|
195
|
+
continue;
|
|
196
|
+
}
|
|
197
|
+
recommendations.push({
|
|
198
|
+
taskType,
|
|
199
|
+
currentModel,
|
|
200
|
+
recommendedModel
|
|
201
|
+
});
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
return recommendations;
|
|
205
|
+
}
|
|
206
|
+
async function getCostBreakdown(params) {
|
|
207
|
+
const { groupBy, startDate, endDate } = params;
|
|
208
|
+
let filtered = [...costRecords];
|
|
209
|
+
if (startDate) {
|
|
210
|
+
filtered = filtered.filter((r) => r.timestamp >= startDate);
|
|
211
|
+
}
|
|
212
|
+
if (endDate) {
|
|
213
|
+
filtered = filtered.filter((r) => r.timestamp <= endDate);
|
|
214
|
+
}
|
|
215
|
+
const groups = {};
|
|
216
|
+
for (const record of filtered) {
|
|
217
|
+
let key;
|
|
218
|
+
switch (groupBy) {
|
|
219
|
+
case "model":
|
|
220
|
+
key = record.model;
|
|
221
|
+
break;
|
|
222
|
+
case "task":
|
|
223
|
+
key = record.taskType;
|
|
224
|
+
break;
|
|
225
|
+
case "day":
|
|
226
|
+
key = record.timestamp.toISOString().split("T")[0];
|
|
227
|
+
break;
|
|
228
|
+
case "hour":
|
|
229
|
+
key = record.timestamp.toISOString().slice(0, 13) + ":00";
|
|
230
|
+
break;
|
|
231
|
+
default:
|
|
232
|
+
key = record.taskType;
|
|
233
|
+
}
|
|
234
|
+
if (!groups[key]) {
|
|
235
|
+
groups[key] = { cost: 0, requests: 0 };
|
|
236
|
+
}
|
|
237
|
+
groups[key].cost += record.cost;
|
|
238
|
+
groups[key].requests += 1;
|
|
239
|
+
}
|
|
240
|
+
const totalCost = filtered.reduce((sum, r) => sum + r.cost, 0);
|
|
241
|
+
const totalRequests = filtered.length;
|
|
242
|
+
return { groups, totalCost, totalRequests };
|
|
243
|
+
}
|
|
244
|
+
async function getSavingsReport(params) {
|
|
245
|
+
const { startDate, endDate } = params;
|
|
246
|
+
let filtered = [...costRecords];
|
|
247
|
+
if (startDate) {
|
|
248
|
+
filtered = filtered.filter((r) => r.timestamp >= startDate);
|
|
249
|
+
}
|
|
250
|
+
if (endDate) {
|
|
251
|
+
filtered = filtered.filter((r) => r.timestamp <= endDate);
|
|
252
|
+
}
|
|
253
|
+
const period = {
|
|
254
|
+
start: filtered.length > 0 ? filtered[0].timestamp : /* @__PURE__ */ new Date(),
|
|
255
|
+
end: filtered.length > 0 ? filtered[filtered.length - 1].timestamp : /* @__PURE__ */ new Date()
|
|
256
|
+
};
|
|
257
|
+
const actualCost = filtered.reduce((sum, r) => sum + r.cost, 0);
|
|
258
|
+
let opusOnlyCost = 0;
|
|
259
|
+
const requests = { total: filtered.length, haiku: 0, sonnet: 0, opus: 0 };
|
|
260
|
+
for (const record of filtered) {
|
|
261
|
+
opusOnlyCost += estimateCost({
|
|
262
|
+
taskType: record.taskType,
|
|
263
|
+
inputTokens: record.inputTokens,
|
|
264
|
+
outputTokens: record.outputTokens,
|
|
265
|
+
model: "opus"
|
|
266
|
+
}).totalCost;
|
|
267
|
+
if (record.model.includes("haiku")) {
|
|
268
|
+
requests.haiku++;
|
|
269
|
+
} else if (record.model.includes("sonnet")) {
|
|
270
|
+
requests.sonnet++;
|
|
271
|
+
} else if (record.model.includes("opus")) {
|
|
272
|
+
requests.opus++;
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
const savings = opusOnlyCost - actualCost;
|
|
276
|
+
const savingsPercentage = opusOnlyCost > 0 ? savings / opusOnlyCost * 100 : 0;
|
|
277
|
+
let recommendation = "";
|
|
278
|
+
if (savingsPercentage > 50) {
|
|
279
|
+
recommendation = "Excellent! Your model selection is highly optimized.";
|
|
280
|
+
} else if (savingsPercentage > 30) {
|
|
281
|
+
recommendation = "Good savings. Consider using Haiku for simpler tasks.";
|
|
282
|
+
} else {
|
|
283
|
+
recommendation = "Consider reviewing task complexity to better match models.";
|
|
284
|
+
}
|
|
285
|
+
return {
|
|
286
|
+
period,
|
|
287
|
+
actualCost,
|
|
288
|
+
opusOnlyCost,
|
|
289
|
+
savings,
|
|
290
|
+
savingsPercentage,
|
|
291
|
+
requests,
|
|
292
|
+
recommendation
|
|
293
|
+
};
|
|
294
|
+
}
|
|
295
|
+
export {
|
|
296
|
+
MODELS,
|
|
297
|
+
batchOptimize,
|
|
298
|
+
calculateSavings,
|
|
299
|
+
estimateCost,
|
|
300
|
+
getCostBreakdown,
|
|
301
|
+
getCostSummary,
|
|
302
|
+
getOptimalModel,
|
|
303
|
+
getSavingsReport,
|
|
304
|
+
recommendModelUpgrades,
|
|
305
|
+
smartLLMCall,
|
|
306
|
+
trackCost
|
|
307
|
+
};
|