@warmdrift/kgauto 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +229 -0
- package/dist/index.d.ts +229 -0
- package/dist/index.js +1131 -0
- package/dist/index.js.map +1 -0
- package/dist/index.mjs +1090 -0
- package/dist/index.mjs.map +1 -0
- package/package.json +41 -0
- package/profiles.json +672 -0
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,1090 @@
|
|
|
1
|
+
// src/tokenizer.ts
|
|
2
|
+
var AVG_TOKENS_PER_TOOL = 350;
|
|
3
|
+
var _tokenizer = (text) => Math.max(0, Math.ceil(text.length / 3.5));
|
|
4
|
+
function setTokenizer(fn) {
|
|
5
|
+
_tokenizer = fn;
|
|
6
|
+
}
|
|
7
|
+
function countTokens(text) {
|
|
8
|
+
return _tokenizer(text);
|
|
9
|
+
}
|
|
10
|
+
function estimateToolTokens(tools) {
|
|
11
|
+
let total = 0;
|
|
12
|
+
for (const tool of tools) {
|
|
13
|
+
try {
|
|
14
|
+
const text = JSON.stringify(tool);
|
|
15
|
+
total += countTokens(text);
|
|
16
|
+
} catch {
|
|
17
|
+
total += AVG_TOKENS_PER_TOOL;
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
return total;
|
|
21
|
+
}
|
|
22
|
+
function estimateMessagesTokens(messages) {
|
|
23
|
+
let total = 0;
|
|
24
|
+
for (const msg of messages) {
|
|
25
|
+
total += countTokens(msg.content) + 4;
|
|
26
|
+
}
|
|
27
|
+
return total;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// profiles.json
|
|
31
|
+
var profiles_default = {
|
|
32
|
+
"claude-opus-4-6": {
|
|
33
|
+
provider: "anthropic",
|
|
34
|
+
status: "current",
|
|
35
|
+
max_tools: 20,
|
|
36
|
+
max_context_tokens: 1e6,
|
|
37
|
+
max_output_tokens: 128e3,
|
|
38
|
+
parallel_tool_calls: true,
|
|
39
|
+
output_modes: [
|
|
40
|
+
"generateText",
|
|
41
|
+
"generateObject"
|
|
42
|
+
],
|
|
43
|
+
prompt_rules: [],
|
|
44
|
+
known_failures: [
|
|
45
|
+
"refusal returns stop_reason 'refusal' with schema-violating output",
|
|
46
|
+
"rate limits pooled across all Opus 4.x versions"
|
|
47
|
+
],
|
|
48
|
+
strengths: [
|
|
49
|
+
"complex_reasoning",
|
|
50
|
+
"judgment",
|
|
51
|
+
"nuance",
|
|
52
|
+
"reliability",
|
|
53
|
+
"1m_context",
|
|
54
|
+
"adaptive_thinking"
|
|
55
|
+
],
|
|
56
|
+
weaknesses: [
|
|
57
|
+
"cost",
|
|
58
|
+
"latency"
|
|
59
|
+
],
|
|
60
|
+
cost_input_per_1m: 5,
|
|
61
|
+
cost_output_per_1m: 25,
|
|
62
|
+
step_limit_default: 10,
|
|
63
|
+
notes: "Current frontier. 1M context. Adaptive thinking. Fast mode available at 6x rates. Auditor model for KG."
|
|
64
|
+
},
|
|
65
|
+
"claude-sonnet-4-6": {
|
|
66
|
+
provider: "anthropic",
|
|
67
|
+
status: "current",
|
|
68
|
+
max_tools: 20,
|
|
69
|
+
max_context_tokens: 1e6,
|
|
70
|
+
max_output_tokens: 64e3,
|
|
71
|
+
parallel_tool_calls: true,
|
|
72
|
+
output_modes: [
|
|
73
|
+
"generateText",
|
|
74
|
+
"generateObject"
|
|
75
|
+
],
|
|
76
|
+
prompt_rules: [],
|
|
77
|
+
known_failures: [
|
|
78
|
+
"refusal returns stop_reason 'refusal' with schema-violating output",
|
|
79
|
+
"rate limits pooled across all Sonnet 4.x versions",
|
|
80
|
+
"pricing jumps to $6/$22.50 per 1M tokens above 200k context"
|
|
81
|
+
],
|
|
82
|
+
strengths: [
|
|
83
|
+
"structured_output",
|
|
84
|
+
"tool_use",
|
|
85
|
+
"instruction_following",
|
|
86
|
+
"reliability",
|
|
87
|
+
"1m_context",
|
|
88
|
+
"adaptive_thinking"
|
|
89
|
+
],
|
|
90
|
+
weaknesses: [
|
|
91
|
+
"cost_for_simple_tasks"
|
|
92
|
+
],
|
|
93
|
+
cost_input_per_1m: 3,
|
|
94
|
+
cost_output_per_1m: 15,
|
|
95
|
+
step_limit_default: 10,
|
|
96
|
+
notes: "Current recommended workhorse. 1M context. generateObject works with complex schemas. Adaptive thinking."
|
|
97
|
+
},
|
|
98
|
+
"claude-haiku-4.5": {
|
|
99
|
+
provider: "anthropic",
|
|
100
|
+
status: "current",
|
|
101
|
+
max_tools: 20,
|
|
102
|
+
max_context_tokens: 2e5,
|
|
103
|
+
max_output_tokens: 64e3,
|
|
104
|
+
parallel_tool_calls: true,
|
|
105
|
+
output_modes: [
|
|
106
|
+
"generateText",
|
|
107
|
+
"generateObject"
|
|
108
|
+
],
|
|
109
|
+
prompt_rules: [],
|
|
110
|
+
known_failures: [
|
|
111
|
+
"refusal returns stop_reason 'refusal' with schema-violating output",
|
|
112
|
+
"no adaptive thinking \u2014 only extended thinking"
|
|
113
|
+
],
|
|
114
|
+
strengths: [
|
|
115
|
+
"speed",
|
|
116
|
+
"classification",
|
|
117
|
+
"simple_routing",
|
|
118
|
+
"cost",
|
|
119
|
+
"structured_output"
|
|
120
|
+
],
|
|
121
|
+
weaknesses: [
|
|
122
|
+
"complex_reasoning",
|
|
123
|
+
"nuance",
|
|
124
|
+
"200k_context_limit"
|
|
125
|
+
],
|
|
126
|
+
cost_input_per_1m: 1,
|
|
127
|
+
cost_output_per_1m: 5,
|
|
128
|
+
step_limit_default: 6,
|
|
129
|
+
notes: "Fast and cheap. Good for classification, routing, simple tasks. 200k context (not 1M)."
|
|
130
|
+
},
|
|
131
|
+
"claude-sonnet-4": {
|
|
132
|
+
provider: "anthropic",
|
|
133
|
+
status: "legacy",
|
|
134
|
+
max_tools: 20,
|
|
135
|
+
max_context_tokens: 2e5,
|
|
136
|
+
max_output_tokens: 64e3,
|
|
137
|
+
parallel_tool_calls: true,
|
|
138
|
+
output_modes: [
|
|
139
|
+
"generateText",
|
|
140
|
+
"generateObject"
|
|
141
|
+
],
|
|
142
|
+
prompt_rules: [],
|
|
143
|
+
known_failures: [
|
|
144
|
+
"1M context beta retiring April 30 2026 \u2014 requests over 200k will error",
|
|
145
|
+
"refusal returns stop_reason 'refusal' with schema-violating output"
|
|
146
|
+
],
|
|
147
|
+
strengths: [
|
|
148
|
+
"structured_output",
|
|
149
|
+
"tool_use",
|
|
150
|
+
"instruction_following",
|
|
151
|
+
"reliability"
|
|
152
|
+
],
|
|
153
|
+
weaknesses: [
|
|
154
|
+
"legacy",
|
|
155
|
+
"cost_for_simple_tasks"
|
|
156
|
+
],
|
|
157
|
+
cost_input_per_1m: 3,
|
|
158
|
+
cost_output_per_1m: 15,
|
|
159
|
+
step_limit_default: 10,
|
|
160
|
+
notes: "Legacy \u2014 use claude-sonnet-4-6 instead. 1M context beta expiring."
|
|
161
|
+
},
|
|
162
|
+
"claude-opus-4": {
|
|
163
|
+
provider: "anthropic",
|
|
164
|
+
status: "legacy",
|
|
165
|
+
max_tools: 20,
|
|
166
|
+
max_context_tokens: 2e5,
|
|
167
|
+
max_output_tokens: 32e3,
|
|
168
|
+
parallel_tool_calls: true,
|
|
169
|
+
output_modes: [
|
|
170
|
+
"generateText",
|
|
171
|
+
"generateObject"
|
|
172
|
+
],
|
|
173
|
+
prompt_rules: [],
|
|
174
|
+
known_failures: [
|
|
175
|
+
"refusal returns stop_reason 'refusal' with schema-violating output"
|
|
176
|
+
],
|
|
177
|
+
strengths: [
|
|
178
|
+
"complex_reasoning",
|
|
179
|
+
"judgment"
|
|
180
|
+
],
|
|
181
|
+
weaknesses: [
|
|
182
|
+
"legacy",
|
|
183
|
+
"expensive_vs_4.6",
|
|
184
|
+
"lower_output_limit"
|
|
185
|
+
],
|
|
186
|
+
cost_input_per_1m: 15,
|
|
187
|
+
cost_output_per_1m: 75,
|
|
188
|
+
step_limit_default: 10,
|
|
189
|
+
notes: "Legacy \u2014 use claude-opus-4-6 instead. 3x more expensive for same tier."
|
|
190
|
+
},
|
|
191
|
+
"gpt-4.1": {
|
|
192
|
+
provider: "openai",
|
|
193
|
+
status: "current",
|
|
194
|
+
max_tools: 128,
|
|
195
|
+
max_context_tokens: 1047576,
|
|
196
|
+
max_output_tokens: 32768,
|
|
197
|
+
parallel_tool_calls: true,
|
|
198
|
+
output_modes: [
|
|
199
|
+
"generateText",
|
|
200
|
+
"generateObject"
|
|
201
|
+
],
|
|
202
|
+
prompt_rules: [],
|
|
203
|
+
known_failures: [
|
|
204
|
+
"structured output incompatible with parallel_tool_calls \u2014 must set parallel_tool_calls=false",
|
|
205
|
+
"first request with new JSON schema has preprocessing delay up to 60s",
|
|
206
|
+
"structured output truncates silently at max_tokens \u2014 unparseable JSON",
|
|
207
|
+
"safety refusal returns 'refusal' field instead of schema-conforming object",
|
|
208
|
+
"model alias points to latest snapshot \u2014 behavior can change without warning"
|
|
209
|
+
],
|
|
210
|
+
strengths: [
|
|
211
|
+
"all_rounder",
|
|
212
|
+
"structured_output",
|
|
213
|
+
"tool_use",
|
|
214
|
+
"reliability",
|
|
215
|
+
"1m_context",
|
|
216
|
+
"coding"
|
|
217
|
+
],
|
|
218
|
+
weaknesses: [],
|
|
219
|
+
cost_input_per_1m: 2,
|
|
220
|
+
cost_output_per_1m: 8,
|
|
221
|
+
step_limit_default: 10,
|
|
222
|
+
notes: "OpenAI stable flagship. 1M context. Strong coding benchmarks. Natural shadow-test candidate for complex tasks vs Opus."
|
|
223
|
+
},
|
|
224
|
+
"gpt-4.1-mini": {
|
|
225
|
+
provider: "openai",
|
|
226
|
+
status: "current",
|
|
227
|
+
max_tools: 128,
|
|
228
|
+
max_context_tokens: 1047576,
|
|
229
|
+
max_output_tokens: 32768,
|
|
230
|
+
parallel_tool_calls: true,
|
|
231
|
+
output_modes: [
|
|
232
|
+
"generateText",
|
|
233
|
+
"generateObject"
|
|
234
|
+
],
|
|
235
|
+
prompt_rules: [],
|
|
236
|
+
known_failures: [
|
|
237
|
+
"structured output incompatible with parallel_tool_calls \u2014 must set parallel_tool_calls=false",
|
|
238
|
+
"structured output truncates silently at max_tokens",
|
|
239
|
+
"safety refusal returns 'refusal' field instead of schema-conforming object"
|
|
240
|
+
],
|
|
241
|
+
strengths: [
|
|
242
|
+
"speed",
|
|
243
|
+
"cost",
|
|
244
|
+
"structured_output",
|
|
245
|
+
"1m_context"
|
|
246
|
+
],
|
|
247
|
+
weaknesses: [
|
|
248
|
+
"complex_reasoning"
|
|
249
|
+
],
|
|
250
|
+
cost_input_per_1m: 0.4,
|
|
251
|
+
cost_output_per_1m: 1.6,
|
|
252
|
+
step_limit_default: 8,
|
|
253
|
+
notes: "OpenAI mid-tier. 1M context. Good for standard tasks."
|
|
254
|
+
},
|
|
255
|
+
"gpt-4.1-nano": {
|
|
256
|
+
provider: "openai",
|
|
257
|
+
status: "current",
|
|
258
|
+
max_tools: 128,
|
|
259
|
+
max_context_tokens: 1047576,
|
|
260
|
+
max_output_tokens: 32768,
|
|
261
|
+
parallel_tool_calls: true,
|
|
262
|
+
output_modes: [
|
|
263
|
+
"generateText",
|
|
264
|
+
"generateObject"
|
|
265
|
+
],
|
|
266
|
+
prompt_rules: [],
|
|
267
|
+
known_failures: [
|
|
268
|
+
"structured output incompatible with parallel_tool_calls \u2014 must set parallel_tool_calls=false",
|
|
269
|
+
"instruction drift in long conversations"
|
|
270
|
+
],
|
|
271
|
+
strengths: [
|
|
272
|
+
"speed",
|
|
273
|
+
"cost",
|
|
274
|
+
"1m_context"
|
|
275
|
+
],
|
|
276
|
+
weaknesses: [
|
|
277
|
+
"complex_reasoning",
|
|
278
|
+
"nuance"
|
|
279
|
+
],
|
|
280
|
+
cost_input_per_1m: 0.1,
|
|
281
|
+
cost_output_per_1m: 0.4,
|
|
282
|
+
step_limit_default: 6,
|
|
283
|
+
notes: "OpenAI cheapest. 1M context. Simple tasks only."
|
|
284
|
+
},
|
|
285
|
+
"gpt-4o": {
|
|
286
|
+
provider: "openai",
|
|
287
|
+
status: "legacy",
|
|
288
|
+
max_tools: 128,
|
|
289
|
+
max_context_tokens: 128e3,
|
|
290
|
+
max_output_tokens: 16384,
|
|
291
|
+
parallel_tool_calls: true,
|
|
292
|
+
output_modes: [
|
|
293
|
+
"generateText",
|
|
294
|
+
"generateObject"
|
|
295
|
+
],
|
|
296
|
+
prompt_rules: [],
|
|
297
|
+
known_failures: [
|
|
298
|
+
"structured output incompatible with parallel_tool_calls \u2014 must set parallel_tool_calls=false",
|
|
299
|
+
"structured output truncates silently at max_tokens",
|
|
300
|
+
"safety refusal returns 'refusal' field instead of schema-conforming object",
|
|
301
|
+
"hallucination rate higher than expected at temperature=0"
|
|
302
|
+
],
|
|
303
|
+
strengths: [
|
|
304
|
+
"all_rounder",
|
|
305
|
+
"structured_output",
|
|
306
|
+
"tool_use",
|
|
307
|
+
"reliability"
|
|
308
|
+
],
|
|
309
|
+
weaknesses: [
|
|
310
|
+
"legacy",
|
|
311
|
+
"128k_context_limit"
|
|
312
|
+
],
|
|
313
|
+
cost_input_per_1m: 2.5,
|
|
314
|
+
cost_output_per_1m: 10,
|
|
315
|
+
step_limit_default: 10,
|
|
316
|
+
notes: "Legacy \u2014 use gpt-4.1 instead. Better quality, cheaper, 1M context."
|
|
317
|
+
},
|
|
318
|
+
"gpt-4o-mini": {
|
|
319
|
+
provider: "openai",
|
|
320
|
+
status: "legacy",
|
|
321
|
+
max_tools: 128,
|
|
322
|
+
max_context_tokens: 128e3,
|
|
323
|
+
max_output_tokens: 16384,
|
|
324
|
+
parallel_tool_calls: true,
|
|
325
|
+
output_modes: [
|
|
326
|
+
"generateText",
|
|
327
|
+
"generateObject"
|
|
328
|
+
],
|
|
329
|
+
prompt_rules: [],
|
|
330
|
+
known_failures: [
|
|
331
|
+
"structured output incompatible with parallel_tool_calls \u2014 must set parallel_tool_calls=false",
|
|
332
|
+
"instruction drift in long conversations \u2014 worse than gpt-4o"
|
|
333
|
+
],
|
|
334
|
+
strengths: [
|
|
335
|
+
"speed",
|
|
336
|
+
"cost",
|
|
337
|
+
"simple_tasks"
|
|
338
|
+
],
|
|
339
|
+
weaknesses: [
|
|
340
|
+
"legacy",
|
|
341
|
+
"complex_reasoning",
|
|
342
|
+
"128k_context_limit"
|
|
343
|
+
],
|
|
344
|
+
cost_input_per_1m: 0.15,
|
|
345
|
+
cost_output_per_1m: 0.6,
|
|
346
|
+
step_limit_default: 6,
|
|
347
|
+
notes: "Legacy \u2014 use gpt-4.1-mini or gpt-4.1-nano instead."
|
|
348
|
+
},
|
|
349
|
+
o3: {
|
|
350
|
+
provider: "openai",
|
|
351
|
+
status: "current",
|
|
352
|
+
max_tools: 128,
|
|
353
|
+
max_context_tokens: 2e5,
|
|
354
|
+
max_output_tokens: 1e5,
|
|
355
|
+
parallel_tool_calls: false,
|
|
356
|
+
output_modes: [
|
|
357
|
+
"generateText",
|
|
358
|
+
"generateObject"
|
|
359
|
+
],
|
|
360
|
+
prompt_rules: [
|
|
361
|
+
"no_system_with_developer"
|
|
362
|
+
],
|
|
363
|
+
known_failures: [
|
|
364
|
+
"reasoning tokens unpredictable and unbounded \u2014 single call cost variance 10-50x",
|
|
365
|
+
"parallel_tool_calls=true errors or silently ignored \u2014 always set false",
|
|
366
|
+
"reasoning summaries unreliable \u2014 omitted >90% of cases",
|
|
367
|
+
"abandons hard tasks mid-work \u2014 non-deterministic behavioral pattern",
|
|
368
|
+
"reasoning tokens discarded between turns \u2014 cannot reference prior reasoning chain",
|
|
369
|
+
"hallucinates tool invocations for tools not in schema"
|
|
370
|
+
],
|
|
371
|
+
strengths: [
|
|
372
|
+
"reasoning",
|
|
373
|
+
"complex_analysis",
|
|
374
|
+
"code",
|
|
375
|
+
"math"
|
|
376
|
+
],
|
|
377
|
+
weaknesses: [
|
|
378
|
+
"cost_variance",
|
|
379
|
+
"sequential_tools",
|
|
380
|
+
"unpredictable_latency"
|
|
381
|
+
],
|
|
382
|
+
cost_input_per_1m: 2,
|
|
383
|
+
cost_output_per_1m: 8,
|
|
384
|
+
step_limit_default: 8,
|
|
385
|
+
notes: "Frontier reasoning. Cost is unpredictable due to reasoning tokens. Budget with max_completion_tokens. reasoning_effort param controls cost/quality tradeoff."
|
|
386
|
+
},
|
|
387
|
+
"gemini-2.5-flash": {
|
|
388
|
+
provider: "google",
|
|
389
|
+
status: "current",
|
|
390
|
+
max_tools: 128,
|
|
391
|
+
max_context_tokens: 1048576,
|
|
392
|
+
max_output_tokens: 65535,
|
|
393
|
+
parallel_tool_calls: true,
|
|
394
|
+
output_modes: [
|
|
395
|
+
"generateText",
|
|
396
|
+
"generateObject"
|
|
397
|
+
],
|
|
398
|
+
prompt_rules: [
|
|
399
|
+
"ban_cot_phrases",
|
|
400
|
+
"hard_word_limit",
|
|
401
|
+
"explicit_format",
|
|
402
|
+
"disable_thinking_for_short_output"
|
|
403
|
+
],
|
|
404
|
+
known_failures: [
|
|
405
|
+
"thinking tokens consume maxOutputTokens \u2014 empty response if budget exhausted by reasoning",
|
|
406
|
+
"MALFORMED_FUNCTION_CALL maps to 'stop' in LiteLLM \u2014 silent failure in agentic loops",
|
|
407
|
+
"empty response after tool call result submission \u2014 reproducible for specific inputs",
|
|
408
|
+
"parallel tool call parsing broken in streaming \u2014 SDKs only parse first functionCall in parts array",
|
|
409
|
+
"deeply nested or large schemas rejected at API level",
|
|
410
|
+
"unsupported JSON Schema properties silently ignored \u2014 partial silent failure",
|
|
411
|
+
"quality degrades significantly with large documents or high-context prompts",
|
|
412
|
+
"10-20 tools recommended despite 128 hard limit \u2014 reliability drops above 20"
|
|
413
|
+
],
|
|
414
|
+
strengths: [
|
|
415
|
+
"speed",
|
|
416
|
+
"volume",
|
|
417
|
+
"classification",
|
|
418
|
+
"parallel_tool_calls",
|
|
419
|
+
"1m_context",
|
|
420
|
+
"cost"
|
|
421
|
+
],
|
|
422
|
+
weaknesses: [
|
|
423
|
+
"complex_schemas",
|
|
424
|
+
"large_tool_sets_unreliable",
|
|
425
|
+
"thinking_token_drain",
|
|
426
|
+
"streaming_tool_parsing"
|
|
427
|
+
],
|
|
428
|
+
cost_input_per_1m: 0.3,
|
|
429
|
+
cost_output_per_1m: 2.5,
|
|
430
|
+
step_limit_default: 6,
|
|
431
|
+
notes: "Fast and cheap with 1M context. Thinking ON by default \u2014 set thinkingBudget=0 for short outputs or max_tokens>=1024. Tool reliability drops above 10-20 tools despite 128 limit. The primary silent failure model in the pool."
|
|
432
|
+
},
|
|
433
|
+
"gemini-2.5-pro": {
|
|
434
|
+
provider: "google",
|
|
435
|
+
status: "current",
|
|
436
|
+
max_tools: 128,
|
|
437
|
+
max_context_tokens: 1048576,
|
|
438
|
+
max_output_tokens: 65535,
|
|
439
|
+
parallel_tool_calls: true,
|
|
440
|
+
output_modes: [
|
|
441
|
+
"generateText",
|
|
442
|
+
"generateObject"
|
|
443
|
+
],
|
|
444
|
+
prompt_rules: [
|
|
445
|
+
"disable_thinking_for_short_output"
|
|
446
|
+
],
|
|
447
|
+
known_failures: [
|
|
448
|
+
"thinking tokens consume maxOutputTokens",
|
|
449
|
+
"MALFORMED_FUNCTION_CALL maps to 'stop' in LiteLLM",
|
|
450
|
+
"pricing doubles above 200k context tokens"
|
|
451
|
+
],
|
|
452
|
+
strengths: [
|
|
453
|
+
"reasoning",
|
|
454
|
+
"1m_context",
|
|
455
|
+
"structured_output",
|
|
456
|
+
"tool_use"
|
|
457
|
+
],
|
|
458
|
+
weaknesses: [
|
|
459
|
+
"pricing_above_200k"
|
|
460
|
+
],
|
|
461
|
+
cost_input_per_1m: 1.25,
|
|
462
|
+
cost_output_per_1m: 10,
|
|
463
|
+
step_limit_default: 10,
|
|
464
|
+
notes: "Google stable frontier. 1M context. Pricing doubles above 200k tokens \u2014 budget carefully for long-context use."
|
|
465
|
+
},
|
|
466
|
+
"deepseek-v4": {
|
|
467
|
+
provider: "deepseek",
|
|
468
|
+
status: "current",
|
|
469
|
+
max_tools: 128,
|
|
470
|
+
max_context_tokens: 1e6,
|
|
471
|
+
max_output_tokens: 64e3,
|
|
472
|
+
parallel_tool_calls: false,
|
|
473
|
+
output_modes: [
|
|
474
|
+
"generateText",
|
|
475
|
+
"generateObject"
|
|
476
|
+
],
|
|
477
|
+
prompt_rules: [
|
|
478
|
+
"explicit_format"
|
|
479
|
+
],
|
|
480
|
+
known_failures: [
|
|
481
|
+
"uptime is primary production risk \u2014 documented multi-day outages",
|
|
482
|
+
"503 server overloaded without backpressure \u2014 no clean 429s, just silent slow responses",
|
|
483
|
+
"tool call JSON occasionally malformed \u2014 parse errors must be handled explicitly"
|
|
484
|
+
],
|
|
485
|
+
strengths: [
|
|
486
|
+
"cost",
|
|
487
|
+
"reasoning",
|
|
488
|
+
"1m_context",
|
|
489
|
+
"coding"
|
|
490
|
+
],
|
|
491
|
+
weaknesses: [
|
|
492
|
+
"uptime",
|
|
493
|
+
"sequential_tool_calls",
|
|
494
|
+
"no_backpressure"
|
|
495
|
+
],
|
|
496
|
+
cost_input_per_1m: 0.3,
|
|
497
|
+
cost_output_per_1m: 0.5,
|
|
498
|
+
step_limit_default: 8,
|
|
499
|
+
notes: "Extraordinary value \u2014 frontier quality at 10-50x cheaper than Western providers. 1M context. Uptime is the risk."
|
|
500
|
+
},
|
|
501
|
+
"deepseek-chat": {
|
|
502
|
+
provider: "deepseek",
|
|
503
|
+
status: "legacy",
|
|
504
|
+
max_tools: 128,
|
|
505
|
+
max_context_tokens: 128e3,
|
|
506
|
+
max_output_tokens: 8e3,
|
|
507
|
+
parallel_tool_calls: false,
|
|
508
|
+
output_modes: [
|
|
509
|
+
"generateText",
|
|
510
|
+
"generateObject"
|
|
511
|
+
],
|
|
512
|
+
prompt_rules: [
|
|
513
|
+
"explicit_format"
|
|
514
|
+
],
|
|
515
|
+
known_failures: [
|
|
516
|
+
"uptime is primary production risk \u2014 documented multi-day outages",
|
|
517
|
+
"503 server overloaded errors \u2014 streaming stalls mid-response without error",
|
|
518
|
+
"tool call JSON occasionally malformed",
|
|
519
|
+
"7-8 sequential tool calls \u2014 5x latency vs parallel models for same task"
|
|
520
|
+
],
|
|
521
|
+
strengths: [
|
|
522
|
+
"cost",
|
|
523
|
+
"general_reasoning"
|
|
524
|
+
],
|
|
525
|
+
weaknesses: [
|
|
526
|
+
"sequential_tool_calls",
|
|
527
|
+
"latency_with_tools",
|
|
528
|
+
"uptime",
|
|
529
|
+
"legacy"
|
|
530
|
+
],
|
|
531
|
+
cost_input_per_1m: 0.28,
|
|
532
|
+
cost_output_per_1m: 0.42,
|
|
533
|
+
step_limit_default: 6,
|
|
534
|
+
notes: "Legacy V3.2. Use deepseek-v4 instead. 10x cheaper cache hits ($0.028/1M). Sequential tool calls only."
|
|
535
|
+
},
|
|
536
|
+
"deepseek-reasoner": {
|
|
537
|
+
provider: "deepseek",
|
|
538
|
+
status: "legacy",
|
|
539
|
+
max_tools: 128,
|
|
540
|
+
max_context_tokens: 128e3,
|
|
541
|
+
max_output_tokens: 64e3,
|
|
542
|
+
parallel_tool_calls: false,
|
|
543
|
+
output_modes: [
|
|
544
|
+
"generateText",
|
|
545
|
+
"generateObject"
|
|
546
|
+
],
|
|
547
|
+
prompt_rules: [
|
|
548
|
+
"explicit_format"
|
|
549
|
+
],
|
|
550
|
+
known_failures: [
|
|
551
|
+
"uptime is primary production risk",
|
|
552
|
+
"tool calling added in V3.2 \u2014 older integrations may not support it",
|
|
553
|
+
"the V3.2-Speciale variant does NOT support tool calling \u2014 do not confuse"
|
|
554
|
+
],
|
|
555
|
+
strengths: [
|
|
556
|
+
"reasoning",
|
|
557
|
+
"cost_effective_reasoning"
|
|
558
|
+
],
|
|
559
|
+
weaknesses: [
|
|
560
|
+
"sequential_tool_calls",
|
|
561
|
+
"latency",
|
|
562
|
+
"uptime",
|
|
563
|
+
"legacy"
|
|
564
|
+
],
|
|
565
|
+
cost_input_per_1m: 0.28,
|
|
566
|
+
cost_output_per_1m: 0.42,
|
|
567
|
+
step_limit_default: 8,
|
|
568
|
+
notes: "Legacy V3.2 reasoning mode. Same pricing as deepseek-chat in V3.2. Use deepseek-v4 instead."
|
|
569
|
+
},
|
|
570
|
+
"mistral-small": {
|
|
571
|
+
provider: "mistral",
|
|
572
|
+
status: "current",
|
|
573
|
+
max_tools: 128,
|
|
574
|
+
max_context_tokens: 256e3,
|
|
575
|
+
max_output_tokens: 16e3,
|
|
576
|
+
parallel_tool_calls: true,
|
|
577
|
+
output_modes: [
|
|
578
|
+
"generateText",
|
|
579
|
+
"generateObject"
|
|
580
|
+
],
|
|
581
|
+
prompt_rules: [
|
|
582
|
+
"reinforce_json_in_prompt"
|
|
583
|
+
],
|
|
584
|
+
known_failures: [
|
|
585
|
+
"json_object mode only 64% reliable on complex schemas \u2014 must use json_schema strict mode",
|
|
586
|
+
"must explicitly instruct JSON output in prompt even when using JSON mode",
|
|
587
|
+
"max output governed by context remainder \u2014 250k input leaves only 6k for output"
|
|
588
|
+
],
|
|
589
|
+
strengths: [
|
|
590
|
+
"speed",
|
|
591
|
+
"cost",
|
|
592
|
+
"european_hosting",
|
|
593
|
+
"256k_context",
|
|
594
|
+
"structured_output_strict"
|
|
595
|
+
],
|
|
596
|
+
weaknesses: [
|
|
597
|
+
"complex_reasoning",
|
|
598
|
+
"json_object_unreliable"
|
|
599
|
+
],
|
|
600
|
+
cost_input_per_1m: 0.2,
|
|
601
|
+
cost_output_per_1m: 0.6,
|
|
602
|
+
step_limit_default: 6,
|
|
603
|
+
notes: "Mistral Small 4 (2603). 256k context. European hosting. Always use json_schema strict mode, never json_object."
|
|
604
|
+
},
|
|
605
|
+
"mistral-large": {
|
|
606
|
+
provider: "mistral",
|
|
607
|
+
status: "current",
|
|
608
|
+
max_tools: 128,
|
|
609
|
+
max_context_tokens: 256e3,
|
|
610
|
+
max_output_tokens: 16e3,
|
|
611
|
+
parallel_tool_calls: true,
|
|
612
|
+
output_modes: [
|
|
613
|
+
"generateText",
|
|
614
|
+
"generateObject"
|
|
615
|
+
],
|
|
616
|
+
prompt_rules: [
|
|
617
|
+
"reinforce_json_in_prompt"
|
|
618
|
+
],
|
|
619
|
+
known_failures: [
|
|
620
|
+
"json_object mode only 64% reliable \u2014 must use json_schema strict mode",
|
|
621
|
+
"must explicitly instruct JSON output in prompt even when using JSON mode"
|
|
622
|
+
],
|
|
623
|
+
strengths: [
|
|
624
|
+
"reasoning",
|
|
625
|
+
"european_hosting",
|
|
626
|
+
"structured_output",
|
|
627
|
+
"256k_context"
|
|
628
|
+
],
|
|
629
|
+
weaknesses: [
|
|
630
|
+
"cost_vs_gpt41"
|
|
631
|
+
],
|
|
632
|
+
cost_input_per_1m: 2,
|
|
633
|
+
cost_output_per_1m: 6,
|
|
634
|
+
step_limit_default: 10,
|
|
635
|
+
notes: "Mistral Large 3 (2512). 256k context. European hosting. Strong reasoning."
|
|
636
|
+
},
|
|
637
|
+
"grok-3": {
|
|
638
|
+
provider: "xai",
|
|
639
|
+
status: "current",
|
|
640
|
+
max_tools: 128,
|
|
641
|
+
max_context_tokens: 131072,
|
|
642
|
+
max_output_tokens: 16e3,
|
|
643
|
+
parallel_tool_calls: true,
|
|
644
|
+
output_modes: [
|
|
645
|
+
"generateText",
|
|
646
|
+
"generateObject"
|
|
647
|
+
],
|
|
648
|
+
prompt_rules: [],
|
|
649
|
+
known_failures: [
|
|
650
|
+
"function call tags leak into thinking blocks \u2014 breaks tool-call parsers",
|
|
651
|
+
"enters repetitive tool-call generation loops \u2014 add hard ceiling on iterations",
|
|
652
|
+
"reasoning_effort param NOT supported \u2014 only on grok-3-mini",
|
|
653
|
+
"100k TPM cap per customer \u2014 lower than comparable models"
|
|
654
|
+
],
|
|
655
|
+
strengths: [
|
|
656
|
+
"reasoning",
|
|
657
|
+
"speed"
|
|
658
|
+
],
|
|
659
|
+
weaknesses: [
|
|
660
|
+
"tool_call_tag_leaks",
|
|
661
|
+
"tool_loop_risk",
|
|
662
|
+
"tpm_cap"
|
|
663
|
+
],
|
|
664
|
+
cost_input_per_1m: 3,
|
|
665
|
+
cost_output_per_1m: 15,
|
|
666
|
+
step_limit_default: 10,
|
|
667
|
+
notes: "Strong reasoning. Young API with thinner operational track record. Watch for tool-call parsing issues."
|
|
668
|
+
},
|
|
669
|
+
"grok-3-mini": {
|
|
670
|
+
provider: "xai",
|
|
671
|
+
status: "current",
|
|
672
|
+
max_tools: 128,
|
|
673
|
+
max_context_tokens: 131072,
|
|
674
|
+
max_output_tokens: 16e3,
|
|
675
|
+
parallel_tool_calls: true,
|
|
676
|
+
output_modes: [
|
|
677
|
+
"generateText",
|
|
678
|
+
"generateObject"
|
|
679
|
+
],
|
|
680
|
+
prompt_rules: [],
|
|
681
|
+
known_failures: [
|
|
682
|
+
"function call tags leak into thinking blocks",
|
|
683
|
+
"enters repetitive tool-call generation loops",
|
|
684
|
+
"100k TPM cap per customer"
|
|
685
|
+
],
|
|
686
|
+
strengths: [
|
|
687
|
+
"speed",
|
|
688
|
+
"cost",
|
|
689
|
+
"medium_reasoning",
|
|
690
|
+
"reasoning_effort_control"
|
|
691
|
+
],
|
|
692
|
+
weaknesses: [
|
|
693
|
+
"tool_call_tag_leaks",
|
|
694
|
+
"tool_loop_risk",
|
|
695
|
+
"tpm_cap"
|
|
696
|
+
],
|
|
697
|
+
cost_input_per_1m: 0.3,
|
|
698
|
+
cost_output_per_1m: 0.5,
|
|
699
|
+
step_limit_default: 8,
|
|
700
|
+
notes: "Fast, cheap. reasoning_effort param (low/high) controls reasoning token budget. Full CoT exposed in every response."
|
|
701
|
+
}
|
|
702
|
+
};
|
|
703
|
+
|
|
704
|
+
// src/profiles.ts
|
|
705
|
+
var _profiles = {};
|
|
706
|
+
for (const [id, raw] of Object.entries(profiles_default)) {
|
|
707
|
+
_profiles[id] = {
|
|
708
|
+
id,
|
|
709
|
+
provider: raw.provider,
|
|
710
|
+
status: raw.status,
|
|
711
|
+
max_tools: raw.max_tools,
|
|
712
|
+
max_context_tokens: raw.max_context_tokens,
|
|
713
|
+
max_output_tokens: raw.max_output_tokens,
|
|
714
|
+
parallel_tool_calls: raw.parallel_tool_calls,
|
|
715
|
+
output_modes: raw.output_modes,
|
|
716
|
+
prompt_rules: raw.prompt_rules ?? [],
|
|
717
|
+
known_failures: raw.known_failures ?? [],
|
|
718
|
+
strengths: raw.strengths ?? [],
|
|
719
|
+
weaknesses: raw.weaknesses ?? [],
|
|
720
|
+
cost_input_per_1m: raw.cost_input_per_1m,
|
|
721
|
+
cost_output_per_1m: raw.cost_output_per_1m,
|
|
722
|
+
step_limit_default: raw.step_limit_default ?? 10,
|
|
723
|
+
notes: raw.notes ?? ""
|
|
724
|
+
};
|
|
725
|
+
}
|
|
726
|
+
var PROFILES = _profiles;
|
|
727
|
+
function getProfile(modelId) {
|
|
728
|
+
const p = _profiles[modelId];
|
|
729
|
+
if (!p) {
|
|
730
|
+
const available = Object.keys(_profiles).sort().join(", ");
|
|
731
|
+
throw new Error(`Unknown model '${modelId}'. Available: ${available}`);
|
|
732
|
+
}
|
|
733
|
+
return p;
|
|
734
|
+
}
|
|
735
|
+
function getCurrentProfiles() {
|
|
736
|
+
return Object.fromEntries(
|
|
737
|
+
Object.entries(_profiles).filter(([, p]) => p.status === "current")
|
|
738
|
+
);
|
|
739
|
+
}
|
|
740
|
+
function getProfilesByProvider(provider) {
|
|
741
|
+
return Object.fromEntries(
|
|
742
|
+
Object.entries(_profiles).filter(([, p]) => p.provider === provider)
|
|
743
|
+
);
|
|
744
|
+
}
|
|
745
|
+
|
|
746
|
+
// src/policies.ts
|
|
747
|
+
var SERIAL_TOOLS = {
|
|
748
|
+
update_dashboard: "UI state mutation \u2014 parallel calls cause stuttering and duplicate filter applications",
|
|
749
|
+
suggest_next_moves: "Epilogue tool \u2014 max 1 per response, generates suggestion pills",
|
|
750
|
+
start_hunt: "Session-level action \u2014 max 1 per response, creates a hunt mission",
|
|
751
|
+
propose_strategy: "Advisory tool \u2014 max 1 per response, generates strategy cards",
|
|
752
|
+
start_wave_run: "Orchestration action \u2014 max 1 per response, launches autonomous wave"
|
|
753
|
+
};
|
|
754
|
+
function computeToolPolicies(tools) {
|
|
755
|
+
const policies = [];
|
|
756
|
+
for (const tool of tools) {
|
|
757
|
+
const reason = SERIAL_TOOLS[tool.name];
|
|
758
|
+
if (reason) {
|
|
759
|
+
policies.push({
|
|
760
|
+
name: tool.name,
|
|
761
|
+
parallelSafe: false,
|
|
762
|
+
maxPerResponse: 1,
|
|
763
|
+
reason
|
|
764
|
+
});
|
|
765
|
+
}
|
|
766
|
+
}
|
|
767
|
+
return policies;
|
|
768
|
+
}
|
|
769
|
+
function detectParallelismWarnings(toolsCalled) {
|
|
770
|
+
if (!toolsCalled.length) return [];
|
|
771
|
+
const counts = {};
|
|
772
|
+
for (const name of toolsCalled) {
|
|
773
|
+
counts[name] = (counts[name] || 0) + 1;
|
|
774
|
+
}
|
|
775
|
+
const warnings = [];
|
|
776
|
+
for (const [name, count] of Object.entries(counts)) {
|
|
777
|
+
if (name in SERIAL_TOOLS && count > 1) {
|
|
778
|
+
warnings.push(`${name} called ${count}x (should be max 1): ${SERIAL_TOOLS[name]}`);
|
|
779
|
+
}
|
|
780
|
+
}
|
|
781
|
+
return warnings;
|
|
782
|
+
}
|
|
783
|
+
|
|
784
|
+
// src/adapter.ts
|
|
785
|
+
var CONTEXT_SAFETY_FACTOR = 0.9;
|
|
786
|
+
var TRUNCATION_MARKER = "\n[...earlier messages truncated to fit context window]";
|
|
787
|
+
function selectTools(tools, tokenBudget, profile, constraints) {
|
|
788
|
+
if (!tools.length) return tools;
|
|
789
|
+
const maxTools = profile.max_tools;
|
|
790
|
+
let candidates;
|
|
791
|
+
if (constraints.relevanceHints) {
|
|
792
|
+
const scored = tools.map((t) => ({
|
|
793
|
+
score: constraints.relevanceHints[t.name] ?? 0,
|
|
794
|
+
tool: t
|
|
795
|
+
}));
|
|
796
|
+
scored.sort((a, b) => b.score - a.score);
|
|
797
|
+
candidates = scored.map((s) => s.tool);
|
|
798
|
+
} else {
|
|
799
|
+
const seed = tools.map((t) => t.name).join(",");
|
|
800
|
+
candidates = [...tools];
|
|
801
|
+
let hash = 0;
|
|
802
|
+
for (let i = 0; i < seed.length; i++) {
|
|
803
|
+
hash = (hash << 5) - hash + seed.charCodeAt(i) | 0;
|
|
804
|
+
}
|
|
805
|
+
for (let i = candidates.length - 1; i > 0; i--) {
|
|
806
|
+
hash = (hash << 5) - hash + i | 0;
|
|
807
|
+
const j = Math.abs(hash) % (i + 1);
|
|
808
|
+
[candidates[i], candidates[j]] = [candidates[j], candidates[i]];
|
|
809
|
+
}
|
|
810
|
+
}
|
|
811
|
+
const selected = [];
|
|
812
|
+
let tokensUsed = 0;
|
|
813
|
+
for (const tool of candidates) {
|
|
814
|
+
if (selected.length >= maxTools) break;
|
|
815
|
+
let toolTokens;
|
|
816
|
+
try {
|
|
817
|
+
toolTokens = countTokens(JSON.stringify(tool));
|
|
818
|
+
} catch {
|
|
819
|
+
toolTokens = 350;
|
|
820
|
+
}
|
|
821
|
+
if (tokensUsed + toolTokens > tokenBudget) break;
|
|
822
|
+
selected.push(tool);
|
|
823
|
+
tokensUsed += toolTokens;
|
|
824
|
+
}
|
|
825
|
+
return selected;
|
|
826
|
+
}
|
|
827
|
+
function truncateMessages(messages, tokenBudget) {
|
|
828
|
+
if (!messages.length) return messages;
|
|
829
|
+
const last = messages[messages.length - 1];
|
|
830
|
+
const lastTokens = countTokens(last.content) + 4;
|
|
831
|
+
if (lastTokens >= tokenBudget) {
|
|
832
|
+
const markerTokens = countTokens(TRUNCATION_MARKER);
|
|
833
|
+
const maxContentTokens = tokenBudget - 4 - markerTokens;
|
|
834
|
+
if (maxContentTokens > 0) {
|
|
835
|
+
const maxChars = maxContentTokens * 4;
|
|
836
|
+
const truncated = last.content.slice(0, maxChars) + TRUNCATION_MARKER;
|
|
837
|
+
return [{ role: last.role, content: truncated }];
|
|
838
|
+
}
|
|
839
|
+
return [last];
|
|
840
|
+
}
|
|
841
|
+
const result = [];
|
|
842
|
+
let remaining = tokenBudget;
|
|
843
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
844
|
+
const msgTokens = countTokens(messages[i].content) + 4;
|
|
845
|
+
if (remaining - msgTokens < 0 && result.length > 0) break;
|
|
846
|
+
result.unshift(messages[i]);
|
|
847
|
+
remaining -= msgTokens;
|
|
848
|
+
}
|
|
849
|
+
if (result.length < messages.length) {
|
|
850
|
+
result[0] = {
|
|
851
|
+
role: result[0].role,
|
|
852
|
+
content: TRUNCATION_MARKER + "\n" + result[0].content
|
|
853
|
+
};
|
|
854
|
+
}
|
|
855
|
+
return result;
|
|
856
|
+
}
|
|
857
|
+
function budgetTokens(systemPrompt, messages, tools, profile, constraints) {
|
|
858
|
+
const budget = Math.floor(profile.max_context_tokens * CONTEXT_SAFETY_FACTOR);
|
|
859
|
+
const systemTokens = countTokens(systemPrompt);
|
|
860
|
+
if (systemTokens > budget) {
|
|
861
|
+
throw new Error(
|
|
862
|
+
`System prompt (${systemTokens} tokens) exceeds ${profile.id}'s context budget (${budget} tokens at ${Math.round(CONTEXT_SAFETY_FACTOR * 100)}% of ${profile.max_context_tokens}). Reduce the system prompt or use a model with a larger context window.`
|
|
863
|
+
);
|
|
864
|
+
}
|
|
865
|
+
const toolTokens = estimateToolTokens(tools);
|
|
866
|
+
const messageTokens = estimateMessagesTokens(messages);
|
|
867
|
+
const total = systemTokens + toolTokens + messageTokens;
|
|
868
|
+
if (total <= budget) {
|
|
869
|
+
return { selectedTools: tools, trimmedMessages: messages, tokensEstimated: total };
|
|
870
|
+
}
|
|
871
|
+
let selected = tools;
|
|
872
|
+
let selectedToolTokens = toolTokens;
|
|
873
|
+
const remaining = budget - systemTokens;
|
|
874
|
+
if (toolTokens > 0 && toolTokens + systemTokens > remaining * 0.5) {
|
|
875
|
+
selected = selectTools(tools, Math.floor(remaining / 2), profile, constraints);
|
|
876
|
+
selectedToolTokens = estimateToolTokens(selected);
|
|
877
|
+
}
|
|
878
|
+
const remainingForMessages = budget - systemTokens - selectedToolTokens;
|
|
879
|
+
let trimmed = messages;
|
|
880
|
+
let trimmedMessageTokens = messageTokens;
|
|
881
|
+
if (messageTokens > remainingForMessages) {
|
|
882
|
+
trimmed = truncateMessages(messages, remainingForMessages);
|
|
883
|
+
trimmedMessageTokens = estimateMessagesTokens(trimmed);
|
|
884
|
+
}
|
|
885
|
+
return {
|
|
886
|
+
selectedTools: selected,
|
|
887
|
+
trimmedMessages: trimmed,
|
|
888
|
+
tokensEstimated: systemTokens + selectedToolTokens + trimmedMessageTokens
|
|
889
|
+
};
|
|
890
|
+
}
|
|
891
|
+
var COT_BAN = "\n\nCRITICAL: NEVER use phrases like 'I should', 'Let me', 'However', 'I need to', 'First, I will', 'Let me think'. Respond directly without narrating your thought process.";
|
|
892
|
+
var EXPLICIT_FORMAT = "\n\nIMPORTANT: If the expected output is JSON, respond with ONLY valid JSON. No markdown code fences. No explanation before or after. Just the JSON object.";
|
|
893
|
+
var NO_MARKDOWN_HEADERS = "\n\nDo not use markdown headers (# ## ###) in your response.";
|
|
894
|
+
function applyPromptRules(systemPrompt, profile, constraints) {
|
|
895
|
+
const rulesApplied = [];
|
|
896
|
+
let prompt = systemPrompt;
|
|
897
|
+
for (const rule of profile.prompt_rules) {
|
|
898
|
+
switch (rule) {
|
|
899
|
+
case "ban_cot_phrases":
|
|
900
|
+
prompt += COT_BAN;
|
|
901
|
+
rulesApplied.push("ban_cot_phrases");
|
|
902
|
+
break;
|
|
903
|
+
case "hard_word_limit":
|
|
904
|
+
if (constraints.maxResponseWords) {
|
|
905
|
+
prompt += `
|
|
906
|
+
|
|
907
|
+
Response MUST be under ${constraints.maxResponseWords} words. Be concise.`;
|
|
908
|
+
rulesApplied.push(`hard_word_limit(${constraints.maxResponseWords})`);
|
|
909
|
+
}
|
|
910
|
+
break;
|
|
911
|
+
case "explicit_format":
|
|
912
|
+
prompt += EXPLICIT_FORMAT;
|
|
913
|
+
rulesApplied.push("explicit_format");
|
|
914
|
+
break;
|
|
915
|
+
case "no_markdown_headers":
|
|
916
|
+
prompt += NO_MARKDOWN_HEADERS;
|
|
917
|
+
rulesApplied.push("no_markdown_headers");
|
|
918
|
+
break;
|
|
919
|
+
case "force_json_mode":
|
|
920
|
+
rulesApplied.push("force_json_mode");
|
|
921
|
+
break;
|
|
922
|
+
case "disable_thinking_for_short_output":
|
|
923
|
+
rulesApplied.push("disable_thinking_for_short_output");
|
|
924
|
+
break;
|
|
925
|
+
case "reinforce_json_in_prompt":
|
|
926
|
+
prompt += "\n\nYou MUST respond with valid JSON. Follow the exact schema specified. No additional text, no explanations, just the JSON object.";
|
|
927
|
+
rulesApplied.push("reinforce_json_in_prompt");
|
|
928
|
+
break;
|
|
929
|
+
case "no_system_with_developer":
|
|
930
|
+
rulesApplied.push("no_system_with_developer");
|
|
931
|
+
break;
|
|
932
|
+
default:
|
|
933
|
+
break;
|
|
934
|
+
}
|
|
935
|
+
}
|
|
936
|
+
return { adaptedPrompt: prompt, rulesApplied };
|
|
937
|
+
}
|
|
938
|
+
function selectOutputStrategy(profile, constraints) {
|
|
939
|
+
if (!constraints.structuredOutput) return "generateText";
|
|
940
|
+
if (profile.output_modes.includes("generateObject")) return "generateObject";
|
|
941
|
+
return "generateText";
|
|
942
|
+
}
|
|
943
|
+
function uuid() {
|
|
944
|
+
return "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx".replace(/[xy]/g, (c) => {
|
|
945
|
+
const r = Math.random() * 16 | 0;
|
|
946
|
+
const v = c === "x" ? r : r & 3 | 8;
|
|
947
|
+
return v.toString(16);
|
|
948
|
+
});
|
|
949
|
+
}
|
|
950
|
+
function prepare(input) {
|
|
951
|
+
const profile = getProfile(input.model);
|
|
952
|
+
const constraints = input.constraints ?? {};
|
|
953
|
+
const tools = input.tools ?? [];
|
|
954
|
+
const originalToolCount = tools.length;
|
|
955
|
+
const { selectedTools, trimmedMessages, tokensEstimated: baseTokens } = budgetTokens(
|
|
956
|
+
input.systemPrompt,
|
|
957
|
+
input.messages,
|
|
958
|
+
tools,
|
|
959
|
+
profile,
|
|
960
|
+
constraints
|
|
961
|
+
);
|
|
962
|
+
const { adaptedPrompt, rulesApplied } = applyPromptRules(
|
|
963
|
+
input.systemPrompt,
|
|
964
|
+
profile,
|
|
965
|
+
constraints
|
|
966
|
+
);
|
|
967
|
+
let tokensEstimated = baseTokens;
|
|
968
|
+
if (rulesApplied.length > 0) {
|
|
969
|
+
const promptDelta = countTokens(adaptedPrompt) - countTokens(input.systemPrompt);
|
|
970
|
+
tokensEstimated += promptDelta;
|
|
971
|
+
}
|
|
972
|
+
const outputStrategy = selectOutputStrategy(profile, constraints);
|
|
973
|
+
const toolPolicies = computeToolPolicies(selectedTools);
|
|
974
|
+
return {
|
|
975
|
+
requestId: uuid(),
|
|
976
|
+
model: profile.id,
|
|
977
|
+
provider: profile.provider,
|
|
978
|
+
systemPrompt: adaptedPrompt,
|
|
979
|
+
messages: trimmedMessages,
|
|
980
|
+
tools: selectedTools,
|
|
981
|
+
outputStrategy,
|
|
982
|
+
promptRulesApplied: rulesApplied,
|
|
983
|
+
tokensEstimated,
|
|
984
|
+
contextBudget: profile.max_context_tokens,
|
|
985
|
+
toolsOriginalCount: originalToolCount,
|
|
986
|
+
toolsSelectedCount: selectedTools.length,
|
|
987
|
+
toolPolicies: toolPolicies.length > 0 ? toolPolicies : null
|
|
988
|
+
};
|
|
989
|
+
}
|
|
990
|
+
|
|
991
|
+
// src/logger.ts
|
|
992
|
+
function computeEfficiencyFlag(tokensIn, tokensOut) {
|
|
993
|
+
const total = tokensIn + tokensOut;
|
|
994
|
+
if (total === 0) return "healthy";
|
|
995
|
+
const ratio = tokensIn / total;
|
|
996
|
+
if (ratio > 0.95) return "critical";
|
|
997
|
+
if (ratio > 0.85) return "warning";
|
|
998
|
+
return "healthy";
|
|
999
|
+
}
|
|
1000
|
+
function detectTextOnly(input) {
|
|
1001
|
+
const toolsOffered = input.toolsOffered ?? 0;
|
|
1002
|
+
const toolsUsed = input.toolsUsed ?? 0;
|
|
1003
|
+
const emptyResponse = input.emptyResponse ?? false;
|
|
1004
|
+
if (toolsOffered > 0 && toolsUsed === 0 && !emptyResponse) return true;
|
|
1005
|
+
if (toolsOffered > 0 && input.toolsCalled && input.toolsCalled.length === 0 && !emptyResponse) return true;
|
|
1006
|
+
return false;
|
|
1007
|
+
}
|
|
1008
|
+
function computeDiagnostics(input) {
|
|
1009
|
+
const total = input.tokensIn + input.tokensOut;
|
|
1010
|
+
const inputRatio = total > 0 ? Math.round(input.tokensIn / total * 1e4) / 1e4 : 0;
|
|
1011
|
+
const efficiencyFlag = computeEfficiencyFlag(input.tokensIn, input.tokensOut);
|
|
1012
|
+
const textOnly = detectTextOnly(input);
|
|
1013
|
+
const parallelismWarnings = input.toolsCalled ? detectParallelismWarnings(input.toolsCalled) : [];
|
|
1014
|
+
return {
|
|
1015
|
+
logged: true,
|
|
1016
|
+
requestId: input.requestId,
|
|
1017
|
+
inputRatio,
|
|
1018
|
+
efficiencyFlag,
|
|
1019
|
+
textOnly,
|
|
1020
|
+
parallelismWarnings: parallelismWarnings.length > 0 ? parallelismWarnings : null
|
|
1021
|
+
};
|
|
1022
|
+
}
|
|
1023
|
+
var ConsoleDestination = class {
|
|
1024
|
+
write(input, result) {
|
|
1025
|
+
const flags = [result.efficiencyFlag ?? "unknown"];
|
|
1026
|
+
if (result.textOnly) flags.push("text-only");
|
|
1027
|
+
if (result.parallelismWarnings) flags.push(`parallel(${result.parallelismWarnings.length})`);
|
|
1028
|
+
console.log(
|
|
1029
|
+
`[kg-auto] ${input.model} | ${input.tokensIn}\u2192${input.tokensOut} tokens | ${input.latencyMs}ms | ${input.success ? "ok" : "FAIL"} | ${flags.join(", ")}`
|
|
1030
|
+
);
|
|
1031
|
+
}
|
|
1032
|
+
};
|
|
1033
|
+
var HttpDestination = class {
|
|
1034
|
+
constructor(url) {
|
|
1035
|
+
this.url = url;
|
|
1036
|
+
}
|
|
1037
|
+
write(input, _result) {
|
|
1038
|
+
fetch(this.url, {
|
|
1039
|
+
method: "POST",
|
|
1040
|
+
headers: { "Content-Type": "application/json" },
|
|
1041
|
+
body: JSON.stringify({
|
|
1042
|
+
request_id: input.requestId,
|
|
1043
|
+
model: input.model,
|
|
1044
|
+
provider: input.provider,
|
|
1045
|
+
project: input.project ?? "default",
|
|
1046
|
+
intent: input.intent,
|
|
1047
|
+
tools_offered: input.toolsOffered ?? 0,
|
|
1048
|
+
tools_selected: input.toolsSelected ?? 0,
|
|
1049
|
+
tools_used: input.toolsUsed ?? 0,
|
|
1050
|
+
tokens_in: input.tokensIn,
|
|
1051
|
+
tokens_out: input.tokensOut,
|
|
1052
|
+
latency_ms: input.latencyMs,
|
|
1053
|
+
success: input.success,
|
|
1054
|
+
empty_response: input.emptyResponse ?? false,
|
|
1055
|
+
error_type: input.errorType,
|
|
1056
|
+
adapter_rules_applied: input.adapterRulesApplied,
|
|
1057
|
+
mode: input.mode,
|
|
1058
|
+
tools_called: input.toolsCalled
|
|
1059
|
+
})
|
|
1060
|
+
}).catch(() => {
|
|
1061
|
+
});
|
|
1062
|
+
}
|
|
1063
|
+
};
|
|
1064
|
+
var SilentDestination = class {
|
|
1065
|
+
write() {
|
|
1066
|
+
}
|
|
1067
|
+
};
|
|
1068
|
+
function log(input, destination) {
|
|
1069
|
+
const result = computeDiagnostics(input);
|
|
1070
|
+
destination.write(input, result);
|
|
1071
|
+
return result;
|
|
1072
|
+
}
|
|
1073
|
+
export {
|
|
1074
|
+
ConsoleDestination,
|
|
1075
|
+
HttpDestination,
|
|
1076
|
+
PROFILES,
|
|
1077
|
+
SERIAL_TOOLS,
|
|
1078
|
+
SilentDestination,
|
|
1079
|
+
computeDiagnostics,
|
|
1080
|
+
computeToolPolicies,
|
|
1081
|
+
countTokens,
|
|
1082
|
+
detectParallelismWarnings,
|
|
1083
|
+
getCurrentProfiles,
|
|
1084
|
+
getProfile,
|
|
1085
|
+
getProfilesByProvider,
|
|
1086
|
+
log,
|
|
1087
|
+
prepare,
|
|
1088
|
+
setTokenizer
|
|
1089
|
+
};
|
|
1090
|
+
//# sourceMappingURL=index.mjs.map
|