promptpilot 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +227 -105
- package/dist/cli.d.ts +9 -0
- package/dist/cli.js +679 -35
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +35 -1
- package/dist/index.js +446 -31
- package/dist/index.js.map +1 -1
- package/package.json +4 -2
package/dist/cli.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
3
|
// src/cli.ts
|
|
4
|
-
import { realpathSync } from "fs";
|
|
4
|
+
import { readFileSync, realpathSync } from "fs";
|
|
5
5
|
import { fileURLToPath } from "url";
|
|
6
6
|
|
|
7
7
|
// src/errors.ts
|
|
@@ -360,13 +360,13 @@ var modeGuidance = {
|
|
|
360
360
|
clarity: "Improve clarity, remove ambiguity, and keep the request easy for a downstream model to follow.",
|
|
361
361
|
concise: "Minimize token count while preserving user intent, constraints, and expected output.",
|
|
362
362
|
detailed: "Make the request explicit and complete, including structure and success criteria.",
|
|
363
|
-
structured: "Organize the request into
|
|
363
|
+
structured: "Organize the request into sections only when that improves clarity or token efficiency.",
|
|
364
364
|
persuasive: "Refine wording so the request is compelling and likely to elicit a thoughtful response.",
|
|
365
365
|
compress: "Aggressively compress redundant wording while preserving the meaning and critical constraints.",
|
|
366
366
|
claude_cli: "Optimize specifically for Claude CLI: compact sections, direct instructions, and minimal boilerplate."
|
|
367
367
|
};
|
|
368
368
|
var presetGuidance = {
|
|
369
|
-
code: "Favor precise technical requirements, edge cases,
|
|
369
|
+
code: "Favor precise technical requirements, edge cases, expected output format, and a compact inspect-plan-act-test-reflect loop for code tasks.",
|
|
370
370
|
email: "Preserve the sender's goal, tone, and audience; aim for a realistic and usable writing request.",
|
|
371
371
|
essay: "Preserve thesis, structure, and voice guidance while making the prompt clearer.",
|
|
372
372
|
support: "Favor concise issue context, user impact, and desired resolution details.",
|
|
@@ -384,6 +384,10 @@ function getOptimizationSystemPrompt(mode, preset) {
|
|
|
384
384
|
"- Preserve critical constraints and task goals.",
|
|
385
385
|
"- Improve clarity, structure, and downstream usefulness.",
|
|
386
386
|
"- Keep the result compact when the mode requests compression.",
|
|
387
|
+
"- Do not force sections when direct phrasing is shorter and equally clear.",
|
|
388
|
+
"- Remove redundancy aggressively when the source prompt repeats the same goal multiple ways.",
|
|
389
|
+
"- For code tasks, prefer a terse agent brief over narrative prose.",
|
|
390
|
+
"- For code tasks, structure the prompt around a Karpathy-style loop: inspect, plan, act, test, reflect, repeat.",
|
|
387
391
|
`Mode guidance: ${modeGuidance[mode]}`,
|
|
388
392
|
preset ? `Preset guidance: ${presetGuidance[preset]}` : "Preset guidance: none"
|
|
389
393
|
].join("\n");
|
|
@@ -712,7 +716,11 @@ function tokenize(value) {
|
|
|
712
716
|
);
|
|
713
717
|
}
|
|
714
718
|
function extractConstraints(value) {
|
|
715
|
-
return
|
|
719
|
+
return Array.from(
|
|
720
|
+
new Set(
|
|
721
|
+
value.split(/\n+/).flatMap((line) => line.split(/(?<=[.!?])\s+/)).map((line) => line.trim().replace(/^[-*]\s*/, "")).filter((line) => line.length > 0 && line.length <= 180).filter((line) => /(must|should|avoid|do not|don't|never|exactly|at most|under|limit|max|preserve|keep)/i.test(line))
|
|
722
|
+
)
|
|
723
|
+
).slice(0, 8);
|
|
716
724
|
}
|
|
717
725
|
function extractEntities(value) {
|
|
718
726
|
return Array.from(
|
|
@@ -834,6 +842,9 @@ var DEFAULT_PROVIDER = "ollama";
|
|
|
834
842
|
var DEFAULT_MAX_INPUT_TOKENS = 1200;
|
|
835
843
|
var DEFAULT_MAX_CONTEXT_TOKENS = 800;
|
|
836
844
|
var DEFAULT_MAX_TOTAL_TOKENS = 2200;
|
|
845
|
+
var DEFAULT_ROUTING_PRIORITY = "cheapest_adequate";
|
|
846
|
+
var DEFAULT_ROUTING_TOP_K = 3;
|
|
847
|
+
var DEFAULT_WORKLOAD_BIAS = "code_first";
|
|
837
848
|
var PromptOptimizer = class {
|
|
838
849
|
config;
|
|
839
850
|
logger;
|
|
@@ -871,6 +882,10 @@ var PromptOptimizer = class {
|
|
|
871
882
|
const maxInputTokens = input.maxInputTokens ?? this.config.maxInputTokens ?? DEFAULT_MAX_INPUT_TOKENS;
|
|
872
883
|
const maxContextTokens = input.maxContextTokens ?? this.config.maxContextTokens ?? DEFAULT_MAX_CONTEXT_TOKENS;
|
|
873
884
|
const maxTotalTokens = input.maxTotalTokens ?? this.config.maxTotalTokens ?? DEFAULT_MAX_TOTAL_TOKENS;
|
|
885
|
+
const routingEnabled = input.routingEnabled !== false;
|
|
886
|
+
const routingPriority = input.routingPriority ?? DEFAULT_ROUTING_PRIORITY;
|
|
887
|
+
const routingTopK = input.routingTopK ?? DEFAULT_ROUTING_TOP_K;
|
|
888
|
+
const workloadBias = input.workloadBias ?? DEFAULT_WORKLOAD_BIAS;
|
|
874
889
|
const warnings = [];
|
|
875
890
|
const changes = [];
|
|
876
891
|
const useContext = input.useContext !== false && Boolean(input.sessionId);
|
|
@@ -900,6 +915,7 @@ var PromptOptimizer = class {
|
|
|
900
915
|
);
|
|
901
916
|
let provider = input.bypassOptimization ? "heuristic" : this.config.provider ?? DEFAULT_PROVIDER;
|
|
902
917
|
let model = provider === "ollama" ? this.config.ollamaModel ?? "auto" : "heuristic";
|
|
918
|
+
let usedPreprocessedFallback = false;
|
|
903
919
|
let optimizedPrompt = originalPrompt;
|
|
904
920
|
let providerWarnings = [];
|
|
905
921
|
let providerChanges = [];
|
|
@@ -934,6 +950,11 @@ var PromptOptimizer = class {
|
|
|
934
950
|
optimizedPrompt = ollamaResult.optimizedPrompt;
|
|
935
951
|
providerWarnings = ollamaResult.warnings;
|
|
936
952
|
providerChanges = ollamaResult.changes;
|
|
953
|
+
if (ollamaResult.source === "preprocessed") {
|
|
954
|
+
provider = "heuristic";
|
|
955
|
+
model = "cheap-preprocess";
|
|
956
|
+
usedPreprocessedFallback = true;
|
|
957
|
+
}
|
|
937
958
|
} else if (provider === "ollama") {
|
|
938
959
|
provider = "heuristic";
|
|
939
960
|
model = "heuristic";
|
|
@@ -942,7 +963,7 @@ var PromptOptimizer = class {
|
|
|
942
963
|
];
|
|
943
964
|
}
|
|
944
965
|
}
|
|
945
|
-
if (provider === "heuristic") {
|
|
966
|
+
if (provider === "heuristic" && !usedPreprocessedFallback) {
|
|
946
967
|
const fallback = this.heuristicOptimize({
|
|
947
968
|
input: {
|
|
948
969
|
...input,
|
|
@@ -959,6 +980,22 @@ var PromptOptimizer = class {
|
|
|
959
980
|
}
|
|
960
981
|
warnings.push(...providerWarnings);
|
|
961
982
|
changes.push(...providerChanges);
|
|
983
|
+
const routingDecision = await this.routeDownstreamTargets({
|
|
984
|
+
input: {
|
|
985
|
+
...input,
|
|
986
|
+
prompt: originalPrompt,
|
|
987
|
+
mode,
|
|
988
|
+
preset,
|
|
989
|
+
routingPriority,
|
|
990
|
+
routingTopK,
|
|
991
|
+
workloadBias
|
|
992
|
+
},
|
|
993
|
+
routingEnabled,
|
|
994
|
+
routingPriority,
|
|
995
|
+
routingTopK,
|
|
996
|
+
workloadBias
|
|
997
|
+
});
|
|
998
|
+
warnings.push(...routingDecision.routingWarnings);
|
|
962
999
|
let finalPrompt = composeFinalPrompt({
|
|
963
1000
|
optimizedPrompt,
|
|
964
1001
|
input: {
|
|
@@ -967,7 +1004,8 @@ var PromptOptimizer = class {
|
|
|
967
1004
|
mode,
|
|
968
1005
|
preset
|
|
969
1006
|
},
|
|
970
|
-
context: relevantContext
|
|
1007
|
+
context: relevantContext,
|
|
1008
|
+
routingDecision
|
|
971
1009
|
});
|
|
972
1010
|
let estimatedTokensAfter = {
|
|
973
1011
|
prompt: this.estimator.estimateText(optimizedPrompt),
|
|
@@ -984,6 +1022,7 @@ var PromptOptimizer = class {
|
|
|
984
1022
|
},
|
|
985
1023
|
optimizedPrompt,
|
|
986
1024
|
context: relevantContext,
|
|
1025
|
+
routingDecision,
|
|
987
1026
|
maxTotalTokens
|
|
988
1027
|
});
|
|
989
1028
|
finalPrompt = reduced.finalPrompt;
|
|
@@ -1019,6 +1058,11 @@ var PromptOptimizer = class {
|
|
|
1019
1058
|
mode,
|
|
1020
1059
|
provider,
|
|
1021
1060
|
model,
|
|
1061
|
+
selectedTarget: routingDecision.selectedTarget,
|
|
1062
|
+
rankedTargets: routingDecision.rankedTargets,
|
|
1063
|
+
routingReason: routingDecision.routingReason,
|
|
1064
|
+
routingWarnings: routingDecision.routingWarnings,
|
|
1065
|
+
routingProvider: routingDecision.routingProvider,
|
|
1022
1066
|
warnings,
|
|
1023
1067
|
changes,
|
|
1024
1068
|
debugInfo: input.debug ? {
|
|
@@ -1027,7 +1071,8 @@ var PromptOptimizer = class {
|
|
|
1027
1071
|
estimatedTokensAfter,
|
|
1028
1072
|
extractedConstraints,
|
|
1029
1073
|
preset,
|
|
1030
|
-
selectedModel: model
|
|
1074
|
+
selectedModel: model,
|
|
1075
|
+
routingDecision
|
|
1031
1076
|
} : void 0
|
|
1032
1077
|
};
|
|
1033
1078
|
}
|
|
@@ -1050,29 +1095,88 @@ var PromptOptimizer = class {
|
|
|
1050
1095
|
});
|
|
1051
1096
|
}
|
|
1052
1097
|
async tryOllamaOptimization(options) {
|
|
1098
|
+
const preprocessedPrompt = cheapCompress(options.input.prompt);
|
|
1099
|
+
const preprocessedTokenCount = this.estimator.estimateText(preprocessedPrompt);
|
|
1100
|
+
const ultraMode = preprocessedTokenCount > 500;
|
|
1053
1101
|
try {
|
|
1054
1102
|
if (!await this.client.isAvailable()) {
|
|
1055
|
-
return
|
|
1103
|
+
return {
|
|
1104
|
+
optimizedPrompt: preprocessedPrompt,
|
|
1105
|
+
changes: ["Applied cheap local preprocessing because Ollama was unavailable."],
|
|
1106
|
+
warnings: ["Ollama was unavailable, so PromptPilot kept the cheap preprocessed prompt."],
|
|
1107
|
+
source: "preprocessed"
|
|
1108
|
+
};
|
|
1109
|
+
}
|
|
1110
|
+
const systemPrompt = ultraMode ? `${getOptimizationSystemPrompt(options.input.mode, options.input.preset)}
|
|
1111
|
+
Mode: Ultra compression. Minimize tokens aggressively.` : getOptimizationSystemPrompt(options.input.mode, options.input.preset);
|
|
1112
|
+
const optimizationPrompt = buildOptimizationPrompt(
|
|
1113
|
+
{
|
|
1114
|
+
...options.input,
|
|
1115
|
+
prompt: preprocessedPrompt
|
|
1116
|
+
},
|
|
1117
|
+
options.relevantContext,
|
|
1118
|
+
options.extractedConstraints
|
|
1119
|
+
);
|
|
1120
|
+
const timeoutMs = options.input.timeoutMs ?? this.config.timeoutMs;
|
|
1121
|
+
let optimizedPrompt = "";
|
|
1122
|
+
let responseChanges = [];
|
|
1123
|
+
let responseWarnings = [];
|
|
1124
|
+
try {
|
|
1125
|
+
const response = await this.client.generateJson({
|
|
1126
|
+
systemPrompt,
|
|
1127
|
+
prompt: optimizationPrompt,
|
|
1128
|
+
timeoutMs,
|
|
1129
|
+
model: options.model,
|
|
1130
|
+
temperature: this.config.temperature,
|
|
1131
|
+
format: "json"
|
|
1132
|
+
});
|
|
1133
|
+
optimizedPrompt = normalizeWhitespace(response.optimizedPrompt ?? "");
|
|
1134
|
+
responseChanges = response.changes ?? [];
|
|
1135
|
+
responseWarnings = response.warnings ?? [];
|
|
1136
|
+
} catch {
|
|
1137
|
+
const raw = await this.client.generate({
|
|
1138
|
+
systemPrompt,
|
|
1139
|
+
prompt: optimizationPrompt,
|
|
1140
|
+
timeoutMs,
|
|
1141
|
+
model: options.model,
|
|
1142
|
+
temperature: this.config.temperature
|
|
1143
|
+
});
|
|
1144
|
+
optimizedPrompt = sanitizeTextOptimizationOutput(raw);
|
|
1145
|
+
responseChanges = [`Applied text-only Ollama optimization with ${options.model}.`];
|
|
1056
1146
|
}
|
|
1057
|
-
const response = await this.client.generateJson({
|
|
1058
|
-
systemPrompt: getOptimizationSystemPrompt(options.input.mode, options.input.preset),
|
|
1059
|
-
prompt: buildOptimizationPrompt(options.input, options.relevantContext, options.extractedConstraints),
|
|
1060
|
-
timeoutMs: options.input.timeoutMs ?? this.config.timeoutMs,
|
|
1061
|
-
model: options.model,
|
|
1062
|
-
temperature: this.config.temperature,
|
|
1063
|
-
format: "json"
|
|
1064
|
-
});
|
|
1065
|
-
const optimizedPrompt = normalizeWhitespace(response.optimizedPrompt ?? "");
|
|
1066
1147
|
if (!optimizedPrompt) {
|
|
1067
|
-
return
|
|
1148
|
+
return {
|
|
1149
|
+
optimizedPrompt: preprocessedPrompt,
|
|
1150
|
+
changes: ["Applied cheap local preprocessing because the model returned an empty optimization."],
|
|
1151
|
+
warnings: ["The local optimizer returned an empty result, so PromptPilot kept the preprocessed prompt."],
|
|
1152
|
+
source: "preprocessed"
|
|
1153
|
+
};
|
|
1154
|
+
}
|
|
1155
|
+
const optimizedTokenCount = this.estimator.estimateText(optimizedPrompt);
|
|
1156
|
+
if (isCompressionSensitiveMode(options.input.mode) && optimizedTokenCount >= preprocessedTokenCount) {
|
|
1157
|
+
return {
|
|
1158
|
+
optimizedPrompt: preprocessedPrompt,
|
|
1159
|
+
changes: [
|
|
1160
|
+
...responseChanges,
|
|
1161
|
+
"Kept the cheap preprocessed prompt because the model output was not smaller."
|
|
1162
|
+
],
|
|
1163
|
+
warnings: responseWarnings,
|
|
1164
|
+
source: "preprocessed"
|
|
1165
|
+
};
|
|
1068
1166
|
}
|
|
1069
1167
|
return {
|
|
1070
1168
|
optimizedPrompt,
|
|
1071
|
-
changes:
|
|
1072
|
-
warnings:
|
|
1169
|
+
changes: responseChanges.length > 0 ? responseChanges : [`Applied Ollama optimization with ${options.model}.`],
|
|
1170
|
+
warnings: responseWarnings,
|
|
1171
|
+
source: "ollama"
|
|
1073
1172
|
};
|
|
1074
1173
|
} catch {
|
|
1075
|
-
return
|
|
1174
|
+
return {
|
|
1175
|
+
optimizedPrompt: preprocessedPrompt,
|
|
1176
|
+
changes: ["Applied cheap local preprocessing because Ollama optimization failed."],
|
|
1177
|
+
warnings: ["Ollama optimization failed, so PromptPilot kept the preprocessed prompt."],
|
|
1178
|
+
source: "preprocessed"
|
|
1179
|
+
};
|
|
1076
1180
|
}
|
|
1077
1181
|
}
|
|
1078
1182
|
async resolveOllamaModel(options) {
|
|
@@ -1243,17 +1347,146 @@ var PromptOptimizer = class {
|
|
|
1243
1347
|
};
|
|
1244
1348
|
}
|
|
1245
1349
|
}
|
|
1350
|
+
async routeDownstreamTargets(options) {
|
|
1351
|
+
const availableTargets = normalizeAvailableTargets(options.input.availableTargets ?? []);
|
|
1352
|
+
if (!options.routingEnabled || availableTargets.length === 0) {
|
|
1353
|
+
return {
|
|
1354
|
+
selectedTarget: null,
|
|
1355
|
+
rankedTargets: [],
|
|
1356
|
+
routingReason: null,
|
|
1357
|
+
routingWarnings: [],
|
|
1358
|
+
routingProvider: null
|
|
1359
|
+
};
|
|
1360
|
+
}
|
|
1361
|
+
if (availableTargets.length === 1) {
|
|
1362
|
+
return {
|
|
1363
|
+
selectedTarget: stripInternalTargetFields(availableTargets[0]),
|
|
1364
|
+
rankedTargets: [
|
|
1365
|
+
{
|
|
1366
|
+
...stripInternalTargetFields(availableTargets[0]),
|
|
1367
|
+
rank: 1,
|
|
1368
|
+
reason: "Only one downstream target was supplied."
|
|
1369
|
+
}
|
|
1370
|
+
],
|
|
1371
|
+
routingReason: "Only one downstream target was supplied, so it was selected directly.",
|
|
1372
|
+
routingWarnings: [],
|
|
1373
|
+
routingProvider: "direct"
|
|
1374
|
+
};
|
|
1375
|
+
}
|
|
1376
|
+
if (!this.client.listModels) {
|
|
1377
|
+
return {
|
|
1378
|
+
selectedTarget: null,
|
|
1379
|
+
rankedTargets: [],
|
|
1380
|
+
routingReason: null,
|
|
1381
|
+
routingWarnings: [
|
|
1382
|
+
"Downstream target routing requires local Ollama model discovery so a Qwen router can run."
|
|
1383
|
+
],
|
|
1384
|
+
routingProvider: null
|
|
1385
|
+
};
|
|
1386
|
+
}
|
|
1387
|
+
try {
|
|
1388
|
+
const installedModels = await this.client.listModels();
|
|
1389
|
+
const routerModel = getQwenRouterModel(installedModels, this.config.routerModel);
|
|
1390
|
+
if (!routerModel) {
|
|
1391
|
+
return {
|
|
1392
|
+
selectedTarget: null,
|
|
1393
|
+
rankedTargets: [],
|
|
1394
|
+
routingReason: null,
|
|
1395
|
+
routingWarnings: [
|
|
1396
|
+
"Downstream target routing could not run because no suitable local Qwen router model is installed."
|
|
1397
|
+
],
|
|
1398
|
+
routingProvider: null
|
|
1399
|
+
};
|
|
1400
|
+
}
|
|
1401
|
+
const response = await this.client.generateJson({
|
|
1402
|
+
model: routerModel,
|
|
1403
|
+
timeoutMs: options.input.timeoutMs ?? this.config.timeoutMs,
|
|
1404
|
+
temperature: 0,
|
|
1405
|
+
format: "json",
|
|
1406
|
+
systemPrompt: buildDownstreamRoutingSystemPrompt(options.routingPriority, options.workloadBias),
|
|
1407
|
+
prompt: JSON.stringify(
|
|
1408
|
+
{
|
|
1409
|
+
objective: "Rank the caller-supplied downstream targets for this prompt and choose the best top target.",
|
|
1410
|
+
prompt: options.input.prompt,
|
|
1411
|
+
task: options.input.task ?? null,
|
|
1412
|
+
mode: options.input.mode,
|
|
1413
|
+
preset: options.input.preset,
|
|
1414
|
+
tone: options.input.tone ?? null,
|
|
1415
|
+
targetHints: options.input.targetHints ?? [],
|
|
1416
|
+
workloadBias: options.workloadBias,
|
|
1417
|
+
routingPriority: options.routingPriority,
|
|
1418
|
+
candidateTargets: availableTargets.map((target) => ({
|
|
1419
|
+
id: target.id,
|
|
1420
|
+
provider: target.provider,
|
|
1421
|
+
model: target.model,
|
|
1422
|
+
label: target.label ?? null,
|
|
1423
|
+
costRank: target.costRank,
|
|
1424
|
+
latencyRank: target.latencyRank,
|
|
1425
|
+
capabilities: target.capabilities,
|
|
1426
|
+
profile: describeDownstreamTarget(target)
|
|
1427
|
+
}))
|
|
1428
|
+
},
|
|
1429
|
+
null,
|
|
1430
|
+
2
|
|
1431
|
+
)
|
|
1432
|
+
});
|
|
1433
|
+
const rankedTargetIds = Array.from(
|
|
1434
|
+
new Set((response.rankedTargetIds ?? []).map((value) => value.trim()).filter(Boolean))
|
|
1435
|
+
).slice(0, Math.max(1, options.routingTopK));
|
|
1436
|
+
const rankedTargets = rankedTargetIds.map((id, index) => {
|
|
1437
|
+
const target = availableTargets.find((candidate) => candidate.id === id);
|
|
1438
|
+
if (!target) {
|
|
1439
|
+
return null;
|
|
1440
|
+
}
|
|
1441
|
+
return {
|
|
1442
|
+
...stripInternalTargetFields(target),
|
|
1443
|
+
rank: index + 1,
|
|
1444
|
+
reason: index === 0 ? response.reason?.trim() || "Selected by the local Qwen downstream router." : `Ranked #${index + 1} by the local Qwen downstream router.`
|
|
1445
|
+
};
|
|
1446
|
+
}).filter((value) => value !== null);
|
|
1447
|
+
const selectedTargetId = response.selectedTargetId?.trim();
|
|
1448
|
+
const selectedTargetCandidate = (selectedTargetId && availableTargets.find((candidate) => candidate.id === selectedTargetId)) ?? (rankedTargets[0] ? availableTargets.find(
|
|
1449
|
+
(candidate) => candidate.provider === rankedTargets[0].provider && candidate.model === rankedTargets[0].model && candidate.label === rankedTargets[0].label
|
|
1450
|
+
) ?? null : null);
|
|
1451
|
+
if (!selectedTargetCandidate || rankedTargets.length === 0) {
|
|
1452
|
+
return {
|
|
1453
|
+
selectedTarget: null,
|
|
1454
|
+
rankedTargets: [],
|
|
1455
|
+
routingReason: null,
|
|
1456
|
+
routingWarnings: [
|
|
1457
|
+
"Downstream target routing returned an invalid selection, so no downstream target was chosen."
|
|
1458
|
+
],
|
|
1459
|
+
routingProvider: routerModel
|
|
1460
|
+
};
|
|
1461
|
+
}
|
|
1462
|
+
return {
|
|
1463
|
+
selectedTarget: stripInternalTargetFields(selectedTargetCandidate),
|
|
1464
|
+
rankedTargets,
|
|
1465
|
+
routingReason: response.reason?.trim() || "Selected by the local Qwen downstream router.",
|
|
1466
|
+
routingWarnings: [],
|
|
1467
|
+
routingProvider: routerModel
|
|
1468
|
+
};
|
|
1469
|
+
} catch {
|
|
1470
|
+
return {
|
|
1471
|
+
selectedTarget: null,
|
|
1472
|
+
rankedTargets: [],
|
|
1473
|
+
routingReason: null,
|
|
1474
|
+
routingWarnings: [
|
|
1475
|
+
"Downstream target routing could not complete, so no downstream target was selected."
|
|
1476
|
+
],
|
|
1477
|
+
routingProvider: null
|
|
1478
|
+
};
|
|
1479
|
+
}
|
|
1480
|
+
}
|
|
1246
1481
|
heuristicOptimize(options) {
|
|
1247
|
-
const
|
|
1248
|
-
|
|
1249
|
-
options.input.task ? `Task type: ${options.input.task}` : "",
|
|
1250
|
-
options.input.tone ? `Tone: ${options.input.tone}` : "",
|
|
1251
|
-
options.input.outputFormat ? `Output format: ${options.input.outputFormat}` : "",
|
|
1252
|
-
options.input.maxLength ? `Maximum length: ${options.input.maxLength}` : "",
|
|
1253
|
-
options.constraints.length ? `Critical constraints: ${options.constraints.join("; ")}` : ""
|
|
1254
|
-
].filter(Boolean);
|
|
1482
|
+
const isCodeRequest = isCodeFirstRequest(options.input);
|
|
1483
|
+
const lines = isCodeRequest ? buildCodeFirstHeuristicPrompt(options.input, options.constraints) : buildGeneralHeuristicPrompt(options.input, options.constraints);
|
|
1255
1484
|
const optimizedPrompt = lines.join("\n");
|
|
1256
|
-
const changes =
|
|
1485
|
+
const changes = isCodeRequest ? [
|
|
1486
|
+
"Compressed the prompt into a code-agent brief.",
|
|
1487
|
+
"Removed redundant narrative phrasing.",
|
|
1488
|
+
"Applied a Karpathy-style inspect-plan-act-test-reflect loop."
|
|
1489
|
+
] : ["Normalized prompt structure for downstream model consumption."];
|
|
1257
1490
|
if (options.input.mode === "compress" || options.input.mode === "concise") {
|
|
1258
1491
|
changes.push("Applied concise formatting to reduce token usage.");
|
|
1259
1492
|
}
|
|
@@ -1274,7 +1507,8 @@ var PromptOptimizer = class {
|
|
|
1274
1507
|
const finalPrompt = composeFinalPrompt({
|
|
1275
1508
|
optimizedPrompt: this.estimator.truncateToBudget(options.optimizedPrompt, Math.floor(options.maxTotalTokens * 0.5)),
|
|
1276
1509
|
input: options.input,
|
|
1277
|
-
context: compactContext
|
|
1510
|
+
context: compactContext,
|
|
1511
|
+
routingDecision: options.routingDecision
|
|
1278
1512
|
});
|
|
1279
1513
|
return {
|
|
1280
1514
|
finalPrompt,
|
|
@@ -1333,9 +1567,17 @@ ${contextBlock}`);
|
|
|
1333
1567
|
if (constraints.length > 0) {
|
|
1334
1568
|
sections.push(`Constraints:
|
|
1335
1569
|
- ${constraints.join("\n- ")}`);
|
|
1570
|
+
}
|
|
1571
|
+
if (isCodeFirstRequest(input.input)) {
|
|
1572
|
+
sections.push(`Execution loop:
|
|
1573
|
+
- Inspect the relevant files and current behavior.
|
|
1574
|
+
- Plan the smallest safe next step.
|
|
1575
|
+
- Act with minimal, reversible changes.
|
|
1576
|
+
- Test or validate the result.
|
|
1577
|
+
- Reflect on gaps or risks, then repeat.`);
|
|
1336
1578
|
}
|
|
1337
1579
|
const desiredOutput = [
|
|
1338
|
-
input.input.targetModel ? `Target model: ${input.input.targetModel}` : "Target model: claude",
|
|
1580
|
+
input.routingDecision.selectedTarget ? `Selected target: ${formatTargetLabel(input.routingDecision.selectedTarget)}` : input.input.targetModel ? `Target model: ${input.input.targetModel}` : "Target model: claude",
|
|
1339
1581
|
`Mode: ${input.input.mode}`,
|
|
1340
1582
|
`Preset: ${input.input.preset}`
|
|
1341
1583
|
];
|
|
@@ -1361,16 +1603,337 @@ function emptyRelevantContext() {
|
|
|
1361
1603
|
debugInfo: {}
|
|
1362
1604
|
};
|
|
1363
1605
|
}
|
|
1606
|
+
function normalizeAvailableTargets(targets) {
|
|
1607
|
+
return targets.map((target, index) => ({
|
|
1608
|
+
...target,
|
|
1609
|
+
id: `${target.provider}:${target.model}:${index}`,
|
|
1610
|
+
label: target.label ?? `${target.provider}:${target.model}`,
|
|
1611
|
+
capabilities: target.capabilities ?? inferCapabilities(target),
|
|
1612
|
+
costRank: target.costRank ?? index + 1,
|
|
1613
|
+
latencyRank: target.latencyRank ?? index + 1
|
|
1614
|
+
}));
|
|
1615
|
+
}
|
|
1616
|
+
function stripInternalTargetFields(target) {
|
|
1617
|
+
return {
|
|
1618
|
+
provider: target.provider,
|
|
1619
|
+
model: target.model,
|
|
1620
|
+
label: target.label,
|
|
1621
|
+
capabilities: target.capabilities,
|
|
1622
|
+
costRank: target.costRank,
|
|
1623
|
+
latencyRank: target.latencyRank
|
|
1624
|
+
};
|
|
1625
|
+
}
|
|
1626
|
+
function buildDownstreamRoutingSystemPrompt(priority, workloadBias) {
|
|
1627
|
+
return [
|
|
1628
|
+
"You are a downstream model router for PromptPilot.",
|
|
1629
|
+
"Return strict JSON only with this shape:",
|
|
1630
|
+
'{"selectedTargetId":"string","rankedTargetIds":["string"],"reason":"string"}',
|
|
1631
|
+
"Choose only from the supplied candidate target IDs.",
|
|
1632
|
+
"Rank up to the requested top targets in best-first order.",
|
|
1633
|
+
`Routing priority: ${priority}.`,
|
|
1634
|
+
`Workload bias: ${workloadBias}.`,
|
|
1635
|
+
"Code-first means ambiguous prompts should default toward coding-capable or agentic-capable targets.",
|
|
1636
|
+
"Explicit email, support, chat, and lightweight writing prompts may prefer cheaper lighter targets.",
|
|
1637
|
+
"Do not invent targets. Do not output prose outside JSON."
|
|
1638
|
+
].join("\n");
|
|
1639
|
+
}
|
|
1640
|
+
function inferCapabilities(target) {
|
|
1641
|
+
const lower = `${target.provider} ${target.model} ${target.label ?? ""}`.toLowerCase();
|
|
1642
|
+
const capabilities = /* @__PURE__ */ new Set();
|
|
1643
|
+
if (/code|codex|coder|agent|tool/.test(lower)) {
|
|
1644
|
+
capabilities.add("coding");
|
|
1645
|
+
}
|
|
1646
|
+
if (/agent|tool/.test(lower)) {
|
|
1647
|
+
capabilities.add("agentic");
|
|
1648
|
+
capabilities.add("tool_use");
|
|
1649
|
+
}
|
|
1650
|
+
if (/refactor|coder|codex/.test(lower)) {
|
|
1651
|
+
capabilities.add("refactor");
|
|
1652
|
+
}
|
|
1653
|
+
if (/debug|fix|ci/.test(lower)) {
|
|
1654
|
+
capabilities.add("debugging");
|
|
1655
|
+
}
|
|
1656
|
+
if (/write|email|chat|sonnet|mini/.test(lower)) {
|
|
1657
|
+
capabilities.add("writing");
|
|
1658
|
+
}
|
|
1659
|
+
if (/email/.test(lower)) {
|
|
1660
|
+
capabilities.add("email");
|
|
1661
|
+
}
|
|
1662
|
+
return Array.from(capabilities);
|
|
1663
|
+
}
|
|
1664
|
+
function describeDownstreamTarget(target) {
|
|
1665
|
+
return [
|
|
1666
|
+
`provider=${target.provider}`,
|
|
1667
|
+
`model=${target.model}`,
|
|
1668
|
+
`label=${target.label}`,
|
|
1669
|
+
`costRank=${target.costRank}`,
|
|
1670
|
+
`latencyRank=${target.latencyRank}`,
|
|
1671
|
+
`capabilities=${target.capabilities?.join(",") || "none"}`
|
|
1672
|
+
].join("; ");
|
|
1673
|
+
}
|
|
1674
|
+
function formatTargetLabel(target) {
|
|
1675
|
+
return target.label ?? `${target.provider}:${target.model}`;
|
|
1676
|
+
}
|
|
1677
|
+
function isCompressionSensitiveMode(mode) {
|
|
1678
|
+
return mode === "compress" || mode === "concise" || mode === "claude_cli";
|
|
1679
|
+
}
|
|
1680
|
+
function cheapCompress(text) {
|
|
1681
|
+
return normalizeWhitespace(text).replace(/\b(?:please|kindly|just)\b/gi, "").replace(/\bI\s+(?:want|need|would\s+like\s+to)\b/gi, "").replace(/\s+([,.;:!?])/g, "$1").replace(/\s{2,}/g, " ").trim();
|
|
1682
|
+
}
|
|
1683
|
+
function sanitizeTextOptimizationOutput(raw) {
|
|
1684
|
+
const normalized = normalizeWhitespace(raw);
|
|
1685
|
+
if (!normalized) {
|
|
1686
|
+
return "";
|
|
1687
|
+
}
|
|
1688
|
+
if (!containsReasoningLeak(normalized)) {
|
|
1689
|
+
return stripWrappingQuotes(normalized);
|
|
1690
|
+
}
|
|
1691
|
+
const candidates = raw.split(/\n{2,}/).map((chunk) => stripWrappingQuotes(normalizeWhitespace(chunk))).filter(Boolean).filter((chunk) => !containsReasoningLeak(chunk)).filter((chunk) => !/^(role|task|guidelines|thinking|thinking process|attempt|critique|final decision|analysis)\b/i.test(chunk)).filter((chunk) => !/^[-*]\s/.test(chunk)).filter((chunk) => !/^\d+\.\s/.test(chunk));
|
|
1692
|
+
return candidates.at(-1) ?? stripWrappingQuotes(normalized);
|
|
1693
|
+
}
|
|
1694
|
+
function containsReasoningLeak(text) {
|
|
1695
|
+
return /(thinking process|analyze the request|drafting the optimized prompt|critique \d|attempt \d|final decision)/i.test(text);
|
|
1696
|
+
}
|
|
1697
|
+
function stripWrappingQuotes(text) {
|
|
1698
|
+
return text.replace(/^["'`]+|["'`]+$/g, "").trim();
|
|
1699
|
+
}
|
|
1700
|
+
function isCodeFirstRequest(input) {
|
|
1701
|
+
if (input.task === "code" || input.preset === "code") {
|
|
1702
|
+
return true;
|
|
1703
|
+
}
|
|
1704
|
+
if ((input.targetHints ?? []).some((hint) => ["coding", "agentic", "refactor", "debugging", "tool_use", "architecture"].includes(hint))) {
|
|
1705
|
+
return true;
|
|
1706
|
+
}
|
|
1707
|
+
return /\b(code|coding|repo|repository|refactor|patch|debug|bug|ci|test|typescript|javascript|agent|tool)\b/i.test(
|
|
1708
|
+
input.prompt
|
|
1709
|
+
);
|
|
1710
|
+
}
|
|
1711
|
+
function buildGeneralHeuristicPrompt(input, constraints) {
|
|
1712
|
+
return [
|
|
1713
|
+
`Request: ${summarizePrompt(input.prompt, 320)}`,
|
|
1714
|
+
input.task ? `Task type: ${input.task}` : "",
|
|
1715
|
+
input.tone ? `Tone: ${input.tone}` : "",
|
|
1716
|
+
input.outputFormat ? `Output format: ${input.outputFormat}` : "",
|
|
1717
|
+
input.maxLength ? `Maximum length: ${input.maxLength}` : "",
|
|
1718
|
+
constraints.length ? `Critical constraints: ${constraints.join("; ")}` : ""
|
|
1719
|
+
].filter(Boolean);
|
|
1720
|
+
}
|
|
1721
|
+
function buildCodeFirstHeuristicPrompt(input, constraints) {
|
|
1722
|
+
const deliverables = inferCodeDeliverables(input.prompt);
|
|
1723
|
+
return [
|
|
1724
|
+
`Goal: ${summarizeCodeGoal(input.prompt)}`,
|
|
1725
|
+
input.tone ? `Tone: ${input.tone}` : "",
|
|
1726
|
+
deliverables.length ? `Deliverables:
|
|
1727
|
+
- ${deliverables.join("\n- ")}` : "",
|
|
1728
|
+
constraints.length ? `Constraints: ${constraints.join("; ")}` : "",
|
|
1729
|
+
"Use a Karpathy loop: inspect, plan, act, test, reflect, repeat."
|
|
1730
|
+
].filter(Boolean);
|
|
1731
|
+
}
|
|
1732
|
+
function summarizePrompt(prompt, maxLength) {
|
|
1733
|
+
const normalized = normalizeWhitespace(prompt);
|
|
1734
|
+
if (normalized.length <= maxLength) {
|
|
1735
|
+
return normalized;
|
|
1736
|
+
}
|
|
1737
|
+
return `${normalized.slice(0, maxLength - 1).trim()}\u2026`;
|
|
1738
|
+
}
|
|
1739
|
+
function summarizeCodeGoal(prompt) {
|
|
1740
|
+
const normalized = summarizePrompt(prompt, 220);
|
|
1741
|
+
const lowered = prompt.toLowerCase();
|
|
1742
|
+
if (/auth|authentication|login|token/.test(lowered)) {
|
|
1743
|
+
return "Inspect the codebase, understand the authentication flow, and produce a safe incremental refactor plan.";
|
|
1744
|
+
}
|
|
1745
|
+
if (/ci|debug|failing|failure|test/.test(lowered)) {
|
|
1746
|
+
return "Inspect the codebase and failing signals, identify root causes, and produce a practical debugging plan.";
|
|
1747
|
+
}
|
|
1748
|
+
if (/refactor/.test(lowered)) {
|
|
1749
|
+
return "Inspect the codebase and produce a phased refactor plan with minimal-risk execution steps.";
|
|
1750
|
+
}
|
|
1751
|
+
return normalized;
|
|
1752
|
+
}
|
|
1753
|
+
function inferCodeDeliverables(prompt) {
|
|
1754
|
+
const lowered = prompt.toLowerCase();
|
|
1755
|
+
const deliverables = [];
|
|
1756
|
+
if (/inspect|codebase|repo|repository/.test(lowered)) {
|
|
1757
|
+
deliverables.push("Summarize the relevant modules, ownership boundaries, and current behavior.");
|
|
1758
|
+
}
|
|
1759
|
+
if (/shared abstraction|shared abstractions|duplicate|duplicated/.test(lowered)) {
|
|
1760
|
+
deliverables.push("Identify duplicated logic and the best shared abstractions to extract.");
|
|
1761
|
+
}
|
|
1762
|
+
if (/incremental|phase|phased|rollout|step/.test(lowered)) {
|
|
1763
|
+
deliverables.push("Propose an incremental plan with small, reversible steps.");
|
|
1764
|
+
}
|
|
1765
|
+
if (/risk|migration|compatibility|backward/.test(lowered)) {
|
|
1766
|
+
deliverables.push("Call out migration risks, compatibility concerns, and rollback points.");
|
|
1767
|
+
}
|
|
1768
|
+
if (/test|tests/.test(lowered)) {
|
|
1769
|
+
deliverables.push("List the tests or validation needed before and after each phase.");
|
|
1770
|
+
}
|
|
1771
|
+
if (/avoid hand-wavy|practical|concrete/.test(lowered)) {
|
|
1772
|
+
deliverables.push("Keep the recommendations concrete, implementation-oriented, and free of vague architecture advice.");
|
|
1773
|
+
}
|
|
1774
|
+
if (deliverables.length === 0) {
|
|
1775
|
+
deliverables.push("Produce a compact, execution-ready plan for the coding task.");
|
|
1776
|
+
}
|
|
1777
|
+
return deliverables.slice(0, 6);
|
|
1778
|
+
}
|
|
1364
1779
|
|
|
1365
1780
|
// src/index.ts
|
|
1366
1781
|
function createOptimizer(config = {}) {
|
|
1367
1782
|
return new PromptOptimizer(config);
|
|
1368
1783
|
}
|
|
1369
1784
|
|
|
1785
|
+
// src/cliWelcome.ts
|
|
1786
|
+
import { basename } from "path";
|
|
1787
|
+
var MIN_WIDE_COLUMNS = 84;
|
|
1788
|
+
function renderWelcomeScreen(options) {
|
|
1789
|
+
const columns = Math.max(60, options.columns ?? 100);
|
|
1790
|
+
const color = options.color ?? false;
|
|
1791
|
+
const user = options.user?.trim() || "pilot";
|
|
1792
|
+
return columns >= MIN_WIDE_COLUMNS ? renderWideWelcome({ ...options, columns, color, user }) : renderCompactWelcome({ ...options, columns, color, user });
|
|
1793
|
+
}
|
|
1794
|
+
function renderWideWelcome(options) {
|
|
1795
|
+
const width = clamp(options.columns - 5, 82, 109);
|
|
1796
|
+
const innerWidth = width - 2;
|
|
1797
|
+
const leftWidth = 28;
|
|
1798
|
+
const rightWidth = innerWidth - leftWidth - 5;
|
|
1799
|
+
const leftLines = [
|
|
1800
|
+
style(`Welcome back, ${options.user}`, "bold", options.color),
|
|
1801
|
+
"",
|
|
1802
|
+
...paintSprite(options.color),
|
|
1803
|
+
"",
|
|
1804
|
+
style(`${options.user} \u2022 ${basename(options.cwd)}`, "dim", options.color),
|
|
1805
|
+
style(options.cwd, "dim", options.color)
|
|
1806
|
+
];
|
|
1807
|
+
const rightLines = [
|
|
1808
|
+
style("Launchpad", "accent", options.color),
|
|
1809
|
+
"Run " + style('promptpilot optimize "fix this CI failure" --task code --plain', "bold", options.color),
|
|
1810
|
+
"Pipe directly into Claude with " + style("| claude", "bold", options.color),
|
|
1811
|
+
"",
|
|
1812
|
+
style("Custom local model", "accent", options.color),
|
|
1813
|
+
"Use " + style("--model promptpilot-compressor", "bold", options.color) + " for text-only local compression",
|
|
1814
|
+
"",
|
|
1815
|
+
style("Commands", "accent", options.color),
|
|
1816
|
+
"optimize optimize, compress, and route prompts",
|
|
1817
|
+
"--help show the full CLI reference"
|
|
1818
|
+
];
|
|
1819
|
+
const rowCount = Math.max(leftLines.length, rightLines.length);
|
|
1820
|
+
const header = `${style(" PromptPilot ", "accent", options.color)} ${style(`v${options.version}`, "dim", options.color)}`;
|
|
1821
|
+
const topRule = `${style("\u250C", "accent", options.color)}${style("\u2500".repeat(innerWidth), "accent", options.color)}${style("\u2510", "accent", options.color)}`;
|
|
1822
|
+
const bottomRule = `${style("\u2514", "accent", options.color)}${style("\u2500".repeat(innerWidth), "accent", options.color)}${style("\u2518", "accent", options.color)}`;
|
|
1823
|
+
const body = new Array(rowCount).fill(null).map((_, index) => {
|
|
1824
|
+
const left = padVisible(leftLines[index] ?? "", leftWidth);
|
|
1825
|
+
const right = padVisible(rightLines[index] ?? "", rightWidth);
|
|
1826
|
+
return `${style("\u2502", "accent", options.color)} ${left} ${style("\u2502", "accent", options.color)} ${right} ${style("\u2502", "accent", options.color)}`;
|
|
1827
|
+
});
|
|
1828
|
+
const footer = [
|
|
1829
|
+
"",
|
|
1830
|
+
style("Ready when you are.", "dim", options.color),
|
|
1831
|
+
`Run ${style("promptpilot --help", "bold", options.color)} for the full option list.`
|
|
1832
|
+
];
|
|
1833
|
+
return [header, topRule, ...body, bottomRule, ...footer].join("\n");
|
|
1834
|
+
}
|
|
1835
|
+
function renderCompactWelcome(options) {
|
|
1836
|
+
const width = clamp(options.columns - 2, 58, 78);
|
|
1837
|
+
const innerWidth = width - 2;
|
|
1838
|
+
const lines = [
|
|
1839
|
+
`${style("PromptPilot", "accent", options.color)} ${style(`v${options.version}`, "dim", options.color)}`,
|
|
1840
|
+
style(`Welcome back, ${options.user}.`, "bold", options.color),
|
|
1841
|
+
...paintSprite(options.color),
|
|
1842
|
+
style(options.cwd, "dim", options.color),
|
|
1843
|
+
"",
|
|
1844
|
+
style("Quick start", "accent", options.color),
|
|
1845
|
+
'promptpilot optimize "fix this CI failure" --task code --plain',
|
|
1846
|
+
'promptpilot optimize "..." --model promptpilot-compressor',
|
|
1847
|
+
"",
|
|
1848
|
+
style("Help", "accent", options.color),
|
|
1849
|
+
"promptpilot --help"
|
|
1850
|
+
];
|
|
1851
|
+
return [
|
|
1852
|
+
`${style("\u250C", "accent", options.color)}${style("\u2500".repeat(innerWidth), "accent", options.color)}${style("\u2510", "accent", options.color)}`,
|
|
1853
|
+
...lines.map((line) => `${style("\u2502", "accent", options.color)} ${padVisible(line, innerWidth - 1)}${style("\u2502", "accent", options.color)}`),
|
|
1854
|
+
`${style("\u2514", "accent", options.color)}${style("\u2500".repeat(innerWidth), "accent", options.color)}${style("\u2518", "accent", options.color)}`
|
|
1855
|
+
].join("\n");
|
|
1856
|
+
}
|
|
1857
|
+
function paintSprite(color) {
|
|
1858
|
+
const ink = color ? "\x1B[38;5;215m" : "";
|
|
1859
|
+
const reset = color ? "\x1B[0m" : "";
|
|
1860
|
+
return [
|
|
1861
|
+
`${ink} .-''''-.${reset}`,
|
|
1862
|
+
`${ink} .' .--. '.${reset}`,
|
|
1863
|
+
`${ink} / / oo \\ \\${reset}`,
|
|
1864
|
+
`${ink} | \\_==_/ |${reset}`,
|
|
1865
|
+
`${ink} | .-.__.-. |${reset}`,
|
|
1866
|
+
`${ink} \\ \\_/ \\_/ /${reset}`,
|
|
1867
|
+
`${ink} '._/|__|\\_.'${reset}`,
|
|
1868
|
+
`${ink} /_/ \\_\\${reset}`
|
|
1869
|
+
];
|
|
1870
|
+
}
|
|
1871
|
+
function style(text, tone, color) {
|
|
1872
|
+
if (!color) {
|
|
1873
|
+
return text;
|
|
1874
|
+
}
|
|
1875
|
+
switch (tone) {
|
|
1876
|
+
case "accent":
|
|
1877
|
+
return `\x1B[38;5;215m${text}\x1B[0m`;
|
|
1878
|
+
case "bold":
|
|
1879
|
+
return `\x1B[1m${text}\x1B[0m`;
|
|
1880
|
+
case "dim":
|
|
1881
|
+
return `\x1B[38;5;245m${text}\x1B[0m`;
|
|
1882
|
+
}
|
|
1883
|
+
}
|
|
1884
|
+
function padVisible(text, targetWidth) {
|
|
1885
|
+
const truncated = truncateVisible(text, targetWidth);
|
|
1886
|
+
const padding = Math.max(0, targetWidth - visibleWidth(truncated));
|
|
1887
|
+
return `${truncated}${" ".repeat(padding)}`;
|
|
1888
|
+
}
|
|
1889
|
+
function truncateVisible(text, targetWidth) {
|
|
1890
|
+
if (visibleWidth(text) <= targetWidth) {
|
|
1891
|
+
return text;
|
|
1892
|
+
}
|
|
1893
|
+
let visible = 0;
|
|
1894
|
+
let result = "";
|
|
1895
|
+
let inEscape = false;
|
|
1896
|
+
for (const char of text) {
|
|
1897
|
+
result += char;
|
|
1898
|
+
if (char === "\x1B") {
|
|
1899
|
+
inEscape = true;
|
|
1900
|
+
continue;
|
|
1901
|
+
}
|
|
1902
|
+
if (inEscape) {
|
|
1903
|
+
if (char === "m") {
|
|
1904
|
+
inEscape = false;
|
|
1905
|
+
}
|
|
1906
|
+
continue;
|
|
1907
|
+
}
|
|
1908
|
+
visible += 1;
|
|
1909
|
+
if (visible >= Math.max(0, targetWidth - 1)) {
|
|
1910
|
+
break;
|
|
1911
|
+
}
|
|
1912
|
+
}
|
|
1913
|
+
return `${result}\u2026`;
|
|
1914
|
+
}
|
|
1915
|
+
function visibleWidth(text) {
|
|
1916
|
+
return text.replace(/\u001b\[[0-9;]*m/g, "").length;
|
|
1917
|
+
}
|
|
1918
|
+
function clamp(value, min, max) {
|
|
1919
|
+
return Math.max(min, Math.min(max, value));
|
|
1920
|
+
}
|
|
1921
|
+
|
|
1370
1922
|
// src/cli.ts
|
|
1371
|
-
async function runCli(argv, io = { stdout: process.stdout, stderr: process.stderr, stdin: process.stdin }, dependencies = { createOptimizer, readStdin }) {
|
|
1923
|
+
async function runCli(argv, io = { stdout: process.stdout, stderr: process.stderr, stdin: process.stdin }, dependencies = { createOptimizer, readStdin, getCliInfo }) {
|
|
1372
1924
|
const [command, ...rest] = argv;
|
|
1373
|
-
if (!command
|
|
1925
|
+
if (!command) {
|
|
1926
|
+
const info = (dependencies.getCliInfo ?? getCliInfo)(io.stdout);
|
|
1927
|
+
if (io.stdout.isTTY) {
|
|
1928
|
+
io.stdout.write(`${renderWelcomeScreen(info)}
|
|
1929
|
+
`);
|
|
1930
|
+
return 0;
|
|
1931
|
+
}
|
|
1932
|
+
io.stdout.write(`${getHelpText()}
|
|
1933
|
+
`);
|
|
1934
|
+
return 0;
|
|
1935
|
+
}
|
|
1936
|
+
if (command === "--help" || command === "-h" || command === "help") {
|
|
1374
1937
|
io.stdout.write(`${getHelpText()}
|
|
1375
1938
|
`);
|
|
1376
1939
|
return 0;
|
|
@@ -1433,6 +1996,12 @@ async function runCli(argv, io = { stdout: process.stdout, stderr: process.stder
|
|
|
1433
1996
|
maxLength: parsed.maxLength,
|
|
1434
1997
|
tags: parsed.tags,
|
|
1435
1998
|
pinnedConstraints: parsed.pinnedConstraints,
|
|
1999
|
+
availableTargets: parsed.targets,
|
|
2000
|
+
routingEnabled: parsed.routingEnabled,
|
|
2001
|
+
routingPriority: parsed.routingPriority,
|
|
2002
|
+
routingTopK: parsed.routingTopK,
|
|
2003
|
+
targetHints: parsed.targetHints,
|
|
2004
|
+
workloadBias: parsed.workloadBias,
|
|
1436
2005
|
debug: parsed.debug,
|
|
1437
2006
|
plainOutput: parsed.plain,
|
|
1438
2007
|
maxTotalTokens: parsed.maxTotalTokens,
|
|
@@ -1456,6 +2025,10 @@ async function runCli(argv, io = { stdout: process.stdout, stderr: process.stder
|
|
|
1456
2025
|
`);
|
|
1457
2026
|
io.stdout.write(`provider=${result.provider} model=${result.model} tokens=${result.estimatedTokensAfter.total} savings=${result.tokenSavings}
|
|
1458
2027
|
`);
|
|
2028
|
+
if (result.selectedTarget) {
|
|
2029
|
+
io.stdout.write(`selected_target=${formatTarget(result.selectedTarget)}
|
|
2030
|
+
`);
|
|
2031
|
+
}
|
|
1459
2032
|
if (result.warnings.length > 0) {
|
|
1460
2033
|
io.stdout.write(`warnings=${result.warnings.join(" | ")}
|
|
1461
2034
|
`);
|
|
@@ -1478,7 +2051,10 @@ function parseOptimizeArgs(args) {
|
|
|
1478
2051
|
bypassOptimization: false,
|
|
1479
2052
|
help: false,
|
|
1480
2053
|
tags: [],
|
|
1481
|
-
pinnedConstraints: []
|
|
2054
|
+
pinnedConstraints: [],
|
|
2055
|
+
targets: [],
|
|
2056
|
+
targetHints: [],
|
|
2057
|
+
routingEnabled: true
|
|
1482
2058
|
};
|
|
1483
2059
|
const positionals = [];
|
|
1484
2060
|
for (let index = 0; index < args.length; index += 1) {
|
|
@@ -1517,6 +2093,24 @@ function parseOptimizeArgs(args) {
|
|
|
1517
2093
|
case "--pin-constraint":
|
|
1518
2094
|
parsed.pinnedConstraints.push(args[++index]);
|
|
1519
2095
|
break;
|
|
2096
|
+
case "--target":
|
|
2097
|
+
parsed.targets.push(parseTargetCandidate(args[++index], parsed.targets.length));
|
|
2098
|
+
break;
|
|
2099
|
+
case "--target-hint":
|
|
2100
|
+
parsed.targetHints.push(args[++index]);
|
|
2101
|
+
break;
|
|
2102
|
+
case "--routing-priority":
|
|
2103
|
+
parsed.routingPriority = args[++index];
|
|
2104
|
+
break;
|
|
2105
|
+
case "--routing-top-k":
|
|
2106
|
+
parsed.routingTopK = Number(args[++index]);
|
|
2107
|
+
break;
|
|
2108
|
+
case "--workload-bias":
|
|
2109
|
+
parsed.workloadBias = args[++index];
|
|
2110
|
+
break;
|
|
2111
|
+
case "--no-routing":
|
|
2112
|
+
parsed.routingEnabled = false;
|
|
2113
|
+
break;
|
|
1520
2114
|
case "--host":
|
|
1521
2115
|
parsed.host = args[++index];
|
|
1522
2116
|
break;
|
|
@@ -1589,6 +2183,12 @@ function getHelpText() {
|
|
|
1589
2183
|
" --max-length <n>",
|
|
1590
2184
|
" --tag <value> Repeatable",
|
|
1591
2185
|
" --pin-constraint <text> Repeatable",
|
|
2186
|
+
" --target <provider:model> Repeatable",
|
|
2187
|
+
" --target-hint <value> Repeatable",
|
|
2188
|
+
" --routing-priority <value>",
|
|
2189
|
+
" --routing-top-k <n>",
|
|
2190
|
+
" --workload-bias <code_first>",
|
|
2191
|
+
" --no-routing",
|
|
1592
2192
|
" --host <url>",
|
|
1593
2193
|
" --store <local|sqlite>",
|
|
1594
2194
|
" --storage-dir <path>",
|
|
@@ -1606,6 +2206,20 @@ function getHelpText() {
|
|
|
1606
2206
|
" --bypass-optimization"
|
|
1607
2207
|
].join("\n");
|
|
1608
2208
|
}
|
|
2209
|
+
function parseTargetCandidate(raw, index) {
|
|
2210
|
+
const [provider, ...modelParts] = raw.split(":");
|
|
2211
|
+
const model = modelParts.join(":").trim();
|
|
2212
|
+
return {
|
|
2213
|
+
provider: provider.trim(),
|
|
2214
|
+
model,
|
|
2215
|
+
label: raw,
|
|
2216
|
+
costRank: index + 1,
|
|
2217
|
+
latencyRank: index + 1
|
|
2218
|
+
};
|
|
2219
|
+
}
|
|
2220
|
+
function formatTarget(target) {
|
|
2221
|
+
return target.label ?? `${target.provider}:${target.model}`;
|
|
2222
|
+
}
|
|
1609
2223
|
async function readStdin(stdin = process.stdin) {
|
|
1610
2224
|
if (!stdin || stdin.isTTY) {
|
|
1611
2225
|
return "";
|
|
@@ -1620,6 +2234,36 @@ async function readStdin(stdin = process.stdin) {
|
|
|
1620
2234
|
stdin.on("error", reject);
|
|
1621
2235
|
});
|
|
1622
2236
|
}
|
|
2237
|
+
function getCliInfo(stdout) {
|
|
2238
|
+
return {
|
|
2239
|
+
cwd: process.cwd(),
|
|
2240
|
+
version: readPackageVersion(),
|
|
2241
|
+
color: shouldUseColor(stdout),
|
|
2242
|
+
columns: stdout.columns,
|
|
2243
|
+
user: process.env.USER ?? process.env.USERNAME
|
|
2244
|
+
};
|
|
2245
|
+
}
|
|
2246
|
+
function shouldUseColor(stdout) {
|
|
2247
|
+
if (!stdout.isTTY) {
|
|
2248
|
+
return false;
|
|
2249
|
+
}
|
|
2250
|
+
if (process.env.NO_COLOR) {
|
|
2251
|
+
return false;
|
|
2252
|
+
}
|
|
2253
|
+
if (process.env.TERM === "dumb") {
|
|
2254
|
+
return false;
|
|
2255
|
+
}
|
|
2256
|
+
return true;
|
|
2257
|
+
}
|
|
2258
|
+
function readPackageVersion() {
|
|
2259
|
+
try {
|
|
2260
|
+
const packageJson = readFileSync(new URL("../package.json", import.meta.url), "utf8");
|
|
2261
|
+
const parsed = JSON.parse(packageJson);
|
|
2262
|
+
return parsed.version ?? "dev";
|
|
2263
|
+
} catch {
|
|
2264
|
+
return "dev";
|
|
2265
|
+
}
|
|
2266
|
+
}
|
|
1623
2267
|
if (isMainModule()) {
|
|
1624
2268
|
runCli(process.argv.slice(2)).then(
|
|
1625
2269
|
(code) => {
|