promptpilot 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +227 -105
- package/dist/cli.d.ts +9 -0
- package/dist/cli.js +679 -35
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +35 -1
- package/dist/index.js +446 -31
- package/dist/index.js.map +1 -1
- package/package.json +4 -2
package/dist/index.d.ts
CHANGED
|
@@ -2,6 +2,28 @@ type ProviderType = "ollama" | "heuristic";
|
|
|
2
2
|
type OptimizationMode = "clarity" | "concise" | "detailed" | "structured" | "persuasive" | "compress" | "claude_cli";
|
|
3
3
|
type PromptPreset = "code" | "email" | "essay" | "support" | "summarization" | "chat";
|
|
4
4
|
type ModelRoutingStrategy = "qwen";
|
|
5
|
+
type RoutingPriority = "cheapest_adequate" | "best_quality" | "fastest_adequate";
|
|
6
|
+
type WorkloadBias = "code_first";
|
|
7
|
+
type TargetCapability = "coding" | "agentic" | "tool_use" | "refactor" | "debugging" | "architecture" | "writing" | "email" | "support" | "chat" | "summarization";
|
|
8
|
+
interface TargetModelCandidate {
|
|
9
|
+
provider: string;
|
|
10
|
+
model: string;
|
|
11
|
+
label?: string;
|
|
12
|
+
capabilities?: readonly TargetCapability[];
|
|
13
|
+
costRank?: number;
|
|
14
|
+
latencyRank?: number;
|
|
15
|
+
}
|
|
16
|
+
interface RankedTargetCandidate extends TargetModelCandidate {
|
|
17
|
+
rank: number;
|
|
18
|
+
reason: string;
|
|
19
|
+
}
|
|
20
|
+
interface RoutingDecision {
|
|
21
|
+
selectedTarget: TargetModelCandidate | null;
|
|
22
|
+
rankedTargets: RankedTargetCandidate[];
|
|
23
|
+
routingReason: string | null;
|
|
24
|
+
routingWarnings: string[];
|
|
25
|
+
routingProvider: string | null;
|
|
26
|
+
}
|
|
5
27
|
interface ContextEntry {
|
|
6
28
|
id: string;
|
|
7
29
|
sessionId: string;
|
|
@@ -60,6 +82,12 @@ interface OptimizePromptInput {
|
|
|
60
82
|
timeoutMs?: number;
|
|
61
83
|
tags?: string[];
|
|
62
84
|
pinnedConstraints?: string[];
|
|
85
|
+
availableTargets?: TargetModelCandidate[];
|
|
86
|
+
routingEnabled?: boolean;
|
|
87
|
+
routingPriority?: RoutingPriority;
|
|
88
|
+
routingTopK?: number;
|
|
89
|
+
targetHints?: TargetCapability[];
|
|
90
|
+
workloadBias?: WorkloadBias;
|
|
63
91
|
}
|
|
64
92
|
interface OptimizePromptResult {
|
|
65
93
|
originalPrompt: string;
|
|
@@ -73,6 +101,11 @@ interface OptimizePromptResult {
|
|
|
73
101
|
mode: OptimizationMode;
|
|
74
102
|
provider: ProviderType;
|
|
75
103
|
model: string;
|
|
104
|
+
selectedTarget: TargetModelCandidate | null;
|
|
105
|
+
rankedTargets: RankedTargetCandidate[];
|
|
106
|
+
routingReason: string | null;
|
|
107
|
+
routingWarnings: string[];
|
|
108
|
+
routingProvider: string | null;
|
|
76
109
|
warnings: string[];
|
|
77
110
|
changes: string[];
|
|
78
111
|
debugInfo?: Record<string, unknown>;
|
|
@@ -158,6 +191,7 @@ declare class PromptOptimizer {
|
|
|
158
191
|
private tryOllamaOptimization;
|
|
159
192
|
private resolveOllamaModel;
|
|
160
193
|
private tryQwenModelRouting;
|
|
194
|
+
private routeDownstreamTargets;
|
|
161
195
|
private heuristicOptimize;
|
|
162
196
|
private reduceToBudget;
|
|
163
197
|
}
|
|
@@ -289,4 +323,4 @@ declare class SQLiteSessionStore implements SessionStore {
|
|
|
289
323
|
declare function createOptimizer(config?: OptimizerConfig): PromptOptimizer;
|
|
290
324
|
declare function optimizePrompt(input: OptimizePromptInput, config?: OptimizerConfig): Promise<OptimizePromptResult>;
|
|
291
325
|
|
|
292
|
-
export { ContextCompressor, type ContextEntry, ContextManager, ContextStoreError, type ContextSummary, FileSessionStore, InvalidPromptError, type Logger, type ModelRoutingStrategy, OllamaClient, type OllamaClientLike, type OllamaGenerateOptions, type OllamaModelInfo, OllamaUnavailableError, type OptimizationMode, type OptimizePromptInput, type OptimizePromptResult, type OptimizerConfig, PromptOptimizer, type PromptPreset, type ProviderType, type RelevantContextResult, SQLiteSessionStore, type SessionData, type SessionStore, TokenBudgetExceededError, TokenEstimator, type TokenUsageEstimate, createOptimizer, getDefaultPreferredModels, optimizePrompt, selectOllamaModel };
|
|
326
|
+
export { ContextCompressor, type ContextEntry, ContextManager, ContextStoreError, type ContextSummary, FileSessionStore, InvalidPromptError, type Logger, type ModelRoutingStrategy, OllamaClient, type OllamaClientLike, type OllamaGenerateOptions, type OllamaModelInfo, OllamaUnavailableError, type OptimizationMode, type OptimizePromptInput, type OptimizePromptResult, type OptimizerConfig, PromptOptimizer, type PromptPreset, type ProviderType, type RankedTargetCandidate, type RelevantContextResult, type RoutingDecision, type RoutingPriority, SQLiteSessionStore, type SessionData, type SessionStore, type TargetCapability, type TargetModelCandidate, TokenBudgetExceededError, TokenEstimator, type TokenUsageEstimate, type WorkloadBias, createOptimizer, getDefaultPreferredModels, optimizePrompt, selectOllamaModel };
|
package/dist/index.js
CHANGED
|
@@ -327,13 +327,13 @@ var modeGuidance = {
|
|
|
327
327
|
clarity: "Improve clarity, remove ambiguity, and keep the request easy for a downstream model to follow.",
|
|
328
328
|
concise: "Minimize token count while preserving user intent, constraints, and expected output.",
|
|
329
329
|
detailed: "Make the request explicit and complete, including structure and success criteria.",
|
|
330
|
-
structured: "Organize the request into
|
|
330
|
+
structured: "Organize the request into sections only when that improves clarity or token efficiency.",
|
|
331
331
|
persuasive: "Refine wording so the request is compelling and likely to elicit a thoughtful response.",
|
|
332
332
|
compress: "Aggressively compress redundant wording while preserving the meaning and critical constraints.",
|
|
333
333
|
claude_cli: "Optimize specifically for Claude CLI: compact sections, direct instructions, and minimal boilerplate."
|
|
334
334
|
};
|
|
335
335
|
var presetGuidance = {
|
|
336
|
-
code: "Favor precise technical requirements, edge cases,
|
|
336
|
+
code: "Favor precise technical requirements, edge cases, expected output format, and a compact inspect-plan-act-test-reflect loop for code tasks.",
|
|
337
337
|
email: "Preserve the sender's goal, tone, and audience; aim for a realistic and usable writing request.",
|
|
338
338
|
essay: "Preserve thesis, structure, and voice guidance while making the prompt clearer.",
|
|
339
339
|
support: "Favor concise issue context, user impact, and desired resolution details.",
|
|
@@ -351,6 +351,10 @@ function getOptimizationSystemPrompt(mode, preset) {
|
|
|
351
351
|
"- Preserve critical constraints and task goals.",
|
|
352
352
|
"- Improve clarity, structure, and downstream usefulness.",
|
|
353
353
|
"- Keep the result compact when the mode requests compression.",
|
|
354
|
+
"- Do not force sections when direct phrasing is shorter and equally clear.",
|
|
355
|
+
"- Remove redundancy aggressively when the source prompt repeats the same goal multiple ways.",
|
|
356
|
+
"- For code tasks, prefer a terse agent brief over narrative prose.",
|
|
357
|
+
"- For code tasks, structure the prompt around a Karpathy-style loop: inspect, plan, act, test, reflect, repeat.",
|
|
354
358
|
`Mode guidance: ${modeGuidance[mode]}`,
|
|
355
359
|
preset ? `Preset guidance: ${presetGuidance[preset]}` : "Preset guidance: none"
|
|
356
360
|
].join("\n");
|
|
@@ -679,7 +683,11 @@ function tokenize(value) {
|
|
|
679
683
|
);
|
|
680
684
|
}
|
|
681
685
|
function extractConstraints(value) {
|
|
682
|
-
return
|
|
686
|
+
return Array.from(
|
|
687
|
+
new Set(
|
|
688
|
+
value.split(/\n+/).flatMap((line) => line.split(/(?<=[.!?])\s+/)).map((line) => line.trim().replace(/^[-*]\s*/, "")).filter((line) => line.length > 0 && line.length <= 180).filter((line) => /(must|should|avoid|do not|don't|never|exactly|at most|under|limit|max|preserve|keep)/i.test(line))
|
|
689
|
+
)
|
|
690
|
+
).slice(0, 8);
|
|
683
691
|
}
|
|
684
692
|
function extractEntities(value) {
|
|
685
693
|
return Array.from(
|
|
@@ -801,6 +809,9 @@ var DEFAULT_PROVIDER = "ollama";
|
|
|
801
809
|
var DEFAULT_MAX_INPUT_TOKENS = 1200;
|
|
802
810
|
var DEFAULT_MAX_CONTEXT_TOKENS = 800;
|
|
803
811
|
var DEFAULT_MAX_TOTAL_TOKENS = 2200;
|
|
812
|
+
var DEFAULT_ROUTING_PRIORITY = "cheapest_adequate";
|
|
813
|
+
var DEFAULT_ROUTING_TOP_K = 3;
|
|
814
|
+
var DEFAULT_WORKLOAD_BIAS = "code_first";
|
|
804
815
|
var PromptOptimizer = class {
|
|
805
816
|
config;
|
|
806
817
|
logger;
|
|
@@ -838,6 +849,10 @@ var PromptOptimizer = class {
|
|
|
838
849
|
const maxInputTokens = input.maxInputTokens ?? this.config.maxInputTokens ?? DEFAULT_MAX_INPUT_TOKENS;
|
|
839
850
|
const maxContextTokens = input.maxContextTokens ?? this.config.maxContextTokens ?? DEFAULT_MAX_CONTEXT_TOKENS;
|
|
840
851
|
const maxTotalTokens = input.maxTotalTokens ?? this.config.maxTotalTokens ?? DEFAULT_MAX_TOTAL_TOKENS;
|
|
852
|
+
const routingEnabled = input.routingEnabled !== false;
|
|
853
|
+
const routingPriority = input.routingPriority ?? DEFAULT_ROUTING_PRIORITY;
|
|
854
|
+
const routingTopK = input.routingTopK ?? DEFAULT_ROUTING_TOP_K;
|
|
855
|
+
const workloadBias = input.workloadBias ?? DEFAULT_WORKLOAD_BIAS;
|
|
841
856
|
const warnings = [];
|
|
842
857
|
const changes = [];
|
|
843
858
|
const useContext = input.useContext !== false && Boolean(input.sessionId);
|
|
@@ -867,6 +882,7 @@ var PromptOptimizer = class {
|
|
|
867
882
|
);
|
|
868
883
|
let provider = input.bypassOptimization ? "heuristic" : this.config.provider ?? DEFAULT_PROVIDER;
|
|
869
884
|
let model = provider === "ollama" ? this.config.ollamaModel ?? "auto" : "heuristic";
|
|
885
|
+
let usedPreprocessedFallback = false;
|
|
870
886
|
let optimizedPrompt = originalPrompt;
|
|
871
887
|
let providerWarnings = [];
|
|
872
888
|
let providerChanges = [];
|
|
@@ -901,6 +917,11 @@ var PromptOptimizer = class {
|
|
|
901
917
|
optimizedPrompt = ollamaResult.optimizedPrompt;
|
|
902
918
|
providerWarnings = ollamaResult.warnings;
|
|
903
919
|
providerChanges = ollamaResult.changes;
|
|
920
|
+
if (ollamaResult.source === "preprocessed") {
|
|
921
|
+
provider = "heuristic";
|
|
922
|
+
model = "cheap-preprocess";
|
|
923
|
+
usedPreprocessedFallback = true;
|
|
924
|
+
}
|
|
904
925
|
} else if (provider === "ollama") {
|
|
905
926
|
provider = "heuristic";
|
|
906
927
|
model = "heuristic";
|
|
@@ -909,7 +930,7 @@ var PromptOptimizer = class {
|
|
|
909
930
|
];
|
|
910
931
|
}
|
|
911
932
|
}
|
|
912
|
-
if (provider === "heuristic") {
|
|
933
|
+
if (provider === "heuristic" && !usedPreprocessedFallback) {
|
|
913
934
|
const fallback = this.heuristicOptimize({
|
|
914
935
|
input: {
|
|
915
936
|
...input,
|
|
@@ -926,6 +947,22 @@ var PromptOptimizer = class {
|
|
|
926
947
|
}
|
|
927
948
|
warnings.push(...providerWarnings);
|
|
928
949
|
changes.push(...providerChanges);
|
|
950
|
+
const routingDecision = await this.routeDownstreamTargets({
|
|
951
|
+
input: {
|
|
952
|
+
...input,
|
|
953
|
+
prompt: originalPrompt,
|
|
954
|
+
mode,
|
|
955
|
+
preset,
|
|
956
|
+
routingPriority,
|
|
957
|
+
routingTopK,
|
|
958
|
+
workloadBias
|
|
959
|
+
},
|
|
960
|
+
routingEnabled,
|
|
961
|
+
routingPriority,
|
|
962
|
+
routingTopK,
|
|
963
|
+
workloadBias
|
|
964
|
+
});
|
|
965
|
+
warnings.push(...routingDecision.routingWarnings);
|
|
929
966
|
let finalPrompt = composeFinalPrompt({
|
|
930
967
|
optimizedPrompt,
|
|
931
968
|
input: {
|
|
@@ -934,7 +971,8 @@ var PromptOptimizer = class {
|
|
|
934
971
|
mode,
|
|
935
972
|
preset
|
|
936
973
|
},
|
|
937
|
-
context: relevantContext
|
|
974
|
+
context: relevantContext,
|
|
975
|
+
routingDecision
|
|
938
976
|
});
|
|
939
977
|
let estimatedTokensAfter = {
|
|
940
978
|
prompt: this.estimator.estimateText(optimizedPrompt),
|
|
@@ -951,6 +989,7 @@ var PromptOptimizer = class {
|
|
|
951
989
|
},
|
|
952
990
|
optimizedPrompt,
|
|
953
991
|
context: relevantContext,
|
|
992
|
+
routingDecision,
|
|
954
993
|
maxTotalTokens
|
|
955
994
|
});
|
|
956
995
|
finalPrompt = reduced.finalPrompt;
|
|
@@ -986,6 +1025,11 @@ var PromptOptimizer = class {
|
|
|
986
1025
|
mode,
|
|
987
1026
|
provider,
|
|
988
1027
|
model,
|
|
1028
|
+
selectedTarget: routingDecision.selectedTarget,
|
|
1029
|
+
rankedTargets: routingDecision.rankedTargets,
|
|
1030
|
+
routingReason: routingDecision.routingReason,
|
|
1031
|
+
routingWarnings: routingDecision.routingWarnings,
|
|
1032
|
+
routingProvider: routingDecision.routingProvider,
|
|
989
1033
|
warnings,
|
|
990
1034
|
changes,
|
|
991
1035
|
debugInfo: input.debug ? {
|
|
@@ -994,7 +1038,8 @@ var PromptOptimizer = class {
|
|
|
994
1038
|
estimatedTokensAfter,
|
|
995
1039
|
extractedConstraints,
|
|
996
1040
|
preset,
|
|
997
|
-
selectedModel: model
|
|
1041
|
+
selectedModel: model,
|
|
1042
|
+
routingDecision
|
|
998
1043
|
} : void 0
|
|
999
1044
|
};
|
|
1000
1045
|
}
|
|
@@ -1017,29 +1062,88 @@ var PromptOptimizer = class {
|
|
|
1017
1062
|
});
|
|
1018
1063
|
}
|
|
1019
1064
|
async tryOllamaOptimization(options) {
|
|
1065
|
+
const preprocessedPrompt = cheapCompress(options.input.prompt);
|
|
1066
|
+
const preprocessedTokenCount = this.estimator.estimateText(preprocessedPrompt);
|
|
1067
|
+
const ultraMode = preprocessedTokenCount > 500;
|
|
1020
1068
|
try {
|
|
1021
1069
|
if (!await this.client.isAvailable()) {
|
|
1022
|
-
return
|
|
1070
|
+
return {
|
|
1071
|
+
optimizedPrompt: preprocessedPrompt,
|
|
1072
|
+
changes: ["Applied cheap local preprocessing because Ollama was unavailable."],
|
|
1073
|
+
warnings: ["Ollama was unavailable, so PromptPilot kept the cheap preprocessed prompt."],
|
|
1074
|
+
source: "preprocessed"
|
|
1075
|
+
};
|
|
1076
|
+
}
|
|
1077
|
+
const systemPrompt = ultraMode ? `${getOptimizationSystemPrompt(options.input.mode, options.input.preset)}
|
|
1078
|
+
Mode: Ultra compression. Minimize tokens aggressively.` : getOptimizationSystemPrompt(options.input.mode, options.input.preset);
|
|
1079
|
+
const optimizationPrompt = buildOptimizationPrompt(
|
|
1080
|
+
{
|
|
1081
|
+
...options.input,
|
|
1082
|
+
prompt: preprocessedPrompt
|
|
1083
|
+
},
|
|
1084
|
+
options.relevantContext,
|
|
1085
|
+
options.extractedConstraints
|
|
1086
|
+
);
|
|
1087
|
+
const timeoutMs = options.input.timeoutMs ?? this.config.timeoutMs;
|
|
1088
|
+
let optimizedPrompt = "";
|
|
1089
|
+
let responseChanges = [];
|
|
1090
|
+
let responseWarnings = [];
|
|
1091
|
+
try {
|
|
1092
|
+
const response = await this.client.generateJson({
|
|
1093
|
+
systemPrompt,
|
|
1094
|
+
prompt: optimizationPrompt,
|
|
1095
|
+
timeoutMs,
|
|
1096
|
+
model: options.model,
|
|
1097
|
+
temperature: this.config.temperature,
|
|
1098
|
+
format: "json"
|
|
1099
|
+
});
|
|
1100
|
+
optimizedPrompt = normalizeWhitespace(response.optimizedPrompt ?? "");
|
|
1101
|
+
responseChanges = response.changes ?? [];
|
|
1102
|
+
responseWarnings = response.warnings ?? [];
|
|
1103
|
+
} catch {
|
|
1104
|
+
const raw = await this.client.generate({
|
|
1105
|
+
systemPrompt,
|
|
1106
|
+
prompt: optimizationPrompt,
|
|
1107
|
+
timeoutMs,
|
|
1108
|
+
model: options.model,
|
|
1109
|
+
temperature: this.config.temperature
|
|
1110
|
+
});
|
|
1111
|
+
optimizedPrompt = sanitizeTextOptimizationOutput(raw);
|
|
1112
|
+
responseChanges = [`Applied text-only Ollama optimization with ${options.model}.`];
|
|
1023
1113
|
}
|
|
1024
|
-
const response = await this.client.generateJson({
|
|
1025
|
-
systemPrompt: getOptimizationSystemPrompt(options.input.mode, options.input.preset),
|
|
1026
|
-
prompt: buildOptimizationPrompt(options.input, options.relevantContext, options.extractedConstraints),
|
|
1027
|
-
timeoutMs: options.input.timeoutMs ?? this.config.timeoutMs,
|
|
1028
|
-
model: options.model,
|
|
1029
|
-
temperature: this.config.temperature,
|
|
1030
|
-
format: "json"
|
|
1031
|
-
});
|
|
1032
|
-
const optimizedPrompt = normalizeWhitespace(response.optimizedPrompt ?? "");
|
|
1033
1114
|
if (!optimizedPrompt) {
|
|
1034
|
-
return
|
|
1115
|
+
return {
|
|
1116
|
+
optimizedPrompt: preprocessedPrompt,
|
|
1117
|
+
changes: ["Applied cheap local preprocessing because the model returned an empty optimization."],
|
|
1118
|
+
warnings: ["The local optimizer returned an empty result, so PromptPilot kept the preprocessed prompt."],
|
|
1119
|
+
source: "preprocessed"
|
|
1120
|
+
};
|
|
1121
|
+
}
|
|
1122
|
+
const optimizedTokenCount = this.estimator.estimateText(optimizedPrompt);
|
|
1123
|
+
if (isCompressionSensitiveMode(options.input.mode) && optimizedTokenCount >= preprocessedTokenCount) {
|
|
1124
|
+
return {
|
|
1125
|
+
optimizedPrompt: preprocessedPrompt,
|
|
1126
|
+
changes: [
|
|
1127
|
+
...responseChanges,
|
|
1128
|
+
"Kept the cheap preprocessed prompt because the model output was not smaller."
|
|
1129
|
+
],
|
|
1130
|
+
warnings: responseWarnings,
|
|
1131
|
+
source: "preprocessed"
|
|
1132
|
+
};
|
|
1035
1133
|
}
|
|
1036
1134
|
return {
|
|
1037
1135
|
optimizedPrompt,
|
|
1038
|
-
changes:
|
|
1039
|
-
warnings:
|
|
1136
|
+
changes: responseChanges.length > 0 ? responseChanges : [`Applied Ollama optimization with ${options.model}.`],
|
|
1137
|
+
warnings: responseWarnings,
|
|
1138
|
+
source: "ollama"
|
|
1040
1139
|
};
|
|
1041
1140
|
} catch {
|
|
1042
|
-
return
|
|
1141
|
+
return {
|
|
1142
|
+
optimizedPrompt: preprocessedPrompt,
|
|
1143
|
+
changes: ["Applied cheap local preprocessing because Ollama optimization failed."],
|
|
1144
|
+
warnings: ["Ollama optimization failed, so PromptPilot kept the preprocessed prompt."],
|
|
1145
|
+
source: "preprocessed"
|
|
1146
|
+
};
|
|
1043
1147
|
}
|
|
1044
1148
|
}
|
|
1045
1149
|
async resolveOllamaModel(options) {
|
|
@@ -1210,17 +1314,146 @@ var PromptOptimizer = class {
|
|
|
1210
1314
|
};
|
|
1211
1315
|
}
|
|
1212
1316
|
}
|
|
1317
|
+
async routeDownstreamTargets(options) {
|
|
1318
|
+
const availableTargets = normalizeAvailableTargets(options.input.availableTargets ?? []);
|
|
1319
|
+
if (!options.routingEnabled || availableTargets.length === 0) {
|
|
1320
|
+
return {
|
|
1321
|
+
selectedTarget: null,
|
|
1322
|
+
rankedTargets: [],
|
|
1323
|
+
routingReason: null,
|
|
1324
|
+
routingWarnings: [],
|
|
1325
|
+
routingProvider: null
|
|
1326
|
+
};
|
|
1327
|
+
}
|
|
1328
|
+
if (availableTargets.length === 1) {
|
|
1329
|
+
return {
|
|
1330
|
+
selectedTarget: stripInternalTargetFields(availableTargets[0]),
|
|
1331
|
+
rankedTargets: [
|
|
1332
|
+
{
|
|
1333
|
+
...stripInternalTargetFields(availableTargets[0]),
|
|
1334
|
+
rank: 1,
|
|
1335
|
+
reason: "Only one downstream target was supplied."
|
|
1336
|
+
}
|
|
1337
|
+
],
|
|
1338
|
+
routingReason: "Only one downstream target was supplied, so it was selected directly.",
|
|
1339
|
+
routingWarnings: [],
|
|
1340
|
+
routingProvider: "direct"
|
|
1341
|
+
};
|
|
1342
|
+
}
|
|
1343
|
+
if (!this.client.listModels) {
|
|
1344
|
+
return {
|
|
1345
|
+
selectedTarget: null,
|
|
1346
|
+
rankedTargets: [],
|
|
1347
|
+
routingReason: null,
|
|
1348
|
+
routingWarnings: [
|
|
1349
|
+
"Downstream target routing requires local Ollama model discovery so a Qwen router can run."
|
|
1350
|
+
],
|
|
1351
|
+
routingProvider: null
|
|
1352
|
+
};
|
|
1353
|
+
}
|
|
1354
|
+
try {
|
|
1355
|
+
const installedModels = await this.client.listModels();
|
|
1356
|
+
const routerModel = getQwenRouterModel(installedModels, this.config.routerModel);
|
|
1357
|
+
if (!routerModel) {
|
|
1358
|
+
return {
|
|
1359
|
+
selectedTarget: null,
|
|
1360
|
+
rankedTargets: [],
|
|
1361
|
+
routingReason: null,
|
|
1362
|
+
routingWarnings: [
|
|
1363
|
+
"Downstream target routing could not run because no suitable local Qwen router model is installed."
|
|
1364
|
+
],
|
|
1365
|
+
routingProvider: null
|
|
1366
|
+
};
|
|
1367
|
+
}
|
|
1368
|
+
const response = await this.client.generateJson({
|
|
1369
|
+
model: routerModel,
|
|
1370
|
+
timeoutMs: options.input.timeoutMs ?? this.config.timeoutMs,
|
|
1371
|
+
temperature: 0,
|
|
1372
|
+
format: "json",
|
|
1373
|
+
systemPrompt: buildDownstreamRoutingSystemPrompt(options.routingPriority, options.workloadBias),
|
|
1374
|
+
prompt: JSON.stringify(
|
|
1375
|
+
{
|
|
1376
|
+
objective: "Rank the caller-supplied downstream targets for this prompt and choose the best top target.",
|
|
1377
|
+
prompt: options.input.prompt,
|
|
1378
|
+
task: options.input.task ?? null,
|
|
1379
|
+
mode: options.input.mode,
|
|
1380
|
+
preset: options.input.preset,
|
|
1381
|
+
tone: options.input.tone ?? null,
|
|
1382
|
+
targetHints: options.input.targetHints ?? [],
|
|
1383
|
+
workloadBias: options.workloadBias,
|
|
1384
|
+
routingPriority: options.routingPriority,
|
|
1385
|
+
candidateTargets: availableTargets.map((target) => ({
|
|
1386
|
+
id: target.id,
|
|
1387
|
+
provider: target.provider,
|
|
1388
|
+
model: target.model,
|
|
1389
|
+
label: target.label ?? null,
|
|
1390
|
+
costRank: target.costRank,
|
|
1391
|
+
latencyRank: target.latencyRank,
|
|
1392
|
+
capabilities: target.capabilities,
|
|
1393
|
+
profile: describeDownstreamTarget(target)
|
|
1394
|
+
}))
|
|
1395
|
+
},
|
|
1396
|
+
null,
|
|
1397
|
+
2
|
|
1398
|
+
)
|
|
1399
|
+
});
|
|
1400
|
+
const rankedTargetIds = Array.from(
|
|
1401
|
+
new Set((response.rankedTargetIds ?? []).map((value) => value.trim()).filter(Boolean))
|
|
1402
|
+
).slice(0, Math.max(1, options.routingTopK));
|
|
1403
|
+
const rankedTargets = rankedTargetIds.map((id, index) => {
|
|
1404
|
+
const target = availableTargets.find((candidate) => candidate.id === id);
|
|
1405
|
+
if (!target) {
|
|
1406
|
+
return null;
|
|
1407
|
+
}
|
|
1408
|
+
return {
|
|
1409
|
+
...stripInternalTargetFields(target),
|
|
1410
|
+
rank: index + 1,
|
|
1411
|
+
reason: index === 0 ? response.reason?.trim() || "Selected by the local Qwen downstream router." : `Ranked #${index + 1} by the local Qwen downstream router.`
|
|
1412
|
+
};
|
|
1413
|
+
}).filter((value) => value !== null);
|
|
1414
|
+
const selectedTargetId = response.selectedTargetId?.trim();
|
|
1415
|
+
const selectedTargetCandidate = (selectedTargetId && availableTargets.find((candidate) => candidate.id === selectedTargetId)) ?? (rankedTargets[0] ? availableTargets.find(
|
|
1416
|
+
(candidate) => candidate.provider === rankedTargets[0].provider && candidate.model === rankedTargets[0].model && candidate.label === rankedTargets[0].label
|
|
1417
|
+
) ?? null : null);
|
|
1418
|
+
if (!selectedTargetCandidate || rankedTargets.length === 0) {
|
|
1419
|
+
return {
|
|
1420
|
+
selectedTarget: null,
|
|
1421
|
+
rankedTargets: [],
|
|
1422
|
+
routingReason: null,
|
|
1423
|
+
routingWarnings: [
|
|
1424
|
+
"Downstream target routing returned an invalid selection, so no downstream target was chosen."
|
|
1425
|
+
],
|
|
1426
|
+
routingProvider: routerModel
|
|
1427
|
+
};
|
|
1428
|
+
}
|
|
1429
|
+
return {
|
|
1430
|
+
selectedTarget: stripInternalTargetFields(selectedTargetCandidate),
|
|
1431
|
+
rankedTargets,
|
|
1432
|
+
routingReason: response.reason?.trim() || "Selected by the local Qwen downstream router.",
|
|
1433
|
+
routingWarnings: [],
|
|
1434
|
+
routingProvider: routerModel
|
|
1435
|
+
};
|
|
1436
|
+
} catch {
|
|
1437
|
+
return {
|
|
1438
|
+
selectedTarget: null,
|
|
1439
|
+
rankedTargets: [],
|
|
1440
|
+
routingReason: null,
|
|
1441
|
+
routingWarnings: [
|
|
1442
|
+
"Downstream target routing could not complete, so no downstream target was selected."
|
|
1443
|
+
],
|
|
1444
|
+
routingProvider: null
|
|
1445
|
+
};
|
|
1446
|
+
}
|
|
1447
|
+
}
|
|
1213
1448
|
heuristicOptimize(options) {
|
|
1214
|
-
const
|
|
1215
|
-
|
|
1216
|
-
options.input.task ? `Task type: ${options.input.task}` : "",
|
|
1217
|
-
options.input.tone ? `Tone: ${options.input.tone}` : "",
|
|
1218
|
-
options.input.outputFormat ? `Output format: ${options.input.outputFormat}` : "",
|
|
1219
|
-
options.input.maxLength ? `Maximum length: ${options.input.maxLength}` : "",
|
|
1220
|
-
options.constraints.length ? `Critical constraints: ${options.constraints.join("; ")}` : ""
|
|
1221
|
-
].filter(Boolean);
|
|
1449
|
+
const isCodeRequest = isCodeFirstRequest(options.input);
|
|
1450
|
+
const lines = isCodeRequest ? buildCodeFirstHeuristicPrompt(options.input, options.constraints) : buildGeneralHeuristicPrompt(options.input, options.constraints);
|
|
1222
1451
|
const optimizedPrompt = lines.join("\n");
|
|
1223
|
-
const changes =
|
|
1452
|
+
const changes = isCodeRequest ? [
|
|
1453
|
+
"Compressed the prompt into a code-agent brief.",
|
|
1454
|
+
"Removed redundant narrative phrasing.",
|
|
1455
|
+
"Applied a Karpathy-style inspect-plan-act-test-reflect loop."
|
|
1456
|
+
] : ["Normalized prompt structure for downstream model consumption."];
|
|
1224
1457
|
if (options.input.mode === "compress" || options.input.mode === "concise") {
|
|
1225
1458
|
changes.push("Applied concise formatting to reduce token usage.");
|
|
1226
1459
|
}
|
|
@@ -1241,7 +1474,8 @@ var PromptOptimizer = class {
|
|
|
1241
1474
|
const finalPrompt = composeFinalPrompt({
|
|
1242
1475
|
optimizedPrompt: this.estimator.truncateToBudget(options.optimizedPrompt, Math.floor(options.maxTotalTokens * 0.5)),
|
|
1243
1476
|
input: options.input,
|
|
1244
|
-
context: compactContext
|
|
1477
|
+
context: compactContext,
|
|
1478
|
+
routingDecision: options.routingDecision
|
|
1245
1479
|
});
|
|
1246
1480
|
return {
|
|
1247
1481
|
finalPrompt,
|
|
@@ -1300,9 +1534,17 @@ ${contextBlock}`);
|
|
|
1300
1534
|
if (constraints.length > 0) {
|
|
1301
1535
|
sections.push(`Constraints:
|
|
1302
1536
|
- ${constraints.join("\n- ")}`);
|
|
1537
|
+
}
|
|
1538
|
+
if (isCodeFirstRequest(input.input)) {
|
|
1539
|
+
sections.push(`Execution loop:
|
|
1540
|
+
- Inspect the relevant files and current behavior.
|
|
1541
|
+
- Plan the smallest safe next step.
|
|
1542
|
+
- Act with minimal, reversible changes.
|
|
1543
|
+
- Test or validate the result.
|
|
1544
|
+
- Reflect on gaps or risks, then repeat.`);
|
|
1303
1545
|
}
|
|
1304
1546
|
const desiredOutput = [
|
|
1305
|
-
input.input.targetModel ? `Target model: ${input.input.targetModel}` : "Target model: claude",
|
|
1547
|
+
input.routingDecision.selectedTarget ? `Selected target: ${formatTargetLabel(input.routingDecision.selectedTarget)}` : input.input.targetModel ? `Target model: ${input.input.targetModel}` : "Target model: claude",
|
|
1306
1548
|
`Mode: ${input.input.mode}`,
|
|
1307
1549
|
`Preset: ${input.input.preset}`
|
|
1308
1550
|
];
|
|
@@ -1328,6 +1570,179 @@ function emptyRelevantContext() {
|
|
|
1328
1570
|
debugInfo: {}
|
|
1329
1571
|
};
|
|
1330
1572
|
}
|
|
1573
|
+
function normalizeAvailableTargets(targets) {
|
|
1574
|
+
return targets.map((target, index) => ({
|
|
1575
|
+
...target,
|
|
1576
|
+
id: `${target.provider}:${target.model}:${index}`,
|
|
1577
|
+
label: target.label ?? `${target.provider}:${target.model}`,
|
|
1578
|
+
capabilities: target.capabilities ?? inferCapabilities(target),
|
|
1579
|
+
costRank: target.costRank ?? index + 1,
|
|
1580
|
+
latencyRank: target.latencyRank ?? index + 1
|
|
1581
|
+
}));
|
|
1582
|
+
}
|
|
1583
|
+
function stripInternalTargetFields(target) {
|
|
1584
|
+
return {
|
|
1585
|
+
provider: target.provider,
|
|
1586
|
+
model: target.model,
|
|
1587
|
+
label: target.label,
|
|
1588
|
+
capabilities: target.capabilities,
|
|
1589
|
+
costRank: target.costRank,
|
|
1590
|
+
latencyRank: target.latencyRank
|
|
1591
|
+
};
|
|
1592
|
+
}
|
|
1593
|
+
function buildDownstreamRoutingSystemPrompt(priority, workloadBias) {
|
|
1594
|
+
return [
|
|
1595
|
+
"You are a downstream model router for PromptPilot.",
|
|
1596
|
+
"Return strict JSON only with this shape:",
|
|
1597
|
+
'{"selectedTargetId":"string","rankedTargetIds":["string"],"reason":"string"}',
|
|
1598
|
+
"Choose only from the supplied candidate target IDs.",
|
|
1599
|
+
"Rank up to the requested top targets in best-first order.",
|
|
1600
|
+
`Routing priority: ${priority}.`,
|
|
1601
|
+
`Workload bias: ${workloadBias}.`,
|
|
1602
|
+
"Code-first means ambiguous prompts should default toward coding-capable or agentic-capable targets.",
|
|
1603
|
+
"Explicit email, support, chat, and lightweight writing prompts may prefer cheaper lighter targets.",
|
|
1604
|
+
"Do not invent targets. Do not output prose outside JSON."
|
|
1605
|
+
].join("\n");
|
|
1606
|
+
}
|
|
1607
|
+
function inferCapabilities(target) {
|
|
1608
|
+
const lower = `${target.provider} ${target.model} ${target.label ?? ""}`.toLowerCase();
|
|
1609
|
+
const capabilities = /* @__PURE__ */ new Set();
|
|
1610
|
+
if (/code|codex|coder|agent|tool/.test(lower)) {
|
|
1611
|
+
capabilities.add("coding");
|
|
1612
|
+
}
|
|
1613
|
+
if (/agent|tool/.test(lower)) {
|
|
1614
|
+
capabilities.add("agentic");
|
|
1615
|
+
capabilities.add("tool_use");
|
|
1616
|
+
}
|
|
1617
|
+
if (/refactor|coder|codex/.test(lower)) {
|
|
1618
|
+
capabilities.add("refactor");
|
|
1619
|
+
}
|
|
1620
|
+
if (/debug|fix|ci/.test(lower)) {
|
|
1621
|
+
capabilities.add("debugging");
|
|
1622
|
+
}
|
|
1623
|
+
if (/write|email|chat|sonnet|mini/.test(lower)) {
|
|
1624
|
+
capabilities.add("writing");
|
|
1625
|
+
}
|
|
1626
|
+
if (/email/.test(lower)) {
|
|
1627
|
+
capabilities.add("email");
|
|
1628
|
+
}
|
|
1629
|
+
return Array.from(capabilities);
|
|
1630
|
+
}
|
|
1631
|
+
function describeDownstreamTarget(target) {
|
|
1632
|
+
return [
|
|
1633
|
+
`provider=${target.provider}`,
|
|
1634
|
+
`model=${target.model}`,
|
|
1635
|
+
`label=${target.label}`,
|
|
1636
|
+
`costRank=${target.costRank}`,
|
|
1637
|
+
`latencyRank=${target.latencyRank}`,
|
|
1638
|
+
`capabilities=${target.capabilities?.join(",") || "none"}`
|
|
1639
|
+
].join("; ");
|
|
1640
|
+
}
|
|
1641
|
+
function formatTargetLabel(target) {
|
|
1642
|
+
return target.label ?? `${target.provider}:${target.model}`;
|
|
1643
|
+
}
|
|
1644
|
+
function isCompressionSensitiveMode(mode) {
|
|
1645
|
+
return mode === "compress" || mode === "concise" || mode === "claude_cli";
|
|
1646
|
+
}
|
|
1647
|
+
function cheapCompress(text) {
|
|
1648
|
+
return normalizeWhitespace(text).replace(/\b(?:please|kindly|just)\b/gi, "").replace(/\bI\s+(?:want|need|would\s+like\s+to)\b/gi, "").replace(/\s+([,.;:!?])/g, "$1").replace(/\s{2,}/g, " ").trim();
|
|
1649
|
+
}
|
|
1650
|
+
function sanitizeTextOptimizationOutput(raw) {
|
|
1651
|
+
const normalized = normalizeWhitespace(raw);
|
|
1652
|
+
if (!normalized) {
|
|
1653
|
+
return "";
|
|
1654
|
+
}
|
|
1655
|
+
if (!containsReasoningLeak(normalized)) {
|
|
1656
|
+
return stripWrappingQuotes(normalized);
|
|
1657
|
+
}
|
|
1658
|
+
const candidates = raw.split(/\n{2,}/).map((chunk) => stripWrappingQuotes(normalizeWhitespace(chunk))).filter(Boolean).filter((chunk) => !containsReasoningLeak(chunk)).filter((chunk) => !/^(role|task|guidelines|thinking|thinking process|attempt|critique|final decision|analysis)\b/i.test(chunk)).filter((chunk) => !/^[-*]\s/.test(chunk)).filter((chunk) => !/^\d+\.\s/.test(chunk));
|
|
1659
|
+
return candidates.at(-1) ?? stripWrappingQuotes(normalized);
|
|
1660
|
+
}
|
|
1661
|
+
function containsReasoningLeak(text) {
|
|
1662
|
+
return /(thinking process|analyze the request|drafting the optimized prompt|critique \d|attempt \d|final decision)/i.test(text);
|
|
1663
|
+
}
|
|
1664
|
+
function stripWrappingQuotes(text) {
|
|
1665
|
+
return text.replace(/^["'`]+|["'`]+$/g, "").trim();
|
|
1666
|
+
}
|
|
1667
|
+
function isCodeFirstRequest(input) {
|
|
1668
|
+
if (input.task === "code" || input.preset === "code") {
|
|
1669
|
+
return true;
|
|
1670
|
+
}
|
|
1671
|
+
if ((input.targetHints ?? []).some((hint) => ["coding", "agentic", "refactor", "debugging", "tool_use", "architecture"].includes(hint))) {
|
|
1672
|
+
return true;
|
|
1673
|
+
}
|
|
1674
|
+
return /\b(code|coding|repo|repository|refactor|patch|debug|bug|ci|test|typescript|javascript|agent|tool)\b/i.test(
|
|
1675
|
+
input.prompt
|
|
1676
|
+
);
|
|
1677
|
+
}
|
|
1678
|
+
function buildGeneralHeuristicPrompt(input, constraints) {
|
|
1679
|
+
return [
|
|
1680
|
+
`Request: ${summarizePrompt(input.prompt, 320)}`,
|
|
1681
|
+
input.task ? `Task type: ${input.task}` : "",
|
|
1682
|
+
input.tone ? `Tone: ${input.tone}` : "",
|
|
1683
|
+
input.outputFormat ? `Output format: ${input.outputFormat}` : "",
|
|
1684
|
+
input.maxLength ? `Maximum length: ${input.maxLength}` : "",
|
|
1685
|
+
constraints.length ? `Critical constraints: ${constraints.join("; ")}` : ""
|
|
1686
|
+
].filter(Boolean);
|
|
1687
|
+
}
|
|
1688
|
+
function buildCodeFirstHeuristicPrompt(input, constraints) {
|
|
1689
|
+
const deliverables = inferCodeDeliverables(input.prompt);
|
|
1690
|
+
return [
|
|
1691
|
+
`Goal: ${summarizeCodeGoal(input.prompt)}`,
|
|
1692
|
+
input.tone ? `Tone: ${input.tone}` : "",
|
|
1693
|
+
deliverables.length ? `Deliverables:
|
|
1694
|
+
- ${deliverables.join("\n- ")}` : "",
|
|
1695
|
+
constraints.length ? `Constraints: ${constraints.join("; ")}` : "",
|
|
1696
|
+
"Use a Karpathy loop: inspect, plan, act, test, reflect, repeat."
|
|
1697
|
+
].filter(Boolean);
|
|
1698
|
+
}
|
|
1699
|
+
function summarizePrompt(prompt, maxLength) {
|
|
1700
|
+
const normalized = normalizeWhitespace(prompt);
|
|
1701
|
+
if (normalized.length <= maxLength) {
|
|
1702
|
+
return normalized;
|
|
1703
|
+
}
|
|
1704
|
+
return `${normalized.slice(0, maxLength - 1).trim()}\u2026`;
|
|
1705
|
+
}
|
|
1706
|
+
function summarizeCodeGoal(prompt) {
|
|
1707
|
+
const normalized = summarizePrompt(prompt, 220);
|
|
1708
|
+
const lowered = prompt.toLowerCase();
|
|
1709
|
+
if (/auth|authentication|login|token/.test(lowered)) {
|
|
1710
|
+
return "Inspect the codebase, understand the authentication flow, and produce a safe incremental refactor plan.";
|
|
1711
|
+
}
|
|
1712
|
+
if (/ci|debug|failing|failure|test/.test(lowered)) {
|
|
1713
|
+
return "Inspect the codebase and failing signals, identify root causes, and produce a practical debugging plan.";
|
|
1714
|
+
}
|
|
1715
|
+
if (/refactor/.test(lowered)) {
|
|
1716
|
+
return "Inspect the codebase and produce a phased refactor plan with minimal-risk execution steps.";
|
|
1717
|
+
}
|
|
1718
|
+
return normalized;
|
|
1719
|
+
}
|
|
1720
|
+
function inferCodeDeliverables(prompt) {
|
|
1721
|
+
const lowered = prompt.toLowerCase();
|
|
1722
|
+
const deliverables = [];
|
|
1723
|
+
if (/inspect|codebase|repo|repository/.test(lowered)) {
|
|
1724
|
+
deliverables.push("Summarize the relevant modules, ownership boundaries, and current behavior.");
|
|
1725
|
+
}
|
|
1726
|
+
if (/shared abstraction|shared abstractions|duplicate|duplicated/.test(lowered)) {
|
|
1727
|
+
deliverables.push("Identify duplicated logic and the best shared abstractions to extract.");
|
|
1728
|
+
}
|
|
1729
|
+
if (/incremental|phase|phased|rollout|step/.test(lowered)) {
|
|
1730
|
+
deliverables.push("Propose an incremental plan with small, reversible steps.");
|
|
1731
|
+
}
|
|
1732
|
+
if (/risk|migration|compatibility|backward/.test(lowered)) {
|
|
1733
|
+
deliverables.push("Call out migration risks, compatibility concerns, and rollback points.");
|
|
1734
|
+
}
|
|
1735
|
+
if (/test|tests/.test(lowered)) {
|
|
1736
|
+
deliverables.push("List the tests or validation needed before and after each phase.");
|
|
1737
|
+
}
|
|
1738
|
+
if (/avoid hand-wavy|practical|concrete/.test(lowered)) {
|
|
1739
|
+
deliverables.push("Keep the recommendations concrete, implementation-oriented, and free of vague architecture advice.");
|
|
1740
|
+
}
|
|
1741
|
+
if (deliverables.length === 0) {
|
|
1742
|
+
deliverables.push("Produce a compact, execution-ready plan for the coding task.");
|
|
1743
|
+
}
|
|
1744
|
+
return deliverables.slice(0, 6);
|
|
1745
|
+
}
|
|
1331
1746
|
|
|
1332
1747
|
// src/index.ts
|
|
1333
1748
|
function createOptimizer(config = {}) {
|