promptpilot 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -3
- package/dist/cli.js +195 -108
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +6 -1
- package/dist/index.js +195 -108
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -561,6 +561,11 @@ var ContextManager = class {
|
|
|
561
561
|
}
|
|
562
562
|
session.updatedAt = timestamp;
|
|
563
563
|
await this.store.saveSession(session);
|
|
564
|
+
this.logger.debug("context saved", {
|
|
565
|
+
sessionId: options.sessionId,
|
|
566
|
+
entryCount: session.entries.length,
|
|
567
|
+
summaryCount: session.summaries.length
|
|
568
|
+
});
|
|
564
569
|
}
|
|
565
570
|
async summarizeContext(sessionId, prompt, task, budgetTokens, timeoutMs) {
|
|
566
571
|
const session = await this.store.loadSession(sessionId);
|
|
@@ -695,116 +700,47 @@ var DEFAULT_SMALL_MODEL_PREFERENCES = [
|
|
|
695
700
|
function getDefaultPreferredModels() {
|
|
696
701
|
return [...DEFAULT_SMALL_MODEL_PREFERENCES];
|
|
697
702
|
}
|
|
703
|
+
function getSuitableAutoModels(installedModels) {
|
|
704
|
+
return installedModels.filter((model) => isSuitableSmallModel(model));
|
|
705
|
+
}
|
|
706
|
+
function getQwenRouterModel(installedModels, explicitRouterModel) {
|
|
707
|
+
if (explicitRouterModel) {
|
|
708
|
+
const match = installedModels.find((model) => model.name === explicitRouterModel);
|
|
709
|
+
return match?.name ?? null;
|
|
710
|
+
}
|
|
711
|
+
const qwenRouters = getSuitableAutoModels(installedModels).filter((model) => /qwen/i.test(model.name)).sort((left, right) => scoreRouterModel(right) - scoreRouterModel(left));
|
|
712
|
+
return qwenRouters[0]?.name ?? null;
|
|
713
|
+
}
|
|
698
714
|
function selectOllamaModel(input) {
|
|
699
|
-
const
|
|
700
|
-
|
|
701
|
-
const preferredMatch = findPreferredMatch(smallCandidates, preferred);
|
|
702
|
-
if (preferredMatch) {
|
|
715
|
+
const smallCandidates = getSuitableAutoModels(input.installedModels);
|
|
716
|
+
if (smallCandidates.length === 1) {
|
|
703
717
|
return {
|
|
704
|
-
model:
|
|
705
|
-
reason: `Selected installed model "${
|
|
718
|
+
model: smallCandidates[0].name,
|
|
719
|
+
reason: `Selected installed model "${smallCandidates[0].name}" because it is the only suitable small local model available.`,
|
|
706
720
|
suitableForAutoUse: true
|
|
707
721
|
};
|
|
708
722
|
}
|
|
709
|
-
|
|
710
|
-
if (ranked[0]) {
|
|
723
|
+
if (smallCandidates.length > 1) {
|
|
711
724
|
return {
|
|
712
|
-
model:
|
|
713
|
-
reason: `
|
|
714
|
-
suitableForAutoUse:
|
|
725
|
+
model: "",
|
|
726
|
+
reason: `Multiple suitable small local models are available (${smallCandidates.map((model) => model.name).join(", ")}), so a Qwen router must choose between them.`,
|
|
727
|
+
suitableForAutoUse: false
|
|
715
728
|
};
|
|
716
729
|
}
|
|
717
|
-
const oversizedRanked = [...input.installedModels].filter((model) => isUsefulGenerationModel(model.name)).
|
|
730
|
+
const oversizedRanked = [...input.installedModels].filter((model) => isUsefulGenerationModel(model.name)).sort((left, right) => compareModelNames(left.name, right.name));
|
|
718
731
|
if (oversizedRanked[0]) {
|
|
719
732
|
return {
|
|
720
|
-
model: oversizedRanked[0].
|
|
721
|
-
reason: `Installed model "${oversizedRanked[0].
|
|
733
|
+
model: oversizedRanked[0].name,
|
|
734
|
+
reason: `Installed model "${oversizedRanked[0].name}" was detected, but it is larger than the preferred low-memory range for auto-use.`,
|
|
722
735
|
suitableForAutoUse: false
|
|
723
736
|
};
|
|
724
737
|
}
|
|
725
738
|
return {
|
|
726
|
-
model:
|
|
727
|
-
reason: "No
|
|
739
|
+
model: "",
|
|
740
|
+
reason: "No suitable local generation models were discovered for automatic routing.",
|
|
728
741
|
suitableForAutoUse: false
|
|
729
742
|
};
|
|
730
743
|
}
|
|
731
|
-
function buildPreferredOrder(input) {
|
|
732
|
-
const taskContext = `${input.task ?? ""} ${input.preset} ${input.mode}`.toLowerCase();
|
|
733
|
-
const configured = (input.preferredModels ?? []).map((model) => model.toLowerCase());
|
|
734
|
-
if (taskContext.includes("code")) {
|
|
735
|
-
return uniqueModels([
|
|
736
|
-
...configured,
|
|
737
|
-
"qwen2.5-coder:3b",
|
|
738
|
-
"qwen2.5:3b",
|
|
739
|
-
"phi3:mini",
|
|
740
|
-
"llama3.2:3b",
|
|
741
|
-
"qwen2.5:1.5b"
|
|
742
|
-
]);
|
|
743
|
-
}
|
|
744
|
-
if (taskContext.includes("compress") || taskContext.includes("summar")) {
|
|
745
|
-
return uniqueModels([
|
|
746
|
-
...configured,
|
|
747
|
-
"qwen2.5:3b",
|
|
748
|
-
"qwen2.5:1.5b",
|
|
749
|
-
"phi3:mini",
|
|
750
|
-
"llama3.2:3b"
|
|
751
|
-
]);
|
|
752
|
-
}
|
|
753
|
-
return uniqueModels([...configured, ...DEFAULT_SMALL_MODEL_PREFERENCES]);
|
|
754
|
-
}
|
|
755
|
-
function uniqueModels(models) {
|
|
756
|
-
return Array.from(new Set(models));
|
|
757
|
-
}
|
|
758
|
-
function findPreferredMatch(installedModels, preferred) {
|
|
759
|
-
const installedNames = installedModels.map((model) => model.name);
|
|
760
|
-
for (const preferredName of preferred) {
|
|
761
|
-
const direct = installedNames.find((name) => name.toLowerCase() === preferredName);
|
|
762
|
-
if (direct) {
|
|
763
|
-
return direct;
|
|
764
|
-
}
|
|
765
|
-
}
|
|
766
|
-
return null;
|
|
767
|
-
}
|
|
768
|
-
function scoreModel(modelName, preset, mode, task) {
|
|
769
|
-
const lower = modelName.toLowerCase();
|
|
770
|
-
let score = 0;
|
|
771
|
-
if (!isUsefulGenerationModel(lower)) {
|
|
772
|
-
return -100;
|
|
773
|
-
}
|
|
774
|
-
if (lower.includes("qwen2.5")) {
|
|
775
|
-
score += 4;
|
|
776
|
-
} else if (lower.includes("phi3")) {
|
|
777
|
-
score += 3.5;
|
|
778
|
-
} else if (lower.includes("llama3.2")) {
|
|
779
|
-
score += 3;
|
|
780
|
-
} else if (lower.includes("mistral")) {
|
|
781
|
-
score += 2;
|
|
782
|
-
}
|
|
783
|
-
const parameterSize = extractBillions(lower);
|
|
784
|
-
if (parameterSize !== null) {
|
|
785
|
-
if (parameterSize <= 4) {
|
|
786
|
-
score += 4;
|
|
787
|
-
} else if (parameterSize <= 8) {
|
|
788
|
-
score += 1;
|
|
789
|
-
} else {
|
|
790
|
-
score -= 4;
|
|
791
|
-
}
|
|
792
|
-
}
|
|
793
|
-
if (lower.includes("mini")) {
|
|
794
|
-
score += 2;
|
|
795
|
-
}
|
|
796
|
-
if (lower.includes("instruct") || lower.includes("chat")) {
|
|
797
|
-
score += 1;
|
|
798
|
-
}
|
|
799
|
-
const taskContext = `${task ?? ""} ${preset} ${mode}`.toLowerCase();
|
|
800
|
-
if (taskContext.includes("code") && lower.includes("coder")) {
|
|
801
|
-
score += 3;
|
|
802
|
-
}
|
|
803
|
-
if ((taskContext.includes("compress") || taskContext.includes("summar")) && lower.includes("qwen2.5")) {
|
|
804
|
-
score += 1;
|
|
805
|
-
}
|
|
806
|
-
return score;
|
|
807
|
-
}
|
|
808
744
|
function extractBillions(modelName) {
|
|
809
745
|
const match = modelName.match(/(\d+(?:\.\d+)?)b/);
|
|
810
746
|
if (!match) {
|
|
@@ -838,6 +774,25 @@ function isSuitableSmallModel(model) {
|
|
|
838
774
|
}
|
|
839
775
|
return /mini|1\.5b|2b|3b|4b/i.test(model.name);
|
|
840
776
|
}
|
|
777
|
+
function scoreRouterModel(model) {
|
|
778
|
+
const lower = model.name.toLowerCase();
|
|
779
|
+
let score = 0;
|
|
780
|
+
if (lower.includes("qwen2.5")) {
|
|
781
|
+
score += 3;
|
|
782
|
+
}
|
|
783
|
+
if (lower.includes("3b")) {
|
|
784
|
+
score += 2;
|
|
785
|
+
} else if (lower.includes("1.5b")) {
|
|
786
|
+
score += 1;
|
|
787
|
+
}
|
|
788
|
+
if (lower.includes("coder")) {
|
|
789
|
+
score -= 1;
|
|
790
|
+
}
|
|
791
|
+
return score;
|
|
792
|
+
}
|
|
793
|
+
function compareModelNames(left, right) {
|
|
794
|
+
return left.localeCompare(right);
|
|
795
|
+
}
|
|
841
796
|
|
|
842
797
|
// src/core/optimizer.ts
|
|
843
798
|
var DEFAULT_MODE = "claude_cli";
|
|
@@ -871,6 +826,7 @@ var PromptOptimizer = class {
|
|
|
871
826
|
host: config.host ?? "http://localhost:11434",
|
|
872
827
|
ollamaModel: config.ollamaModel,
|
|
873
828
|
preferredModels: config.preferredModels ?? getDefaultPreferredModels(),
|
|
829
|
+
modelRoutingStrategy: "qwen",
|
|
874
830
|
timeoutMs: config.timeoutMs ?? 3e4,
|
|
875
831
|
temperature: config.temperature ?? 0.1
|
|
876
832
|
};
|
|
@@ -916,6 +872,7 @@ var PromptOptimizer = class {
|
|
|
916
872
|
let providerChanges = [];
|
|
917
873
|
if (provider === "ollama") {
|
|
918
874
|
const modelSelection = await this.resolveOllamaModel({
|
|
875
|
+
prompt: originalPrompt,
|
|
919
876
|
mode,
|
|
920
877
|
preset,
|
|
921
878
|
task: input.task
|
|
@@ -1095,16 +1052,18 @@ var PromptOptimizer = class {
|
|
|
1095
1052
|
};
|
|
1096
1053
|
}
|
|
1097
1054
|
if (!this.client.listModels) {
|
|
1098
|
-
const fallback = this.config.preferredModels[0] ?? "qwen2.5:3b";
|
|
1099
1055
|
return {
|
|
1100
|
-
model:
|
|
1101
|
-
warnings: [
|
|
1102
|
-
|
|
1103
|
-
|
|
1056
|
+
model: "heuristic",
|
|
1057
|
+
warnings: [
|
|
1058
|
+
"Model auto-selection is unavailable in the current Ollama client, so prompt optimization is falling back to deterministic heuristic formatting."
|
|
1059
|
+
],
|
|
1060
|
+
reason: "Model discovery is unsupported, so Qwen-based model routing could not run.",
|
|
1061
|
+
forceHeuristic: true
|
|
1104
1062
|
};
|
|
1105
1063
|
}
|
|
1106
1064
|
try {
|
|
1107
1065
|
const installedModels = await this.client.listModels();
|
|
1066
|
+
const suitableModels = getSuitableAutoModels(installedModels);
|
|
1108
1067
|
const selection = selectOllamaModel({
|
|
1109
1068
|
installedModels,
|
|
1110
1069
|
mode: options.mode,
|
|
@@ -1112,7 +1071,7 @@ var PromptOptimizer = class {
|
|
|
1112
1071
|
task: options.task,
|
|
1113
1072
|
preferredModels: this.config.preferredModels
|
|
1114
1073
|
});
|
|
1115
|
-
if (
|
|
1074
|
+
if (suitableModels.length === 0) {
|
|
1116
1075
|
return {
|
|
1117
1076
|
model: selection.model,
|
|
1118
1077
|
warnings: [
|
|
@@ -1123,19 +1082,131 @@ var PromptOptimizer = class {
|
|
|
1123
1082
|
forceHeuristic: true
|
|
1124
1083
|
};
|
|
1125
1084
|
}
|
|
1085
|
+
if (suitableModels.length === 1) {
|
|
1086
|
+
return {
|
|
1087
|
+
model: selection.model,
|
|
1088
|
+
warnings: [],
|
|
1089
|
+
reason: selection.reason,
|
|
1090
|
+
forceHeuristic: false
|
|
1091
|
+
};
|
|
1092
|
+
}
|
|
1093
|
+
if (this.config.modelRoutingStrategy === "qwen") {
|
|
1094
|
+
const routed = await this.tryQwenModelRouting({
|
|
1095
|
+
prompt: options.prompt,
|
|
1096
|
+
task: options.task,
|
|
1097
|
+
mode: options.mode,
|
|
1098
|
+
preset: options.preset,
|
|
1099
|
+
installedModels,
|
|
1100
|
+
candidateModels: suitableModels.map((model) => model.name),
|
|
1101
|
+
fallbackModel: selection.model
|
|
1102
|
+
});
|
|
1103
|
+
return {
|
|
1104
|
+
model: routed.model,
|
|
1105
|
+
warnings: routed.warnings,
|
|
1106
|
+
reason: routed.reason,
|
|
1107
|
+
forceHeuristic: routed.model === "heuristic"
|
|
1108
|
+
};
|
|
1109
|
+
}
|
|
1126
1110
|
return {
|
|
1127
|
-
model:
|
|
1128
|
-
warnings:
|
|
1129
|
-
reason:
|
|
1130
|
-
forceHeuristic:
|
|
1111
|
+
model: "heuristic",
|
|
1112
|
+
warnings: ["Qwen model routing is required but was disabled, so prompt optimization is falling back to deterministic heuristic formatting."],
|
|
1113
|
+
reason: "Qwen model routing is required but was disabled.",
|
|
1114
|
+
forceHeuristic: true
|
|
1131
1115
|
};
|
|
1132
1116
|
} catch {
|
|
1133
|
-
const fallback = this.config.preferredModels[0] ?? "qwen2.5:3b";
|
|
1134
1117
|
return {
|
|
1135
|
-
model:
|
|
1136
|
-
warnings: [
|
|
1137
|
-
|
|
1138
|
-
|
|
1118
|
+
model: "heuristic",
|
|
1119
|
+
warnings: [
|
|
1120
|
+
"Failed to inspect local Ollama models, so prompt optimization is falling back to deterministic heuristic formatting."
|
|
1121
|
+
],
|
|
1122
|
+
reason: "Local Ollama model discovery failed, so Qwen-based model routing could not run.",
|
|
1123
|
+
forceHeuristic: true
|
|
1124
|
+
};
|
|
1125
|
+
}
|
|
1126
|
+
}
|
|
1127
|
+
async tryQwenModelRouting(options) {
|
|
1128
|
+
const routerModel = getQwenRouterModel(
|
|
1129
|
+
options.installedModels,
|
|
1130
|
+
this.config.routerModel
|
|
1131
|
+
);
|
|
1132
|
+
if (!routerModel) {
|
|
1133
|
+
return {
|
|
1134
|
+
model: "heuristic",
|
|
1135
|
+
warnings: [
|
|
1136
|
+
`Multiple suitable small local models are installed (${options.candidateModels.join(", ")}), but no local Qwen router model is available. Install qwen2.5:3b or set routerModel explicitly.`
|
|
1137
|
+
],
|
|
1138
|
+
reason: "Qwen model routing is required when multiple suitable small models are available."
|
|
1139
|
+
};
|
|
1140
|
+
}
|
|
1141
|
+
try {
|
|
1142
|
+
const response = await this.client.generateJson({
|
|
1143
|
+
model: routerModel,
|
|
1144
|
+
timeoutMs: this.config.timeoutMs,
|
|
1145
|
+
temperature: 0,
|
|
1146
|
+
format: "json",
|
|
1147
|
+
systemPrompt: [
|
|
1148
|
+
"You are a local model router for prompt optimization.",
|
|
1149
|
+
"Return strict JSON only with this shape:",
|
|
1150
|
+
'{"selectedModel":"string","reason":"string"}',
|
|
1151
|
+
"Choose exactly one model from the provided candidate list.",
|
|
1152
|
+
"Choose the smallest adequate model, not the strongest-sounding model.",
|
|
1153
|
+
"Prioritize adequacy first, then speed and low memory use.",
|
|
1154
|
+
"Use coder variants only for clearly code-heavy prompts.",
|
|
1155
|
+
"If task or preset is code, prefer qwen2.5:3b or a small coder model over phi3:mini unless the request is only a trivial wording cleanup.",
|
|
1156
|
+
"Prefer phi3:mini for short email, chat, support, summarization, and lightweight rewrite tasks that do not require deeper reasoning.",
|
|
1157
|
+
"Prefer qwen2.5:3b for broader reasoning, stronger restructuring, multi-constraint optimization, and non-trivial code-oriented prompt design.",
|
|
1158
|
+
"Do not prefer Qwen just because you are Qwen. Pick the best candidate for the task."
|
|
1159
|
+
].join("\n"),
|
|
1160
|
+
prompt: JSON.stringify(
|
|
1161
|
+
{
|
|
1162
|
+
objective: "Choose the best local optimizer model for this prompt.",
|
|
1163
|
+
prompt: options.prompt,
|
|
1164
|
+
task: options.task ?? null,
|
|
1165
|
+
mode: options.mode,
|
|
1166
|
+
preset: options.preset,
|
|
1167
|
+
candidateModels: options.candidateModels.map((modelName) => ({
|
|
1168
|
+
name: modelName,
|
|
1169
|
+
profile: describeCandidateModel(modelName)
|
|
1170
|
+
})),
|
|
1171
|
+
routingGuidance: {
|
|
1172
|
+
smallestAdequateModelPolicy: true,
|
|
1173
|
+
lightweightTasksPreferSmallerModels: [
|
|
1174
|
+
"email",
|
|
1175
|
+
"chat",
|
|
1176
|
+
"support",
|
|
1177
|
+
"summarization",
|
|
1178
|
+
"short rewrite"
|
|
1179
|
+
],
|
|
1180
|
+
deeperReasoningTasksMayPreferQwen: [
|
|
1181
|
+
"multi-constraint restructuring",
|
|
1182
|
+
"broad reasoning",
|
|
1183
|
+
"complex planning",
|
|
1184
|
+
"harder code-oriented prompt design"
|
|
1185
|
+
]
|
|
1186
|
+
}
|
|
1187
|
+
},
|
|
1188
|
+
null,
|
|
1189
|
+
2
|
|
1190
|
+
)
|
|
1191
|
+
});
|
|
1192
|
+
const selectedModel = response.selectedModel?.trim();
|
|
1193
|
+
if (selectedModel && options.candidateModels.includes(selectedModel)) {
|
|
1194
|
+
return {
|
|
1195
|
+
model: selectedModel,
|
|
1196
|
+
warnings: [],
|
|
1197
|
+
reason: response.reason?.trim() || `Qwen router selected "${selectedModel}" for this prompt.`
|
|
1198
|
+
};
|
|
1199
|
+
}
|
|
1200
|
+
return {
|
|
1201
|
+
model: "heuristic",
|
|
1202
|
+
warnings: ["Qwen router returned an invalid model choice, so prompt optimization is falling back to deterministic heuristic formatting."],
|
|
1203
|
+
reason: "Qwen router returned an invalid model selection."
|
|
1204
|
+
};
|
|
1205
|
+
} catch {
|
|
1206
|
+
return {
|
|
1207
|
+
model: "heuristic",
|
|
1208
|
+
warnings: ["Qwen router could not choose a model, so prompt optimization is falling back to deterministic heuristic formatting."],
|
|
1209
|
+
reason: "Qwen router failed to select a model."
|
|
1139
1210
|
};
|
|
1140
1211
|
}
|
|
1141
1212
|
}
|
|
@@ -1183,6 +1254,22 @@ var PromptOptimizer = class {
|
|
|
1183
1254
|
};
|
|
1184
1255
|
}
|
|
1185
1256
|
};
|
|
1257
|
+
function describeCandidateModel(modelName) {
|
|
1258
|
+
const lower = modelName.toLowerCase();
|
|
1259
|
+
if (lower.includes("phi3:mini")) {
|
|
1260
|
+
return "Very small and fast. Good for short rewrites, lightweight email/chat tasks, and simple prompt cleanup.";
|
|
1261
|
+
}
|
|
1262
|
+
if (lower.includes("qwen2.5:3b")) {
|
|
1263
|
+
return "Small general-purpose model with stronger reasoning and restructuring than ultra-light models. Better for broader or more complex prompt optimization.";
|
|
1264
|
+
}
|
|
1265
|
+
if (lower.includes("coder")) {
|
|
1266
|
+
return "Code-specialized model. Use only when the prompt is clearly code-heavy or refactor-oriented.";
|
|
1267
|
+
}
|
|
1268
|
+
if (lower.includes("llama3.2:3b")) {
|
|
1269
|
+
return "Small general chat/rewrite model. Reasonable middle option for general tasks.";
|
|
1270
|
+
}
|
|
1271
|
+
return "Local candidate model for prompt optimization.";
|
|
1272
|
+
}
|
|
1186
1273
|
function resolveSessionStore(config) {
|
|
1187
1274
|
if (typeof config.contextStore === "object" && config.contextStore !== null) {
|
|
1188
1275
|
return config.contextStore;
|