promptpilot 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
 
3
3
  // src/cli.ts
4
- import { realpathSync } from "fs";
4
+ import { readFileSync, realpathSync } from "fs";
5
5
  import { fileURLToPath } from "url";
6
6
 
7
7
  // src/errors.ts
@@ -360,13 +360,13 @@ var modeGuidance = {
360
360
  clarity: "Improve clarity, remove ambiguity, and keep the request easy for a downstream model to follow.",
361
361
  concise: "Minimize token count while preserving user intent, constraints, and expected output.",
362
362
  detailed: "Make the request explicit and complete, including structure and success criteria.",
363
- structured: "Organize the request into clean sections with compact headings and bullet points where helpful.",
363
+ structured: "Organize the request into sections only when that improves clarity or token efficiency.",
364
364
  persuasive: "Refine wording so the request is compelling and likely to elicit a thoughtful response.",
365
365
  compress: "Aggressively compress redundant wording while preserving the meaning and critical constraints.",
366
366
  claude_cli: "Optimize specifically for Claude CLI: compact sections, direct instructions, and minimal boilerplate."
367
367
  };
368
368
  var presetGuidance = {
369
- code: "Favor precise technical requirements, edge cases, and expected output format for code tasks.",
369
+ code: "Favor precise technical requirements, edge cases, expected output format, and a compact inspect-plan-act-test-reflect loop for code tasks.",
370
370
  email: "Preserve the sender's goal, tone, and audience; aim for a realistic and usable writing request.",
371
371
  essay: "Preserve thesis, structure, and voice guidance while making the prompt clearer.",
372
372
  support: "Favor concise issue context, user impact, and desired resolution details.",
@@ -384,6 +384,10 @@ function getOptimizationSystemPrompt(mode, preset) {
384
384
  "- Preserve critical constraints and task goals.",
385
385
  "- Improve clarity, structure, and downstream usefulness.",
386
386
  "- Keep the result compact when the mode requests compression.",
387
+ "- Do not force sections when direct phrasing is shorter and equally clear.",
388
+ "- Remove redundancy aggressively when the source prompt repeats the same goal multiple ways.",
389
+ "- For code tasks, prefer a terse agent brief over narrative prose.",
390
+ "- For code tasks, structure the prompt around a Karpathy-style loop: inspect, plan, act, test, reflect, repeat.",
387
391
  `Mode guidance: ${modeGuidance[mode]}`,
388
392
  preset ? `Preset guidance: ${presetGuidance[preset]}` : "Preset guidance: none"
389
393
  ].join("\n");
@@ -712,7 +716,11 @@ function tokenize(value) {
712
716
  );
713
717
  }
714
718
  function extractConstraints(value) {
715
- return value.split(/\n+/).map((line) => line.trim()).filter((line) => /(must|should|avoid|do not|don't|never|exactly|at most|under|limit|max)/i.test(line)).slice(0, 8);
719
+ return Array.from(
720
+ new Set(
721
+ value.split(/\n+/).flatMap((line) => line.split(/(?<=[.!?])\s+/)).map((line) => line.trim().replace(/^[-*]\s*/, "")).filter((line) => line.length > 0 && line.length <= 180).filter((line) => /(must|should|avoid|do not|don't|never|exactly|at most|under|limit|max|preserve|keep)/i.test(line))
722
+ )
723
+ ).slice(0, 8);
716
724
  }
717
725
  function extractEntities(value) {
718
726
  return Array.from(
@@ -834,6 +842,9 @@ var DEFAULT_PROVIDER = "ollama";
834
842
  var DEFAULT_MAX_INPUT_TOKENS = 1200;
835
843
  var DEFAULT_MAX_CONTEXT_TOKENS = 800;
836
844
  var DEFAULT_MAX_TOTAL_TOKENS = 2200;
845
+ var DEFAULT_ROUTING_PRIORITY = "cheapest_adequate";
846
+ var DEFAULT_ROUTING_TOP_K = 3;
847
+ var DEFAULT_WORKLOAD_BIAS = "code_first";
837
848
  var PromptOptimizer = class {
838
849
  config;
839
850
  logger;
@@ -871,6 +882,10 @@ var PromptOptimizer = class {
871
882
  const maxInputTokens = input.maxInputTokens ?? this.config.maxInputTokens ?? DEFAULT_MAX_INPUT_TOKENS;
872
883
  const maxContextTokens = input.maxContextTokens ?? this.config.maxContextTokens ?? DEFAULT_MAX_CONTEXT_TOKENS;
873
884
  const maxTotalTokens = input.maxTotalTokens ?? this.config.maxTotalTokens ?? DEFAULT_MAX_TOTAL_TOKENS;
885
+ const routingEnabled = input.routingEnabled !== false;
886
+ const routingPriority = input.routingPriority ?? DEFAULT_ROUTING_PRIORITY;
887
+ const routingTopK = input.routingTopK ?? DEFAULT_ROUTING_TOP_K;
888
+ const workloadBias = input.workloadBias ?? DEFAULT_WORKLOAD_BIAS;
874
889
  const warnings = [];
875
890
  const changes = [];
876
891
  const useContext = input.useContext !== false && Boolean(input.sessionId);
@@ -900,6 +915,7 @@ var PromptOptimizer = class {
900
915
  );
901
916
  let provider = input.bypassOptimization ? "heuristic" : this.config.provider ?? DEFAULT_PROVIDER;
902
917
  let model = provider === "ollama" ? this.config.ollamaModel ?? "auto" : "heuristic";
918
+ let usedPreprocessedFallback = false;
903
919
  let optimizedPrompt = originalPrompt;
904
920
  let providerWarnings = [];
905
921
  let providerChanges = [];
@@ -934,6 +950,11 @@ var PromptOptimizer = class {
934
950
  optimizedPrompt = ollamaResult.optimizedPrompt;
935
951
  providerWarnings = ollamaResult.warnings;
936
952
  providerChanges = ollamaResult.changes;
953
+ if (ollamaResult.source === "preprocessed") {
954
+ provider = "heuristic";
955
+ model = "cheap-preprocess";
956
+ usedPreprocessedFallback = true;
957
+ }
937
958
  } else if (provider === "ollama") {
938
959
  provider = "heuristic";
939
960
  model = "heuristic";
@@ -942,7 +963,7 @@ var PromptOptimizer = class {
942
963
  ];
943
964
  }
944
965
  }
945
- if (provider === "heuristic") {
966
+ if (provider === "heuristic" && !usedPreprocessedFallback) {
946
967
  const fallback = this.heuristicOptimize({
947
968
  input: {
948
969
  ...input,
@@ -959,6 +980,22 @@ var PromptOptimizer = class {
959
980
  }
960
981
  warnings.push(...providerWarnings);
961
982
  changes.push(...providerChanges);
983
+ const routingDecision = await this.routeDownstreamTargets({
984
+ input: {
985
+ ...input,
986
+ prompt: originalPrompt,
987
+ mode,
988
+ preset,
989
+ routingPriority,
990
+ routingTopK,
991
+ workloadBias
992
+ },
993
+ routingEnabled,
994
+ routingPriority,
995
+ routingTopK,
996
+ workloadBias
997
+ });
998
+ warnings.push(...routingDecision.routingWarnings);
962
999
  let finalPrompt = composeFinalPrompt({
963
1000
  optimizedPrompt,
964
1001
  input: {
@@ -967,7 +1004,8 @@ var PromptOptimizer = class {
967
1004
  mode,
968
1005
  preset
969
1006
  },
970
- context: relevantContext
1007
+ context: relevantContext,
1008
+ routingDecision
971
1009
  });
972
1010
  let estimatedTokensAfter = {
973
1011
  prompt: this.estimator.estimateText(optimizedPrompt),
@@ -984,6 +1022,7 @@ var PromptOptimizer = class {
984
1022
  },
985
1023
  optimizedPrompt,
986
1024
  context: relevantContext,
1025
+ routingDecision,
987
1026
  maxTotalTokens
988
1027
  });
989
1028
  finalPrompt = reduced.finalPrompt;
@@ -1019,6 +1058,11 @@ var PromptOptimizer = class {
1019
1058
  mode,
1020
1059
  provider,
1021
1060
  model,
1061
+ selectedTarget: routingDecision.selectedTarget,
1062
+ rankedTargets: routingDecision.rankedTargets,
1063
+ routingReason: routingDecision.routingReason,
1064
+ routingWarnings: routingDecision.routingWarnings,
1065
+ routingProvider: routingDecision.routingProvider,
1022
1066
  warnings,
1023
1067
  changes,
1024
1068
  debugInfo: input.debug ? {
@@ -1027,7 +1071,8 @@ var PromptOptimizer = class {
1027
1071
  estimatedTokensAfter,
1028
1072
  extractedConstraints,
1029
1073
  preset,
1030
- selectedModel: model
1074
+ selectedModel: model,
1075
+ routingDecision
1031
1076
  } : void 0
1032
1077
  };
1033
1078
  }
@@ -1050,29 +1095,88 @@ var PromptOptimizer = class {
1050
1095
  });
1051
1096
  }
1052
1097
  async tryOllamaOptimization(options) {
1098
+ const preprocessedPrompt = cheapCompress(options.input.prompt);
1099
+ const preprocessedTokenCount = this.estimator.estimateText(preprocessedPrompt);
1100
+ const ultraMode = preprocessedTokenCount > 500;
1053
1101
  try {
1054
1102
  if (!await this.client.isAvailable()) {
1055
- return null;
1103
+ return {
1104
+ optimizedPrompt: preprocessedPrompt,
1105
+ changes: ["Applied cheap local preprocessing because Ollama was unavailable."],
1106
+ warnings: ["Ollama was unavailable, so PromptPilot kept the cheap preprocessed prompt."],
1107
+ source: "preprocessed"
1108
+ };
1109
+ }
1110
+ const systemPrompt = ultraMode ? `${getOptimizationSystemPrompt(options.input.mode, options.input.preset)}
1111
+ Mode: Ultra compression. Minimize tokens aggressively.` : getOptimizationSystemPrompt(options.input.mode, options.input.preset);
1112
+ const optimizationPrompt = buildOptimizationPrompt(
1113
+ {
1114
+ ...options.input,
1115
+ prompt: preprocessedPrompt
1116
+ },
1117
+ options.relevantContext,
1118
+ options.extractedConstraints
1119
+ );
1120
+ const timeoutMs = options.input.timeoutMs ?? this.config.timeoutMs;
1121
+ let optimizedPrompt = "";
1122
+ let responseChanges = [];
1123
+ let responseWarnings = [];
1124
+ try {
1125
+ const response = await this.client.generateJson({
1126
+ systemPrompt,
1127
+ prompt: optimizationPrompt,
1128
+ timeoutMs,
1129
+ model: options.model,
1130
+ temperature: this.config.temperature,
1131
+ format: "json"
1132
+ });
1133
+ optimizedPrompt = normalizeWhitespace(response.optimizedPrompt ?? "");
1134
+ responseChanges = response.changes ?? [];
1135
+ responseWarnings = response.warnings ?? [];
1136
+ } catch {
1137
+ const raw = await this.client.generate({
1138
+ systemPrompt,
1139
+ prompt: optimizationPrompt,
1140
+ timeoutMs,
1141
+ model: options.model,
1142
+ temperature: this.config.temperature
1143
+ });
1144
+ optimizedPrompt = sanitizeTextOptimizationOutput(raw);
1145
+ responseChanges = [`Applied text-only Ollama optimization with ${options.model}.`];
1056
1146
  }
1057
- const response = await this.client.generateJson({
1058
- systemPrompt: getOptimizationSystemPrompt(options.input.mode, options.input.preset),
1059
- prompt: buildOptimizationPrompt(options.input, options.relevantContext, options.extractedConstraints),
1060
- timeoutMs: options.input.timeoutMs ?? this.config.timeoutMs,
1061
- model: options.model,
1062
- temperature: this.config.temperature,
1063
- format: "json"
1064
- });
1065
- const optimizedPrompt = normalizeWhitespace(response.optimizedPrompt ?? "");
1066
1147
  if (!optimizedPrompt) {
1067
- return null;
1148
+ return {
1149
+ optimizedPrompt: preprocessedPrompt,
1150
+ changes: ["Applied cheap local preprocessing because the model returned an empty optimization."],
1151
+ warnings: ["The local optimizer returned an empty result, so PromptPilot kept the preprocessed prompt."],
1152
+ source: "preprocessed"
1153
+ };
1154
+ }
1155
+ const optimizedTokenCount = this.estimator.estimateText(optimizedPrompt);
1156
+ if (isCompressionSensitiveMode(options.input.mode) && optimizedTokenCount >= preprocessedTokenCount) {
1157
+ return {
1158
+ optimizedPrompt: preprocessedPrompt,
1159
+ changes: [
1160
+ ...responseChanges,
1161
+ "Kept the cheap preprocessed prompt because the model output was not smaller."
1162
+ ],
1163
+ warnings: responseWarnings,
1164
+ source: "preprocessed"
1165
+ };
1068
1166
  }
1069
1167
  return {
1070
1168
  optimizedPrompt,
1071
- changes: response.changes ?? [`Applied Ollama optimization with ${options.model}.`],
1072
- warnings: response.warnings ?? []
1169
+ changes: responseChanges.length > 0 ? responseChanges : [`Applied Ollama optimization with ${options.model}.`],
1170
+ warnings: responseWarnings,
1171
+ source: "ollama"
1073
1172
  };
1074
1173
  } catch {
1075
- return null;
1174
+ return {
1175
+ optimizedPrompt: preprocessedPrompt,
1176
+ changes: ["Applied cheap local preprocessing because Ollama optimization failed."],
1177
+ warnings: ["Ollama optimization failed, so PromptPilot kept the preprocessed prompt."],
1178
+ source: "preprocessed"
1179
+ };
1076
1180
  }
1077
1181
  }
1078
1182
  async resolveOllamaModel(options) {
@@ -1243,17 +1347,146 @@ var PromptOptimizer = class {
1243
1347
  };
1244
1348
  }
1245
1349
  }
1350
+ async routeDownstreamTargets(options) {
1351
+ const availableTargets = normalizeAvailableTargets(options.input.availableTargets ?? []);
1352
+ if (!options.routingEnabled || availableTargets.length === 0) {
1353
+ return {
1354
+ selectedTarget: null,
1355
+ rankedTargets: [],
1356
+ routingReason: null,
1357
+ routingWarnings: [],
1358
+ routingProvider: null
1359
+ };
1360
+ }
1361
+ if (availableTargets.length === 1) {
1362
+ return {
1363
+ selectedTarget: stripInternalTargetFields(availableTargets[0]),
1364
+ rankedTargets: [
1365
+ {
1366
+ ...stripInternalTargetFields(availableTargets[0]),
1367
+ rank: 1,
1368
+ reason: "Only one downstream target was supplied."
1369
+ }
1370
+ ],
1371
+ routingReason: "Only one downstream target was supplied, so it was selected directly.",
1372
+ routingWarnings: [],
1373
+ routingProvider: "direct"
1374
+ };
1375
+ }
1376
+ if (!this.client.listModels) {
1377
+ return {
1378
+ selectedTarget: null,
1379
+ rankedTargets: [],
1380
+ routingReason: null,
1381
+ routingWarnings: [
1382
+ "Downstream target routing requires local Ollama model discovery so a Qwen router can run."
1383
+ ],
1384
+ routingProvider: null
1385
+ };
1386
+ }
1387
+ try {
1388
+ const installedModels = await this.client.listModels();
1389
+ const routerModel = getQwenRouterModel(installedModels, this.config.routerModel);
1390
+ if (!routerModel) {
1391
+ return {
1392
+ selectedTarget: null,
1393
+ rankedTargets: [],
1394
+ routingReason: null,
1395
+ routingWarnings: [
1396
+ "Downstream target routing could not run because no suitable local Qwen router model is installed."
1397
+ ],
1398
+ routingProvider: null
1399
+ };
1400
+ }
1401
+ const response = await this.client.generateJson({
1402
+ model: routerModel,
1403
+ timeoutMs: options.input.timeoutMs ?? this.config.timeoutMs,
1404
+ temperature: 0,
1405
+ format: "json",
1406
+ systemPrompt: buildDownstreamRoutingSystemPrompt(options.routingPriority, options.workloadBias),
1407
+ prompt: JSON.stringify(
1408
+ {
1409
+ objective: "Rank the caller-supplied downstream targets for this prompt and choose the best top target.",
1410
+ prompt: options.input.prompt,
1411
+ task: options.input.task ?? null,
1412
+ mode: options.input.mode,
1413
+ preset: options.input.preset,
1414
+ tone: options.input.tone ?? null,
1415
+ targetHints: options.input.targetHints ?? [],
1416
+ workloadBias: options.workloadBias,
1417
+ routingPriority: options.routingPriority,
1418
+ candidateTargets: availableTargets.map((target) => ({
1419
+ id: target.id,
1420
+ provider: target.provider,
1421
+ model: target.model,
1422
+ label: target.label ?? null,
1423
+ costRank: target.costRank,
1424
+ latencyRank: target.latencyRank,
1425
+ capabilities: target.capabilities,
1426
+ profile: describeDownstreamTarget(target)
1427
+ }))
1428
+ },
1429
+ null,
1430
+ 2
1431
+ )
1432
+ });
1433
+ const rankedTargetIds = Array.from(
1434
+ new Set((response.rankedTargetIds ?? []).map((value) => value.trim()).filter(Boolean))
1435
+ ).slice(0, Math.max(1, options.routingTopK));
1436
+ const rankedTargets = rankedTargetIds.map((id, index) => {
1437
+ const target = availableTargets.find((candidate) => candidate.id === id);
1438
+ if (!target) {
1439
+ return null;
1440
+ }
1441
+ return {
1442
+ ...stripInternalTargetFields(target),
1443
+ rank: index + 1,
1444
+ reason: index === 0 ? response.reason?.trim() || "Selected by the local Qwen downstream router." : `Ranked #${index + 1} by the local Qwen downstream router.`
1445
+ };
1446
+ }).filter((value) => value !== null);
1447
+ const selectedTargetId = response.selectedTargetId?.trim();
1448
+ const selectedTargetCandidate = (selectedTargetId && availableTargets.find((candidate) => candidate.id === selectedTargetId)) ?? (rankedTargets[0] ? availableTargets.find(
1449
+ (candidate) => candidate.provider === rankedTargets[0].provider && candidate.model === rankedTargets[0].model && candidate.label === rankedTargets[0].label
1450
+ ) ?? null : null);
1451
+ if (!selectedTargetCandidate || rankedTargets.length === 0) {
1452
+ return {
1453
+ selectedTarget: null,
1454
+ rankedTargets: [],
1455
+ routingReason: null,
1456
+ routingWarnings: [
1457
+ "Downstream target routing returned an invalid selection, so no downstream target was chosen."
1458
+ ],
1459
+ routingProvider: routerModel
1460
+ };
1461
+ }
1462
+ return {
1463
+ selectedTarget: stripInternalTargetFields(selectedTargetCandidate),
1464
+ rankedTargets,
1465
+ routingReason: response.reason?.trim() || "Selected by the local Qwen downstream router.",
1466
+ routingWarnings: [],
1467
+ routingProvider: routerModel
1468
+ };
1469
+ } catch {
1470
+ return {
1471
+ selectedTarget: null,
1472
+ rankedTargets: [],
1473
+ routingReason: null,
1474
+ routingWarnings: [
1475
+ "Downstream target routing could not complete, so no downstream target was selected."
1476
+ ],
1477
+ routingProvider: null
1478
+ };
1479
+ }
1480
+ }
1246
1481
  heuristicOptimize(options) {
1247
- const lines = [
1248
- `Request: ${options.input.prompt}`,
1249
- options.input.task ? `Task type: ${options.input.task}` : "",
1250
- options.input.tone ? `Tone: ${options.input.tone}` : "",
1251
- options.input.outputFormat ? `Output format: ${options.input.outputFormat}` : "",
1252
- options.input.maxLength ? `Maximum length: ${options.input.maxLength}` : "",
1253
- options.constraints.length ? `Critical constraints: ${options.constraints.join("; ")}` : ""
1254
- ].filter(Boolean);
1482
+ const isCodeRequest = isCodeFirstRequest(options.input);
1483
+ const lines = isCodeRequest ? buildCodeFirstHeuristicPrompt(options.input, options.constraints) : buildGeneralHeuristicPrompt(options.input, options.constraints);
1255
1484
  const optimizedPrompt = lines.join("\n");
1256
- const changes = ["Normalized prompt structure for downstream model consumption."];
1485
+ const changes = isCodeRequest ? [
1486
+ "Compressed the prompt into a code-agent brief.",
1487
+ "Removed redundant narrative phrasing.",
1488
+ "Applied a Karpathy-style inspect-plan-act-test-reflect loop."
1489
+ ] : ["Normalized prompt structure for downstream model consumption."];
1257
1490
  if (options.input.mode === "compress" || options.input.mode === "concise") {
1258
1491
  changes.push("Applied concise formatting to reduce token usage.");
1259
1492
  }
@@ -1274,7 +1507,8 @@ var PromptOptimizer = class {
1274
1507
  const finalPrompt = composeFinalPrompt({
1275
1508
  optimizedPrompt: this.estimator.truncateToBudget(options.optimizedPrompt, Math.floor(options.maxTotalTokens * 0.5)),
1276
1509
  input: options.input,
1277
- context: compactContext
1510
+ context: compactContext,
1511
+ routingDecision: options.routingDecision
1278
1512
  });
1279
1513
  return {
1280
1514
  finalPrompt,
@@ -1333,9 +1567,17 @@ ${contextBlock}`);
1333
1567
  if (constraints.length > 0) {
1334
1568
  sections.push(`Constraints:
1335
1569
  - ${constraints.join("\n- ")}`);
1570
+ }
1571
+ if (isCodeFirstRequest(input.input)) {
1572
+ sections.push(`Execution loop:
1573
+ - Inspect the relevant files and current behavior.
1574
+ - Plan the smallest safe next step.
1575
+ - Act with minimal, reversible changes.
1576
+ - Test or validate the result.
1577
+ - Reflect on gaps or risks, then repeat.`);
1336
1578
  }
1337
1579
  const desiredOutput = [
1338
- input.input.targetModel ? `Target model: ${input.input.targetModel}` : "Target model: claude",
1580
+ input.routingDecision.selectedTarget ? `Selected target: ${formatTargetLabel(input.routingDecision.selectedTarget)}` : input.input.targetModel ? `Target model: ${input.input.targetModel}` : "Target model: claude",
1339
1581
  `Mode: ${input.input.mode}`,
1340
1582
  `Preset: ${input.input.preset}`
1341
1583
  ];
@@ -1361,16 +1603,337 @@ function emptyRelevantContext() {
1361
1603
  debugInfo: {}
1362
1604
  };
1363
1605
  }
1606
+ function normalizeAvailableTargets(targets) {
1607
+ return targets.map((target, index) => ({
1608
+ ...target,
1609
+ id: `${target.provider}:${target.model}:${index}`,
1610
+ label: target.label ?? `${target.provider}:${target.model}`,
1611
+ capabilities: target.capabilities ?? inferCapabilities(target),
1612
+ costRank: target.costRank ?? index + 1,
1613
+ latencyRank: target.latencyRank ?? index + 1
1614
+ }));
1615
+ }
1616
+ function stripInternalTargetFields(target) {
1617
+ return {
1618
+ provider: target.provider,
1619
+ model: target.model,
1620
+ label: target.label,
1621
+ capabilities: target.capabilities,
1622
+ costRank: target.costRank,
1623
+ latencyRank: target.latencyRank
1624
+ };
1625
+ }
1626
+ function buildDownstreamRoutingSystemPrompt(priority, workloadBias) {
1627
+ return [
1628
+ "You are a downstream model router for PromptPilot.",
1629
+ "Return strict JSON only with this shape:",
1630
+ '{"selectedTargetId":"string","rankedTargetIds":["string"],"reason":"string"}',
1631
+ "Choose only from the supplied candidate target IDs.",
1632
+ "Rank up to the requested top targets in best-first order.",
1633
+ `Routing priority: ${priority}.`,
1634
+ `Workload bias: ${workloadBias}.`,
1635
+ "Code-first means ambiguous prompts should default toward coding-capable or agentic-capable targets.",
1636
+ "Explicit email, support, chat, and lightweight writing prompts may prefer cheaper lighter targets.",
1637
+ "Do not invent targets. Do not output prose outside JSON."
1638
+ ].join("\n");
1639
+ }
1640
+ function inferCapabilities(target) {
1641
+ const lower = `${target.provider} ${target.model} ${target.label ?? ""}`.toLowerCase();
1642
+ const capabilities = /* @__PURE__ */ new Set();
1643
+ if (/code|codex|coder|agent|tool/.test(lower)) {
1644
+ capabilities.add("coding");
1645
+ }
1646
+ if (/agent|tool/.test(lower)) {
1647
+ capabilities.add("agentic");
1648
+ capabilities.add("tool_use");
1649
+ }
1650
+ if (/refactor|coder|codex/.test(lower)) {
1651
+ capabilities.add("refactor");
1652
+ }
1653
+ if (/debug|fix|ci/.test(lower)) {
1654
+ capabilities.add("debugging");
1655
+ }
1656
+ if (/write|email|chat|sonnet|mini/.test(lower)) {
1657
+ capabilities.add("writing");
1658
+ }
1659
+ if (/email/.test(lower)) {
1660
+ capabilities.add("email");
1661
+ }
1662
+ return Array.from(capabilities);
1663
+ }
1664
+ function describeDownstreamTarget(target) {
1665
+ return [
1666
+ `provider=${target.provider}`,
1667
+ `model=${target.model}`,
1668
+ `label=${target.label}`,
1669
+ `costRank=${target.costRank}`,
1670
+ `latencyRank=${target.latencyRank}`,
1671
+ `capabilities=${target.capabilities?.join(",") || "none"}`
1672
+ ].join("; ");
1673
+ }
1674
+ function formatTargetLabel(target) {
1675
+ return target.label ?? `${target.provider}:${target.model}`;
1676
+ }
1677
+ function isCompressionSensitiveMode(mode) {
1678
+ return mode === "compress" || mode === "concise" || mode === "claude_cli";
1679
+ }
1680
+ function cheapCompress(text) {
1681
+ return normalizeWhitespace(text).replace(/\b(?:please|kindly|just)\b/gi, "").replace(/\bI\s+(?:want|need|would\s+like\s+to)\b/gi, "").replace(/\s+([,.;:!?])/g, "$1").replace(/\s{2,}/g, " ").trim();
1682
+ }
1683
+ function sanitizeTextOptimizationOutput(raw) {
1684
+ const normalized = normalizeWhitespace(raw);
1685
+ if (!normalized) {
1686
+ return "";
1687
+ }
1688
+ if (!containsReasoningLeak(normalized)) {
1689
+ return stripWrappingQuotes(normalized);
1690
+ }
1691
+ const candidates = raw.split(/\n{2,}/).map((chunk) => stripWrappingQuotes(normalizeWhitespace(chunk))).filter(Boolean).filter((chunk) => !containsReasoningLeak(chunk)).filter((chunk) => !/^(role|task|guidelines|thinking|thinking process|attempt|critique|final decision|analysis)\b/i.test(chunk)).filter((chunk) => !/^[-*]\s/.test(chunk)).filter((chunk) => !/^\d+\.\s/.test(chunk));
1692
+ return candidates.at(-1) ?? stripWrappingQuotes(normalized);
1693
+ }
1694
+ function containsReasoningLeak(text) {
1695
+ return /(thinking process|analyze the request|drafting the optimized prompt|critique \d|attempt \d|final decision)/i.test(text);
1696
+ }
1697
+ function stripWrappingQuotes(text) {
1698
+ return text.replace(/^["'`]+|["'`]+$/g, "").trim();
1699
+ }
1700
+ function isCodeFirstRequest(input) {
1701
+ if (input.task === "code" || input.preset === "code") {
1702
+ return true;
1703
+ }
1704
+ if ((input.targetHints ?? []).some((hint) => ["coding", "agentic", "refactor", "debugging", "tool_use", "architecture"].includes(hint))) {
1705
+ return true;
1706
+ }
1707
+ return /\b(code|coding|repo|repository|refactor|patch|debug|bug|ci|test|typescript|javascript|agent|tool)\b/i.test(
1708
+ input.prompt
1709
+ );
1710
+ }
1711
+ function buildGeneralHeuristicPrompt(input, constraints) {
1712
+ return [
1713
+ `Request: ${summarizePrompt(input.prompt, 320)}`,
1714
+ input.task ? `Task type: ${input.task}` : "",
1715
+ input.tone ? `Tone: ${input.tone}` : "",
1716
+ input.outputFormat ? `Output format: ${input.outputFormat}` : "",
1717
+ input.maxLength ? `Maximum length: ${input.maxLength}` : "",
1718
+ constraints.length ? `Critical constraints: ${constraints.join("; ")}` : ""
1719
+ ].filter(Boolean);
1720
+ }
1721
+ function buildCodeFirstHeuristicPrompt(input, constraints) {
1722
+ const deliverables = inferCodeDeliverables(input.prompt);
1723
+ return [
1724
+ `Goal: ${summarizeCodeGoal(input.prompt)}`,
1725
+ input.tone ? `Tone: ${input.tone}` : "",
1726
+ deliverables.length ? `Deliverables:
1727
+ - ${deliverables.join("\n- ")}` : "",
1728
+ constraints.length ? `Constraints: ${constraints.join("; ")}` : "",
1729
+ "Use a Karpathy loop: inspect, plan, act, test, reflect, repeat."
1730
+ ].filter(Boolean);
1731
+ }
1732
+ function summarizePrompt(prompt, maxLength) {
1733
+ const normalized = normalizeWhitespace(prompt);
1734
+ if (normalized.length <= maxLength) {
1735
+ return normalized;
1736
+ }
1737
+ return `${normalized.slice(0, maxLength - 1).trim()}\u2026`;
1738
+ }
1739
+ function summarizeCodeGoal(prompt) {
1740
+ const normalized = summarizePrompt(prompt, 220);
1741
+ const lowered = prompt.toLowerCase();
1742
+ if (/auth|authentication|login|token/.test(lowered)) {
1743
+ return "Inspect the codebase, understand the authentication flow, and produce a safe incremental refactor plan.";
1744
+ }
1745
+ if (/ci|debug|failing|failure|test/.test(lowered)) {
1746
+ return "Inspect the codebase and failing signals, identify root causes, and produce a practical debugging plan.";
1747
+ }
1748
+ if (/refactor/.test(lowered)) {
1749
+ return "Inspect the codebase and produce a phased refactor plan with minimal-risk execution steps.";
1750
+ }
1751
+ return normalized;
1752
+ }
1753
+ function inferCodeDeliverables(prompt) {
1754
+ const lowered = prompt.toLowerCase();
1755
+ const deliverables = [];
1756
+ if (/inspect|codebase|repo|repository/.test(lowered)) {
1757
+ deliverables.push("Summarize the relevant modules, ownership boundaries, and current behavior.");
1758
+ }
1759
+ if (/shared abstraction|shared abstractions|duplicate|duplicated/.test(lowered)) {
1760
+ deliverables.push("Identify duplicated logic and the best shared abstractions to extract.");
1761
+ }
1762
+ if (/incremental|phase|phased|rollout|step/.test(lowered)) {
1763
+ deliverables.push("Propose an incremental plan with small, reversible steps.");
1764
+ }
1765
+ if (/risk|migration|compatibility|backward/.test(lowered)) {
1766
+ deliverables.push("Call out migration risks, compatibility concerns, and rollback points.");
1767
+ }
1768
+ if (/test|tests/.test(lowered)) {
1769
+ deliverables.push("List the tests or validation needed before and after each phase.");
1770
+ }
1771
+ if (/avoid hand-wavy|practical|concrete/.test(lowered)) {
1772
+ deliverables.push("Keep the recommendations concrete, implementation-oriented, and free of vague architecture advice.");
1773
+ }
1774
+ if (deliverables.length === 0) {
1775
+ deliverables.push("Produce a compact, execution-ready plan for the coding task.");
1776
+ }
1777
+ return deliverables.slice(0, 6);
1778
+ }
1364
1779
 
1365
1780
  // src/index.ts
1366
1781
  function createOptimizer(config = {}) {
1367
1782
  return new PromptOptimizer(config);
1368
1783
  }
1369
1784
 
1785
+ // src/cliWelcome.ts
1786
+ import { basename } from "path";
1787
+ var MIN_WIDE_COLUMNS = 84;
1788
+ function renderWelcomeScreen(options) {
1789
+ const columns = Math.max(60, options.columns ?? 100);
1790
+ const color = options.color ?? false;
1791
+ const user = options.user?.trim() || "pilot";
1792
+ return columns >= MIN_WIDE_COLUMNS ? renderWideWelcome({ ...options, columns, color, user }) : renderCompactWelcome({ ...options, columns, color, user });
1793
+ }
1794
+ function renderWideWelcome(options) {
1795
+ const width = clamp(options.columns - 5, 82, 109);
1796
+ const innerWidth = width - 2;
1797
+ const leftWidth = 28;
1798
+ const rightWidth = innerWidth - leftWidth - 5;
1799
+ const leftLines = [
1800
+ style(`Welcome back, ${options.user}`, "bold", options.color),
1801
+ "",
1802
+ ...paintSprite(options.color),
1803
+ "",
1804
+ style(`${options.user} \u2022 ${basename(options.cwd)}`, "dim", options.color),
1805
+ style(options.cwd, "dim", options.color)
1806
+ ];
1807
+ const rightLines = [
1808
+ style("Launchpad", "accent", options.color),
1809
+ "Run " + style('promptpilot optimize "fix this CI failure" --task code --plain', "bold", options.color),
1810
+ "Pipe directly into Claude with " + style("| claude", "bold", options.color),
1811
+ "",
1812
+ style("Custom local model", "accent", options.color),
1813
+ "Use " + style("--model promptpilot-compressor", "bold", options.color) + " for text-only local compression",
1814
+ "",
1815
+ style("Commands", "accent", options.color),
1816
+ "optimize optimize, compress, and route prompts",
1817
+ "--help show the full CLI reference"
1818
+ ];
1819
+ const rowCount = Math.max(leftLines.length, rightLines.length);
1820
+ const header = `${style(" PromptPilot ", "accent", options.color)} ${style(`v${options.version}`, "dim", options.color)}`;
1821
+ const topRule = `${style("\u250C", "accent", options.color)}${style("\u2500".repeat(innerWidth), "accent", options.color)}${style("\u2510", "accent", options.color)}`;
1822
+ const bottomRule = `${style("\u2514", "accent", options.color)}${style("\u2500".repeat(innerWidth), "accent", options.color)}${style("\u2518", "accent", options.color)}`;
1823
+ const body = new Array(rowCount).fill(null).map((_, index) => {
1824
+ const left = padVisible(leftLines[index] ?? "", leftWidth);
1825
+ const right = padVisible(rightLines[index] ?? "", rightWidth);
1826
+ return `${style("\u2502", "accent", options.color)} ${left} ${style("\u2502", "accent", options.color)} ${right} ${style("\u2502", "accent", options.color)}`;
1827
+ });
1828
+ const footer = [
1829
+ "",
1830
+ style("Ready when you are.", "dim", options.color),
1831
+ `Run ${style("promptpilot --help", "bold", options.color)} for the full option list.`
1832
+ ];
1833
+ return [header, topRule, ...body, bottomRule, ...footer].join("\n");
1834
+ }
1835
+ function renderCompactWelcome(options) {
1836
+ const width = clamp(options.columns - 2, 58, 78);
1837
+ const innerWidth = width - 2;
1838
+ const lines = [
1839
+ `${style("PromptPilot", "accent", options.color)} ${style(`v${options.version}`, "dim", options.color)}`,
1840
+ style(`Welcome back, ${options.user}.`, "bold", options.color),
1841
+ ...paintSprite(options.color),
1842
+ style(options.cwd, "dim", options.color),
1843
+ "",
1844
+ style("Quick start", "accent", options.color),
1845
+ 'promptpilot optimize "fix this CI failure" --task code --plain',
1846
+ 'promptpilot optimize "..." --model promptpilot-compressor',
1847
+ "",
1848
+ style("Help", "accent", options.color),
1849
+ "promptpilot --help"
1850
+ ];
1851
+ return [
1852
+ `${style("\u250C", "accent", options.color)}${style("\u2500".repeat(innerWidth), "accent", options.color)}${style("\u2510", "accent", options.color)}`,
1853
+ ...lines.map((line) => `${style("\u2502", "accent", options.color)} ${padVisible(line, innerWidth - 1)}${style("\u2502", "accent", options.color)}`),
1854
+ `${style("\u2514", "accent", options.color)}${style("\u2500".repeat(innerWidth), "accent", options.color)}${style("\u2518", "accent", options.color)}`
1855
+ ].join("\n");
1856
+ }
1857
+ function paintSprite(color) {
1858
+ const ink = color ? "\x1B[38;5;215m" : "";
1859
+ const reset = color ? "\x1B[0m" : "";
1860
+ return [
1861
+ `${ink} .-''''-.${reset}`,
1862
+ `${ink} .' .--. '.${reset}`,
1863
+ `${ink} / / oo \\ \\${reset}`,
1864
+ `${ink} | \\_==_/ |${reset}`,
1865
+ `${ink} | .-.__.-. |${reset}`,
1866
+ `${ink} \\ \\_/ \\_/ /${reset}`,
1867
+ `${ink} '._/|__|\\_.'${reset}`,
1868
+ `${ink} /_/ \\_\\${reset}`
1869
+ ];
1870
+ }
1871
+ function style(text, tone, color) {
1872
+ if (!color) {
1873
+ return text;
1874
+ }
1875
+ switch (tone) {
1876
+ case "accent":
1877
+ return `\x1B[38;5;215m${text}\x1B[0m`;
1878
+ case "bold":
1879
+ return `\x1B[1m${text}\x1B[0m`;
1880
+ case "dim":
1881
+ return `\x1B[38;5;245m${text}\x1B[0m`;
1882
+ }
1883
+ }
1884
+ function padVisible(text, targetWidth) {
1885
+ const truncated = truncateVisible(text, targetWidth);
1886
+ const padding = Math.max(0, targetWidth - visibleWidth(truncated));
1887
+ return `${truncated}${" ".repeat(padding)}`;
1888
+ }
1889
+ function truncateVisible(text, targetWidth) {
1890
+ if (visibleWidth(text) <= targetWidth) {
1891
+ return text;
1892
+ }
1893
+ let visible = 0;
1894
+ let result = "";
1895
+ let inEscape = false;
1896
+ for (const char of text) {
1897
+ result += char;
1898
+ if (char === "\x1B") {
1899
+ inEscape = true;
1900
+ continue;
1901
+ }
1902
+ if (inEscape) {
1903
+ if (char === "m") {
1904
+ inEscape = false;
1905
+ }
1906
+ continue;
1907
+ }
1908
+ visible += 1;
1909
+ if (visible >= Math.max(0, targetWidth - 1)) {
1910
+ break;
1911
+ }
1912
+ }
1913
+ return `${result}\u2026`;
1914
+ }
1915
+ function visibleWidth(text) {
1916
+ return text.replace(/\u001b\[[0-9;]*m/g, "").length;
1917
+ }
1918
+ function clamp(value, min, max) {
1919
+ return Math.max(min, Math.min(max, value));
1920
+ }
1921
+
1370
1922
  // src/cli.ts
1371
- async function runCli(argv, io = { stdout: process.stdout, stderr: process.stderr, stdin: process.stdin }, dependencies = { createOptimizer, readStdin }) {
1923
+ async function runCli(argv, io = { stdout: process.stdout, stderr: process.stderr, stdin: process.stdin }, dependencies = { createOptimizer, readStdin, getCliInfo }) {
1372
1924
  const [command, ...rest] = argv;
1373
- if (!command || command === "--help" || command === "-h") {
1925
+ if (!command) {
1926
+ const info = (dependencies.getCliInfo ?? getCliInfo)(io.stdout);
1927
+ if (io.stdout.isTTY) {
1928
+ io.stdout.write(`${renderWelcomeScreen(info)}
1929
+ `);
1930
+ return 0;
1931
+ }
1932
+ io.stdout.write(`${getHelpText()}
1933
+ `);
1934
+ return 0;
1935
+ }
1936
+ if (command === "--help" || command === "-h" || command === "help") {
1374
1937
  io.stdout.write(`${getHelpText()}
1375
1938
  `);
1376
1939
  return 0;
@@ -1433,6 +1996,12 @@ async function runCli(argv, io = { stdout: process.stdout, stderr: process.stder
1433
1996
  maxLength: parsed.maxLength,
1434
1997
  tags: parsed.tags,
1435
1998
  pinnedConstraints: parsed.pinnedConstraints,
1999
+ availableTargets: parsed.targets,
2000
+ routingEnabled: parsed.routingEnabled,
2001
+ routingPriority: parsed.routingPriority,
2002
+ routingTopK: parsed.routingTopK,
2003
+ targetHints: parsed.targetHints,
2004
+ workloadBias: parsed.workloadBias,
1436
2005
  debug: parsed.debug,
1437
2006
  plainOutput: parsed.plain,
1438
2007
  maxTotalTokens: parsed.maxTotalTokens,
@@ -1456,6 +2025,10 @@ async function runCli(argv, io = { stdout: process.stdout, stderr: process.stder
1456
2025
  `);
1457
2026
  io.stdout.write(`provider=${result.provider} model=${result.model} tokens=${result.estimatedTokensAfter.total} savings=${result.tokenSavings}
1458
2027
  `);
2028
+ if (result.selectedTarget) {
2029
+ io.stdout.write(`selected_target=${formatTarget(result.selectedTarget)}
2030
+ `);
2031
+ }
1459
2032
  if (result.warnings.length > 0) {
1460
2033
  io.stdout.write(`warnings=${result.warnings.join(" | ")}
1461
2034
  `);
@@ -1478,7 +2051,10 @@ function parseOptimizeArgs(args) {
1478
2051
  bypassOptimization: false,
1479
2052
  help: false,
1480
2053
  tags: [],
1481
- pinnedConstraints: []
2054
+ pinnedConstraints: [],
2055
+ targets: [],
2056
+ targetHints: [],
2057
+ routingEnabled: true
1482
2058
  };
1483
2059
  const positionals = [];
1484
2060
  for (let index = 0; index < args.length; index += 1) {
@@ -1517,6 +2093,24 @@ function parseOptimizeArgs(args) {
1517
2093
  case "--pin-constraint":
1518
2094
  parsed.pinnedConstraints.push(args[++index]);
1519
2095
  break;
2096
+ case "--target":
2097
+ parsed.targets.push(parseTargetCandidate(args[++index], parsed.targets.length));
2098
+ break;
2099
+ case "--target-hint":
2100
+ parsed.targetHints.push(args[++index]);
2101
+ break;
2102
+ case "--routing-priority":
2103
+ parsed.routingPriority = args[++index];
2104
+ break;
2105
+ case "--routing-top-k":
2106
+ parsed.routingTopK = Number(args[++index]);
2107
+ break;
2108
+ case "--workload-bias":
2109
+ parsed.workloadBias = args[++index];
2110
+ break;
2111
+ case "--no-routing":
2112
+ parsed.routingEnabled = false;
2113
+ break;
1520
2114
  case "--host":
1521
2115
  parsed.host = args[++index];
1522
2116
  break;
@@ -1589,6 +2183,12 @@ function getHelpText() {
1589
2183
  " --max-length <n>",
1590
2184
  " --tag <value> Repeatable",
1591
2185
  " --pin-constraint <text> Repeatable",
2186
+ " --target <provider:model> Repeatable",
2187
+ " --target-hint <value> Repeatable",
2188
+ " --routing-priority <value>",
2189
+ " --routing-top-k <n>",
2190
+ " --workload-bias <code_first>",
2191
+ " --no-routing",
1592
2192
  " --host <url>",
1593
2193
  " --store <local|sqlite>",
1594
2194
  " --storage-dir <path>",
@@ -1606,6 +2206,20 @@ function getHelpText() {
1606
2206
  " --bypass-optimization"
1607
2207
  ].join("\n");
1608
2208
  }
2209
+ function parseTargetCandidate(raw, index) {
2210
+ const [provider, ...modelParts] = raw.split(":");
2211
+ const model = modelParts.join(":").trim();
2212
+ return {
2213
+ provider: provider.trim(),
2214
+ model,
2215
+ label: raw,
2216
+ costRank: index + 1,
2217
+ latencyRank: index + 1
2218
+ };
2219
+ }
2220
+ function formatTarget(target) {
2221
+ return target.label ?? `${target.provider}:${target.model}`;
2222
+ }
1609
2223
  async function readStdin(stdin = process.stdin) {
1610
2224
  if (!stdin || stdin.isTTY) {
1611
2225
  return "";
@@ -1620,6 +2234,36 @@ async function readStdin(stdin = process.stdin) {
1620
2234
  stdin.on("error", reject);
1621
2235
  });
1622
2236
  }
2237
+ function getCliInfo(stdout) {
2238
+ return {
2239
+ cwd: process.cwd(),
2240
+ version: readPackageVersion(),
2241
+ color: shouldUseColor(stdout),
2242
+ columns: stdout.columns,
2243
+ user: process.env.USER ?? process.env.USERNAME
2244
+ };
2245
+ }
2246
+ function shouldUseColor(stdout) {
2247
+ if (!stdout.isTTY) {
2248
+ return false;
2249
+ }
2250
+ if (process.env.NO_COLOR) {
2251
+ return false;
2252
+ }
2253
+ if (process.env.TERM === "dumb") {
2254
+ return false;
2255
+ }
2256
+ return true;
2257
+ }
2258
+ function readPackageVersion() {
2259
+ try {
2260
+ const packageJson = readFileSync(new URL("../package.json", import.meta.url), "utf8");
2261
+ const parsed = JSON.parse(packageJson);
2262
+ return parsed.version ?? "dev";
2263
+ } catch {
2264
+ return "dev";
2265
+ }
2266
+ }
1623
2267
  if (isMainModule()) {
1624
2268
  runCli(process.argv.slice(2)).then(
1625
2269
  (code) => {