promptpilot 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -834,6 +834,9 @@ var DEFAULT_PROVIDER = "ollama";
834
834
  var DEFAULT_MAX_INPUT_TOKENS = 1200;
835
835
  var DEFAULT_MAX_CONTEXT_TOKENS = 800;
836
836
  var DEFAULT_MAX_TOTAL_TOKENS = 2200;
837
+ var DEFAULT_ROUTING_PRIORITY = "cheapest_adequate";
838
+ var DEFAULT_ROUTING_TOP_K = 3;
839
+ var DEFAULT_WORKLOAD_BIAS = "code_first";
837
840
  var PromptOptimizer = class {
838
841
  config;
839
842
  logger;
@@ -871,6 +874,10 @@ var PromptOptimizer = class {
871
874
  const maxInputTokens = input.maxInputTokens ?? this.config.maxInputTokens ?? DEFAULT_MAX_INPUT_TOKENS;
872
875
  const maxContextTokens = input.maxContextTokens ?? this.config.maxContextTokens ?? DEFAULT_MAX_CONTEXT_TOKENS;
873
876
  const maxTotalTokens = input.maxTotalTokens ?? this.config.maxTotalTokens ?? DEFAULT_MAX_TOTAL_TOKENS;
877
+ const routingEnabled = input.routingEnabled !== false;
878
+ const routingPriority = input.routingPriority ?? DEFAULT_ROUTING_PRIORITY;
879
+ const routingTopK = input.routingTopK ?? DEFAULT_ROUTING_TOP_K;
880
+ const workloadBias = input.workloadBias ?? DEFAULT_WORKLOAD_BIAS;
874
881
  const warnings = [];
875
882
  const changes = [];
876
883
  const useContext = input.useContext !== false && Boolean(input.sessionId);
@@ -959,6 +966,22 @@ var PromptOptimizer = class {
959
966
  }
960
967
  warnings.push(...providerWarnings);
961
968
  changes.push(...providerChanges);
969
+ const routingDecision = await this.routeDownstreamTargets({
970
+ input: {
971
+ ...input,
972
+ prompt: originalPrompt,
973
+ mode,
974
+ preset,
975
+ routingPriority,
976
+ routingTopK,
977
+ workloadBias
978
+ },
979
+ routingEnabled,
980
+ routingPriority,
981
+ routingTopK,
982
+ workloadBias
983
+ });
984
+ warnings.push(...routingDecision.routingWarnings);
962
985
  let finalPrompt = composeFinalPrompt({
963
986
  optimizedPrompt,
964
987
  input: {
@@ -967,7 +990,8 @@ var PromptOptimizer = class {
967
990
  mode,
968
991
  preset
969
992
  },
970
- context: relevantContext
993
+ context: relevantContext,
994
+ routingDecision
971
995
  });
972
996
  let estimatedTokensAfter = {
973
997
  prompt: this.estimator.estimateText(optimizedPrompt),
@@ -984,6 +1008,7 @@ var PromptOptimizer = class {
984
1008
  },
985
1009
  optimizedPrompt,
986
1010
  context: relevantContext,
1011
+ routingDecision,
987
1012
  maxTotalTokens
988
1013
  });
989
1014
  finalPrompt = reduced.finalPrompt;
@@ -1019,6 +1044,11 @@ var PromptOptimizer = class {
1019
1044
  mode,
1020
1045
  provider,
1021
1046
  model,
1047
+ selectedTarget: routingDecision.selectedTarget,
1048
+ rankedTargets: routingDecision.rankedTargets,
1049
+ routingReason: routingDecision.routingReason,
1050
+ routingWarnings: routingDecision.routingWarnings,
1051
+ routingProvider: routingDecision.routingProvider,
1022
1052
  warnings,
1023
1053
  changes,
1024
1054
  debugInfo: input.debug ? {
@@ -1027,7 +1057,8 @@ var PromptOptimizer = class {
1027
1057
  estimatedTokensAfter,
1028
1058
  extractedConstraints,
1029
1059
  preset,
1030
- selectedModel: model
1060
+ selectedModel: model,
1061
+ routingDecision
1031
1062
  } : void 0
1032
1063
  };
1033
1064
  }
@@ -1243,6 +1274,137 @@ var PromptOptimizer = class {
1243
1274
  };
1244
1275
  }
1245
1276
  }
1277
+ async routeDownstreamTargets(options) {
1278
+ const availableTargets = normalizeAvailableTargets(options.input.availableTargets ?? []);
1279
+ if (!options.routingEnabled || availableTargets.length === 0) {
1280
+ return {
1281
+ selectedTarget: null,
1282
+ rankedTargets: [],
1283
+ routingReason: null,
1284
+ routingWarnings: [],
1285
+ routingProvider: null
1286
+ };
1287
+ }
1288
+ if (availableTargets.length === 1) {
1289
+ return {
1290
+ selectedTarget: stripInternalTargetFields(availableTargets[0]),
1291
+ rankedTargets: [
1292
+ {
1293
+ ...stripInternalTargetFields(availableTargets[0]),
1294
+ rank: 1,
1295
+ reason: "Only one downstream target was supplied."
1296
+ }
1297
+ ],
1298
+ routingReason: "Only one downstream target was supplied, so it was selected directly.",
1299
+ routingWarnings: [],
1300
+ routingProvider: "direct"
1301
+ };
1302
+ }
1303
+ if (!this.client.listModels) {
1304
+ return {
1305
+ selectedTarget: null,
1306
+ rankedTargets: [],
1307
+ routingReason: null,
1308
+ routingWarnings: [
1309
+ "Downstream target routing requires local Ollama model discovery so a Qwen router can run."
1310
+ ],
1311
+ routingProvider: null
1312
+ };
1313
+ }
1314
+ try {
1315
+ const installedModels = await this.client.listModels();
1316
+ const routerModel = getQwenRouterModel(installedModels, this.config.routerModel);
1317
+ if (!routerModel) {
1318
+ return {
1319
+ selectedTarget: null,
1320
+ rankedTargets: [],
1321
+ routingReason: null,
1322
+ routingWarnings: [
1323
+ "Downstream target routing could not run because no suitable local Qwen router model is installed."
1324
+ ],
1325
+ routingProvider: null
1326
+ };
1327
+ }
1328
+ const response = await this.client.generateJson({
1329
+ model: routerModel,
1330
+ timeoutMs: options.input.timeoutMs ?? this.config.timeoutMs,
1331
+ temperature: 0,
1332
+ format: "json",
1333
+ systemPrompt: buildDownstreamRoutingSystemPrompt(options.routingPriority, options.workloadBias),
1334
+ prompt: JSON.stringify(
1335
+ {
1336
+ objective: "Rank the caller-supplied downstream targets for this prompt and choose the best top target.",
1337
+ prompt: options.input.prompt,
1338
+ task: options.input.task ?? null,
1339
+ mode: options.input.mode,
1340
+ preset: options.input.preset,
1341
+ tone: options.input.tone ?? null,
1342
+ targetHints: options.input.targetHints ?? [],
1343
+ workloadBias: options.workloadBias,
1344
+ routingPriority: options.routingPriority,
1345
+ candidateTargets: availableTargets.map((target) => ({
1346
+ id: target.id,
1347
+ provider: target.provider,
1348
+ model: target.model,
1349
+ label: target.label ?? null,
1350
+ costRank: target.costRank,
1351
+ latencyRank: target.latencyRank,
1352
+ capabilities: target.capabilities,
1353
+ profile: describeDownstreamTarget(target)
1354
+ }))
1355
+ },
1356
+ null,
1357
+ 2
1358
+ )
1359
+ });
1360
+ const rankedTargetIds = Array.from(
1361
+ new Set((response.rankedTargetIds ?? []).map((value) => value.trim()).filter(Boolean))
1362
+ ).slice(0, Math.max(1, options.routingTopK));
1363
+ const rankedTargets = rankedTargetIds.map((id, index) => {
1364
+ const target = availableTargets.find((candidate) => candidate.id === id);
1365
+ if (!target) {
1366
+ return null;
1367
+ }
1368
+ return {
1369
+ ...stripInternalTargetFields(target),
1370
+ rank: index + 1,
1371
+ reason: index === 0 ? response.reason?.trim() || "Selected by the local Qwen downstream router." : `Ranked #${index + 1} by the local Qwen downstream router.`
1372
+ };
1373
+ }).filter((value) => value !== null);
1374
+ const selectedTargetId = response.selectedTargetId?.trim();
1375
+ const selectedTargetCandidate = (selectedTargetId && availableTargets.find((candidate) => candidate.id === selectedTargetId)) ?? (rankedTargets[0] ? availableTargets.find(
1376
+ (candidate) => candidate.provider === rankedTargets[0].provider && candidate.model === rankedTargets[0].model && candidate.label === rankedTargets[0].label
1377
+ ) ?? null : null);
1378
+ if (!selectedTargetCandidate || rankedTargets.length === 0) {
1379
+ return {
1380
+ selectedTarget: null,
1381
+ rankedTargets: [],
1382
+ routingReason: null,
1383
+ routingWarnings: [
1384
+ "Downstream target routing returned an invalid selection, so no downstream target was chosen."
1385
+ ],
1386
+ routingProvider: routerModel
1387
+ };
1388
+ }
1389
+ return {
1390
+ selectedTarget: stripInternalTargetFields(selectedTargetCandidate),
1391
+ rankedTargets,
1392
+ routingReason: response.reason?.trim() || "Selected by the local Qwen downstream router.",
1393
+ routingWarnings: [],
1394
+ routingProvider: routerModel
1395
+ };
1396
+ } catch {
1397
+ return {
1398
+ selectedTarget: null,
1399
+ rankedTargets: [],
1400
+ routingReason: null,
1401
+ routingWarnings: [
1402
+ "Downstream target routing could not complete, so no downstream target was selected."
1403
+ ],
1404
+ routingProvider: null
1405
+ };
1406
+ }
1407
+ }
1246
1408
  heuristicOptimize(options) {
1247
1409
  const lines = [
1248
1410
  `Request: ${options.input.prompt}`,
@@ -1274,7 +1436,8 @@ var PromptOptimizer = class {
1274
1436
  const finalPrompt = composeFinalPrompt({
1275
1437
  optimizedPrompt: this.estimator.truncateToBudget(options.optimizedPrompt, Math.floor(options.maxTotalTokens * 0.5)),
1276
1438
  input: options.input,
1277
- context: compactContext
1439
+ context: compactContext,
1440
+ routingDecision: options.routingDecision
1278
1441
  });
1279
1442
  return {
1280
1443
  finalPrompt,
@@ -1335,7 +1498,7 @@ ${contextBlock}`);
1335
1498
  - ${constraints.join("\n- ")}`);
1336
1499
  }
1337
1500
  const desiredOutput = [
1338
- input.input.targetModel ? `Target model: ${input.input.targetModel}` : "Target model: claude",
1501
+ input.routingDecision.selectedTarget ? `Selected target: ${formatTargetLabel(input.routingDecision.selectedTarget)}` : input.input.targetModel ? `Target model: ${input.input.targetModel}` : "Target model: claude",
1339
1502
  `Mode: ${input.input.mode}`,
1340
1503
  `Preset: ${input.input.preset}`
1341
1504
  ];
@@ -1361,6 +1524,77 @@ function emptyRelevantContext() {
1361
1524
  debugInfo: {}
1362
1525
  };
1363
1526
  }
1527
+ function normalizeAvailableTargets(targets) {
1528
+ return targets.map((target, index) => ({
1529
+ ...target,
1530
+ id: `${target.provider}:${target.model}:${index}`,
1531
+ label: target.label ?? `${target.provider}:${target.model}`,
1532
+ capabilities: target.capabilities ?? inferCapabilities(target),
1533
+ costRank: target.costRank ?? index + 1,
1534
+ latencyRank: target.latencyRank ?? index + 1
1535
+ }));
1536
+ }
1537
+ function stripInternalTargetFields(target) {
1538
+ return {
1539
+ provider: target.provider,
1540
+ model: target.model,
1541
+ label: target.label,
1542
+ capabilities: target.capabilities,
1543
+ costRank: target.costRank,
1544
+ latencyRank: target.latencyRank
1545
+ };
1546
+ }
1547
+ function buildDownstreamRoutingSystemPrompt(priority, workloadBias) {
1548
+ return [
1549
+ "You are a downstream model router for PromptPilot.",
1550
+ "Return strict JSON only with this shape:",
1551
+ '{"selectedTargetId":"string","rankedTargetIds":["string"],"reason":"string"}',
1552
+ "Choose only from the supplied candidate target IDs.",
1553
+ "Rank up to the requested top targets in best-first order.",
1554
+ `Routing priority: ${priority}.`,
1555
+ `Workload bias: ${workloadBias}.`,
1556
+ "Code-first means ambiguous prompts should default toward coding-capable or agentic-capable targets.",
1557
+ "Explicit email, support, chat, and lightweight writing prompts may prefer cheaper lighter targets.",
1558
+ "Do not invent targets. Do not output prose outside JSON."
1559
+ ].join("\n");
1560
+ }
1561
+ function inferCapabilities(target) {
1562
+ const lower = `${target.provider} ${target.model} ${target.label ?? ""}`.toLowerCase();
1563
+ const capabilities = /* @__PURE__ */ new Set();
1564
+ if (/code|codex|coder|agent|tool/.test(lower)) {
1565
+ capabilities.add("coding");
1566
+ }
1567
+ if (/agent|tool/.test(lower)) {
1568
+ capabilities.add("agentic");
1569
+ capabilities.add("tool_use");
1570
+ }
1571
+ if (/refactor|coder|codex/.test(lower)) {
1572
+ capabilities.add("refactor");
1573
+ }
1574
+ if (/debug|fix|ci/.test(lower)) {
1575
+ capabilities.add("debugging");
1576
+ }
1577
+ if (/write|email|chat|sonnet|mini/.test(lower)) {
1578
+ capabilities.add("writing");
1579
+ }
1580
+ if (/email/.test(lower)) {
1581
+ capabilities.add("email");
1582
+ }
1583
+ return Array.from(capabilities);
1584
+ }
1585
+ function describeDownstreamTarget(target) {
1586
+ return [
1587
+ `provider=${target.provider}`,
1588
+ `model=${target.model}`,
1589
+ `label=${target.label}`,
1590
+ `costRank=${target.costRank}`,
1591
+ `latencyRank=${target.latencyRank}`,
1592
+ `capabilities=${target.capabilities?.join(",") || "none"}`
1593
+ ].join("; ");
1594
+ }
1595
+ function formatTargetLabel(target) {
1596
+ return target.label ?? `${target.provider}:${target.model}`;
1597
+ }
1364
1598
 
1365
1599
  // src/index.ts
1366
1600
  function createOptimizer(config = {}) {
@@ -1433,6 +1667,12 @@ async function runCli(argv, io = { stdout: process.stdout, stderr: process.stder
1433
1667
  maxLength: parsed.maxLength,
1434
1668
  tags: parsed.tags,
1435
1669
  pinnedConstraints: parsed.pinnedConstraints,
1670
+ availableTargets: parsed.targets,
1671
+ routingEnabled: parsed.routingEnabled,
1672
+ routingPriority: parsed.routingPriority,
1673
+ routingTopK: parsed.routingTopK,
1674
+ targetHints: parsed.targetHints,
1675
+ workloadBias: parsed.workloadBias,
1436
1676
  debug: parsed.debug,
1437
1677
  plainOutput: parsed.plain,
1438
1678
  maxTotalTokens: parsed.maxTotalTokens,
@@ -1456,6 +1696,10 @@ async function runCli(argv, io = { stdout: process.stdout, stderr: process.stder
1456
1696
  `);
1457
1697
  io.stdout.write(`provider=${result.provider} model=${result.model} tokens=${result.estimatedTokensAfter.total} savings=${result.tokenSavings}
1458
1698
  `);
1699
+ if (result.selectedTarget) {
1700
+ io.stdout.write(`selected_target=${formatTarget(result.selectedTarget)}
1701
+ `);
1702
+ }
1459
1703
  if (result.warnings.length > 0) {
1460
1704
  io.stdout.write(`warnings=${result.warnings.join(" | ")}
1461
1705
  `);
@@ -1478,7 +1722,10 @@ function parseOptimizeArgs(args) {
1478
1722
  bypassOptimization: false,
1479
1723
  help: false,
1480
1724
  tags: [],
1481
- pinnedConstraints: []
1725
+ pinnedConstraints: [],
1726
+ targets: [],
1727
+ targetHints: [],
1728
+ routingEnabled: true
1482
1729
  };
1483
1730
  const positionals = [];
1484
1731
  for (let index = 0; index < args.length; index += 1) {
@@ -1517,6 +1764,24 @@ function parseOptimizeArgs(args) {
1517
1764
  case "--pin-constraint":
1518
1765
  parsed.pinnedConstraints.push(args[++index]);
1519
1766
  break;
1767
+ case "--target":
1768
+ parsed.targets.push(parseTargetCandidate(args[++index], parsed.targets.length));
1769
+ break;
1770
+ case "--target-hint":
1771
+ parsed.targetHints.push(args[++index]);
1772
+ break;
1773
+ case "--routing-priority":
1774
+ parsed.routingPriority = args[++index];
1775
+ break;
1776
+ case "--routing-top-k":
1777
+ parsed.routingTopK = Number(args[++index]);
1778
+ break;
1779
+ case "--workload-bias":
1780
+ parsed.workloadBias = args[++index];
1781
+ break;
1782
+ case "--no-routing":
1783
+ parsed.routingEnabled = false;
1784
+ break;
1520
1785
  case "--host":
1521
1786
  parsed.host = args[++index];
1522
1787
  break;
@@ -1589,6 +1854,12 @@ function getHelpText() {
1589
1854
  " --max-length <n>",
1590
1855
  " --tag <value> Repeatable",
1591
1856
  " --pin-constraint <text> Repeatable",
1857
+ " --target <provider:model> Repeatable",
1858
+ " --target-hint <value> Repeatable",
1859
+ " --routing-priority <value>",
1860
+ " --routing-top-k <n>",
1861
+ " --workload-bias <code_first>",
1862
+ " --no-routing",
1592
1863
  " --host <url>",
1593
1864
  " --store <local|sqlite>",
1594
1865
  " --storage-dir <path>",
@@ -1606,6 +1877,20 @@ function getHelpText() {
1606
1877
  " --bypass-optimization"
1607
1878
  ].join("\n");
1608
1879
  }
1880
+ function parseTargetCandidate(raw, index) {
1881
+ const [provider, ...modelParts] = raw.split(":");
1882
+ const model = modelParts.join(":").trim();
1883
+ return {
1884
+ provider: provider.trim(),
1885
+ model,
1886
+ label: raw,
1887
+ costRank: index + 1,
1888
+ latencyRank: index + 1
1889
+ };
1890
+ }
1891
+ function formatTarget(target) {
1892
+ return target.label ?? `${target.provider}:${target.model}`;
1893
+ }
1609
1894
  async function readStdin(stdin = process.stdin) {
1610
1895
  if (!stdin || stdin.isTTY) {
1611
1896
  return "";