promptpilot 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -2,6 +2,28 @@ type ProviderType = "ollama" | "heuristic";
2
2
  type OptimizationMode = "clarity" | "concise" | "detailed" | "structured" | "persuasive" | "compress" | "claude_cli";
3
3
  type PromptPreset = "code" | "email" | "essay" | "support" | "summarization" | "chat";
4
4
  type ModelRoutingStrategy = "qwen";
5
+ type RoutingPriority = "cheapest_adequate" | "best_quality" | "fastest_adequate";
6
+ type WorkloadBias = "code_first";
7
+ type TargetCapability = "coding" | "agentic" | "tool_use" | "refactor" | "debugging" | "architecture" | "writing" | "email" | "support" | "chat" | "summarization";
8
+ interface TargetModelCandidate {
9
+ provider: string;
10
+ model: string;
11
+ label?: string;
12
+ capabilities?: readonly TargetCapability[];
13
+ costRank?: number;
14
+ latencyRank?: number;
15
+ }
16
+ interface RankedTargetCandidate extends TargetModelCandidate {
17
+ rank: number;
18
+ reason: string;
19
+ }
20
+ interface RoutingDecision {
21
+ selectedTarget: TargetModelCandidate | null;
22
+ rankedTargets: RankedTargetCandidate[];
23
+ routingReason: string | null;
24
+ routingWarnings: string[];
25
+ routingProvider: string | null;
26
+ }
5
27
  interface ContextEntry {
6
28
  id: string;
7
29
  sessionId: string;
@@ -60,6 +82,12 @@ interface OptimizePromptInput {
60
82
  timeoutMs?: number;
61
83
  tags?: string[];
62
84
  pinnedConstraints?: string[];
85
+ availableTargets?: TargetModelCandidate[];
86
+ routingEnabled?: boolean;
87
+ routingPriority?: RoutingPriority;
88
+ routingTopK?: number;
89
+ targetHints?: TargetCapability[];
90
+ workloadBias?: WorkloadBias;
63
91
  }
64
92
  interface OptimizePromptResult {
65
93
  originalPrompt: string;
@@ -73,6 +101,11 @@ interface OptimizePromptResult {
73
101
  mode: OptimizationMode;
74
102
  provider: ProviderType;
75
103
  model: string;
104
+ selectedTarget: TargetModelCandidate | null;
105
+ rankedTargets: RankedTargetCandidate[];
106
+ routingReason: string | null;
107
+ routingWarnings: string[];
108
+ routingProvider: string | null;
76
109
  warnings: string[];
77
110
  changes: string[];
78
111
  debugInfo?: Record<string, unknown>;
@@ -158,6 +191,7 @@ declare class PromptOptimizer {
158
191
  private tryOllamaOptimization;
159
192
  private resolveOllamaModel;
160
193
  private tryQwenModelRouting;
194
+ private routeDownstreamTargets;
161
195
  private heuristicOptimize;
162
196
  private reduceToBudget;
163
197
  }
@@ -289,4 +323,4 @@ declare class SQLiteSessionStore implements SessionStore {
289
323
  declare function createOptimizer(config?: OptimizerConfig): PromptOptimizer;
290
324
  declare function optimizePrompt(input: OptimizePromptInput, config?: OptimizerConfig): Promise<OptimizePromptResult>;
291
325
 
292
- export { ContextCompressor, type ContextEntry, ContextManager, ContextStoreError, type ContextSummary, FileSessionStore, InvalidPromptError, type Logger, type ModelRoutingStrategy, OllamaClient, type OllamaClientLike, type OllamaGenerateOptions, type OllamaModelInfo, OllamaUnavailableError, type OptimizationMode, type OptimizePromptInput, type OptimizePromptResult, type OptimizerConfig, PromptOptimizer, type PromptPreset, type ProviderType, type RelevantContextResult, SQLiteSessionStore, type SessionData, type SessionStore, TokenBudgetExceededError, TokenEstimator, type TokenUsageEstimate, createOptimizer, getDefaultPreferredModels, optimizePrompt, selectOllamaModel };
326
+ export { ContextCompressor, type ContextEntry, ContextManager, ContextStoreError, type ContextSummary, FileSessionStore, InvalidPromptError, type Logger, type ModelRoutingStrategy, OllamaClient, type OllamaClientLike, type OllamaGenerateOptions, type OllamaModelInfo, OllamaUnavailableError, type OptimizationMode, type OptimizePromptInput, type OptimizePromptResult, type OptimizerConfig, PromptOptimizer, type PromptPreset, type ProviderType, type RankedTargetCandidate, type RelevantContextResult, type RoutingDecision, type RoutingPriority, SQLiteSessionStore, type SessionData, type SessionStore, type TargetCapability, type TargetModelCandidate, TokenBudgetExceededError, TokenEstimator, type TokenUsageEstimate, type WorkloadBias, createOptimizer, getDefaultPreferredModels, optimizePrompt, selectOllamaModel };
package/dist/index.js CHANGED
@@ -801,6 +801,9 @@ var DEFAULT_PROVIDER = "ollama";
801
801
  var DEFAULT_MAX_INPUT_TOKENS = 1200;
802
802
  var DEFAULT_MAX_CONTEXT_TOKENS = 800;
803
803
  var DEFAULT_MAX_TOTAL_TOKENS = 2200;
804
+ var DEFAULT_ROUTING_PRIORITY = "cheapest_adequate";
805
+ var DEFAULT_ROUTING_TOP_K = 3;
806
+ var DEFAULT_WORKLOAD_BIAS = "code_first";
804
807
  var PromptOptimizer = class {
805
808
  config;
806
809
  logger;
@@ -838,6 +841,10 @@ var PromptOptimizer = class {
838
841
  const maxInputTokens = input.maxInputTokens ?? this.config.maxInputTokens ?? DEFAULT_MAX_INPUT_TOKENS;
839
842
  const maxContextTokens = input.maxContextTokens ?? this.config.maxContextTokens ?? DEFAULT_MAX_CONTEXT_TOKENS;
840
843
  const maxTotalTokens = input.maxTotalTokens ?? this.config.maxTotalTokens ?? DEFAULT_MAX_TOTAL_TOKENS;
844
+ const routingEnabled = input.routingEnabled !== false;
845
+ const routingPriority = input.routingPriority ?? DEFAULT_ROUTING_PRIORITY;
846
+ const routingTopK = input.routingTopK ?? DEFAULT_ROUTING_TOP_K;
847
+ const workloadBias = input.workloadBias ?? DEFAULT_WORKLOAD_BIAS;
841
848
  const warnings = [];
842
849
  const changes = [];
843
850
  const useContext = input.useContext !== false && Boolean(input.sessionId);
@@ -926,6 +933,22 @@ var PromptOptimizer = class {
926
933
  }
927
934
  warnings.push(...providerWarnings);
928
935
  changes.push(...providerChanges);
936
+ const routingDecision = await this.routeDownstreamTargets({
937
+ input: {
938
+ ...input,
939
+ prompt: originalPrompt,
940
+ mode,
941
+ preset,
942
+ routingPriority,
943
+ routingTopK,
944
+ workloadBias
945
+ },
946
+ routingEnabled,
947
+ routingPriority,
948
+ routingTopK,
949
+ workloadBias
950
+ });
951
+ warnings.push(...routingDecision.routingWarnings);
929
952
  let finalPrompt = composeFinalPrompt({
930
953
  optimizedPrompt,
931
954
  input: {
@@ -934,7 +957,8 @@ var PromptOptimizer = class {
934
957
  mode,
935
958
  preset
936
959
  },
937
- context: relevantContext
960
+ context: relevantContext,
961
+ routingDecision
938
962
  });
939
963
  let estimatedTokensAfter = {
940
964
  prompt: this.estimator.estimateText(optimizedPrompt),
@@ -951,6 +975,7 @@ var PromptOptimizer = class {
951
975
  },
952
976
  optimizedPrompt,
953
977
  context: relevantContext,
978
+ routingDecision,
954
979
  maxTotalTokens
955
980
  });
956
981
  finalPrompt = reduced.finalPrompt;
@@ -986,6 +1011,11 @@ var PromptOptimizer = class {
986
1011
  mode,
987
1012
  provider,
988
1013
  model,
1014
+ selectedTarget: routingDecision.selectedTarget,
1015
+ rankedTargets: routingDecision.rankedTargets,
1016
+ routingReason: routingDecision.routingReason,
1017
+ routingWarnings: routingDecision.routingWarnings,
1018
+ routingProvider: routingDecision.routingProvider,
989
1019
  warnings,
990
1020
  changes,
991
1021
  debugInfo: input.debug ? {
@@ -994,7 +1024,8 @@ var PromptOptimizer = class {
994
1024
  estimatedTokensAfter,
995
1025
  extractedConstraints,
996
1026
  preset,
997
- selectedModel: model
1027
+ selectedModel: model,
1028
+ routingDecision
998
1029
  } : void 0
999
1030
  };
1000
1031
  }
@@ -1210,6 +1241,137 @@ var PromptOptimizer = class {
1210
1241
  };
1211
1242
  }
1212
1243
  }
1244
+ async routeDownstreamTargets(options) {
1245
+ const availableTargets = normalizeAvailableTargets(options.input.availableTargets ?? []);
1246
+ if (!options.routingEnabled || availableTargets.length === 0) {
1247
+ return {
1248
+ selectedTarget: null,
1249
+ rankedTargets: [],
1250
+ routingReason: null,
1251
+ routingWarnings: [],
1252
+ routingProvider: null
1253
+ };
1254
+ }
1255
+ if (availableTargets.length === 1) {
1256
+ return {
1257
+ selectedTarget: stripInternalTargetFields(availableTargets[0]),
1258
+ rankedTargets: [
1259
+ {
1260
+ ...stripInternalTargetFields(availableTargets[0]),
1261
+ rank: 1,
1262
+ reason: "Only one downstream target was supplied."
1263
+ }
1264
+ ],
1265
+ routingReason: "Only one downstream target was supplied, so it was selected directly.",
1266
+ routingWarnings: [],
1267
+ routingProvider: "direct"
1268
+ };
1269
+ }
1270
+ if (!this.client.listModels) {
1271
+ return {
1272
+ selectedTarget: null,
1273
+ rankedTargets: [],
1274
+ routingReason: null,
1275
+ routingWarnings: [
1276
+ "Downstream target routing requires local Ollama model discovery so a Qwen router can run."
1277
+ ],
1278
+ routingProvider: null
1279
+ };
1280
+ }
1281
+ try {
1282
+ const installedModels = await this.client.listModels();
1283
+ const routerModel = getQwenRouterModel(installedModels, this.config.routerModel);
1284
+ if (!routerModel) {
1285
+ return {
1286
+ selectedTarget: null,
1287
+ rankedTargets: [],
1288
+ routingReason: null,
1289
+ routingWarnings: [
1290
+ "Downstream target routing could not run because no suitable local Qwen router model is installed."
1291
+ ],
1292
+ routingProvider: null
1293
+ };
1294
+ }
1295
+ const response = await this.client.generateJson({
1296
+ model: routerModel,
1297
+ timeoutMs: options.input.timeoutMs ?? this.config.timeoutMs,
1298
+ temperature: 0,
1299
+ format: "json",
1300
+ systemPrompt: buildDownstreamRoutingSystemPrompt(options.routingPriority, options.workloadBias),
1301
+ prompt: JSON.stringify(
1302
+ {
1303
+ objective: "Rank the caller-supplied downstream targets for this prompt and choose the best top target.",
1304
+ prompt: options.input.prompt,
1305
+ task: options.input.task ?? null,
1306
+ mode: options.input.mode,
1307
+ preset: options.input.preset,
1308
+ tone: options.input.tone ?? null,
1309
+ targetHints: options.input.targetHints ?? [],
1310
+ workloadBias: options.workloadBias,
1311
+ routingPriority: options.routingPriority,
1312
+ candidateTargets: availableTargets.map((target) => ({
1313
+ id: target.id,
1314
+ provider: target.provider,
1315
+ model: target.model,
1316
+ label: target.label ?? null,
1317
+ costRank: target.costRank,
1318
+ latencyRank: target.latencyRank,
1319
+ capabilities: target.capabilities,
1320
+ profile: describeDownstreamTarget(target)
1321
+ }))
1322
+ },
1323
+ null,
1324
+ 2
1325
+ )
1326
+ });
1327
+ const rankedTargetIds = Array.from(
1328
+ new Set((response.rankedTargetIds ?? []).map((value) => value.trim()).filter(Boolean))
1329
+ ).slice(0, Math.max(1, options.routingTopK));
1330
+ const rankedTargets = rankedTargetIds.map((id, index) => {
1331
+ const target = availableTargets.find((candidate) => candidate.id === id);
1332
+ if (!target) {
1333
+ return null;
1334
+ }
1335
+ return {
1336
+ ...stripInternalTargetFields(target),
1337
+ rank: index + 1,
1338
+ reason: index === 0 ? response.reason?.trim() || "Selected by the local Qwen downstream router." : `Ranked #${index + 1} by the local Qwen downstream router.`
1339
+ };
1340
+ }).filter((value) => value !== null);
1341
+ const selectedTargetId = response.selectedTargetId?.trim();
1342
+ const selectedTargetCandidate = (selectedTargetId && availableTargets.find((candidate) => candidate.id === selectedTargetId)) ?? (rankedTargets[0] ? availableTargets.find(
1343
+ (candidate) => candidate.provider === rankedTargets[0].provider && candidate.model === rankedTargets[0].model && candidate.label === rankedTargets[0].label
1344
+ ) ?? null : null);
1345
+ if (!selectedTargetCandidate || rankedTargets.length === 0) {
1346
+ return {
1347
+ selectedTarget: null,
1348
+ rankedTargets: [],
1349
+ routingReason: null,
1350
+ routingWarnings: [
1351
+ "Downstream target routing returned an invalid selection, so no downstream target was chosen."
1352
+ ],
1353
+ routingProvider: routerModel
1354
+ };
1355
+ }
1356
+ return {
1357
+ selectedTarget: stripInternalTargetFields(selectedTargetCandidate),
1358
+ rankedTargets,
1359
+ routingReason: response.reason?.trim() || "Selected by the local Qwen downstream router.",
1360
+ routingWarnings: [],
1361
+ routingProvider: routerModel
1362
+ };
1363
+ } catch {
1364
+ return {
1365
+ selectedTarget: null,
1366
+ rankedTargets: [],
1367
+ routingReason: null,
1368
+ routingWarnings: [
1369
+ "Downstream target routing could not complete, so no downstream target was selected."
1370
+ ],
1371
+ routingProvider: null
1372
+ };
1373
+ }
1374
+ }
1213
1375
  heuristicOptimize(options) {
1214
1376
  const lines = [
1215
1377
  `Request: ${options.input.prompt}`,
@@ -1241,7 +1403,8 @@ var PromptOptimizer = class {
1241
1403
  const finalPrompt = composeFinalPrompt({
1242
1404
  optimizedPrompt: this.estimator.truncateToBudget(options.optimizedPrompt, Math.floor(options.maxTotalTokens * 0.5)),
1243
1405
  input: options.input,
1244
- context: compactContext
1406
+ context: compactContext,
1407
+ routingDecision: options.routingDecision
1245
1408
  });
1246
1409
  return {
1247
1410
  finalPrompt,
@@ -1302,7 +1465,7 @@ ${contextBlock}`);
1302
1465
  - ${constraints.join("\n- ")}`);
1303
1466
  }
1304
1467
  const desiredOutput = [
1305
- input.input.targetModel ? `Target model: ${input.input.targetModel}` : "Target model: claude",
1468
+ input.routingDecision.selectedTarget ? `Selected target: ${formatTargetLabel(input.routingDecision.selectedTarget)}` : input.input.targetModel ? `Target model: ${input.input.targetModel}` : "Target model: claude",
1306
1469
  `Mode: ${input.input.mode}`,
1307
1470
  `Preset: ${input.input.preset}`
1308
1471
  ];
@@ -1328,6 +1491,77 @@ function emptyRelevantContext() {
1328
1491
  debugInfo: {}
1329
1492
  };
1330
1493
  }
1494
+ function normalizeAvailableTargets(targets) {
1495
+ return targets.map((target, index) => ({
1496
+ ...target,
1497
+ id: `${target.provider}:${target.model}:${index}`,
1498
+ label: target.label ?? `${target.provider}:${target.model}`,
1499
+ capabilities: target.capabilities ?? inferCapabilities(target),
1500
+ costRank: target.costRank ?? index + 1,
1501
+ latencyRank: target.latencyRank ?? index + 1
1502
+ }));
1503
+ }
1504
+ function stripInternalTargetFields(target) {
1505
+ return {
1506
+ provider: target.provider,
1507
+ model: target.model,
1508
+ label: target.label,
1509
+ capabilities: target.capabilities,
1510
+ costRank: target.costRank,
1511
+ latencyRank: target.latencyRank
1512
+ };
1513
+ }
1514
+ function buildDownstreamRoutingSystemPrompt(priority, workloadBias) {
1515
+ return [
1516
+ "You are a downstream model router for PromptPilot.",
1517
+ "Return strict JSON only with this shape:",
1518
+ '{"selectedTargetId":"string","rankedTargetIds":["string"],"reason":"string"}',
1519
+ "Choose only from the supplied candidate target IDs.",
1520
+ "Rank up to the requested top targets in best-first order.",
1521
+ `Routing priority: ${priority}.`,
1522
+ `Workload bias: ${workloadBias}.`,
1523
+ "Code-first means ambiguous prompts should default toward coding-capable or agentic-capable targets.",
1524
+ "Explicit email, support, chat, and lightweight writing prompts may prefer cheaper lighter targets.",
1525
+ "Do not invent targets. Do not output prose outside JSON."
1526
+ ].join("\n");
1527
+ }
1528
+ function inferCapabilities(target) {
1529
+ const lower = `${target.provider} ${target.model} ${target.label ?? ""}`.toLowerCase();
1530
+ const capabilities = /* @__PURE__ */ new Set();
1531
+ if (/code|codex|coder|agent|tool/.test(lower)) {
1532
+ capabilities.add("coding");
1533
+ }
1534
+ if (/agent|tool/.test(lower)) {
1535
+ capabilities.add("agentic");
1536
+ capabilities.add("tool_use");
1537
+ }
1538
+ if (/refactor|coder|codex/.test(lower)) {
1539
+ capabilities.add("refactor");
1540
+ }
1541
+ if (/debug|fix|ci/.test(lower)) {
1542
+ capabilities.add("debugging");
1543
+ }
1544
+ if (/write|email|chat|sonnet|mini/.test(lower)) {
1545
+ capabilities.add("writing");
1546
+ }
1547
+ if (/email/.test(lower)) {
1548
+ capabilities.add("email");
1549
+ }
1550
+ return Array.from(capabilities);
1551
+ }
1552
+ function describeDownstreamTarget(target) {
1553
+ return [
1554
+ `provider=${target.provider}`,
1555
+ `model=${target.model}`,
1556
+ `label=${target.label}`,
1557
+ `costRank=${target.costRank}`,
1558
+ `latencyRank=${target.latencyRank}`,
1559
+ `capabilities=${target.capabilities?.join(",") || "none"}`
1560
+ ].join("; ");
1561
+ }
1562
+ function formatTargetLabel(target) {
1563
+ return target.label ?? `${target.provider}:${target.model}`;
1564
+ }
1331
1565
 
1332
1566
  // src/index.ts
1333
1567
  function createOptimizer(config = {}) {