promptpilot 0.1.7 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.d.ts +1 -0
- package/dist/cli.js +301 -16
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +2 -1
- package/dist/index.js +144 -7
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -199,6 +199,7 @@ declare class PromptOptimizer {
|
|
|
199
199
|
private heuristicOptimize;
|
|
200
200
|
private reduceToBudget;
|
|
201
201
|
}
|
|
202
|
+
declare const CLAUDE_TIER_TARGETS: TargetModelCandidate[];
|
|
202
203
|
|
|
203
204
|
declare class InvalidPromptError extends Error {
|
|
204
205
|
constructor(message?: string);
|
|
@@ -328,4 +329,4 @@ declare class SQLiteSessionStore implements SessionStore {
|
|
|
328
329
|
declare function createOptimizer(config?: OptimizerConfig): PromptOptimizer;
|
|
329
330
|
declare function optimizePrompt(input: OptimizePromptInput, config?: OptimizerConfig): Promise<OptimizePromptResult>;
|
|
330
331
|
|
|
331
|
-
export { ContextCompressor, type ContextEntry, ContextManager, ContextStoreError, type ContextSummary, FileSessionStore, InvalidPromptError, type Logger, type ModelRoutingStrategy, OllamaClient, type OllamaClientLike, type OllamaGenerateOptions, type OllamaModelInfo, OllamaUnavailableError, type OptimizationMode, type OptimizePromptInput, type OptimizePromptResult, type OptimizerConfig, PromptOptimizer, type PromptPreset, type ProviderType, type RankedTargetCandidate, type RelevantContextResult, type RoutingDecision, type RoutingPriority, SQLiteSessionStore, type SessionData, type SessionStore, type TargetCapability, type TargetModelCandidate, TokenBudgetExceededError, TokenEstimator, type TokenUsageEstimate, type WorkloadBias, createOptimizer, getDefaultPreferredModels, optimizePrompt, selectOllamaModel };
|
|
332
|
+
export { CLAUDE_TIER_TARGETS, ContextCompressor, type ContextEntry, ContextManager, ContextStoreError, type ContextSummary, FileSessionStore, InvalidPromptError, type Logger, type ModelRoutingStrategy, OllamaClient, type OllamaClientLike, type OllamaGenerateOptions, type OllamaModelInfo, OllamaUnavailableError, type OptimizationMode, type OptimizePromptInput, type OptimizePromptResult, type OptimizerConfig, PromptOptimizer, type PromptPreset, type ProviderType, type RankedTargetCandidate, type RelevantContextResult, type RoutingDecision, type RoutingPriority, SQLiteSessionStore, type SessionData, type SessionStore, type TargetCapability, type TargetModelCandidate, TokenBudgetExceededError, TokenEstimator, type TokenUsageEstimate, type WorkloadBias, createOptimizer, getDefaultPreferredModels, optimizePrompt, selectOllamaModel };
|
package/dist/index.js
CHANGED
|
@@ -1383,6 +1383,18 @@ Mode: Ultra compression. Minimize tokens aggressively.` : getOptimizationSystemP
|
|
|
1383
1383
|
};
|
|
1384
1384
|
}
|
|
1385
1385
|
if (!this.client.listModels) {
|
|
1386
|
+
if (isClaudeTiersOnlyTargetSet(availableTargets)) {
|
|
1387
|
+
const selected = selectClaudeTierHeuristic(options.input, options.routingPriority, availableTargets);
|
|
1388
|
+
if (selected) {
|
|
1389
|
+
return {
|
|
1390
|
+
selectedTarget: stripInternalTargetFields(selected),
|
|
1391
|
+
rankedTargets: [{ ...stripInternalTargetFields(selected), rank: 1, reason: "Selected by Claude tier heuristic (no local Qwen router available)." }],
|
|
1392
|
+
routingReason: "Selected by Claude tier heuristic (no local Qwen router available).",
|
|
1393
|
+
routingWarnings: [],
|
|
1394
|
+
routingProvider: "heuristic"
|
|
1395
|
+
};
|
|
1396
|
+
}
|
|
1397
|
+
}
|
|
1386
1398
|
return {
|
|
1387
1399
|
selectedTarget: null,
|
|
1388
1400
|
rankedTargets: [],
|
|
@@ -1407,12 +1419,23 @@ Mode: Ultra compression. Minimize tokens aggressively.` : getOptimizationSystemP
|
|
|
1407
1419
|
routingProvider: null
|
|
1408
1420
|
};
|
|
1409
1421
|
}
|
|
1422
|
+
const claudeTiersOnly = isClaudeTiersOnlyTargetSet(availableTargets);
|
|
1423
|
+
const routingCandidates = claudeTiersOnly ? filterClaudeTierCandidates(availableTargets, options.input, options.routingPriority) : availableTargets;
|
|
1424
|
+
if (routingCandidates.length === 1) {
|
|
1425
|
+
return {
|
|
1426
|
+
selectedTarget: stripInternalTargetFields(routingCandidates[0]),
|
|
1427
|
+
rankedTargets: [{ ...stripInternalTargetFields(routingCandidates[0]), rank: 1, reason: "Selected by Claude tier pre-filter based on prompt signals." }],
|
|
1428
|
+
routingReason: "Selected by Claude tier pre-filter based on prompt signals.",
|
|
1429
|
+
routingWarnings: [],
|
|
1430
|
+
routingProvider: "heuristic"
|
|
1431
|
+
};
|
|
1432
|
+
}
|
|
1410
1433
|
const response = await this.client.generateJson({
|
|
1411
1434
|
model: routerModel,
|
|
1412
1435
|
timeoutMs: options.input.timeoutMs ?? this.config.timeoutMs,
|
|
1413
1436
|
temperature: 0,
|
|
1414
1437
|
format: "json",
|
|
1415
|
-
systemPrompt: buildDownstreamRoutingSystemPrompt(options.routingPriority, options.workloadBias),
|
|
1438
|
+
systemPrompt: buildDownstreamRoutingSystemPrompt(options.routingPriority, options.workloadBias, claudeTiersOnly),
|
|
1416
1439
|
prompt: JSON.stringify(
|
|
1417
1440
|
{
|
|
1418
1441
|
objective: "Rank the caller-supplied downstream targets for this prompt and choose the best top target.",
|
|
@@ -1424,7 +1447,7 @@ Mode: Ultra compression. Minimize tokens aggressively.` : getOptimizationSystemP
|
|
|
1424
1447
|
targetHints: options.input.targetHints ?? [],
|
|
1425
1448
|
workloadBias: options.workloadBias,
|
|
1426
1449
|
routingPriority: options.routingPriority,
|
|
1427
|
-
candidateTargets:
|
|
1450
|
+
candidateTargets: routingCandidates.map((target) => ({
|
|
1428
1451
|
id: target.id,
|
|
1429
1452
|
provider: target.provider,
|
|
1430
1453
|
model: target.model,
|
|
@@ -1443,7 +1466,7 @@ Mode: Ultra compression. Minimize tokens aggressively.` : getOptimizationSystemP
|
|
|
1443
1466
|
new Set((response.rankedTargetIds ?? []).map((value) => value.trim()).filter(Boolean))
|
|
1444
1467
|
).slice(0, Math.max(1, options.routingTopK));
|
|
1445
1468
|
const rankedTargets = rankedTargetIds.map((id, index) => {
|
|
1446
|
-
const target =
|
|
1469
|
+
const target = routingCandidates.find((candidate) => candidate.id === id);
|
|
1447
1470
|
if (!target) {
|
|
1448
1471
|
return null;
|
|
1449
1472
|
}
|
|
@@ -1454,7 +1477,7 @@ Mode: Ultra compression. Minimize tokens aggressively.` : getOptimizationSystemP
|
|
|
1454
1477
|
};
|
|
1455
1478
|
}).filter((value) => value !== null);
|
|
1456
1479
|
const selectedTargetId = response.selectedTargetId?.trim();
|
|
1457
|
-
const selectedTargetCandidate = (selectedTargetId &&
|
|
1480
|
+
const selectedTargetCandidate = (selectedTargetId && routingCandidates.find((candidate) => candidate.id === selectedTargetId)) ?? (rankedTargets[0] ? routingCandidates.find(
|
|
1458
1481
|
(candidate) => candidate.provider === rankedTargets[0].provider && candidate.model === rankedTargets[0].model && candidate.label === rankedTargets[0].label
|
|
1459
1482
|
) ?? null : null);
|
|
1460
1483
|
if (!selectedTargetCandidate || rankedTargets.length === 0) {
|
|
@@ -1476,6 +1499,18 @@ Mode: Ultra compression. Minimize tokens aggressively.` : getOptimizationSystemP
|
|
|
1476
1499
|
routingProvider: routerModel
|
|
1477
1500
|
};
|
|
1478
1501
|
} catch {
|
|
1502
|
+
if (isClaudeTiersOnlyTargetSet(availableTargets)) {
|
|
1503
|
+
const selected = selectClaudeTierHeuristic(options.input, options.routingPriority, availableTargets);
|
|
1504
|
+
if (selected) {
|
|
1505
|
+
return {
|
|
1506
|
+
selectedTarget: stripInternalTargetFields(selected),
|
|
1507
|
+
rankedTargets: [{ ...stripInternalTargetFields(selected), rank: 1, reason: "Selected by Claude tier heuristic (Qwen routing failed)." }],
|
|
1508
|
+
routingReason: "Selected by Claude tier heuristic (Qwen routing failed).",
|
|
1509
|
+
routingWarnings: ["Qwen downstream routing failed; fell back to Claude tier heuristic."],
|
|
1510
|
+
routingProvider: "heuristic"
|
|
1511
|
+
};
|
|
1512
|
+
}
|
|
1513
|
+
}
|
|
1479
1514
|
return {
|
|
1480
1515
|
selectedTarget: null,
|
|
1481
1516
|
rankedTargets: [],
|
|
@@ -1624,8 +1659,8 @@ function stripInternalTargetFields(target) {
|
|
|
1624
1659
|
latencyRank: target.latencyRank
|
|
1625
1660
|
};
|
|
1626
1661
|
}
|
|
1627
|
-
function buildDownstreamRoutingSystemPrompt(priority, workloadBias) {
|
|
1628
|
-
|
|
1662
|
+
function buildDownstreamRoutingSystemPrompt(priority, workloadBias, claudeTiersOnly = false) {
|
|
1663
|
+
const lines = [
|
|
1629
1664
|
"You are a downstream model router for PromptPilot.",
|
|
1630
1665
|
"Return strict JSON only with this shape:",
|
|
1631
1666
|
'{"selectedTargetId":"string","rankedTargetIds":["string"],"reason":"string"}',
|
|
@@ -1636,7 +1671,108 @@ function buildDownstreamRoutingSystemPrompt(priority, workloadBias) {
|
|
|
1636
1671
|
"Code-first means ambiguous prompts should default toward coding-capable or agentic-capable targets.",
|
|
1637
1672
|
"Explicit email, support, chat, and lightweight writing prompts may prefer cheaper lighter targets.",
|
|
1638
1673
|
"Do not invent targets. Do not output prose outside JSON."
|
|
1639
|
-
]
|
|
1674
|
+
];
|
|
1675
|
+
if (claudeTiersOnly) {
|
|
1676
|
+
lines.push(
|
|
1677
|
+
"You are choosing between Claude model tiers (Haiku, Sonnet, Opus).",
|
|
1678
|
+
"Haiku: fastest and cheapest. ONLY suitable for email, chat, support, summarization, and trivial one-sentence rewrites. Do NOT use Haiku for any coding, debugging, refactoring, or technical tasks.",
|
|
1679
|
+
"Sonnet: balanced cost and capability. The DEFAULT for all coding, debugging, refactoring, writing, and general-purpose tasks. If the prompt mentions code, a file, a module, a bug, or any technical work, choose Sonnet at minimum.",
|
|
1680
|
+
"Opus: most capable and most expensive. Use for complex architecture decisions, multi-constraint agentic planning, system design, long-horizon reasoning, or when the prompt explicitly requires the strongest model.",
|
|
1681
|
+
"When routing priority is cheapest_adequate: Haiku for non-technical lightweight tasks only, Sonnet for anything involving code or technical content, Opus only when clearly necessary.",
|
|
1682
|
+
"When routing priority is best_quality: Opus for all code and reasoning tasks, Sonnet for writing and non-technical tasks.",
|
|
1683
|
+
"When routing priority is fastest_adequate: Haiku only for lightweight non-technical tasks, Sonnet otherwise.",
|
|
1684
|
+
"IMPORTANT: refactor, debug, fix, auth, module, CI, test, and TypeScript are all coding signals \u2014 always choose Sonnet or Opus for these, never Haiku."
|
|
1685
|
+
);
|
|
1686
|
+
}
|
|
1687
|
+
return lines.join("\n");
|
|
1688
|
+
}
|
|
1689
|
+
var CLAUDE_TIER_TARGETS = [
|
|
1690
|
+
{
|
|
1691
|
+
provider: "anthropic",
|
|
1692
|
+
model: "claude-haiku-4-5",
|
|
1693
|
+
label: "anthropic:claude-haiku-4-5",
|
|
1694
|
+
capabilities: ["writing", "email", "support", "chat", "summarization"],
|
|
1695
|
+
costRank: 1,
|
|
1696
|
+
latencyRank: 1
|
|
1697
|
+
},
|
|
1698
|
+
{
|
|
1699
|
+
provider: "anthropic",
|
|
1700
|
+
model: "claude-sonnet-4-6",
|
|
1701
|
+
label: "anthropic:claude-sonnet-4-6",
|
|
1702
|
+
capabilities: ["coding", "writing", "agentic", "tool_use", "refactor", "debugging"],
|
|
1703
|
+
costRank: 2,
|
|
1704
|
+
latencyRank: 2
|
|
1705
|
+
},
|
|
1706
|
+
{
|
|
1707
|
+
provider: "anthropic",
|
|
1708
|
+
model: "claude-opus-4-6",
|
|
1709
|
+
label: "anthropic:claude-opus-4-6",
|
|
1710
|
+
capabilities: ["coding", "agentic", "tool_use", "refactor", "debugging", "architecture", "writing"],
|
|
1711
|
+
costRank: 3,
|
|
1712
|
+
latencyRank: 3
|
|
1713
|
+
}
|
|
1714
|
+
];
|
|
1715
|
+
function isClaudeTiersOnlyTargetSet(targets) {
|
|
1716
|
+
return targets.length >= 2 && targets.every(
|
|
1717
|
+
(t) => t.provider === "anthropic" && /haiku|sonnet|opus/i.test(t.model)
|
|
1718
|
+
);
|
|
1719
|
+
}
|
|
1720
|
+
function isCodeSignal(input) {
|
|
1721
|
+
const task = (input.task ?? "").toLowerCase();
|
|
1722
|
+
const preset = (input.preset ?? "").toLowerCase();
|
|
1723
|
+
const hints = input.targetHints ?? [];
|
|
1724
|
+
return task === "code" || preset === "code" || hints.some((h) => ["coding", "agentic", "tool_use", "refactor", "debugging", "architecture"].includes(h)) || /\b(refactor|debug|fix|auth|module|ci|test|typescript|javascript|function|class|api|endpoint|build|deploy|lint|migration)\b/i.test(input.prompt);
|
|
1725
|
+
}
|
|
1726
|
+
function isArchitectureSignal(input) {
|
|
1727
|
+
const hints = input.targetHints ?? [];
|
|
1728
|
+
return hints.includes("architecture") || /\b(architect|architecture|design system|migration plan|multi.?step|long.?horizon|agentic.*plan|system design|microservice|monolith)\b/i.test(input.prompt);
|
|
1729
|
+
}
|
|
1730
|
+
function filterClaudeTierCandidates(targets, input, priority) {
|
|
1731
|
+
if (priority === "best_quality") {
|
|
1732
|
+
const filtered = targets.filter((t) => /opus|sonnet/i.test(t.model));
|
|
1733
|
+
return filtered.length > 0 ? filtered : targets;
|
|
1734
|
+
}
|
|
1735
|
+
if (priority === "cheapest_adequate") {
|
|
1736
|
+
if (isArchitectureSignal(input)) {
|
|
1737
|
+
const filtered2 = targets.filter((t) => /opus|sonnet/i.test(t.model));
|
|
1738
|
+
return filtered2.length > 0 ? filtered2 : targets;
|
|
1739
|
+
}
|
|
1740
|
+
if (isCodeSignal(input)) {
|
|
1741
|
+
const sonnet = targets.find((t) => /sonnet/i.test(t.model));
|
|
1742
|
+
return sonnet ? [sonnet] : targets.filter((t) => !/haiku/i.test(t.model));
|
|
1743
|
+
}
|
|
1744
|
+
const filtered = targets.filter((t) => /haiku|sonnet/i.test(t.model));
|
|
1745
|
+
return filtered.length > 0 ? filtered : targets;
|
|
1746
|
+
}
|
|
1747
|
+
if (priority === "fastest_adequate") {
|
|
1748
|
+
const filtered = targets.filter((t) => !/opus/i.test(t.model));
|
|
1749
|
+
return filtered.length > 0 ? filtered : targets;
|
|
1750
|
+
}
|
|
1751
|
+
return targets;
|
|
1752
|
+
}
|
|
1753
|
+
function selectClaudeTierHeuristic(input, priority, targets) {
|
|
1754
|
+
const haiku = targets.find((t) => /haiku/i.test(t.model)) ?? null;
|
|
1755
|
+
const sonnet = targets.find((t) => /sonnet/i.test(t.model)) ?? null;
|
|
1756
|
+
const opus = targets.find((t) => /opus/i.test(t.model)) ?? null;
|
|
1757
|
+
const task = (input.task ?? "").toLowerCase();
|
|
1758
|
+
const preset = (input.preset ?? "").toLowerCase();
|
|
1759
|
+
const hints = input.targetHints ?? [];
|
|
1760
|
+
const prompt = input.prompt;
|
|
1761
|
+
const isLightweight = ["email", "chat", "support", "summarization"].includes(task) || ["email", "chat", "support", "summarization"].includes(preset) || hints.some((h) => ["email", "support", "chat", "summarization"].includes(h));
|
|
1762
|
+
const needsOpus = /\b(architect|architecture|design system|migration plan|multi.?step|complex.*refactor|long.?horizon|agentic.*plan)\b/i.test(prompt) || hints.includes("architecture") || priority === "best_quality";
|
|
1763
|
+
const isCodeTask = ["code"].includes(task) || ["code"].includes(preset) || hints.some((h) => ["coding", "agentic", "tool_use", "refactor", "debugging", "architecture"].includes(h)) || /\b(refactor|debug|fix|auth|module|ci|test|typescript|javascript|function|class|api|endpoint)\b/i.test(prompt);
|
|
1764
|
+
if (priority === "fastest_adequate") {
|
|
1765
|
+
if (needsOpus) return opus ?? sonnet;
|
|
1766
|
+
if (isCodeTask) return sonnet ?? opus;
|
|
1767
|
+
return haiku ?? sonnet;
|
|
1768
|
+
}
|
|
1769
|
+
if (priority === "best_quality") {
|
|
1770
|
+
return opus ?? sonnet ?? haiku;
|
|
1771
|
+
}
|
|
1772
|
+
if (needsOpus) return opus ?? sonnet;
|
|
1773
|
+
if (isCodeTask) return sonnet ?? opus;
|
|
1774
|
+
if (isLightweight) return haiku ?? sonnet;
|
|
1775
|
+
return sonnet ?? haiku ?? opus;
|
|
1640
1776
|
}
|
|
1641
1777
|
function inferCapabilities(target) {
|
|
1642
1778
|
const lower = `${target.provider} ${target.model} ${target.label ?? ""}`.toLowerCase();
|
|
@@ -1789,6 +1925,7 @@ async function optimizePrompt(input, config = {}) {
|
|
|
1789
1925
|
return optimizer.optimize(input);
|
|
1790
1926
|
}
|
|
1791
1927
|
export {
|
|
1928
|
+
CLAUDE_TIER_TARGETS,
|
|
1792
1929
|
ContextCompressor,
|
|
1793
1930
|
ContextManager,
|
|
1794
1931
|
ContextStoreError,
|