teleportation-cli 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/hooks/config-loader.mjs +93 -0
- package/.claude/hooks/heartbeat.mjs +331 -0
- package/.claude/hooks/notification.mjs +35 -0
- package/.claude/hooks/permission_request.mjs +307 -0
- package/.claude/hooks/post_tool_use.mjs +137 -0
- package/.claude/hooks/pre_tool_use.mjs +451 -0
- package/.claude/hooks/session-register.mjs +274 -0
- package/.claude/hooks/session_end.mjs +256 -0
- package/.claude/hooks/session_start.mjs +308 -0
- package/.claude/hooks/stop.mjs +277 -0
- package/.claude/hooks/user_prompt_submit.mjs +91 -0
- package/LICENSE +21 -0
- package/README.md +243 -0
- package/lib/auth/api-key.js +110 -0
- package/lib/auth/credentials.js +341 -0
- package/lib/backup/manager.js +461 -0
- package/lib/cli/daemon-commands.js +299 -0
- package/lib/cli/index.js +303 -0
- package/lib/cli/session-commands.js +294 -0
- package/lib/cli/snapshot-commands.js +223 -0
- package/lib/cli/worktree-commands.js +291 -0
- package/lib/config/manager.js +306 -0
- package/lib/daemon/lifecycle.js +336 -0
- package/lib/daemon/pid-manager.js +160 -0
- package/lib/daemon/teleportation-daemon.js +2009 -0
- package/lib/handoff/config.js +102 -0
- package/lib/handoff/example.js +152 -0
- package/lib/handoff/git-handoff.js +351 -0
- package/lib/handoff/handoff.js +277 -0
- package/lib/handoff/index.js +25 -0
- package/lib/handoff/session-state.js +238 -0
- package/lib/install/installer.js +555 -0
- package/lib/machine-coders/claude-code-adapter.js +329 -0
- package/lib/machine-coders/example.js +239 -0
- package/lib/machine-coders/gemini-cli-adapter.js +406 -0
- package/lib/machine-coders/index.js +103 -0
- package/lib/machine-coders/interface.js +168 -0
- package/lib/router/classifier.js +251 -0
- package/lib/router/example.js +92 -0
- package/lib/router/index.js +69 -0
- package/lib/router/mech-llms-client.js +277 -0
- package/lib/router/models.js +188 -0
- package/lib/router/router.js +382 -0
- package/lib/session/cleanup.js +100 -0
- package/lib/session/metadata.js +258 -0
- package/lib/session/mute-checker.js +114 -0
- package/lib/session-registry/manager.js +302 -0
- package/lib/snapshot/manager.js +390 -0
- package/lib/utils/errors.js +166 -0
- package/lib/utils/logger.js +148 -0
- package/lib/utils/retry.js +155 -0
- package/lib/worktree/manager.js +301 -0
- package/package.json +66 -0
- package/teleportation-cli.cjs +2987 -0
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Model Registry
|
|
3
|
+
*
|
|
4
|
+
* Static model definitions with cost and capability metadata.
|
|
5
|
+
* This serves as a fallback when the llms.mechdna.net registry is unavailable,
|
|
6
|
+
* and provides the tier classification logic.
|
|
7
|
+
*
|
|
8
|
+
* Pricing is per 1M tokens (as of Dec 2024).
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
// Quality tiers (from llms.mechdna.net)
|
|
12
|
+
export const QUALITY_TIERS = {
|
|
13
|
+
FLAGSHIP: 'flagship', // Best quality, highest cost
|
|
14
|
+
HIGH: 'high', // Very good quality, moderate-high cost
|
|
15
|
+
STANDARD: 'standard', // Good balance of quality and cost
|
|
16
|
+
FAST: 'fast', // Optimized for speed, lower cost
|
|
17
|
+
BUDGET: 'budget' // Cheapest, acceptable quality for simple tasks
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
// Map our simple tiers to quality tiers
|
|
21
|
+
export const TIER_TO_QUALITY = {
|
|
22
|
+
cheap: [QUALITY_TIERS.BUDGET, QUALITY_TIERS.FAST],
|
|
23
|
+
mid: [QUALITY_TIERS.STANDARD, QUALITY_TIERS.HIGH],
|
|
24
|
+
best: [QUALITY_TIERS.FLAGSHIP]
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
// Fallback model definitions (used when API is unavailable)
|
|
28
|
+
export const FALLBACK_MODELS = [
|
|
29
|
+
// Budget tier - cheapest options
|
|
30
|
+
{
|
|
31
|
+
id: 'gpt-4o-mini',
|
|
32
|
+
provider: 'openai',
|
|
33
|
+
quality: QUALITY_TIERS.FAST,
|
|
34
|
+
pricing: { input: 0.15, output: 0.60 }, // per 1M tokens
|
|
35
|
+
contextWindow: 128000,
|
|
36
|
+
maxOutputTokens: 16384,
|
|
37
|
+
capabilities: ['streaming', 'tools', 'vision'],
|
|
38
|
+
speed: 'very_fast',
|
|
39
|
+
goodFor: ['simple-qa', 'small-edit', 'classification', 'extraction']
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
id: 'claude-3-5-haiku-latest',
|
|
43
|
+
provider: 'anthropic',
|
|
44
|
+
quality: QUALITY_TIERS.FAST,
|
|
45
|
+
pricing: { input: 0.80, output: 4.00 },
|
|
46
|
+
contextWindow: 200000,
|
|
47
|
+
maxOutputTokens: 8192,
|
|
48
|
+
capabilities: ['streaming', 'tools', 'vision'],
|
|
49
|
+
speed: 'very_fast',
|
|
50
|
+
goodFor: ['simple-qa', 'small-edit', 'code-review', 'summarization']
|
|
51
|
+
},
|
|
52
|
+
{
|
|
53
|
+
id: 'gemini-2.0-flash',
|
|
54
|
+
provider: 'google',
|
|
55
|
+
quality: QUALITY_TIERS.FAST,
|
|
56
|
+
pricing: { input: 0.10, output: 0.40 },
|
|
57
|
+
contextWindow: 1000000,
|
|
58
|
+
maxOutputTokens: 8192,
|
|
59
|
+
capabilities: ['streaming', 'tools', 'vision', 'grounding', 'multimodal'],
|
|
60
|
+
speed: 'very_fast',
|
|
61
|
+
goodFor: ['simple-qa', 'search-grounded', 'multimodal']
|
|
62
|
+
},
|
|
63
|
+
|
|
64
|
+
// Standard/High tier - good balance
|
|
65
|
+
{
|
|
66
|
+
id: 'gpt-4o',
|
|
67
|
+
provider: 'openai',
|
|
68
|
+
quality: QUALITY_TIERS.HIGH,
|
|
69
|
+
pricing: { input: 2.50, output: 10.00 },
|
|
70
|
+
contextWindow: 128000,
|
|
71
|
+
maxOutputTokens: 16384,
|
|
72
|
+
capabilities: ['streaming', 'tools', 'vision'],
|
|
73
|
+
speed: 'fast',
|
|
74
|
+
goodFor: ['refactor', 'multi-file', 'code-generation', 'analysis']
|
|
75
|
+
},
|
|
76
|
+
{
|
|
77
|
+
id: 'claude-sonnet-4-20250514',
|
|
78
|
+
provider: 'anthropic',
|
|
79
|
+
quality: QUALITY_TIERS.HIGH,
|
|
80
|
+
pricing: { input: 3.00, output: 15.00 },
|
|
81
|
+
contextWindow: 200000,
|
|
82
|
+
maxOutputTokens: 8192,
|
|
83
|
+
capabilities: ['streaming', 'tools', 'vision', 'thinking'],
|
|
84
|
+
speed: 'fast',
|
|
85
|
+
goodFor: ['refactor', 'multi-file', 'code-generation', 'debugging']
|
|
86
|
+
},
|
|
87
|
+
{
|
|
88
|
+
id: 'gemini-2.5-pro-preview-06-05',
|
|
89
|
+
provider: 'google',
|
|
90
|
+
quality: QUALITY_TIERS.HIGH,
|
|
91
|
+
pricing: { input: 2.50, output: 15.00 },
|
|
92
|
+
contextWindow: 1000000,
|
|
93
|
+
maxOutputTokens: 65536,
|
|
94
|
+
capabilities: ['streaming', 'tools', 'vision', 'thinking', 'grounding'],
|
|
95
|
+
speed: 'medium',
|
|
96
|
+
goodFor: ['large-context', 'analysis', 'research', 'code-generation']
|
|
97
|
+
},
|
|
98
|
+
|
|
99
|
+
// Flagship tier - best quality
|
|
100
|
+
{
|
|
101
|
+
id: 'claude-opus-4-20250514',
|
|
102
|
+
provider: 'anthropic',
|
|
103
|
+
quality: QUALITY_TIERS.FLAGSHIP,
|
|
104
|
+
pricing: { input: 15.00, output: 75.00 },
|
|
105
|
+
contextWindow: 200000,
|
|
106
|
+
maxOutputTokens: 32000,
|
|
107
|
+
capabilities: ['streaming', 'tools', 'vision', 'thinking'],
|
|
108
|
+
speed: 'slow',
|
|
109
|
+
goodFor: ['architecture', 'planning', 'complex-reasoning', 'ambiguous-tasks']
|
|
110
|
+
},
|
|
111
|
+
{
|
|
112
|
+
id: 'o1',
|
|
113
|
+
provider: 'openai',
|
|
114
|
+
quality: QUALITY_TIERS.FLAGSHIP,
|
|
115
|
+
pricing: { input: 15.00, output: 60.00 },
|
|
116
|
+
contextWindow: 200000,
|
|
117
|
+
maxOutputTokens: 100000,
|
|
118
|
+
capabilities: ['reasoning'],
|
|
119
|
+
speed: 'slow',
|
|
120
|
+
goodFor: ['complex-reasoning', 'math', 'planning', 'architecture']
|
|
121
|
+
},
|
|
122
|
+
{
|
|
123
|
+
id: 'o3',
|
|
124
|
+
provider: 'openai',
|
|
125
|
+
quality: QUALITY_TIERS.FLAGSHIP,
|
|
126
|
+
pricing: { input: 10.00, output: 40.00 },
|
|
127
|
+
contextWindow: 200000,
|
|
128
|
+
maxOutputTokens: 100000,
|
|
129
|
+
capabilities: ['reasoning', 'tools'],
|
|
130
|
+
speed: 'slow',
|
|
131
|
+
goodFor: ['complex-reasoning', 'math', 'planning', 'architecture']
|
|
132
|
+
}
|
|
133
|
+
];
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Get models by quality tier from fallback registry
|
|
137
|
+
*/
|
|
138
|
+
export function getModelsByQuality(quality) {
|
|
139
|
+
return FALLBACK_MODELS
|
|
140
|
+
.filter(m => m.quality === quality)
|
|
141
|
+
.sort((a, b) => a.pricing.input - b.pricing.input);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Get models by simple tier (cheap/mid/best)
|
|
146
|
+
*/
|
|
147
|
+
export function getModelsByTier(tier) {
|
|
148
|
+
const qualities = TIER_TO_QUALITY[tier] || [QUALITY_TIERS.STANDARD];
|
|
149
|
+
return FALLBACK_MODELS
|
|
150
|
+
.filter(m => qualities.includes(m.quality))
|
|
151
|
+
.sort((a, b) => a.pricing.input - b.pricing.input);
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Get the cheapest model for a tier
|
|
156
|
+
*/
|
|
157
|
+
export function getCheapestForTier(tier) {
|
|
158
|
+
const models = getModelsByTier(tier);
|
|
159
|
+
return models[0] || null;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* Get all tiers in escalation order
|
|
164
|
+
*/
|
|
165
|
+
export function getEscalationOrder() {
|
|
166
|
+
return ['cheap', 'mid', 'best'];
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Get next tier for escalation
|
|
171
|
+
*/
|
|
172
|
+
export function getNextTier(currentTier) {
|
|
173
|
+
const order = getEscalationOrder();
|
|
174
|
+
const idx = order.indexOf(currentTier);
|
|
175
|
+
if (idx === -1 || idx >= order.length - 1) return null;
|
|
176
|
+
return order[idx + 1];
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
export default {
|
|
180
|
+
QUALITY_TIERS,
|
|
181
|
+
TIER_TO_QUALITY,
|
|
182
|
+
FALLBACK_MODELS,
|
|
183
|
+
getModelsByQuality,
|
|
184
|
+
getModelsByTier,
|
|
185
|
+
getCheapestForTier,
|
|
186
|
+
getEscalationOrder,
|
|
187
|
+
getNextTier
|
|
188
|
+
};
|
|
@@ -0,0 +1,382 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cost-Aware Model Router
|
|
3
|
+
*
|
|
4
|
+
* Routes coding tasks to the cheapest capable model, with automatic escalation
|
|
5
|
+
* on failure. Uses llms.mechdna.net for model registry and completions.
|
|
6
|
+
*
|
|
7
|
+
* Flow:
|
|
8
|
+
* 1. Classify task → determine tier (cheap/mid/best)
|
|
9
|
+
* 2. Get cheapest model for that tier
|
|
10
|
+
* 3. Execute completion
|
|
11
|
+
* 4. If result looks bad, escalate to next tier
|
|
12
|
+
* 5. Return result with cost info
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import { MechLLMsClient } from './mech-llms-client.js';
|
|
16
|
+
import { classifyTask, buildUseCaseString } from './classifier.js';
|
|
17
|
+
import {
|
|
18
|
+
getCheapestForTier,
|
|
19
|
+
getNextTier,
|
|
20
|
+
getModelsByTier,
|
|
21
|
+
TIER_TO_QUALITY
|
|
22
|
+
} from './models.js';
|
|
23
|
+
|
|
24
|
+
// Map simple tiers to quality tiers for API
|
|
25
|
+
const TIER_TO_API_QUALITY = {
|
|
26
|
+
cheap: 'fast', // fast/budget tier
|
|
27
|
+
mid: 'high', // high/standard tier
|
|
28
|
+
best: 'flagship' // flagship tier
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Model Router
|
|
33
|
+
*/
|
|
34
|
+
export class Router {
|
|
35
|
+
/**
|
|
36
|
+
* @param {Object} options
|
|
37
|
+
* @param {string} [options.apiKey] - Mech API key
|
|
38
|
+
* @param {boolean} [options.useRecommendations] - Use /recommend API instead of local classification
|
|
39
|
+
* @param {number} [options.maxEscalations] - Max escalation attempts (default: 2)
|
|
40
|
+
* @param {boolean} [options.verbose] - Enable verbose logging
|
|
41
|
+
*/
|
|
42
|
+
constructor(options = {}) {
|
|
43
|
+
this.client = new MechLLMsClient({ apiKey: options.apiKey });
|
|
44
|
+
this.useRecommendations = options.useRecommendations || false;
|
|
45
|
+
this.maxEscalations = options.maxEscalations || 2;
|
|
46
|
+
this.verbose = options.verbose || false;
|
|
47
|
+
|
|
48
|
+
// Cache for model registry
|
|
49
|
+
this._modelCache = null;
|
|
50
|
+
this._modelCacheTime = 0;
|
|
51
|
+
this._modelCacheTTL = 5 * 60 * 1000; // 5 minutes
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Log message if verbose mode is enabled
|
|
56
|
+
*/
|
|
57
|
+
_log(...args) {
|
|
58
|
+
if (this.verbose) {
|
|
59
|
+
console.log('[Router]', ...args);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Get model registry (cached)
|
|
65
|
+
*/
|
|
66
|
+
async _getModelRegistry() {
|
|
67
|
+
const now = Date.now();
|
|
68
|
+
if (this._modelCache && (now - this._modelCacheTime) < this._modelCacheTTL) {
|
|
69
|
+
return this._modelCache;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
try {
|
|
73
|
+
const result = await this.client.getModelRegistry({ sortBy: 'cost', sortOrder: 'asc' });
|
|
74
|
+
this._modelCache = result.models || [];
|
|
75
|
+
this._modelCacheTime = now;
|
|
76
|
+
return this._modelCache;
|
|
77
|
+
} catch (err) {
|
|
78
|
+
this._log('Failed to fetch model registry, using fallback:', err.message);
|
|
79
|
+
return null; // Will use fallback models
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Get the cheapest model for a tier
|
|
85
|
+
* Tries API first, falls back to local registry
|
|
86
|
+
*/
|
|
87
|
+
async _getCheapestModel(tier) {
|
|
88
|
+
const quality = TIER_TO_API_QUALITY[tier];
|
|
89
|
+
|
|
90
|
+
try {
|
|
91
|
+
// Try API first
|
|
92
|
+
const model = await this.client.getCheapestModel(quality);
|
|
93
|
+
if (model) {
|
|
94
|
+
this._log(`API returned cheapest ${quality} model: ${model.id}`);
|
|
95
|
+
return model;
|
|
96
|
+
}
|
|
97
|
+
} catch (err) {
|
|
98
|
+
this._log('API model lookup failed:', err.message);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// Fallback to local registry
|
|
102
|
+
const fallback = getCheapestForTier(tier);
|
|
103
|
+
if (fallback) {
|
|
104
|
+
this._log(`Using fallback model: ${fallback.id}`);
|
|
105
|
+
return fallback;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
throw new Error(`No model found for tier: ${tier}`);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Get model recommendation from API
|
|
113
|
+
*/
|
|
114
|
+
async _getRecommendedModel(prompt, context) {
|
|
115
|
+
const useCase = buildUseCaseString(prompt, context);
|
|
116
|
+
|
|
117
|
+
try {
|
|
118
|
+
const result = await this.client.getRecommendations({ useCase });
|
|
119
|
+
if (result.recommendations?.length > 0) {
|
|
120
|
+
const rec = result.recommendations[0];
|
|
121
|
+
this._log(`API recommended: ${rec.model.id} (score: ${rec.score}, reason: ${rec.reasoning})`);
|
|
122
|
+
return rec.model;
|
|
123
|
+
}
|
|
124
|
+
} catch (err) {
|
|
125
|
+
this._log('Recommendation API failed:', err.message);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
return null;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Check if a completion result looks bad (should escalate)
|
|
133
|
+
*
|
|
134
|
+
* @param {Object} result - Completion result
|
|
135
|
+
* @param {Object} options - Original request options
|
|
136
|
+
* @returns {boolean} True if should escalate
|
|
137
|
+
*/
|
|
138
|
+
_shouldEscalate(result, options = {}) {
|
|
139
|
+
// No content
|
|
140
|
+
if (!result.choices?.[0]?.message?.content) {
|
|
141
|
+
this._log('Escalating: No content in response');
|
|
142
|
+
return true;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
const content = result.choices[0].message.content;
|
|
146
|
+
|
|
147
|
+
// Very short response for a complex task
|
|
148
|
+
if (options.expectLongResponse && content.length < 100) {
|
|
149
|
+
this._log('Escalating: Response too short for expected complexity');
|
|
150
|
+
return true;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// Model explicitly says it can't do something
|
|
154
|
+
const cantDoPatterns = [
|
|
155
|
+
/i (?:can't|cannot|am unable to)/i,
|
|
156
|
+
/i don't (?:have|know)/i,
|
|
157
|
+
/beyond my (?:capabilities|ability)/i,
|
|
158
|
+
/i'm not (?:able|capable)/i
|
|
159
|
+
];
|
|
160
|
+
|
|
161
|
+
for (const pattern of cantDoPatterns) {
|
|
162
|
+
if (pattern.test(content)) {
|
|
163
|
+
this._log('Escalating: Model indicated inability');
|
|
164
|
+
return true;
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// Content filter or safety stop
|
|
169
|
+
if (result.choices[0].finish_reason === 'content_filter') {
|
|
170
|
+
this._log('Escalating: Content filter triggered');
|
|
171
|
+
return true;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
return false;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
/**
|
|
178
|
+
* Route a request to the appropriate model
|
|
179
|
+
*
|
|
180
|
+
* @param {Object} options
|
|
181
|
+
* @param {string} options.prompt - User prompt (for classification)
|
|
182
|
+
* @param {Array} options.messages - Chat messages
|
|
183
|
+
* @param {Object} [options.context] - Context for classification
|
|
184
|
+
* @param {number} [options.maxTokens] - Max tokens
|
|
185
|
+
* @param {number} [options.temperature] - Temperature
|
|
186
|
+
* @param {Array} [options.tools] - Tools for function calling
|
|
187
|
+
* @param {string} [options.forceTier] - Force a specific tier
|
|
188
|
+
* @param {string} [options.forceModel] - Force a specific model
|
|
189
|
+
* @returns {Promise<Object>} Result with content, model, cost, usage
|
|
190
|
+
*/
|
|
191
|
+
async route(options) {
|
|
192
|
+
const {
|
|
193
|
+
prompt,
|
|
194
|
+
messages,
|
|
195
|
+
context = {},
|
|
196
|
+
maxTokens = 4096,
|
|
197
|
+
temperature,
|
|
198
|
+
tools,
|
|
199
|
+
forceTier,
|
|
200
|
+
forceModel
|
|
201
|
+
} = options;
|
|
202
|
+
|
|
203
|
+
// If model is forced, just use it
|
|
204
|
+
if (forceModel) {
|
|
205
|
+
this._log(`Using forced model: ${forceModel}`);
|
|
206
|
+
return this._executeCompletion(forceModel, messages, { maxTokens, temperature, tools });
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// Determine starting tier
|
|
210
|
+
let tier;
|
|
211
|
+
let model;
|
|
212
|
+
let classification;
|
|
213
|
+
|
|
214
|
+
if (forceTier) {
|
|
215
|
+
tier = forceTier;
|
|
216
|
+
this._log(`Using forced tier: ${tier}`);
|
|
217
|
+
} else if (this.useRecommendations) {
|
|
218
|
+
// Use API recommendations
|
|
219
|
+
model = await this._getRecommendedModel(prompt, context);
|
|
220
|
+
if (model) {
|
|
221
|
+
return this._executeWithEscalation(model, messages, options);
|
|
222
|
+
}
|
|
223
|
+
// Fallback to local classification
|
|
224
|
+
classification = classifyTask(prompt, context);
|
|
225
|
+
tier = classification.tier;
|
|
226
|
+
} else {
|
|
227
|
+
// Local classification
|
|
228
|
+
classification = classifyTask(prompt, context);
|
|
229
|
+
tier = classification.tier;
|
|
230
|
+
this._log(`Classified as ${tier}: ${classification.reason} (confidence: ${classification.confidence})`);
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// Get cheapest model for tier
|
|
234
|
+
model = await this._getCheapestModel(tier);
|
|
235
|
+
|
|
236
|
+
return this._executeWithEscalation(model, messages, {
|
|
237
|
+
...options,
|
|
238
|
+
startingTier: tier,
|
|
239
|
+
classification
|
|
240
|
+
});
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* Execute completion with automatic escalation on failure
|
|
245
|
+
*/
|
|
246
|
+
async _executeWithEscalation(model, messages, options) {
|
|
247
|
+
const { startingTier, maxTokens, temperature, tools } = options;
|
|
248
|
+
let currentTier = startingTier || 'cheap';
|
|
249
|
+
let currentModel = model;
|
|
250
|
+
let attempts = 0;
|
|
251
|
+
|
|
252
|
+
while (attempts <= this.maxEscalations) {
|
|
253
|
+
this._log(`Attempt ${attempts + 1}: Using ${currentModel.id} (${currentModel.provider})`);
|
|
254
|
+
|
|
255
|
+
try {
|
|
256
|
+
const result = await this._executeCompletion(
|
|
257
|
+
currentModel.id,
|
|
258
|
+
messages,
|
|
259
|
+
{ maxTokens, temperature, tools }
|
|
260
|
+
);
|
|
261
|
+
|
|
262
|
+
// Check if we should escalate
|
|
263
|
+
if (this._shouldEscalate(result, options) && attempts < this.maxEscalations) {
|
|
264
|
+
const nextTier = getNextTier(currentTier);
|
|
265
|
+
if (nextTier) {
|
|
266
|
+
this._log(`Escalating from ${currentTier} to ${nextTier}`);
|
|
267
|
+
currentTier = nextTier;
|
|
268
|
+
currentModel = await this._getCheapestModel(nextTier);
|
|
269
|
+
attempts++;
|
|
270
|
+
continue;
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
// Return successful result
|
|
275
|
+
return this._formatResult(result, currentModel, attempts, currentTier);
|
|
276
|
+
|
|
277
|
+
} catch (err) {
|
|
278
|
+
this._log(`Error with ${currentModel.id}:`, err.message);
|
|
279
|
+
|
|
280
|
+
// Try escalating on error
|
|
281
|
+
if (attempts < this.maxEscalations) {
|
|
282
|
+
const nextTier = getNextTier(currentTier);
|
|
283
|
+
if (nextTier) {
|
|
284
|
+
this._log(`Escalating due to error: ${currentTier} → ${nextTier}`);
|
|
285
|
+
currentTier = nextTier;
|
|
286
|
+
currentModel = await this._getCheapestModel(nextTier);
|
|
287
|
+
attempts++;
|
|
288
|
+
continue;
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
throw err;
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
throw new Error('Max escalation attempts reached');
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
/**
|
|
300
|
+
* Execute a single completion
|
|
301
|
+
*/
|
|
302
|
+
async _executeCompletion(modelId, messages, options = {}) {
|
|
303
|
+
const { maxTokens, temperature, tools } = options;
|
|
304
|
+
|
|
305
|
+
return this.client.createCompletion({
|
|
306
|
+
model: modelId,
|
|
307
|
+
messages,
|
|
308
|
+
maxTokens,
|
|
309
|
+
temperature,
|
|
310
|
+
tools
|
|
311
|
+
});
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
/**
|
|
315
|
+
* Format the result for return
|
|
316
|
+
*/
|
|
317
|
+
_formatResult(result, model, escalationCount, tier) {
|
|
318
|
+
const content = result.choices?.[0]?.message?.content || '';
|
|
319
|
+
const cost = result.x_llm_cost?.total || 0;
|
|
320
|
+
|
|
321
|
+
return {
|
|
322
|
+
content,
|
|
323
|
+
model: model.id,
|
|
324
|
+
provider: model.provider || result.x_llm_provider,
|
|
325
|
+
tier,
|
|
326
|
+
cost,
|
|
327
|
+
costBreakdown: result.x_llm_cost,
|
|
328
|
+
usage: result.usage,
|
|
329
|
+
escalations: escalationCount,
|
|
330
|
+
finishReason: result.choices?.[0]?.finish_reason,
|
|
331
|
+
raw: result
|
|
332
|
+
};
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
/**
|
|
336
|
+
* Simple completion without routing (direct model call)
|
|
337
|
+
*/
|
|
338
|
+
async complete(options) {
|
|
339
|
+
const { model, messages, maxTokens, temperature, tools } = options;
|
|
340
|
+
|
|
341
|
+
const result = await this.client.createCompletion({
|
|
342
|
+
model,
|
|
343
|
+
messages,
|
|
344
|
+
maxTokens,
|
|
345
|
+
temperature,
|
|
346
|
+
tools
|
|
347
|
+
});
|
|
348
|
+
|
|
349
|
+
return {
|
|
350
|
+
content: result.choices?.[0]?.message?.content || '',
|
|
351
|
+
model,
|
|
352
|
+
cost: result.x_llm_cost?.total || 0,
|
|
353
|
+
costBreakdown: result.x_llm_cost,
|
|
354
|
+
usage: result.usage,
|
|
355
|
+
raw: result
|
|
356
|
+
};
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
/**
|
|
360
|
+
* Get usage summary
|
|
361
|
+
*/
|
|
362
|
+
async getUsageSummary(filters = {}) {
|
|
363
|
+
return this.client.getUsageSummary(filters);
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
/**
|
|
368
|
+
* Create a router instance
|
|
369
|
+
*/
|
|
370
|
+
export function createRouter(options = {}) {
|
|
371
|
+
return new Router(options);
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
/**
|
|
375
|
+
* Quick route function for simple use cases
|
|
376
|
+
*/
|
|
377
|
+
export async function route(prompt, messages, options = {}) {
|
|
378
|
+
const router = new Router(options);
|
|
379
|
+
return router.route({ prompt, messages, ...options });
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
export default Router;
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Session cleanup utilities
|
|
4
|
+
* Handles cleanup of session-related caches and resources
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Clean up all session-related caches and resources
|
|
9
|
+
* @param {string} sessionId - Session ID to clean up
|
|
10
|
+
* @returns {Promise<void>}
|
|
11
|
+
*/
|
|
12
|
+
export async function cleanupSession(sessionId) {
|
|
13
|
+
if (!sessionId) {
|
|
14
|
+
return;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
// 1. Clear mute cache
|
|
18
|
+
try {
|
|
19
|
+
const { clearMuteCache } = await import('./mute-checker.js');
|
|
20
|
+
if (clearMuteCache) {
|
|
21
|
+
clearMuteCache(sessionId);
|
|
22
|
+
}
|
|
23
|
+
} catch (e) {
|
|
24
|
+
// Mute checker might not be available, ignore
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
// 2. Clear any other session-specific caches
|
|
28
|
+
// (Add more cleanup tasks here as needed)
|
|
29
|
+
// Note: If adding async cleanup tasks, use Promise.allSettled() here
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Clean up all sessions (used for global cleanup)
|
|
34
|
+
* @returns {Promise<void>}
|
|
35
|
+
*/
|
|
36
|
+
export async function cleanupAllSessions() {
|
|
37
|
+
try {
|
|
38
|
+
const { clearAllMuteCache } = await import('./mute-checker.js');
|
|
39
|
+
if (clearAllMuteCache) {
|
|
40
|
+
clearAllMuteCache();
|
|
41
|
+
}
|
|
42
|
+
} catch (e) {
|
|
43
|
+
// Mute checker might not be available, ignore
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Check if a session has timed out based on last activity
|
|
49
|
+
* @param {number} lastActivityTimestamp - Last activity timestamp (milliseconds)
|
|
50
|
+
* @param {number} timeoutMs - Timeout in milliseconds (default: 1 hour)
|
|
51
|
+
* @returns {boolean} - True if session has timed out
|
|
52
|
+
*/
|
|
53
|
+
export function isSessionTimedOut(lastActivityTimestamp, timeoutMs = 3600000) {
|
|
54
|
+
if (lastActivityTimestamp == null) { // null or undefined, but not 0
|
|
55
|
+
return false; // No timestamp means can't determine timeout
|
|
56
|
+
}
|
|
57
|
+
const now = Date.now();
|
|
58
|
+
const age = now - lastActivityTimestamp;
|
|
59
|
+
return age > timeoutMs;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Get session age in milliseconds
|
|
64
|
+
* @param {number} lastActivityTimestamp - Last activity timestamp (milliseconds)
|
|
65
|
+
* @returns {number} - Age in milliseconds, or 0 if no timestamp
|
|
66
|
+
*/
|
|
67
|
+
export function getSessionAge(lastActivityTimestamp) {
|
|
68
|
+
if (lastActivityTimestamp == null) { // null or undefined, but not 0
|
|
69
|
+
return 0;
|
|
70
|
+
}
|
|
71
|
+
return Date.now() - lastActivityTimestamp;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Format session age as human-readable string
|
|
76
|
+
* @param {number} lastActivityTimestamp - Last activity timestamp (milliseconds)
|
|
77
|
+
* @returns {string} - Human-readable age string
|
|
78
|
+
*/
|
|
79
|
+
export function formatSessionAge(lastActivityTimestamp) {
|
|
80
|
+
const age = getSessionAge(lastActivityTimestamp);
|
|
81
|
+
if (age === 0) {
|
|
82
|
+
return 'unknown';
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
const seconds = Math.floor(age / 1000);
|
|
86
|
+
const minutes = Math.floor(seconds / 60);
|
|
87
|
+
const hours = Math.floor(minutes / 60);
|
|
88
|
+
const days = Math.floor(hours / 24);
|
|
89
|
+
|
|
90
|
+
if (days > 0) {
|
|
91
|
+
return `${days}d ${hours % 24}h`;
|
|
92
|
+
} else if (hours > 0) {
|
|
93
|
+
return `${hours}h ${minutes % 60}m`;
|
|
94
|
+
} else if (minutes > 0) {
|
|
95
|
+
return `${minutes}m ${seconds % 60}s`;
|
|
96
|
+
} else {
|
|
97
|
+
return `${seconds}s`;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|