@theihtisham/budget-llm 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/.env.example +21 -0
  2. package/LICENSE +21 -0
  3. package/README.md +293 -0
  4. package/dist/config.d.ts +77 -0
  5. package/dist/config.d.ts.map +1 -0
  6. package/dist/config.js +246 -0
  7. package/dist/config.js.map +1 -0
  8. package/dist/database.d.ts +24 -0
  9. package/dist/database.d.ts.map +1 -0
  10. package/dist/database.js +414 -0
  11. package/dist/database.js.map +1 -0
  12. package/dist/providers.d.ts +20 -0
  13. package/dist/providers.d.ts.map +1 -0
  14. package/dist/providers.js +208 -0
  15. package/dist/providers.js.map +1 -0
  16. package/dist/proxy.d.ts +7 -0
  17. package/dist/proxy.d.ts.map +1 -0
  18. package/dist/proxy.js +181 -0
  19. package/dist/proxy.js.map +1 -0
  20. package/dist/rate-limiter.d.ts +8 -0
  21. package/dist/rate-limiter.d.ts.map +1 -0
  22. package/dist/rate-limiter.js +72 -0
  23. package/dist/rate-limiter.js.map +1 -0
  24. package/dist/router.d.ts +33 -0
  25. package/dist/router.d.ts.map +1 -0
  26. package/dist/router.js +186 -0
  27. package/dist/router.js.map +1 -0
  28. package/dist/server.d.ts +3 -0
  29. package/dist/server.d.ts.map +1 -0
  30. package/dist/server.js +705 -0
  31. package/dist/server.js.map +1 -0
  32. package/dist/task-classifier.d.ts +4 -0
  33. package/dist/task-classifier.d.ts.map +1 -0
  34. package/dist/task-classifier.js +123 -0
  35. package/dist/task-classifier.js.map +1 -0
  36. package/dist/types.d.ts +205 -0
  37. package/dist/types.d.ts.map +1 -0
  38. package/dist/types.js +46 -0
  39. package/dist/types.js.map +1 -0
  40. package/dist/utils/encryption.d.ts +4 -0
  41. package/dist/utils/encryption.d.ts.map +1 -0
  42. package/dist/utils/encryption.js +40 -0
  43. package/dist/utils/encryption.js.map +1 -0
  44. package/package.json +63 -0
  45. package/src/config.ts +254 -0
  46. package/src/database.ts +496 -0
  47. package/src/providers.ts +315 -0
  48. package/src/proxy.ts +226 -0
  49. package/src/rate-limiter.ts +81 -0
  50. package/src/router.ts +228 -0
  51. package/src/server.ts +754 -0
  52. package/src/task-classifier.ts +134 -0
  53. package/src/types/sql.js.d.ts +27 -0
  54. package/src/types.ts +258 -0
  55. package/src/utils/encryption.ts +36 -0
  56. package/tests/config.test.ts +85 -0
  57. package/tests/database.test.ts +194 -0
  58. package/tests/encryption.test.ts +57 -0
  59. package/tests/rate-limiter.test.ts +83 -0
  60. package/tests/router.test.ts +182 -0
  61. package/tests/server.test.ts +253 -0
  62. package/tests/setup.ts +15 -0
  63. package/tests/task-classifier.test.ts +117 -0
  64. package/tsconfig.json +25 -0
  65. package/vitest.config.ts +15 -0
@@ -0,0 +1,134 @@
1
+ import type { TaskType, ChatMessage } from './types';
2
+
3
+ // Keyword patterns for each task type, ordered by specificity
4
+ interface TaskPattern {
5
+ type: TaskType;
6
+ keywords: string[];
7
+ roleWeights: Partial<Record<ChatMessage['role'], number>>;
8
+ }
9
+
10
+ const TASK_PATTERNS: TaskPattern[] = [
11
+ {
12
+ type: 'code',
13
+ keywords: [
14
+ 'function', 'class', 'method', 'variable', 'import', 'export',
15
+ 'return', 'interface', 'type ', 'const ', 'let ', 'var ',
16
+ 'async', 'await', 'promise', 'callback', 'error handling',
17
+ 'algorithm', 'data structure', 'api endpoint', 'sql query',
18
+ 'database', 'debug', 'fix bug', 'refactor', 'code review',
19
+ 'typescript', 'javascript', 'python', 'rust', 'golang',
20
+ 'implementation', 'compile', 'deploy', 'unit test',
21
+ 'regex', 'html', 'css', 'component', 'render',
22
+ 'write a function', 'write a class', 'create a module',
23
+ 'implement', 'optimize', 'performance',
24
+ ],
25
+ roleWeights: { system: 1.5, user: 1.0 },
26
+ },
27
+ {
28
+ type: 'math',
29
+ keywords: [
30
+ 'calculate', 'equation', 'formula', 'derivative', 'integral',
31
+ 'polynomial', 'theorem', 'proof', 'linear algebra', 'calculus',
32
+ 'probability', 'statistics', 'matrix', 'vector', 'solve for x',
33
+ 'mathematical', 'compute', 'arithmetic', 'geometric',
34
+ ],
35
+ roleWeights: { user: 1.0 },
36
+ },
37
+ {
38
+ type: 'reasoning',
39
+ keywords: [
40
+ 'analyze', 'evaluate', 'compare', 'contrast', 'pros and cons',
41
+ 'should i', 'which is better', 'explain why', 'logical',
42
+ 'argument', 'evidence', 'implication', 'consequence',
43
+ 'what would happen', 'predict', 'deduce', 'infer',
44
+ 'step by step reasoning', 'think through',
45
+ ],
46
+ roleWeights: { user: 1.0 },
47
+ },
48
+ {
49
+ type: 'creative',
50
+ keywords: [
51
+ 'write a story', 'poem', 'creative', 'imagine', 'fictional',
52
+ 'narrative', 'dialogue', 'character', 'plot', 'brainstorm',
53
+ 'idea', 'invent', 'design a', 'slogan', 'tagline',
54
+ 'marketing copy', 'blog post', 'article about',
55
+ 'song lyrics', 'joke', 'creative writing',
56
+ ],
57
+ roleWeights: { user: 1.0 },
58
+ },
59
+ {
60
+ type: 'translation',
61
+ keywords: [
62
+ 'translate', 'translation', 'in french', 'in spanish',
63
+ 'in german', 'in japanese', 'in chinese', 'in korean',
64
+ 'convert to', 'say this in',
65
+ ],
66
+ roleWeights: { user: 1.0 },
67
+ },
68
+ {
69
+ type: 'summarization',
70
+ keywords: [
71
+ 'summarize', 'summary', 'tldr', 'brief overview',
72
+ 'key points', 'main ideas', 'condense', 'shorten',
73
+ 'bullet points', 'executive summary',
74
+ ],
75
+ roleWeights: { user: 1.0 },
76
+ },
77
+ {
78
+ type: 'analysis',
79
+ keywords: [
80
+ 'analyze', 'analysis', 'break down', 'examine',
81
+ 'data analysis', 'trends', 'insights', 'report',
82
+ 'metrics', 'dashboard', 'visualization',
83
+ ],
84
+ roleWeights: { user: 1.0 },
85
+ },
86
+ {
87
+ type: 'chat',
88
+ keywords: [
89
+ 'hello', 'hi ', 'hey', 'how are you', 'thanks', 'thank you',
90
+ 'help me', 'can you', 'what is', 'who is', 'when did',
91
+ 'where is', 'tell me about',
92
+ ],
93
+ roleWeights: { user: 1.0 },
94
+ },
95
+ ];
96
+
97
+ export function classifyTask(messages: ChatMessage[]): TaskType {
98
+ const text = messages.map((m) => m.content.toLowerCase()).join(' ');
99
+
100
+ let bestType: TaskType = 'chat';
101
+ let bestScore = 0;
102
+
103
+ for (const pattern of TASK_PATTERNS) {
104
+ let score = 0;
105
+ for (const keyword of pattern.keywords) {
106
+ if (text.includes(keyword)) {
107
+ score += 1;
108
+ }
109
+ }
110
+
111
+ // Weight by role
112
+ for (const msg of messages) {
113
+ const weight = pattern.roleWeights[msg.role] ?? 0.5;
114
+ for (const keyword of pattern.keywords) {
115
+ if (msg.content.toLowerCase().includes(keyword)) {
116
+ score += weight * 0.5;
117
+ }
118
+ }
119
+ }
120
+
121
+ if (score > bestScore) {
122
+ bestScore = score;
123
+ bestType = pattern.type;
124
+ }
125
+ }
126
+
127
+ return bestType;
128
+ }
129
+
130
+ export function estimateTokens(messages: ChatMessage[]): number {
131
+ // Rough estimate: ~4 chars per token
132
+ const totalChars = messages.reduce((sum, m) => sum + m.content.length + m.role.length, 0);
133
+ return Math.ceil(totalChars / 4);
134
+ }
@@ -0,0 +1,27 @@
1
+ declare module 'sql.js' {
2
+ interface SqlJsStatic {
3
+ Database: new (data?: ArrayLike<number | Buffer>) => Database;
4
+ }
5
+
6
+ interface Database {
7
+ run(sql: string, params?: (string | number | null | undefined)[]): Database;
8
+ exec(sql: string, params?: (string | number | null | undefined)[]): Array<{ columns: string[]; values: unknown[][] }>;
9
+ prepare(sql: string): Statement;
10
+ export(): Uint8Array;
11
+ close(): void;
12
+ getRowsModified(): number;
13
+ }
14
+
15
+ interface Statement {
16
+ bind(params?: (string | number | null | undefined)[]): boolean;
17
+ step(): boolean;
18
+ getAsObject(): Record<string, unknown>;
19
+ get(column: number): unknown;
20
+ getColumnNames(): string[];
21
+ free(): boolean;
22
+ reset(): boolean;
23
+ }
24
+
25
+ export default function initSqlJs(config?: { locateFile?: (file: string) => string }): Promise<SqlJsStatic>;
26
+ export { Database, Statement };
27
+ }
package/src/types.ts ADDED
@@ -0,0 +1,258 @@
1
+ // ---- Provider & Model Types ----
2
+
3
+ export type ProviderId = 'openai' | 'anthropic' | 'google' | 'deepseek';
4
+
5
+ export interface ProviderConfig {
6
+ id: ProviderId;
7
+ name: string;
8
+ baseUrl: string;
9
+ apiKey: string;
10
+ enabled: boolean;
11
+ priority: number; // lower = higher priority in fallback
12
+ timeoutMs: number;
13
+ }
14
+
15
+ export interface ModelInfo {
16
+ id: string;
17
+ provider: ProviderId;
18
+ displayName: string;
19
+ inputPricePer1M: number; // USD per 1M input tokens
20
+ outputPricePer1M: number; // USD per 1M output tokens
21
+ contextWindow: number;
22
+ maxOutputTokens: number;
23
+ capabilities: ModelCapability[];
24
+ qualityScore: number; // 1-10 overall quality
25
+ speedScore: number; // 1-10 latency speed
26
+ costScore: number; // 1-10 cost efficiency (10 = cheapest)
27
+ }
28
+
29
+ export type ModelCapability =
30
+ | 'code'
31
+ | 'creative'
32
+ | 'reasoning'
33
+ | 'chat'
34
+ | 'summarization'
35
+ | 'translation'
36
+ | 'analysis'
37
+ | 'math';
38
+
39
+ export type TaskType =
40
+ | 'code'
41
+ | 'creative'
42
+ | 'reasoning'
43
+ | 'chat'
44
+ | 'summarization'
45
+ | 'translation'
46
+ | 'analysis'
47
+ | 'math'
48
+ | 'auto';
49
+
50
+ // ---- Request/Response Types ----
51
+
52
+ export interface ChatMessage {
53
+ role: 'system' | 'user' | 'assistant';
54
+ content: string;
55
+ }
56
+
57
+ export interface ProxyRequest {
58
+ model?: string;
59
+ messages: ChatMessage[];
60
+ temperature?: number;
61
+ max_tokens?: number;
62
+ top_p?: number;
63
+ stream?: boolean;
64
+ /** BudgetLLM extension: force a specific task type hint */
65
+ task_type?: TaskType;
66
+ /** BudgetLLM extension: request ID for tracking */
67
+ request_id?: string;
68
+ /** BudgetLLM extension: per-request budget cap in USD */
69
+ budget_cap?: number;
70
+ }
71
+
72
+ export interface ProxyResponse {
73
+ id: string;
74
+ object: 'chat.completion';
75
+ created: number;
76
+ model: string;
77
+ choices: Array<{
78
+ index: number;
79
+ message: ChatMessage;
80
+ finish_reason: string;
81
+ }>;
82
+ usage: {
83
+ prompt_tokens: number;
84
+ completion_tokens: number;
85
+ total_tokens: number;
86
+ };
87
+ /** BudgetLLM extension: cost breakdown */
88
+ cost: CostBreakdown;
89
+ }
90
+
91
+ export interface CostBreakdown {
92
+ inputCost: number;
93
+ outputCost: number;
94
+ totalCost: number;
95
+ currency: string;
96
+ model: string;
97
+ provider: ProviderId;
98
+ savingsVsGpt4: number; // how much saved vs sending to GPT-4
99
+ }
100
+
101
+ // ---- Budget Types ----
102
+
103
+ export interface BudgetConfig {
104
+ dailyBudget: number;
105
+ monthlyBudget: number;
106
+ perRequestCap: number;
107
+ }
108
+
109
+ export interface BudgetStatus {
110
+ daily: { spent: number; limit: number; remaining: number; percentUsed: number };
111
+ monthly: { spent: number; limit: number; remaining: number; percentUsed: number };
112
+ }
113
+
114
+ // ---- Cost Tracking Types ----
115
+
116
+ export interface CostRecord {
117
+ id: string;
118
+ requestId: string;
119
+ timestamp: string;
120
+ provider: ProviderId;
121
+ model: string;
122
+ taskType: TaskType;
123
+ inputTokens: number;
124
+ outputTokens: number;
125
+ inputCost: number;
126
+ outputCost: number;
127
+ totalCost: number;
128
+ latencyMs: number;
129
+ cached: boolean;
130
+ }
131
+
132
+ export interface CostSummary {
133
+ totalSpent: number;
134
+ totalRequests: number;
135
+ totalInputTokens: number;
136
+ totalOutputTokens: number;
137
+ cacheHitRate: number;
138
+ averageLatencyMs: number;
139
+ byProvider: Record<ProviderId, ProviderSummary>;
140
+ byModel: Record<string, ModelSummary>;
141
+ savingsVsGpt4: number;
142
+ }
143
+
144
+ export interface ProviderSummary {
145
+ totalSpent: number;
146
+ requestCount: number;
147
+ inputTokens: number;
148
+ outputTokens: number;
149
+ }
150
+
151
+ export interface ModelSummary {
152
+ totalSpent: number;
153
+ requestCount: number;
154
+ inputTokens: number;
155
+ outputTokens: number;
156
+ averageLatencyMs: number;
157
+ }
158
+
159
+ // ---- Cache Types ----
160
+
161
+ export interface CacheEntry {
162
+ promptHash: string;
163
+ model: string;
164
+ provider: ProviderId;
165
+ response: string;
166
+ inputTokens: number;
167
+ outputTokens: number;
168
+ cost: number;
169
+ createdAt: number;
170
+ expiresAt: number;
171
+ hitCount: number;
172
+ }
173
+
174
+ // ---- Rate Limiting Types ----
175
+
176
+ export interface TokenBucket {
177
+ tokens: number;
178
+ maxTokens: number;
179
+ refillRate: number; // tokens per second
180
+ lastRefill: number;
181
+ }
182
+
183
+ export interface RateLimitStatus {
184
+ allowed: boolean;
185
+ remaining: number;
186
+ resetMs: number;
187
+ }
188
+
189
+ // ---- Routing Types ----
190
+
191
+ export interface RoutingDecision {
192
+ provider: ProviderId;
193
+ model: string;
194
+ reason: string;
195
+ estimatedCost: number;
196
+ estimatedSavings: number;
197
+ }
198
+
199
+ // ---- Dashboard Types ----
200
+
201
+ export interface DashboardData {
202
+ overview: {
203
+ totalSpent: number;
204
+ totalRequests: number;
205
+ totalSaved: number;
206
+ cacheHitRate: number;
207
+ };
208
+ budget: BudgetStatus;
209
+ recentRequests: Array<{
210
+ time: string;
211
+ model: string;
212
+ provider: string;
213
+ taskType: string;
214
+ cost: number;
215
+ tokens: number;
216
+ cached: boolean;
217
+ }>;
218
+ costByDay: Array<{ date: string; cost: number; requests: number }>;
219
+ modelDistribution: Array<{ model: string; count: number; cost: number }>;
220
+ }
221
+
222
+ // ---- Error Types ----
223
+
224
+ export class BudgetExceededError extends Error {
225
+ constructor(
226
+ public readonly type: 'daily' | 'monthly' | 'per_request',
227
+ public readonly spent: number,
228
+ public readonly limit: number
229
+ ) {
230
+ super(`Budget exceeded (${type}): $${spent.toFixed(4)} / $${limit.toFixed(2)}`);
231
+ this.name = 'BudgetExceededError';
232
+ }
233
+ }
234
+
235
+ export class RateLimitError extends Error {
236
+ constructor(public readonly resetMs: number) {
237
+ super(`Rate limit exceeded. Retry after ${Math.ceil(resetMs / 1000)}s`);
238
+ this.name = 'RateLimitError';
239
+ }
240
+ }
241
+
242
+ export class ProviderError extends Error {
243
+ constructor(
244
+ public readonly provider: ProviderId,
245
+ public readonly statusCode: number,
246
+ message: string
247
+ ) {
248
+ super(`Provider ${provider} error (${statusCode}): ${message}`);
249
+ this.name = 'ProviderError';
250
+ }
251
+ }
252
+
253
+ export class NoProviderAvailableError extends Error {
254
+ constructor() {
255
+ super('No LLM provider available — check API keys and provider health');
256
+ this.name = 'NoProviderAvailableError';
257
+ }
258
+ }
@@ -0,0 +1,36 @@
1
+ import crypto from 'crypto';
2
+ import { env } from '../config';
3
+
4
+ const ALGORITHM = 'aes-256-gcm';
5
+ const IV_LENGTH = 16;
6
+ const TAG_LENGTH = 16;
7
+
8
+ function getKey(): Buffer {
9
+ const key = env.ENCRYPTION_KEY;
10
+ // Ensure exactly 32 bytes for AES-256
11
+ return crypto.createHash('sha256').update(key).digest();
12
+ }
13
+
14
+ export function encrypt(plaintext: string): string {
15
+ const iv = crypto.randomBytes(IV_LENGTH);
16
+ const key = getKey();
17
+ const cipher = crypto.createCipheriv(ALGORITHM, key, iv);
18
+ const encrypted = Buffer.concat([cipher.update(plaintext, 'utf8'), cipher.final()]);
19
+ const tag = cipher.getAuthTag();
20
+ return Buffer.concat([iv, tag, encrypted]).toString('base64');
21
+ }
22
+
23
+ export function decrypt(ciphertext: string): string {
24
+ const data = Buffer.from(ciphertext, 'base64');
25
+ const iv = data.subarray(0, IV_LENGTH);
26
+ const tag = data.subarray(IV_LENGTH, IV_LENGTH + TAG_LENGTH);
27
+ const encrypted = data.subarray(IV_LENGTH + TAG_LENGTH);
28
+ const key = getKey();
29
+ const decipher = crypto.createDecipheriv(ALGORITHM, key, iv);
30
+ decipher.setAuthTag(tag);
31
+ return decipher.update(encrypted) + decipher.final('utf8');
32
+ }
33
+
34
+ export function hashPrompt(messages: string): string {
35
+ return crypto.createHash('sha256').update(messages).digest('hex');
36
+ }
@@ -0,0 +1,85 @@
1
+ import { describe, it, expect } from 'vitest';
2
+ import './setup';
3
+ import { env, getProviders, MODEL_CATALOG, getDefaultBudget } from '../src/config';
4
+
5
+ describe('Config', () => {
6
+ it('should parse environment variables', () => {
7
+ expect(env.PORT).toBe(3211);
8
+ expect(env.NODE_ENV).toBe('test');
9
+ expect(env.ENCRYPTION_KEY).toBeTruthy();
10
+ });
11
+
12
+ it('should return provider configurations', () => {
13
+ const providers = getProviders();
14
+ expect(providers).toHaveLength(4);
15
+ expect(providers.map(p => p.id)).toEqual(
16
+ expect.arrayContaining(['openai', 'anthropic', 'google', 'deepseek'])
17
+ );
18
+ });
19
+
20
+ it('should mark providers as enabled when API key is set', () => {
21
+ const providers = getProviders();
22
+ for (const provider of providers) {
23
+ expect(provider.enabled).toBe(true);
24
+ }
25
+ });
26
+
27
+ it('should have valid model catalog entries', () => {
28
+ for (const model of MODEL_CATALOG) {
29
+ expect(model.id).toBeTruthy();
30
+ expect(model.provider).toBeTruthy();
31
+ expect(model.displayName).toBeTruthy();
32
+ expect(model.inputPricePer1M).toBeGreaterThan(0);
33
+ expect(model.outputPricePer1M).toBeGreaterThan(0);
34
+ expect(model.contextWindow).toBeGreaterThan(0);
35
+ expect(model.maxOutputTokens).toBeGreaterThan(0);
36
+ expect(model.capabilities.length).toBeGreaterThan(0);
37
+ expect(model.qualityScore).toBeGreaterThanOrEqual(1);
38
+ expect(model.qualityScore).toBeLessThanOrEqual(10);
39
+ expect(model.speedScore).toBeGreaterThanOrEqual(1);
40
+ expect(model.speedScore).toBeLessThanOrEqual(10);
41
+ expect(model.costScore).toBeGreaterThanOrEqual(1);
42
+ expect(model.costScore).toBeLessThanOrEqual(10);
43
+ }
44
+ });
45
+
46
+ it('should include models from all providers', () => {
47
+ const providers = new Set(MODEL_CATALOG.map(m => m.provider));
48
+ expect(providers.has('openai')).toBe(true);
49
+ expect(providers.has('anthropic')).toBe(true);
50
+ expect(providers.has('google')).toBe(true);
51
+ expect(providers.has('deepseek')).toBe(true);
52
+ });
53
+
54
+ it('should return default budget', () => {
55
+ const budget = getDefaultBudget();
56
+ expect(budget.dailyBudget).toBeGreaterThan(0);
57
+ expect(budget.monthlyBudget).toBeGreaterThan(0);
58
+ expect(budget.perRequestCap).toBeGreaterThan(0);
59
+ });
60
+
61
+ it('should have provider config with required fields', () => {
62
+ const providers = getProviders();
63
+ for (const provider of providers) {
64
+ expect(provider.id).toBeTruthy();
65
+ expect(provider.name).toBeTruthy();
66
+ expect(provider.baseUrl).toBeTruthy();
67
+ expect(provider.baseUrl).toMatch(/^https:\/\//);
68
+ expect(typeof provider.priority).toBe('number');
69
+ expect(typeof provider.timeoutMs).toBe('number');
70
+ }
71
+ });
72
+
73
+ it('should have unique model IDs', () => {
74
+ const ids = MODEL_CATALOG.map(m => m.id);
75
+ const uniqueIds = new Set(ids);
76
+ expect(ids.length).toBe(uniqueIds.size);
77
+ });
78
+
79
+ it('should have unique provider priorities', () => {
80
+ const providers = getProviders();
81
+ const priorities = providers.map(p => p.priority);
82
+ const uniquePriorities = new Set(priorities);
83
+ expect(priorities.length).toBe(uniquePriorities.size);
84
+ });
85
+ });