@theihtisham/budget-llm 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.editorconfig ADDED
@@ -0,0 +1,12 @@
1
+ root = true
2
+
3
+ [*]
4
+ charset = utf-8
5
+ end_of_line = lf
6
+ indent_style = space
7
+ indent_size = 2
8
+ insert_final_newline = true
9
+ trim_trailing_whitespace = true
10
+
11
+ [*.md]
12
+ trim_trailing_whitespace = false
@@ -0,0 +1,43 @@
1
+ name: Bug Report
2
+ description: Report a bug or unexpected behavior
3
+ labels: [bug]
4
+ body:
5
+ - type: textarea
6
+ id: description
7
+ attributes:
8
+ label: Bug Description
9
+ description: What happened?
10
+ placeholder: "When I run agent-memory..."
11
+ validations:
12
+ required: true
13
+
14
+ - type: textarea
15
+ id: steps
16
+ attributes:
17
+ label: Steps to Reproduce
18
+ description: How can we reproduce this?
19
+ placeholder: "1. Install...\n2. Configure...\n3. Run..."
20
+ validations:
21
+ required: true
22
+
23
+ - type: textarea
24
+ id: expected
25
+ attributes:
26
+ label: Expected Behavior
27
+ description: What should have happened?
28
+ validations:
29
+ required: true
30
+
31
+ - type: textarea
32
+ id: environment
33
+ attributes:
34
+ label: Environment
35
+ description: Node version, OS, etc.
36
+ placeholder: "Node 20.11.0, macOS 14, agent-memory 1.0.0"
37
+
38
+ - type: textarea
39
+ id: logs
40
+ attributes:
41
+ label: Relevant Logs
42
+ description: Paste any relevant log output
43
+ render: shell
@@ -0,0 +1,33 @@
1
+ name: Feature Request
2
+ description: Suggest a new feature or enhancement
3
+ labels: [enhancement]
4
+ body:
5
+ - type: textarea
6
+ id: problem
7
+ attributes:
8
+ label: Problem
9
+ description: What problem does this feature solve?
10
+ placeholder: "I'm frustrated when..."
11
+ validations:
12
+ required: true
13
+
14
+ - type: textarea
15
+ id: solution
16
+ attributes:
17
+ label: Proposed Solution
18
+ description: How should it work?
19
+ validations:
20
+ required: true
21
+
22
+ - type: dropdown
23
+ id: type
24
+ attributes:
25
+ label: Feature Type
26
+ options:
27
+ - New Tool/Resource
28
+ - Performance Improvement
29
+ - Developer Experience
30
+ - Documentation
31
+ - Other
32
+ validations:
33
+ required: true
@@ -0,0 +1,18 @@
1
+ ## Description
2
+
3
+ <!-- Brief description of changes -->
4
+
5
+ ## Type of Change
6
+
7
+ - [ ] Bug fix
8
+ - [ ] New feature
9
+ - [ ] Breaking change
10
+ - [ ] Documentation update
11
+ - [ ] Refactor / cleanup
12
+
13
+ ## Checklist
14
+
15
+ - [ ] Tests pass (`npm test`)
16
+ - [ ] Type checks pass (`npm run lint`)
17
+ - [ ] No secrets or credentials introduced
18
+ - [ ] Commit messages follow [conventional commits](https://www.conventionalcommits.org/)
@@ -0,0 +1,16 @@
1
+ version: 2
2
+ updates:
3
+ - package-ecosystem: npm
4
+ directory: /
5
+ schedule:
6
+ interval: weekly
7
+ day: monday
8
+ open-pull-requests-limit: 5
9
+ commit-message:
10
+ prefix: chore
11
+ include: scope
12
+
13
+ - package-ecosystem: github-actions
14
+ directory: /
15
+ schedule:
16
+ interval: monthly
@@ -0,0 +1,36 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ build-and-test:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ matrix:
14
+ node-version: [18, 20, 22]
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+ - uses: actions/setup-node@v4
18
+ with:
19
+ node-version: ${{ matrix.node-version }}
20
+ cache: npm
21
+ - run: npm ci
22
+ - run: npm run lint
23
+ - run: npm test
24
+ - run: npm run build
25
+
26
+ coverage:
27
+ runs-on: ubuntu-latest
28
+ needs: build-and-test
29
+ steps:
30
+ - uses: actions/checkout@v4
31
+ - uses: actions/setup-node@v4
32
+ with:
33
+ node-version: 20
34
+ cache: npm
35
+ - run: npm ci
36
+ - run: npm run test:coverage
@@ -0,0 +1,23 @@
1
+ name: Publish to npm
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ jobs:
8
+ publish:
9
+ runs-on: ubuntu-latest
10
+ permissions:
11
+ contents: read
12
+ id-token: write
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+ - uses: actions/setup-node@v4
16
+ with:
17
+ node-version: 20
18
+ registry-url: https://registry.npmjs.org
19
+ - run: npm ci
20
+ - run: npm run build
21
+ - run: npm publish --provenance --access public
22
+ env:
23
+ NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
@@ -0,0 +1,27 @@
1
+ # Contributor Covenant Code of Conduct
2
+
3
+ ## Our Pledge
4
+
5
+ We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone.
6
+
7
+ ## Our Standards
8
+
9
+ Examples of behavior that contributes to a positive environment:
10
+ - Demonstrating empathy and kindness toward other people
11
+ - Being respectful of differing opinions, viewpoints, and experiences
12
+ - Giving and gracefully accepting constructive feedback
13
+ - Accepting responsibility and apologizing to those affected by our mistakes
14
+
15
+ Examples of unacceptable behavior:
16
+ - The use of sexualized language or imagery
17
+ - Trolling, insulting or derogatory comments
18
+ - Public or private harassment
19
+ - Publishing others' private information without explicit permission
20
+
21
+ ## Enforcement
22
+
23
+ Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to **Theihtisham@outlook.com**.
24
+
25
+ ## Attribution
26
+
27
+ This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org/), version 2.1.
@@ -0,0 +1,41 @@
1
+ # Contributing to Budget LLM
2
+
3
+ Thank you for your interest!
4
+
5
+ ## Development Setup
6
+
7
+ ```bash
8
+ git clone https://github.com/theihtisham/budget-llm.git
9
+ cd budget-llm
10
+ npm install
11
+ cp .env.example .env
12
+ npm run build
13
+ npm test
14
+ ```
15
+
16
+ ## Project Structure
17
+
18
+ ```
19
+ src/
20
+ server.ts — Express server entry point
21
+ router.ts — Intelligent model routing
22
+ config.ts — Configuration management
23
+ database.ts — SQLite usage tracking
24
+ encryption.ts — API key encryption
25
+ rate-limiter.ts — Per-provider rate limiting
26
+ task-classifier.ts — Task complexity classification
27
+ tests/ — Vitest test suite
28
+ ```
29
+
30
+ ## How to Contribute
31
+
32
+ 1. Fork the repository
33
+ 2. Create a feature branch: `git checkout -b feature/my-feature`
34
+ 3. Make changes and add tests
35
+ 4. Run checks: `npm run lint && npm test`
36
+ 5. Commit with [conventional commits](https://www.conventionalcommits.org/)
37
+ 6. Push and open a Pull Request
38
+
39
+ ## License
40
+
41
+ By contributing, you agree that your contributions will be licensed under the MIT License.
package/Dockerfile ADDED
@@ -0,0 +1,9 @@
1
+ FROM node:20-slim
2
+
3
+ WORKDIR /app
4
+ COPY package*.json ./
5
+ RUN npm ci --production
6
+ COPY dist/ ./dist/
7
+
8
+ EXPOSE 3000
9
+ ENTRYPOINT ["node", "dist/server.js"]
package/README.md CHANGED
@@ -1,293 +1,242 @@
1
- # BudgetLLM
1
+ <div align="center">
2
2
 
3
- > Cut your AI costs by 60% one API endpoint that automatically picks the cheapest model for every request.
3
+ <img width="100%" height="180" src="data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 960 180'%3E%3Cdefs%3E%3ClinearGradient id='g' x1='0%25' y1='0%25' x2='100%25' y2='100%25'%3E%3Cstop offset='0%25' stop-color='%23fbbf24'/%3E%3Cstop offset='100%25' stop-color='%23f472b6'/%3E%3C/linearGradient%3E%3C/defs%3E%3Crect width='960' height='180' fill='%230a0a1a' rx='16'/%3E%3Crect x='2' y='2' width='956' height='176' fill='none' stroke='url(%23g)' stroke-width='2' rx='15'/%3E%3Ctext x='480' y='75' text-anchor='middle' fill='white' font-family='system-ui' font-size='38' font-weight='bold'%3E%F0%9F%92%B0 Budget LLM%3C/text%3E%3Ctext x='480' y='115' text-anchor='middle' fill='%23a5a5c0' font-family='system-ui' font-size='18'%3ECut AI Costs by 60% %E2%80%94 Smart Model Routing Proxy%3C/text%3E%3Ctext x='480' y='148' text-anchor='middle' fill='%236b6b88' font-family='monospace' font-size='13'%3EExpress %C2%B7 OpenAI %C2%B7 Anthropic %C2%B7 Google %C2%B7 DeepSeek %C2%B7 SQLite %C2%B7 Auto-Classification%3C/text%3E%3C/svg%3E" alt="Budget LLM Banner"/>
4
4
 
5
- [![MIT License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
6
- [![Node.js](https://img.shields.io/badge/node-%3E%3D18-green.svg)](https://nodejs.org)
7
- [![TypeScript](https://img.shields.io/badge/TypeScript-Strict-3178C6.svg)](https://www.typescriptlang.org/)
8
- [![Vitest](https://img.shields.io/badge/tested%20with-Vitest-6E9F18.svg)](https://vitest.dev/)
5
+ [![CI](https://img.shields.io/github/actions/workflow/status/theihtisham/budget-llm/ci.yml?style=for-the-badge&label=CI)](https://github.com/theihtisham/budget-llm/actions/workflows/ci.yml)
6
+ [![npm version](https://img.shields.io/npm/v/@theihtisham/budget-llm.svg?style=for-the-badge&color=FF6D00)](https://www.npmjs.com/package/@theihtisham/budget-llm)
7
+ [![npm downloads](https://img.shields.io/npm/dt/@theihtisham/budget-llm?style=for-the-badge&color=0A9D2A)](https://www.npmjs.com/package/@theihtisham/budget-llm)
8
+ [![GitHub stars](https://img.shields.io/github/stars/theihtisham/budget-llm?style=for-the-badge&color=FFD700)](https://github.com/theihtisham/budget-llm/stargazers)
9
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg?style=for-the-badge)](https://opensource.org/licenses/MIT)
10
+ [![TypeScript](https://img.shields.io/badge/TypeScript-5.8-3178C6?style=for-the-badge&logo=typescript&logoColor=white)](https://www.typescriptlang.org/)
9
11
 
10
- BudgetLLM is a smart multi-provider LLM cost optimizer and routing proxy. It sits between your application and LLM providers (OpenAI, Anthropic, Google, DeepSeek) and automatically routes each request to the cheapest capable model based on the task type.
12
+ **One API endpoint that automatically picks the cheapest LLM for every request.** Drops in front of your existing OpenAI SDK calls routes simple tasks to cheap models, complex tasks to powerful ones. Tracks spending per API key with SQLite.
11
13
 
12
- ## How It Works
14
+ [Quick Start](#-quick-start) · [How It Works](#-how-it-works) · [Supported Providers](#-supported-providers) · [Configuration](#-configuration)
13
15
 
16
+ </div>
17
+
18
+ ---
19
+
20
+ ## Why Budget LLM?
21
+
22
+ Most LLM requests don't need GPT-4. Budget LLM classifies each request's complexity and routes it to the cheapest model that can handle it — automatically.
23
+
24
+ ### What Makes This Different
25
+
26
+ | Feature | Budget LLM | LiteLLM | OpenRouter |
27
+ |---------|-----------|---------|------------|
28
+ | Cost Reduction | Up to 60% automatic | Manual model selection | Manual |
29
+ | Task Classification | Built-in complexity scoring | No | No |
30
+ | Per-Key Tracking | SQLite per API key spending | Limited | Dashboard only |
31
+ | Drop-In Proxy | Works with existing OpenAI SDK | Yes | Yes |
32
+ | Encryption | AES-256 API key encryption | No | No |
33
+ | Rate Limiting | Per-provider with backoff | Basic | Built-in |
34
+ | Self-Hosted | Yes, fully local | Yes | No |
35
+ | Zero Config | Works out of the box | Needs config | Needs account |
36
+
37
+ ---
38
+
39
+ ## Architecture
40
+
41
+ ```mermaid
42
+ graph TD
43
+ subgraph "Client"
44
+ APP[Your Application<br/>OpenAI SDK]
45
+ end
46
+
47
+ subgraph "Budget LLM Proxy"
48
+ SRV[Express Server<br/>CORS + Helmet + Compression]
49
+ RTR[Smart Router<br/>Cost-Optimized Model Selection]
50
+ CLS[Task Classifier<br/>Complexity Scoring]
51
+ RL[Rate Limiter<br/>Per-Provider Throttling]
52
+ ENC[Encryption<br/>AES-256 Key Storage]
53
+ end
54
+
55
+ subgraph "LLM Providers"
56
+ OAI[OpenAI<br/>GPT-4o / GPT-4o-mini]
57
+ ANT[Anthropic<br/>Claude 4.6 / Haiku]
58
+ GGL[Google<br/>Gemini Pro / Flash]
59
+ DSP[DeepSeek<br/>Chat / Coder]
60
+ end
61
+
62
+ subgraph "Storage"
63
+ DB[SQLite DB<br/>Usage + Costs + Keys]
64
+ end
65
+
66
+ APP -->|OpenAI-compatible API| SRV
67
+ SRV --> CLS
68
+ CLS --> RTR
69
+ RTR --> RL
70
+ RL --> OAI
71
+ RL --> ANT
72
+ RL --> GGL
73
+ RL --> DSP
74
+ RTR --> DB
75
+ ENC --> DB
76
+
77
+ style SRV fill:#fbbf24,color:#000
78
+ style RTR fill:#f472b6,color:#fff
79
+ style CLS fill:#34d399,color:#fff
14
80
  ```
15
- Your App
16
- |
17
- | POST /v1/chat/completions (OpenAI-compatible)
18
- v
19
- +------------------+
20
- | BudgetLLM |
21
- | Proxy Server |
22
- +------------------+
23
- | | | |
24
- | Task | Budget | Cache | Rate Limit
25
- | Classification | Enforcement | Lookup | Check
26
- | | | |
27
- v v v v
28
- +----------------------------------------------------------+
29
- | Smart Router |
30
- | code tasks -> cheapest capable model |
31
- | creative tasks -> highest quality model |
32
- | chat tasks -> fastest cheapest model |
33
- | reasoning -> best reasoning model |
34
- +----------------------------------------------------------+
35
- | | | |
36
- v v v v
37
- +--------+ +--------+ +--------+ +--------+
38
- | OpenAI | |Anthropic| | Google | |DeepSeek|
39
- +--------+ +--------+ +--------+ +--------+
40
- | | | |
41
- +-------------+------+------+-------------+
42
- |
43
- Cost Tracking
44
- & Response Cache
45
- |
46
- v
47
- SQLite Database
81
+
82
+ ### Request Flow
83
+
84
+ ```mermaid
85
+ sequenceDiagram
86
+ participant C as Client
87
+ participant S as Budget LLM
88
+ participant T as Classifier
89
+ participant R as Router
90
+ participant P as LLM Provider
91
+
92
+ C->>S: POST /v1/chat/completions
93
+ S->>T: Classify task complexity
94
+ T->>T: Score: simple(0-3), medium(4-6), complex(7-10)
95
+ T->>R: Complexity score + budget
96
+ R->>R: Select cheapest capable model
97
+ R->>P: Forward request
98
+ P-->>R: Response
99
+ R->>R: Log tokens + cost to SQLite
100
+ R-->>C: Response (OpenAI format)
48
101
  ```
49
102
 
50
- ## Features
51
-
52
- - **Smart Routing** - Automatically classifies tasks (code, creative, reasoning, chat, etc.) and picks the cheapest model that can handle it well
53
- - **Budget Limits** - Daily, monthly, and per-request cost caps with automatic enforcement
54
- - **Fallback Chain** - If the primary provider fails, automatically tries the next best provider
55
- - **Cost Tracking** - Per-request, per-day, per-model cost logging with SQLite storage
56
- - **Response Caching** - Identical prompts return cached responses (free!) with configurable TTL
57
- - **Rate Limiting** - Token bucket algorithm with per-IP limits
58
- - **Real-time Dashboard** - Beautiful web UI showing costs, savings, usage, and budget status
59
- - **OpenAI-Compatible** - Drop-in replacement for the OpenAI API. Just change the base URL.
60
- - **Security** - API key encryption (AES-256-GCM), rate limiting, input validation, no prompt content logging
61
-
62
- ## Cost Comparison
63
-
64
- BudgetLLM automatically picks the cheapest model for each task type. Here's how costs compare:
65
-
66
- | Task Type | Default Route | Cost/1M Tokens | vs GPT-4 Turbo | Savings |
67
- |-----------|--------------|----------------|-----------------|---------|
68
- | Chat | Gemini 2.0 Flash | $0.10 / $0.40 | $10 / $30 | **97%** |
69
- | Code | GPT-4o Mini | $0.15 / $0.60 | $10 / $30 | **98%** |
70
- | Creative | GPT-4o | $2.50 / $10.00 | $10 / $30 | **67%** |
71
- | Reasoning | DeepSeek R1 | $0.55 / $2.19 | $10 / $30 | **93%** |
72
- | Summarization | Gemini 2.0 Flash | $0.10 / $0.40 | $10 / $30 | **97%** |
73
- | Translation | Gemini 2.0 Flash Lite | $0.075 / $0.30 | $10 / $30 | **99%** |
74
- | Math | DeepSeek R1 | $0.55 / $2.19 | $10 / $30 | **93%** |
75
- | Analysis | DeepSeek V3 | $0.27 / $1.10 | $10 / $30 | **96%** |
76
-
77
- ### Model Catalog
78
-
79
- | Model | Provider | Input/1M | Output/1M | Quality | Speed | Cost |
80
- |-------|----------|----------|-----------|---------|-------|------|
81
- | GPT-4o | OpenAI | $2.50 | $10.00 | 9/10 | 7/10 | 5/10 |
82
- | GPT-4o Mini | OpenAI | $0.15 | $0.60 | 7/10 | 9/10 | 9/10 |
83
- | GPT-4 Turbo | OpenAI | $10.00 | $30.00 | 9/10 | 6/10 | 3/10 |
84
- | Claude Sonnet 4 | Anthropic | $3.00 | $15.00 | 9/10 | 7/10 | 5/10 |
85
- | Claude 3.5 Haiku | Anthropic | $0.80 | $4.00 | 7/10 | 9/10 | 7/10 |
86
- | Gemini 2.0 Flash | Google | $0.10 | $0.40 | 7/10 | 10/10 | 10/10 |
87
- | Gemini 2.0 Flash Lite | Google | $0.075 | $0.30 | 6/10 | 10/10 | 10/10 |
88
- | DeepSeek V3 | DeepSeek | $0.27 | $1.10 | 8/10 | 8/10 | 8/10 |
89
- | DeepSeek R1 | DeepSeek | $0.55 | $2.19 | 9/10 | 5/10 | 7/10 |
103
+ ---
90
104
 
91
105
  ## Quick Start
92
106
 
93
- ### 1. Install
94
-
95
107
  ```bash
96
- git clone https://github.com/your-username/budget-llm.git
108
+ # Install
109
+ npm install -g @theihtisham/budget-llm
110
+
111
+ # Or from source
112
+ git clone https://github.com/theihtisham/budget-llm.git
97
113
  cd budget-llm
98
- npm install
114
+ npm install && npm run build
99
115
  ```
100
116
 
101
- ### 2. Configure
117
+ ### Configure
102
118
 
103
119
  ```bash
104
120
  cp .env.example .env
105
121
  ```
106
122
 
107
- Edit `.env` and add your API keys:
123
+ Edit `.env`:
108
124
 
109
125
  ```env
110
- # Add at least one provider API key
126
+ # At least one provider required
111
127
  OPENAI_API_KEY=sk-...
112
128
  ANTHROPIC_API_KEY=sk-ant-...
113
- GOOGLE_API_KEY=AIza...
114
- DEEPSEEK_API_KEY=sk-...
129
+ GOOGLE_API_KEY=...
130
+ DEEPSEEK_API_KEY=...
115
131
 
116
- # Set your budget limits
117
- DEFAULT_DAILY_BUDGET=10.00
118
- DEFAULT_MONTHLY_BUDGET=200.00
119
- DEFAULT_PER_REQUEST_CAP=1.00
132
+ # Server
133
+ PORT=3000
134
+ ENCRYPTION_KEY=your-32-char-encryption-key-here
120
135
  ```
121
136
 
122
- ### 3. Run
137
+ ### Run
123
138
 
124
139
  ```bash
125
- # Development
126
- npm run dev
127
-
128
- # Production
129
- npm run build
140
+ # Start proxy
130
141
  npm start
131
- ```
132
-
133
- ### 4. Use It
134
-
135
- BudgetLLM is a drop-in replacement for the OpenAI API. Just change the base URL:
136
142
 
137
- **Before (direct OpenAI):**
138
- ```javascript
139
- const openai = new OpenAI({
140
- apiKey: 'sk-your-key',
141
- baseURL: 'https://api.openai.com/v1',
142
- });
143
+ # Point your OpenAI SDK at it
144
+ # base_url: http://localhost:3000/v1
143
145
  ```
144
146
 
145
- **After (via BudgetLLM):**
146
- ```javascript
147
- const openai = new OpenAI({
148
- apiKey: 'anything', // BudgetLLM doesn't require a client key
149
- baseURL: 'http://localhost:3210/v1',
150
- });
151
- ```
152
-
153
- That's it. Every request is now automatically optimized for cost.
147
+ ### Use with OpenAI SDK
154
148
 
155
- ### 5. Optional: Task Type Hints
149
+ ```python
150
+ from openai import OpenAI
156
151
 
157
- Help BudgetLLM pick the best model by specifying a task type:
152
+ client = OpenAI(
153
+ base_url="http://localhost:3000/v1",
154
+ api_key="your-budget-llm-key" # Any key for auth
155
+ )
158
156
 
159
- ```javascript
160
- const response = await openai.chat.completions.create({
161
- model: 'auto', // let BudgetLLM decide
162
- messages: [{ role: 'user', content: 'Write a sort function' }],
163
- // BudgetLLM extension:
164
- task_type: 'code', // forces code-optimized routing
165
- });
157
+ # Budget LLM auto-routes to cheapest capable model
158
+ response = client.chat.completions.create(
159
+ model="auto", # Magic: auto-selects cheapest
160
+ messages=[{"role": "user", "content": "Hello!"}]
161
+ )
166
162
  ```
167
163
 
168
- ## API Reference
164
+ ---
169
165
 
170
- ### OpenAI-Compatible Endpoints
166
+ ## Supported Providers
171
167
 
172
- | Method | Endpoint | Description |
173
- |--------|----------|-------------|
174
- | POST | `/v1/chat/completions` | Chat completion (drop-in replacement) |
175
- | GET | `/v1/models` | List available models |
168
+ | Provider | Models | Cost Tier |
169
+ |----------|--------|-----------|
170
+ | **OpenAI** | GPT-4o, GPT-4o-mini, GPT-3.5-turbo | $$ / $ |
171
+ | **Anthropic** | Claude Opus 4.6, Sonnet 4.6, Haiku 4.5 | $$$ / $$ / $ |
172
+ | **Google** | Gemini Pro, Gemini Flash | $$ / $ |
173
+ | **DeepSeek** | DeepSeek Chat, DeepSeek Coder | $ |
176
174
 
177
- ### BudgetLLM Extensions
175
+ ### Task Classification
178
176
 
179
- The `/v1/chat/completions` endpoint accepts these additional fields:
177
+ | Score | Complexity | Route To |
178
+ |-------|-----------|----------|
179
+ | 0-3 | Simple (greeting, formatting) | Cheapest model |
180
+ | 4-6 | Medium (summarization, basic code) | Mid-tier model |
181
+ | 7-10 | Complex (reasoning, advanced code) | Best available model |
180
182
 
181
- | Field | Type | Description |
182
- |-------|------|-------------|
183
- | `task_type` | string | Force routing: `code`, `creative`, `reasoning`, `chat`, `summarization`, `translation`, `analysis`, `math`, `auto` |
184
- | `request_id` | string | Custom request ID for tracking |
185
- | `budget_cap` | number | Per-request budget cap in USD |
183
+ ---
186
184
 
187
- The response includes a `cost` field:
185
+ ## Configuration
188
186
 
189
- ```json
190
- {
191
- "cost": {
192
- "inputCost": 0.0000015,
193
- "outputCost": 0.000006,
194
- "totalCost": 0.0000075,
195
- "currency": "USD",
196
- "model": "gpt-4o-mini",
197
- "provider": "openai",
198
- "savingsVsGpt4": 0.00012
199
- }
200
- }
201
- ```
187
+ ### Environment Variables
202
188
 
203
- ### Management API
189
+ | Variable | Required | Description |
190
+ |----------|----------|-------------|
191
+ | `OPENAI_API_KEY` | Yes* | OpenAI API key |
192
+ | `ANTHROPIC_API_KEY` | Yes* | Anthropic API key |
193
+ | `GOOGLE_API_KEY` | No | Google AI API key |
194
+ | `DEEPSEEK_API_KEY` | No | DeepSeek API key |
195
+ | `PORT` | No | Server port (default: 3000) |
196
+ | `ENCRYPTION_KEY` | No | AES-256 key for API key storage |
197
+ | `DB_PATH` | No | SQLite database path |
204
198
 
205
- | Method | Endpoint | Description |
206
- |--------|----------|-------------|
207
- | GET | `/health` | Health check |
208
- | GET | `/api/dashboard` | Full dashboard data |
209
- | GET | `/api/costs?days=30` | Cost summary |
210
- | GET | `/api/budget` | Budget config and status |
211
- | PUT | `/api/budget` | Update budget limits |
212
- | GET | `/api/rate-limit` | Rate limit status |
213
- | DELETE | `/api/cache` | Clear response cache |
214
- | GET | `/dashboard` | Web dashboard UI |
199
+ *At least one provider API key required.
215
200
 
216
- ## Architecture
201
+ ---
217
202
 
218
- ```
219
- src/
220
- server.ts # Express HTTP server with all routes
221
- proxy.ts # Main request handler (orchestrates everything)
222
- router.ts # Smart model routing engine
223
- task-classifier.ts # Task type detection from prompts
224
- providers.ts # Provider adapters (OpenAI, Anthropic, Google, DeepSeek)
225
- database.ts # SQLite database layer
226
- rate-limiter.ts # Token bucket rate limiter
227
- config.ts # Configuration, model catalog, logging
228
- types.ts # TypeScript type definitions
229
- utils/
230
- encryption.ts # AES-256-GCM encryption for API keys
231
- ```
203
+ ## Docker
232
204
 
233
- ### Routing Algorithm
234
-
235
- BudgetLLM scores each model using weighted criteria that vary by task type:
236
-
237
- | Task Type | Quality Weight | Cost Weight | Speed Weight |
238
- |-----------|---------------|-------------|--------------|
239
- | Code | 40% | 45% | 15% |
240
- | Creative | 70% | 15% | 15% |
241
- | Reasoning | 65% | 25% | 10% |
242
- | Chat | 20% | 60% | 20% |
243
- | Summarization | 20% | 60% | 20% |
244
- | Translation | 20% | 60% | 20% |
245
- | Analysis | 50% | 35% | 15% |
246
-
247
- Models also get a +20 score bonus if they declare the task type as a capability, and a -10 penalty otherwise.
248
-
249
- ### Fallback Chain
250
-
251
- When the primary provider fails, BudgetLLM tries up to 3 alternative providers in priority order. This means your app stays up even if one provider has an outage.
252
-
253
- ### Caching
254
-
255
- Identical prompts (same messages, model, temperature, max_tokens) are cached with a configurable TTL. Cached responses are served instantly with zero cost. The cache uses content-hash matching so even rephrased requests that happen to match the exact same parameters get the benefit.
256
-
257
- ## Security
205
+ ```bash
206
+ docker build -t budget-llm .
207
+ docker run -p 3000:3000 --env-file .env budget-llm
208
+ ```
258
209
 
259
- - **API Key Encryption** - Stored keys are encrypted with AES-256-GCM
260
- - **Rate Limiting** - Token bucket algorithm prevents abuse
261
- - **Input Validation** - Zod schema validation on all inputs
262
- - **No Prompt Logging** - Request metadata is logged, but prompt content is never stored in logs
263
- - **Helmet** - HTTP security headers via helmet middleware
264
- - **Budget Enforcement** - Hard limits prevent runaway spending
210
+ ---
265
211
 
266
212
  ## Development
267
213
 
268
214
  ```bash
269
- # Install dependencies
270
215
  npm install
271
-
272
- # Run in development mode with hot reload
273
- npm run dev
274
-
275
- # Run tests
216
+ npm run build
276
217
  npm test
218
+ npm run test:coverage
219
+ npm run dev # Watch mode with tsx
220
+ ```
277
221
 
278
- # Run tests in watch mode
279
- npm run test:watch
222
+ ---
280
223
 
281
- # Generate coverage report
282
- npm run test:coverage
224
+ ## Trending Tags
283
225
 
284
- # Type check
285
- npm run lint
226
+ `llm` `cost-optimization` `ai-proxy` `model-routing` `openai` `anthropic` `google-ai` `deepseek` `express` `typescript` `budget` `api-gateway` `smart-routing` `task-classification` `developer-tools` `cost-tracking`
286
227
 
287
- # Build for production
288
- npm run build
289
- ```
228
+ ---
290
229
 
291
230
  ## License
292
231
 
293
- [MIT](LICENSE)
232
+ MIT License — see [LICENSE](LICENSE) for details.
233
+
234
+ ---
235
+
236
+ <div align="center">
237
+
238
+ **Built by [theihtisham](https://github.com/theihtisham)**
239
+
240
+ [GitHub](https://github.com/theihtisham) · [npm](https://www.npmjs.com/~theihtisham) · [Email](mailto:Theihtisham@outlook.com)
241
+
242
+ </div>
package/SECURITY.md ADDED
@@ -0,0 +1,22 @@
1
+ # Security Policy
2
+
3
+ ## Supported Versions
4
+
5
+ | Version | Supported |
6
+ | ------- | --------- |
7
+ | 1.x | Yes |
8
+
9
+ ## Reporting a Vulnerability
10
+
11
+ If you discover a security vulnerability, please report it responsibly:
12
+
13
+ - **Email**: Theihtisham@outlook.com
14
+ - **Subject**: [Security] Vulnerability in ai-agent-memory
15
+
16
+ Please include:
17
+ 1. Description of the vulnerability
18
+ 2. Steps to reproduce
19
+ 3. Potential impact
20
+ 4. Suggested fix (if any)
21
+
22
+ We aim to respond within 48 hours and patch critical issues within 7 days.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@theihtisham/budget-llm",
3
- "version": "1.0.0",
4
- "description": "Cut your AI costs by 60% one API endpoint that automatically picks the cheapest model for every request.",
3
+ "version": "1.1.0",
4
+ "description": "Cut your AI costs by 60% \u2014 one API endpoint that automatically picks the cheapest model for every request.",
5
5
  "main": "dist/server.js",
6
6
  "types": "dist/server.d.ts",
7
7
  "scripts": {
@@ -25,8 +25,11 @@
25
25
  "budget",
26
26
  "routing"
27
27
  ],
28
- "author": "",
28
+ "author": "theihtisham",
29
29
  "license": "MIT",
30
+ "publishConfig": {
31
+ "access": "public"
32
+ },
30
33
  "dependencies": {
31
34
  "compression": "^1.7.5",
32
35
  "cors": "^2.8.5",
@@ -53,11 +56,12 @@
53
56
  "engines": {
54
57
  "node": ">=18.0.0"
55
58
  },
56
- "publishConfig": {
57
- "access": "public"
58
- },
59
59
  "repository": {
60
60
  "type": "git",
61
- "url": "https://github.com/theihtisham/budget-llm"
62
- }
63
- }
61
+ "url": "git+https://github.com/theihtisham/budget-llm.git"
62
+ },
63
+ "bugs": {
64
+ "url": "https://github.com/theihtisham/budget-llm/issues"
65
+ },
66
+ "homepage": "https://github.com/theihtisham/budget-llm#readme"
67
+ }