@theihtisham/budget-llm 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.editorconfig +12 -0
- package/.github/ISSUE_TEMPLATE/bug_report.yml +43 -0
- package/.github/ISSUE_TEMPLATE/feature_request.yml +33 -0
- package/.github/PULL_REQUEST_TEMPLATE.md +18 -0
- package/.github/dependabot.yml +16 -0
- package/.github/workflows/ci.yml +36 -0
- package/.github/workflows/release.yml +23 -0
- package/CODE_OF_CONDUCT.md +27 -0
- package/CONTRIBUTING.md +41 -0
- package/Dockerfile +9 -0
- package/README.md +178 -229
- package/SECURITY.md +22 -0
- package/package.json +13 -9
package/.editorconfig
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
name: Bug Report
|
|
2
|
+
description: Report a bug or unexpected behavior
|
|
3
|
+
labels: [bug]
|
|
4
|
+
body:
|
|
5
|
+
- type: textarea
|
|
6
|
+
id: description
|
|
7
|
+
attributes:
|
|
8
|
+
label: Bug Description
|
|
9
|
+
description: What happened?
|
|
10
|
+
placeholder: "When I run agent-memory..."
|
|
11
|
+
validations:
|
|
12
|
+
required: true
|
|
13
|
+
|
|
14
|
+
- type: textarea
|
|
15
|
+
id: steps
|
|
16
|
+
attributes:
|
|
17
|
+
label: Steps to Reproduce
|
|
18
|
+
description: How can we reproduce this?
|
|
19
|
+
placeholder: "1. Install...\n2. Configure...\n3. Run..."
|
|
20
|
+
validations:
|
|
21
|
+
required: true
|
|
22
|
+
|
|
23
|
+
- type: textarea
|
|
24
|
+
id: expected
|
|
25
|
+
attributes:
|
|
26
|
+
label: Expected Behavior
|
|
27
|
+
description: What should have happened?
|
|
28
|
+
validations:
|
|
29
|
+
required: true
|
|
30
|
+
|
|
31
|
+
- type: textarea
|
|
32
|
+
id: environment
|
|
33
|
+
attributes:
|
|
34
|
+
label: Environment
|
|
35
|
+
description: Node version, OS, etc.
|
|
36
|
+
placeholder: "Node 20.11.0, macOS 14, agent-memory 1.0.0"
|
|
37
|
+
|
|
38
|
+
- type: textarea
|
|
39
|
+
id: logs
|
|
40
|
+
attributes:
|
|
41
|
+
label: Relevant Logs
|
|
42
|
+
description: Paste any relevant log output
|
|
43
|
+
render: shell
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
name: Feature Request
|
|
2
|
+
description: Suggest a new feature or enhancement
|
|
3
|
+
labels: [enhancement]
|
|
4
|
+
body:
|
|
5
|
+
- type: textarea
|
|
6
|
+
id: problem
|
|
7
|
+
attributes:
|
|
8
|
+
label: Problem
|
|
9
|
+
description: What problem does this feature solve?
|
|
10
|
+
placeholder: "I'm frustrated when..."
|
|
11
|
+
validations:
|
|
12
|
+
required: true
|
|
13
|
+
|
|
14
|
+
- type: textarea
|
|
15
|
+
id: solution
|
|
16
|
+
attributes:
|
|
17
|
+
label: Proposed Solution
|
|
18
|
+
description: How should it work?
|
|
19
|
+
validations:
|
|
20
|
+
required: true
|
|
21
|
+
|
|
22
|
+
- type: dropdown
|
|
23
|
+
id: type
|
|
24
|
+
attributes:
|
|
25
|
+
label: Feature Type
|
|
26
|
+
options:
|
|
27
|
+
- New Tool/Resource
|
|
28
|
+
- Performance Improvement
|
|
29
|
+
- Developer Experience
|
|
30
|
+
- Documentation
|
|
31
|
+
- Other
|
|
32
|
+
validations:
|
|
33
|
+
required: true
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
## Description
|
|
2
|
+
|
|
3
|
+
<!-- Brief description of changes -->
|
|
4
|
+
|
|
5
|
+
## Type of Change
|
|
6
|
+
|
|
7
|
+
- [ ] Bug fix
|
|
8
|
+
- [ ] New feature
|
|
9
|
+
- [ ] Breaking change
|
|
10
|
+
- [ ] Documentation update
|
|
11
|
+
- [ ] Refactor / cleanup
|
|
12
|
+
|
|
13
|
+
## Checklist
|
|
14
|
+
|
|
15
|
+
- [ ] Tests pass (`npm test`)
|
|
16
|
+
- [ ] Type checks pass (`npm run lint`)
|
|
17
|
+
- [ ] No secrets or credentials introduced
|
|
18
|
+
- [ ] Commit messages follow [conventional commits](https://www.conventionalcommits.org/)
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
version: 2
|
|
2
|
+
updates:
|
|
3
|
+
- package-ecosystem: npm
|
|
4
|
+
directory: /
|
|
5
|
+
schedule:
|
|
6
|
+
interval: weekly
|
|
7
|
+
day: monday
|
|
8
|
+
open-pull-requests-limit: 5
|
|
9
|
+
commit-message:
|
|
10
|
+
prefix: chore
|
|
11
|
+
include: scope
|
|
12
|
+
|
|
13
|
+
- package-ecosystem: github-actions
|
|
14
|
+
directory: /
|
|
15
|
+
schedule:
|
|
16
|
+
interval: monthly
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
build-and-test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
strategy:
|
|
13
|
+
matrix:
|
|
14
|
+
node-version: [18, 20, 22]
|
|
15
|
+
steps:
|
|
16
|
+
- uses: actions/checkout@v4
|
|
17
|
+
- uses: actions/setup-node@v4
|
|
18
|
+
with:
|
|
19
|
+
node-version: ${{ matrix.node-version }}
|
|
20
|
+
cache: npm
|
|
21
|
+
- run: npm ci
|
|
22
|
+
- run: npm run lint
|
|
23
|
+
- run: npm test
|
|
24
|
+
- run: npm run build
|
|
25
|
+
|
|
26
|
+
coverage:
|
|
27
|
+
runs-on: ubuntu-latest
|
|
28
|
+
needs: build-and-test
|
|
29
|
+
steps:
|
|
30
|
+
- uses: actions/checkout@v4
|
|
31
|
+
- uses: actions/setup-node@v4
|
|
32
|
+
with:
|
|
33
|
+
node-version: 20
|
|
34
|
+
cache: npm
|
|
35
|
+
- run: npm ci
|
|
36
|
+
- run: npm run test:coverage
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
name: Publish to npm
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
publish:
|
|
9
|
+
runs-on: ubuntu-latest
|
|
10
|
+
permissions:
|
|
11
|
+
contents: read
|
|
12
|
+
id-token: write
|
|
13
|
+
steps:
|
|
14
|
+
- uses: actions/checkout@v4
|
|
15
|
+
- uses: actions/setup-node@v4
|
|
16
|
+
with:
|
|
17
|
+
node-version: 20
|
|
18
|
+
registry-url: https://registry.npmjs.org
|
|
19
|
+
- run: npm ci
|
|
20
|
+
- run: npm run build
|
|
21
|
+
- run: npm publish --provenance --access public
|
|
22
|
+
env:
|
|
23
|
+
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# Contributor Covenant Code of Conduct
|
|
2
|
+
|
|
3
|
+
## Our Pledge
|
|
4
|
+
|
|
5
|
+
We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone.
|
|
6
|
+
|
|
7
|
+
## Our Standards
|
|
8
|
+
|
|
9
|
+
Examples of behavior that contributes to a positive environment:
|
|
10
|
+
- Demonstrating empathy and kindness toward other people
|
|
11
|
+
- Being respectful of differing opinions, viewpoints, and experiences
|
|
12
|
+
- Giving and gracefully accepting constructive feedback
|
|
13
|
+
- Accepting responsibility and apologizing to those affected by our mistakes
|
|
14
|
+
|
|
15
|
+
Examples of unacceptable behavior:
|
|
16
|
+
- The use of sexualized language or imagery
|
|
17
|
+
- Trolling, insulting or derogatory comments
|
|
18
|
+
- Public or private harassment
|
|
19
|
+
- Publishing others' private information without explicit permission
|
|
20
|
+
|
|
21
|
+
## Enforcement
|
|
22
|
+
|
|
23
|
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to **Theihtisham@outlook.com**.
|
|
24
|
+
|
|
25
|
+
## Attribution
|
|
26
|
+
|
|
27
|
+
This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org/), version 2.1.
|
package/CONTRIBUTING.md
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# Contributing to Budget LLM
|
|
2
|
+
|
|
3
|
+
Thank you for your interest!
|
|
4
|
+
|
|
5
|
+
## Development Setup
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
git clone https://github.com/theihtisham/budget-llm.git
|
|
9
|
+
cd budget-llm
|
|
10
|
+
npm install
|
|
11
|
+
cp .env.example .env
|
|
12
|
+
npm run build
|
|
13
|
+
npm test
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
## Project Structure
|
|
17
|
+
|
|
18
|
+
```
|
|
19
|
+
src/
|
|
20
|
+
server.ts — Express server entry point
|
|
21
|
+
router.ts — Intelligent model routing
|
|
22
|
+
config.ts — Configuration management
|
|
23
|
+
database.ts — SQLite usage tracking
|
|
24
|
+
encryption.ts — API key encryption
|
|
25
|
+
rate-limiter.ts — Per-provider rate limiting
|
|
26
|
+
task-classifier.ts — Task complexity classification
|
|
27
|
+
tests/ — Vitest test suite
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## How to Contribute
|
|
31
|
+
|
|
32
|
+
1. Fork the repository
|
|
33
|
+
2. Create a feature branch: `git checkout -b feature/my-feature`
|
|
34
|
+
3. Make changes and add tests
|
|
35
|
+
4. Run checks: `npm run lint && npm test`
|
|
36
|
+
5. Commit with [conventional commits](https://www.conventionalcommits.org/)
|
|
37
|
+
6. Push and open a Pull Request
|
|
38
|
+
|
|
39
|
+
## License
|
|
40
|
+
|
|
41
|
+
By contributing, you agree that your contributions will be licensed under the MIT License.
|
package/Dockerfile
ADDED
package/README.md
CHANGED
|
@@ -1,293 +1,242 @@
|
|
|
1
|
-
|
|
1
|
+
<div align="center">
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
<img width="100%" height="180" src="data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 960 180'%3E%3Cdefs%3E%3ClinearGradient id='g' x1='0%25' y1='0%25' x2='100%25' y2='100%25'%3E%3Cstop offset='0%25' stop-color='%23fbbf24'/%3E%3Cstop offset='100%25' stop-color='%23f472b6'/%3E%3C/linearGradient%3E%3C/defs%3E%3Crect width='960' height='180' fill='%230a0a1a' rx='16'/%3E%3Crect x='2' y='2' width='956' height='176' fill='none' stroke='url(%23g)' stroke-width='2' rx='15'/%3E%3Ctext x='480' y='75' text-anchor='middle' fill='white' font-family='system-ui' font-size='38' font-weight='bold'%3E%F0%9F%92%B0 Budget LLM%3C/text%3E%3Ctext x='480' y='115' text-anchor='middle' fill='%23a5a5c0' font-family='system-ui' font-size='18'%3ECut AI Costs by 60% %E2%80%94 Smart Model Routing Proxy%3C/text%3E%3Ctext x='480' y='148' text-anchor='middle' fill='%236b6b88' font-family='monospace' font-size='13'%3EExpress %C2%B7 OpenAI %C2%B7 Anthropic %C2%B7 Google %C2%B7 DeepSeek %C2%B7 SQLite %C2%B7 Auto-Classification%3C/text%3E%3C/svg%3E" alt="Budget LLM Banner"/>
|
|
4
4
|
|
|
5
|
-
[](https://github.com/theihtisham/budget-llm/actions/workflows/ci.yml)
|
|
6
|
+
[](https://www.npmjs.com/package/@theihtisham/budget-llm)
|
|
7
|
+
[](https://www.npmjs.com/package/@theihtisham/budget-llm)
|
|
8
|
+
[](https://github.com/theihtisham/budget-llm/stargazers)
|
|
9
|
+
[](https://opensource.org/licenses/MIT)
|
|
10
|
+
[](https://www.typescriptlang.org/)
|
|
9
11
|
|
|
10
|
-
|
|
12
|
+
**One API endpoint that automatically picks the cheapest LLM for every request.** Drops in front of your existing OpenAI SDK calls — routes simple tasks to cheap models, complex tasks to powerful ones. Tracks spending per API key with SQLite.
|
|
11
13
|
|
|
12
|
-
|
|
14
|
+
[Quick Start](#-quick-start) · [How It Works](#-how-it-works) · [Supported Providers](#-supported-providers) · [Configuration](#-configuration)
|
|
13
15
|
|
|
16
|
+
</div>
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
## Why Budget LLM?
|
|
21
|
+
|
|
22
|
+
Most LLM requests don't need GPT-4. Budget LLM classifies each request's complexity and routes it to the cheapest model that can handle it — automatically.
|
|
23
|
+
|
|
24
|
+
### What Makes This Different
|
|
25
|
+
|
|
26
|
+
| Feature | Budget LLM | LiteLLM | OpenRouter |
|
|
27
|
+
|---------|-----------|---------|------------|
|
|
28
|
+
| Cost Reduction | Up to 60% automatic | Manual model selection | Manual |
|
|
29
|
+
| Task Classification | Built-in complexity scoring | No | No |
|
|
30
|
+
| Per-Key Tracking | SQLite per API key spending | Limited | Dashboard only |
|
|
31
|
+
| Drop-In Proxy | Works with existing OpenAI SDK | Yes | Yes |
|
|
32
|
+
| Encryption | AES-256 API key encryption | No | No |
|
|
33
|
+
| Rate Limiting | Per-provider with backoff | Basic | Built-in |
|
|
34
|
+
| Self-Hosted | Yes, fully local | Yes | No |
|
|
35
|
+
| Zero Config | Works out of the box | Needs config | Needs account |
|
|
36
|
+
|
|
37
|
+
---
|
|
38
|
+
|
|
39
|
+
## Architecture
|
|
40
|
+
|
|
41
|
+
```mermaid
|
|
42
|
+
graph TD
|
|
43
|
+
subgraph "Client"
|
|
44
|
+
APP[Your Application<br/>OpenAI SDK]
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
subgraph "Budget LLM Proxy"
|
|
48
|
+
SRV[Express Server<br/>CORS + Helmet + Compression]
|
|
49
|
+
RTR[Smart Router<br/>Cost-Optimized Model Selection]
|
|
50
|
+
CLS[Task Classifier<br/>Complexity Scoring]
|
|
51
|
+
RL[Rate Limiter<br/>Per-Provider Throttling]
|
|
52
|
+
ENC[Encryption<br/>AES-256 Key Storage]
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
subgraph "LLM Providers"
|
|
56
|
+
OAI[OpenAI<br/>GPT-4o / GPT-4o-mini]
|
|
57
|
+
ANT[Anthropic<br/>Claude 4.6 / Haiku]
|
|
58
|
+
GGL[Google<br/>Gemini Pro / Flash]
|
|
59
|
+
DSP[DeepSeek<br/>Chat / Coder]
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
subgraph "Storage"
|
|
63
|
+
DB[SQLite DB<br/>Usage + Costs + Keys]
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
APP -->|OpenAI-compatible API| SRV
|
|
67
|
+
SRV --> CLS
|
|
68
|
+
CLS --> RTR
|
|
69
|
+
RTR --> RL
|
|
70
|
+
RL --> OAI
|
|
71
|
+
RL --> ANT
|
|
72
|
+
RL --> GGL
|
|
73
|
+
RL --> DSP
|
|
74
|
+
RTR --> DB
|
|
75
|
+
ENC --> DB
|
|
76
|
+
|
|
77
|
+
style SRV fill:#fbbf24,color:#000
|
|
78
|
+
style RTR fill:#f472b6,color:#fff
|
|
79
|
+
style CLS fill:#34d399,color:#fff
|
|
14
80
|
```
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
| | | |
|
|
36
|
-
v v v v
|
|
37
|
-
+--------+ +--------+ +--------+ +--------+
|
|
38
|
-
| OpenAI | |Anthropic| | Google | |DeepSeek|
|
|
39
|
-
+--------+ +--------+ +--------+ +--------+
|
|
40
|
-
| | | |
|
|
41
|
-
+-------------+------+------+-------------+
|
|
42
|
-
|
|
|
43
|
-
Cost Tracking
|
|
44
|
-
& Response Cache
|
|
45
|
-
|
|
|
46
|
-
v
|
|
47
|
-
SQLite Database
|
|
81
|
+
|
|
82
|
+
### Request Flow
|
|
83
|
+
|
|
84
|
+
```mermaid
|
|
85
|
+
sequenceDiagram
|
|
86
|
+
participant C as Client
|
|
87
|
+
participant S as Budget LLM
|
|
88
|
+
participant T as Classifier
|
|
89
|
+
participant R as Router
|
|
90
|
+
participant P as LLM Provider
|
|
91
|
+
|
|
92
|
+
C->>S: POST /v1/chat/completions
|
|
93
|
+
S->>T: Classify task complexity
|
|
94
|
+
T->>T: Score: simple(0-3), medium(4-6), complex(7-10)
|
|
95
|
+
T->>R: Complexity score + budget
|
|
96
|
+
R->>R: Select cheapest capable model
|
|
97
|
+
R->>P: Forward request
|
|
98
|
+
P-->>R: Response
|
|
99
|
+
R->>R: Log tokens + cost to SQLite
|
|
100
|
+
R-->>C: Response (OpenAI format)
|
|
48
101
|
```
|
|
49
102
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
- **Smart Routing** - Automatically classifies tasks (code, creative, reasoning, chat, etc.) and picks the cheapest model that can handle it well
|
|
53
|
-
- **Budget Limits** - Daily, monthly, and per-request cost caps with automatic enforcement
|
|
54
|
-
- **Fallback Chain** - If the primary provider fails, automatically tries the next best provider
|
|
55
|
-
- **Cost Tracking** - Per-request, per-day, per-model cost logging with SQLite storage
|
|
56
|
-
- **Response Caching** - Identical prompts return cached responses (free!) with configurable TTL
|
|
57
|
-
- **Rate Limiting** - Token bucket algorithm with per-IP limits
|
|
58
|
-
- **Real-time Dashboard** - Beautiful web UI showing costs, savings, usage, and budget status
|
|
59
|
-
- **OpenAI-Compatible** - Drop-in replacement for the OpenAI API. Just change the base URL.
|
|
60
|
-
- **Security** - API key encryption (AES-256-GCM), rate limiting, input validation, no prompt content logging
|
|
61
|
-
|
|
62
|
-
## Cost Comparison
|
|
63
|
-
|
|
64
|
-
BudgetLLM automatically picks the cheapest model for each task type. Here's how costs compare:
|
|
65
|
-
|
|
66
|
-
| Task Type | Default Route | Cost/1M Tokens | vs GPT-4 Turbo | Savings |
|
|
67
|
-
|-----------|--------------|----------------|-----------------|---------|
|
|
68
|
-
| Chat | Gemini 2.0 Flash | $0.10 / $0.40 | $10 / $30 | **97%** |
|
|
69
|
-
| Code | GPT-4o Mini | $0.15 / $0.60 | $10 / $30 | **98%** |
|
|
70
|
-
| Creative | GPT-4o | $2.50 / $10.00 | $10 / $30 | **67%** |
|
|
71
|
-
| Reasoning | DeepSeek R1 | $0.55 / $2.19 | $10 / $30 | **93%** |
|
|
72
|
-
| Summarization | Gemini 2.0 Flash | $0.10 / $0.40 | $10 / $30 | **97%** |
|
|
73
|
-
| Translation | Gemini 2.0 Flash Lite | $0.075 / $0.30 | $10 / $30 | **99%** |
|
|
74
|
-
| Math | DeepSeek R1 | $0.55 / $2.19 | $10 / $30 | **93%** |
|
|
75
|
-
| Analysis | DeepSeek V3 | $0.27 / $1.10 | $10 / $30 | **96%** |
|
|
76
|
-
|
|
77
|
-
### Model Catalog
|
|
78
|
-
|
|
79
|
-
| Model | Provider | Input/1M | Output/1M | Quality | Speed | Cost |
|
|
80
|
-
|-------|----------|----------|-----------|---------|-------|------|
|
|
81
|
-
| GPT-4o | OpenAI | $2.50 | $10.00 | 9/10 | 7/10 | 5/10 |
|
|
82
|
-
| GPT-4o Mini | OpenAI | $0.15 | $0.60 | 7/10 | 9/10 | 9/10 |
|
|
83
|
-
| GPT-4 Turbo | OpenAI | $10.00 | $30.00 | 9/10 | 6/10 | 3/10 |
|
|
84
|
-
| Claude Sonnet 4 | Anthropic | $3.00 | $15.00 | 9/10 | 7/10 | 5/10 |
|
|
85
|
-
| Claude 3.5 Haiku | Anthropic | $0.80 | $4.00 | 7/10 | 9/10 | 7/10 |
|
|
86
|
-
| Gemini 2.0 Flash | Google | $0.10 | $0.40 | 7/10 | 10/10 | 10/10 |
|
|
87
|
-
| Gemini 2.0 Flash Lite | Google | $0.075 | $0.30 | 6/10 | 10/10 | 10/10 |
|
|
88
|
-
| DeepSeek V3 | DeepSeek | $0.27 | $1.10 | 8/10 | 8/10 | 8/10 |
|
|
89
|
-
| DeepSeek R1 | DeepSeek | $0.55 | $2.19 | 9/10 | 5/10 | 7/10 |
|
|
103
|
+
---
|
|
90
104
|
|
|
91
105
|
## Quick Start
|
|
92
106
|
|
|
93
|
-
### 1. Install
|
|
94
|
-
|
|
95
107
|
```bash
|
|
96
|
-
|
|
108
|
+
# Install
|
|
109
|
+
npm install -g @theihtisham/budget-llm
|
|
110
|
+
|
|
111
|
+
# Or from source
|
|
112
|
+
git clone https://github.com/theihtisham/budget-llm.git
|
|
97
113
|
cd budget-llm
|
|
98
|
-
npm install
|
|
114
|
+
npm install && npm run build
|
|
99
115
|
```
|
|
100
116
|
|
|
101
|
-
###
|
|
117
|
+
### Configure
|
|
102
118
|
|
|
103
119
|
```bash
|
|
104
120
|
cp .env.example .env
|
|
105
121
|
```
|
|
106
122
|
|
|
107
|
-
Edit `.env
|
|
123
|
+
Edit `.env`:
|
|
108
124
|
|
|
109
125
|
```env
|
|
110
|
-
#
|
|
126
|
+
# At least one provider required
|
|
111
127
|
OPENAI_API_KEY=sk-...
|
|
112
128
|
ANTHROPIC_API_KEY=sk-ant-...
|
|
113
|
-
GOOGLE_API_KEY
|
|
114
|
-
DEEPSEEK_API_KEY
|
|
129
|
+
GOOGLE_API_KEY=...
|
|
130
|
+
DEEPSEEK_API_KEY=...
|
|
115
131
|
|
|
116
|
-
#
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
DEFAULT_PER_REQUEST_CAP=1.00
|
|
132
|
+
# Server
|
|
133
|
+
PORT=3000
|
|
134
|
+
ENCRYPTION_KEY=your-32-char-encryption-key-here
|
|
120
135
|
```
|
|
121
136
|
|
|
122
|
-
###
|
|
137
|
+
### Run
|
|
123
138
|
|
|
124
139
|
```bash
|
|
125
|
-
#
|
|
126
|
-
npm run dev
|
|
127
|
-
|
|
128
|
-
# Production
|
|
129
|
-
npm run build
|
|
140
|
+
# Start proxy
|
|
130
141
|
npm start
|
|
131
|
-
```
|
|
132
|
-
|
|
133
|
-
### 4. Use It
|
|
134
|
-
|
|
135
|
-
BudgetLLM is a drop-in replacement for the OpenAI API. Just change the base URL:
|
|
136
142
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
const openai = new OpenAI({
|
|
140
|
-
apiKey: 'sk-your-key',
|
|
141
|
-
baseURL: 'https://api.openai.com/v1',
|
|
142
|
-
});
|
|
143
|
+
# Point your OpenAI SDK at it
|
|
144
|
+
# base_url: http://localhost:3000/v1
|
|
143
145
|
```
|
|
144
146
|
|
|
145
|
-
|
|
146
|
-
```javascript
|
|
147
|
-
const openai = new OpenAI({
|
|
148
|
-
apiKey: 'anything', // BudgetLLM doesn't require a client key
|
|
149
|
-
baseURL: 'http://localhost:3210/v1',
|
|
150
|
-
});
|
|
151
|
-
```
|
|
152
|
-
|
|
153
|
-
That's it. Every request is now automatically optimized for cost.
|
|
147
|
+
### Use with OpenAI SDK
|
|
154
148
|
|
|
155
|
-
|
|
149
|
+
```python
|
|
150
|
+
from openai import OpenAI
|
|
156
151
|
|
|
157
|
-
|
|
152
|
+
client = OpenAI(
|
|
153
|
+
base_url="http://localhost:3000/v1",
|
|
154
|
+
api_key="your-budget-llm-key" # Any key for auth
|
|
155
|
+
)
|
|
158
156
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
task_type: 'code', // forces code-optimized routing
|
|
165
|
-
});
|
|
157
|
+
# Budget LLM auto-routes to cheapest capable model
|
|
158
|
+
response = client.chat.completions.create(
|
|
159
|
+
model="auto", # Magic: auto-selects cheapest
|
|
160
|
+
messages=[{"role": "user", "content": "Hello!"}]
|
|
161
|
+
)
|
|
166
162
|
```
|
|
167
163
|
|
|
168
|
-
|
|
164
|
+
---
|
|
169
165
|
|
|
170
|
-
|
|
166
|
+
## Supported Providers
|
|
171
167
|
|
|
172
|
-
|
|
|
173
|
-
|
|
174
|
-
|
|
|
175
|
-
|
|
|
168
|
+
| Provider | Models | Cost Tier |
|
|
169
|
+
|----------|--------|-----------|
|
|
170
|
+
| **OpenAI** | GPT-4o, GPT-4o-mini, GPT-3.5-turbo | $$ / $ |
|
|
171
|
+
| **Anthropic** | Claude Opus 4.6, Sonnet 4.6, Haiku 4.5 | $$$ / $$ / $ |
|
|
172
|
+
| **Google** | Gemini Pro, Gemini Flash | $$ / $ |
|
|
173
|
+
| **DeepSeek** | DeepSeek Chat, DeepSeek Coder | $ |
|
|
176
174
|
|
|
177
|
-
###
|
|
175
|
+
### Task Classification
|
|
178
176
|
|
|
179
|
-
|
|
177
|
+
| Score | Complexity | Route To |
|
|
178
|
+
|-------|-----------|----------|
|
|
179
|
+
| 0-3 | Simple (greeting, formatting) | Cheapest model |
|
|
180
|
+
| 4-6 | Medium (summarization, basic code) | Mid-tier model |
|
|
181
|
+
| 7-10 | Complex (reasoning, advanced code) | Best available model |
|
|
180
182
|
|
|
181
|
-
|
|
182
|
-
|-------|------|-------------|
|
|
183
|
-
| `task_type` | string | Force routing: `code`, `creative`, `reasoning`, `chat`, `summarization`, `translation`, `analysis`, `math`, `auto` |
|
|
184
|
-
| `request_id` | string | Custom request ID for tracking |
|
|
185
|
-
| `budget_cap` | number | Per-request budget cap in USD |
|
|
183
|
+
---
|
|
186
184
|
|
|
187
|
-
|
|
185
|
+
## Configuration
|
|
188
186
|
|
|
189
|
-
|
|
190
|
-
{
|
|
191
|
-
"cost": {
|
|
192
|
-
"inputCost": 0.0000015,
|
|
193
|
-
"outputCost": 0.000006,
|
|
194
|
-
"totalCost": 0.0000075,
|
|
195
|
-
"currency": "USD",
|
|
196
|
-
"model": "gpt-4o-mini",
|
|
197
|
-
"provider": "openai",
|
|
198
|
-
"savingsVsGpt4": 0.00012
|
|
199
|
-
}
|
|
200
|
-
}
|
|
201
|
-
```
|
|
187
|
+
### Environment Variables
|
|
202
188
|
|
|
203
|
-
|
|
189
|
+
| Variable | Required | Description |
|
|
190
|
+
|----------|----------|-------------|
|
|
191
|
+
| `OPENAI_API_KEY` | Yes* | OpenAI API key |
|
|
192
|
+
| `ANTHROPIC_API_KEY` | Yes* | Anthropic API key |
|
|
193
|
+
| `GOOGLE_API_KEY` | No | Google AI API key |
|
|
194
|
+
| `DEEPSEEK_API_KEY` | No | DeepSeek API key |
|
|
195
|
+
| `PORT` | No | Server port (default: 3000) |
|
|
196
|
+
| `ENCRYPTION_KEY` | No | AES-256 key for API key storage |
|
|
197
|
+
| `DB_PATH` | No | SQLite database path |
|
|
204
198
|
|
|
205
|
-
|
|
206
|
-
|--------|----------|-------------|
|
|
207
|
-
| GET | `/health` | Health check |
|
|
208
|
-
| GET | `/api/dashboard` | Full dashboard data |
|
|
209
|
-
| GET | `/api/costs?days=30` | Cost summary |
|
|
210
|
-
| GET | `/api/budget` | Budget config and status |
|
|
211
|
-
| PUT | `/api/budget` | Update budget limits |
|
|
212
|
-
| GET | `/api/rate-limit` | Rate limit status |
|
|
213
|
-
| DELETE | `/api/cache` | Clear response cache |
|
|
214
|
-
| GET | `/dashboard` | Web dashboard UI |
|
|
199
|
+
*At least one provider API key required.
|
|
215
200
|
|
|
216
|
-
|
|
201
|
+
---
|
|
217
202
|
|
|
218
|
-
|
|
219
|
-
src/
|
|
220
|
-
server.ts # Express HTTP server with all routes
|
|
221
|
-
proxy.ts # Main request handler (orchestrates everything)
|
|
222
|
-
router.ts # Smart model routing engine
|
|
223
|
-
task-classifier.ts # Task type detection from prompts
|
|
224
|
-
providers.ts # Provider adapters (OpenAI, Anthropic, Google, DeepSeek)
|
|
225
|
-
database.ts # SQLite database layer
|
|
226
|
-
rate-limiter.ts # Token bucket rate limiter
|
|
227
|
-
config.ts # Configuration, model catalog, logging
|
|
228
|
-
types.ts # TypeScript type definitions
|
|
229
|
-
utils/
|
|
230
|
-
encryption.ts # AES-256-GCM encryption for API keys
|
|
231
|
-
```
|
|
203
|
+
## Docker
|
|
232
204
|
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
| Task Type | Quality Weight | Cost Weight | Speed Weight |
|
|
238
|
-
|-----------|---------------|-------------|--------------|
|
|
239
|
-
| Code | 40% | 45% | 15% |
|
|
240
|
-
| Creative | 70% | 15% | 15% |
|
|
241
|
-
| Reasoning | 65% | 25% | 10% |
|
|
242
|
-
| Chat | 20% | 60% | 20% |
|
|
243
|
-
| Summarization | 20% | 60% | 20% |
|
|
244
|
-
| Translation | 20% | 60% | 20% |
|
|
245
|
-
| Analysis | 50% | 35% | 15% |
|
|
246
|
-
|
|
247
|
-
Models also get a +20 score bonus if they declare the task type as a capability, and a -10 penalty otherwise.
|
|
248
|
-
|
|
249
|
-
### Fallback Chain
|
|
250
|
-
|
|
251
|
-
When the primary provider fails, BudgetLLM tries up to 3 alternative providers in priority order. This means your app stays up even if one provider has an outage.
|
|
252
|
-
|
|
253
|
-
### Caching
|
|
254
|
-
|
|
255
|
-
Identical prompts (same messages, model, temperature, max_tokens) are cached with a configurable TTL. Cached responses are served instantly with zero cost. The cache uses content-hash matching so even rephrased requests that happen to match the exact same parameters get the benefit.
|
|
256
|
-
|
|
257
|
-
## Security
|
|
205
|
+
```bash
|
|
206
|
+
docker build -t budget-llm .
|
|
207
|
+
docker run -p 3000:3000 --env-file .env budget-llm
|
|
208
|
+
```
|
|
258
209
|
|
|
259
|
-
|
|
260
|
-
- **Rate Limiting** - Token bucket algorithm prevents abuse
|
|
261
|
-
- **Input Validation** - Zod schema validation on all inputs
|
|
262
|
-
- **No Prompt Logging** - Request metadata is logged, but prompt content is never stored in logs
|
|
263
|
-
- **Helmet** - HTTP security headers via helmet middleware
|
|
264
|
-
- **Budget Enforcement** - Hard limits prevent runaway spending
|
|
210
|
+
---
|
|
265
211
|
|
|
266
212
|
## Development
|
|
267
213
|
|
|
268
214
|
```bash
|
|
269
|
-
# Install dependencies
|
|
270
215
|
npm install
|
|
271
|
-
|
|
272
|
-
# Run in development mode with hot reload
|
|
273
|
-
npm run dev
|
|
274
|
-
|
|
275
|
-
# Run tests
|
|
216
|
+
npm run build
|
|
276
217
|
npm test
|
|
218
|
+
npm run test:coverage
|
|
219
|
+
npm run dev # Watch mode with tsx
|
|
220
|
+
```
|
|
277
221
|
|
|
278
|
-
|
|
279
|
-
npm run test:watch
|
|
222
|
+
---
|
|
280
223
|
|
|
281
|
-
|
|
282
|
-
npm run test:coverage
|
|
224
|
+
## Trending Tags
|
|
283
225
|
|
|
284
|
-
|
|
285
|
-
npm run lint
|
|
226
|
+
`llm` `cost-optimization` `ai-proxy` `model-routing` `openai` `anthropic` `google-ai` `deepseek` `express` `typescript` `budget` `api-gateway` `smart-routing` `task-classification` `developer-tools` `cost-tracking`
|
|
286
227
|
|
|
287
|
-
|
|
288
|
-
npm run build
|
|
289
|
-
```
|
|
228
|
+
---
|
|
290
229
|
|
|
291
230
|
## License
|
|
292
231
|
|
|
293
|
-
[
|
|
232
|
+
MIT License — see [LICENSE](LICENSE) for details.
|
|
233
|
+
|
|
234
|
+
---
|
|
235
|
+
|
|
236
|
+
<div align="center">
|
|
237
|
+
|
|
238
|
+
**Built by [theihtisham](https://github.com/theihtisham)**
|
|
239
|
+
|
|
240
|
+
[GitHub](https://github.com/theihtisham) · [npm](https://www.npmjs.com/~theihtisham) · [Email](mailto:Theihtisham@outlook.com)
|
|
241
|
+
|
|
242
|
+
</div>
|
package/SECURITY.md
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Security Policy
|
|
2
|
+
|
|
3
|
+
## Supported Versions
|
|
4
|
+
|
|
5
|
+
| Version | Supported |
|
|
6
|
+
| ------- | --------- |
|
|
7
|
+
| 1.x | Yes |
|
|
8
|
+
|
|
9
|
+
## Reporting a Vulnerability
|
|
10
|
+
|
|
11
|
+
If you discover a security vulnerability, please report it responsibly:
|
|
12
|
+
|
|
13
|
+
- **Email**: Theihtisham@outlook.com
|
|
14
|
+
- **Subject**: [Security] Vulnerability in ai-agent-memory
|
|
15
|
+
|
|
16
|
+
Please include:
|
|
17
|
+
1. Description of the vulnerability
|
|
18
|
+
2. Steps to reproduce
|
|
19
|
+
3. Potential impact
|
|
20
|
+
4. Suggested fix (if any)
|
|
21
|
+
|
|
22
|
+
We aim to respond within 48 hours and patch critical issues within 7 days.
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@theihtisham/budget-llm",
|
|
3
|
-
"version": "1.
|
|
4
|
-
"description": "Cut your AI costs by 60%
|
|
3
|
+
"version": "1.1.0",
|
|
4
|
+
"description": "Cut your AI costs by 60% \u2014 one API endpoint that automatically picks the cheapest model for every request.",
|
|
5
5
|
"main": "dist/server.js",
|
|
6
6
|
"types": "dist/server.d.ts",
|
|
7
7
|
"scripts": {
|
|
@@ -25,8 +25,11 @@
|
|
|
25
25
|
"budget",
|
|
26
26
|
"routing"
|
|
27
27
|
],
|
|
28
|
-
"author": "",
|
|
28
|
+
"author": "theihtisham",
|
|
29
29
|
"license": "MIT",
|
|
30
|
+
"publishConfig": {
|
|
31
|
+
"access": "public"
|
|
32
|
+
},
|
|
30
33
|
"dependencies": {
|
|
31
34
|
"compression": "^1.7.5",
|
|
32
35
|
"cors": "^2.8.5",
|
|
@@ -53,11 +56,12 @@
|
|
|
53
56
|
"engines": {
|
|
54
57
|
"node": ">=18.0.0"
|
|
55
58
|
},
|
|
56
|
-
"publishConfig": {
|
|
57
|
-
"access": "public"
|
|
58
|
-
},
|
|
59
59
|
"repository": {
|
|
60
60
|
"type": "git",
|
|
61
|
-
"url": "https://github.com/theihtisham/budget-llm"
|
|
62
|
-
}
|
|
63
|
-
|
|
61
|
+
"url": "git+https://github.com/theihtisham/budget-llm.git"
|
|
62
|
+
},
|
|
63
|
+
"bugs": {
|
|
64
|
+
"url": "https://github.com/theihtisham/budget-llm/issues"
|
|
65
|
+
},
|
|
66
|
+
"homepage": "https://github.com/theihtisham/budget-llm#readme"
|
|
67
|
+
}
|