npm - @theihtisham/budget-llm - Versions diffs - 1.0.0 → 1.1.0 - Mend

@theihtisham/budget-llm 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/.editorconfig +12 -0
package/.github/ISSUE_TEMPLATE/bug_report.yml +43 -0
package/.github/ISSUE_TEMPLATE/feature_request.yml +33 -0
package/.github/PULL_REQUEST_TEMPLATE.md +18 -0
package/.github/dependabot.yml +16 -0
package/.github/workflows/ci.yml +36 -0
package/.github/workflows/release.yml +23 -0
package/CODE_OF_CONDUCT.md +27 -0
package/CONTRIBUTING.md +41 -0
package/Dockerfile +9 -0
package/README.md +178 -229
package/SECURITY.md +22 -0
package/package.json +13 -9

package/.editorconfig ADDED Viewed

@@ -0,0 +1,12 @@
+root = true
+[*]
+charset = utf-8
+end_of_line = lf
+indent_style = space
+indent_size = 2
+insert_final_newline = true
+trim_trailing_whitespace = true
+[*.md]
+trim_trailing_whitespace = false

package/.github/ISSUE_TEMPLATE/bug_report.yml ADDED Viewed

@@ -0,0 +1,43 @@
+name: Bug Report
+description: Report a bug or unexpected behavior
+labels: [bug]
+body:
+  - type: textarea
+    id: description
+    attributes:
+      label: Bug Description
+      description: What happened?
+      placeholder: "When I run agent-memory..."
+    validations:
+      required: true
+  - type: textarea
+    id: steps
+    attributes:
+      label: Steps to Reproduce
+      description: How can we reproduce this?
+      placeholder: "1. Install...\n2. Configure...\n3. Run..."
+    validations:
+      required: true
+  - type: textarea
+    id: expected
+    attributes:
+      label: Expected Behavior
+      description: What should have happened?
+    validations:
+      required: true
+  - type: textarea
+    id: environment
+    attributes:
+      label: Environment
+      description: Node version, OS, etc.
+      placeholder: "Node 20.11.0, macOS 14, agent-memory 1.0.0"
+  - type: textarea
+    id: logs
+    attributes:
+      label: Relevant Logs
+      description: Paste any relevant log output
+      render: shell

package/.github/ISSUE_TEMPLATE/feature_request.yml ADDED Viewed

@@ -0,0 +1,33 @@
+name: Feature Request
+description: Suggest a new feature or enhancement
+labels: [enhancement]
+body:
+  - type: textarea
+    id: problem
+    attributes:
+      label: Problem
+      description: What problem does this feature solve?
+      placeholder: "I'm frustrated when..."
+    validations:
+      required: true
+  - type: textarea
+    id: solution
+    attributes:
+      label: Proposed Solution
+      description: How should it work?
+    validations:
+      required: true
+  - type: dropdown
+    id: type
+    attributes:
+      label: Feature Type
+      options:
+        - New Tool/Resource
+        - Performance Improvement
+        - Developer Experience
+        - Documentation
+        - Other
+    validations:
+      required: true

package/.github/PULL_REQUEST_TEMPLATE.md ADDED Viewed

@@ -0,0 +1,18 @@
+## Description
+<!-- Brief description of changes -->
+## Type of Change
+- [ ] Bug fix
+- [ ] New feature
+- [ ] Breaking change
+- [ ] Documentation update
+- [ ] Refactor / cleanup
+## Checklist
+- [ ] Tests pass (`npm test`)
+- [ ] Type checks pass (`npm run lint`)
+- [ ] No secrets or credentials introduced
+- [ ] Commit messages follow [conventional commits](https://www.conventionalcommits.org/)

package/.github/dependabot.yml ADDED Viewed

@@ -0,0 +1,16 @@
+version: 2
+updates:
+  - package-ecosystem: npm
+    directory: /
+    schedule:
+      interval: weekly
+      day: monday
+    open-pull-requests-limit: 5
+    commit-message:
+      prefix: chore
+      include: scope
+  - package-ecosystem: github-actions
+    directory: /
+    schedule:
+      interval: monthly

package/.github/workflows/ci.yml ADDED Viewed

@@ -0,0 +1,36 @@
+name: CI
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+jobs:
+  build-and-test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        node-version: [18, 20, 22]
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-node@v4
+        with:
+          node-version: ${{ matrix.node-version }}
+          cache: npm
+      - run: npm ci
+      - run: npm run lint
+      - run: npm test
+      - run: npm run build
+  coverage:
+    runs-on: ubuntu-latest
+    needs: build-and-test
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 20
+          cache: npm
+      - run: npm ci
+      - run: npm run test:coverage

package/.github/workflows/release.yml ADDED Viewed

@@ -0,0 +1,23 @@
+name: Publish to npm
+on:
+  release:
+    types: [published]
+jobs:
+  publish:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      id-token: write
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 20
+          registry-url: https://registry.npmjs.org
+      - run: npm ci
+      - run: npm run build
+      - run: npm publish --provenance --access public
+        env:
+          NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}

package/CODE_OF_CONDUCT.md ADDED Viewed

@@ -0,0 +1,27 @@
+# Contributor Covenant Code of Conduct
+## Our Pledge
+We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone.
+## Our Standards
+Examples of behavior that contributes to a positive environment:
+- Demonstrating empathy and kindness toward other people
+- Being respectful of differing opinions, viewpoints, and experiences
+- Giving and gracefully accepting constructive feedback
+- Accepting responsibility and apologizing to those affected by our mistakes
+Examples of unacceptable behavior:
+- The use of sexualized language or imagery
+- Trolling, insulting or derogatory comments
+- Public or private harassment
+- Publishing others' private information without explicit permission
+## Enforcement
+Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to **Theihtisham@outlook.com**.
+## Attribution
+This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org/), version 2.1.

package/CONTRIBUTING.md ADDED Viewed

@@ -0,0 +1,41 @@
+# Contributing to Budget LLM
+Thank you for your interest!
+## Development Setup
+```bash
+git clone https://github.com/theihtisham/budget-llm.git
+cd budget-llm
+npm install
+cp .env.example .env
+npm run build
+npm test
+```
+## Project Structure
+```
+src/
+  server.ts         — Express server entry point
+  router.ts         — Intelligent model routing
+  config.ts         — Configuration management
+  database.ts       — SQLite usage tracking
+  encryption.ts     — API key encryption
+  rate-limiter.ts   — Per-provider rate limiting
+  task-classifier.ts — Task complexity classification
+tests/              — Vitest test suite
+```
+## How to Contribute
+1. Fork the repository
+2. Create a feature branch: `git checkout -b feature/my-feature`
+3. Make changes and add tests
+4. Run checks: `npm run lint && npm test`
+5. Commit with [conventional commits](https://www.conventionalcommits.org/)
+6. Push and open a Pull Request
+## License
+By contributing, you agree that your contributions will be licensed under the MIT License.

package/Dockerfile ADDED Viewed

@@ -0,0 +1,9 @@
+FROM node:20-slim
+WORKDIR /app
+COPY package*.json ./
+RUN npm ci --production
+COPY dist/ ./dist/
+EXPOSE 3000
+ENTRYPOINT ["node", "dist/server.js"]

package/README.md CHANGED Viewed

@@ -1,293 +1,242 @@
-# BudgetLLM
+<div align="center">
-> Cut your AI costs by 60% — one API endpoint that automatically picks the cheapest model for every request.
+<img width="100%" height="180" src="data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 960 180'%3E%3Cdefs%3E%3ClinearGradient id='g' x1='0%25' y1='0%25' x2='100%25' y2='100%25'%3E%3Cstop offset='0%25' stop-color='%23fbbf24'/%3E%3Cstop offset='100%25' stop-color='%23f472b6'/%3E%3C/linearGradient%3E%3C/defs%3E%3Crect width='960' height='180' fill='%230a0a1a' rx='16'/%3E%3Crect x='2' y='2' width='956' height='176' fill='none' stroke='url(%23g)' stroke-width='2' rx='15'/%3E%3Ctext x='480' y='75' text-anchor='middle' fill='white' font-family='system-ui' font-size='38' font-weight='bold'%3E%F0%9F%92%B0 Budget LLM%3C/text%3E%3Ctext x='480' y='115' text-anchor='middle' fill='%23a5a5c0' font-family='system-ui' font-size='18'%3ECut AI Costs by 60% %E2%80%94 Smart Model Routing Proxy%3C/text%3E%3Ctext x='480' y='148' text-anchor='middle' fill='%236b6b88' font-family='monospace' font-size='13'%3EExpress %C2%B7 OpenAI %C2%B7 Anthropic %C2%B7 Google %C2%B7 DeepSeek %C2%B7 SQLite %C2%B7 Auto-Classification%3C/text%3E%3C/svg%3E" alt="Budget LLM Banner"/>
-[![MIT License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
-[![Node.js](https://img.shields.io/badge/node-%3E%3D18-green.svg)](https://nodejs.org)
-[![TypeScript](https://img.shields.io/badge/TypeScript-Strict-3178C6.svg)](https://www.typescriptlang.org/)
-[![Vitest](https://img.shields.io/badge/tested%20with-Vitest-6E9F18.svg)](https://vitest.dev/)
+[![CI](https://img.shields.io/github/actions/workflow/status/theihtisham/budget-llm/ci.yml?style=for-the-badge&label=CI)](https://github.com/theihtisham/budget-llm/actions/workflows/ci.yml)
+[![npm version](https://img.shields.io/npm/v/@theihtisham/budget-llm.svg?style=for-the-badge&color=FF6D00)](https://www.npmjs.com/package/@theihtisham/budget-llm)
+[![npm downloads](https://img.shields.io/npm/dt/@theihtisham/budget-llm?style=for-the-badge&color=0A9D2A)](https://www.npmjs.com/package/@theihtisham/budget-llm)
+[![GitHub stars](https://img.shields.io/github/stars/theihtisham/budget-llm?style=for-the-badge&color=FFD700)](https://github.com/theihtisham/budget-llm/stargazers)
+[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg?style=for-the-badge)](https://opensource.org/licenses/MIT)
+[![TypeScript](https://img.shields.io/badge/TypeScript-5.8-3178C6?style=for-the-badge&logo=typescript&logoColor=white)](https://www.typescriptlang.org/)
-BudgetLLM is a smart multi-provider LLM cost optimizer and routing proxy. It sits between your application and LLM providers (OpenAI, Anthropic, Google, DeepSeek) and automatically routes each request to the cheapest capable model based on the task type.
+**One API endpoint that automatically picks the cheapest LLM for every request.** Drops in front of your existing OpenAI SDK calls — routes simple tasks to cheap models, complex tasks to powerful ones. Tracks spending per API key with SQLite.
-## How It Works
+[Quick Start](#-quick-start) · [How It Works](#-how-it-works) · [Supported Providers](#-supported-providers) · [Configuration](#-configuration)
+</div>
+---
+## Why Budget LLM?
+Most LLM requests don't need GPT-4. Budget LLM classifies each request's complexity and routes it to the cheapest model that can handle it — automatically.
+### What Makes This Different
+| Feature | Budget LLM | LiteLLM | OpenRouter |
+|---------|-----------|---------|------------|
+| Cost Reduction | Up to 60% automatic | Manual model selection | Manual |
+| Task Classification | Built-in complexity scoring | No | No |
+| Per-Key Tracking | SQLite per API key spending | Limited | Dashboard only |
+| Drop-In Proxy | Works with existing OpenAI SDK | Yes | Yes |
+| Encryption | AES-256 API key encryption | No | No |
+| Rate Limiting | Per-provider with backoff | Basic | Built-in |
+| Self-Hosted | Yes, fully local | Yes | No |
+| Zero Config | Works out of the box | Needs config | Needs account |
+---
+## Architecture
+```mermaid
+graph TD
+    subgraph "Client"
+        APP[Your Application<br/>OpenAI SDK]
+    end
+    subgraph "Budget LLM Proxy"
+        SRV[Express Server<br/>CORS + Helmet + Compression]
+        RTR[Smart Router<br/>Cost-Optimized Model Selection]
+        CLS[Task Classifier<br/>Complexity Scoring]
+        RL[Rate Limiter<br/>Per-Provider Throttling]
+        ENC[Encryption<br/>AES-256 Key Storage]
+    end
+    subgraph "LLM Providers"
+        OAI[OpenAI<br/>GPT-4o / GPT-4o-mini]
+        ANT[Anthropic<br/>Claude 4.6 / Haiku]
+        GGL[Google<br/>Gemini Pro / Flash]
+        DSP[DeepSeek<br/>Chat / Coder]
+    end
+    subgraph "Storage"
+        DB[SQLite DB<br/>Usage + Costs + Keys]
+    end
+    APP -->|OpenAI-compatible API| SRV
+    SRV --> CLS
+    CLS --> RTR
+    RTR --> RL
+    RL --> OAI
+    RL --> ANT
+    RL --> GGL
+    RL --> DSP
+    RTR --> DB
+    ENC --> DB
+    style SRV fill:#fbbf24,color:#000
+    style RTR fill:#f472b6,color:#fff
+    style CLS fill:#34d399,color:#fff
 ```
-Your App
-   |
-   | POST /v1/chat/completions  (OpenAI-compatible)
-   v
-+------------------+
-|   BudgetLLM      |
-|   Proxy Server   |
-+------------------+
-   |                |               |              |
-   | Task           | Budget        | Cache        | Rate Limit
-   | Classification | Enforcement   | Lookup       | Check
-   |                |               |              |
-   v                v               v              v
-+----------------------------------------------------------+
-|                    Smart Router                           |
-|  code tasks -> cheapest capable model                     |
-|  creative tasks -> highest quality model                  |
-|  chat tasks -> fastest cheapest model                     |
-|  reasoning -> best reasoning model                        |
-+----------------------------------------------------------+
-   |             |             |             |
-   v             v             v             v
-+--------+  +--------+  +--------+  +--------+
-| OpenAI |  |Anthropic| | Google |  |DeepSeek|
-+--------+  +--------+  +--------+  +--------+
-   |             |             |             |
-   +-------------+------+------+-------------+
-                        |
-                   Cost Tracking
-                   & Response Cache
-                        |
-                        v
-                   SQLite Database
+### Request Flow
+```mermaid
+sequenceDiagram
+    participant C as Client
+    participant S as Budget LLM
+    participant T as Classifier
+    participant R as Router
+    participant P as LLM Provider
+    C->>S: POST /v1/chat/completions
+    S->>T: Classify task complexity
+    T->>T: Score: simple(0-3), medium(4-6), complex(7-10)
+    T->>R: Complexity score + budget
+    R->>R: Select cheapest capable model
+    R->>P: Forward request
+    P-->>R: Response
+    R->>R: Log tokens + cost to SQLite
+    R-->>C: Response (OpenAI format)
 ```
-## Features
-- **Smart Routing** - Automatically classifies tasks (code, creative, reasoning, chat, etc.) and picks the cheapest model that can handle it well
-- **Budget Limits** - Daily, monthly, and per-request cost caps with automatic enforcement
-- **Fallback Chain** - If the primary provider fails, automatically tries the next best provider
-- **Cost Tracking** - Per-request, per-day, per-model cost logging with SQLite storage
-- **Response Caching** - Identical prompts return cached responses (free!) with configurable TTL
-- **Rate Limiting** - Token bucket algorithm with per-IP limits
-- **Real-time Dashboard** - Beautiful web UI showing costs, savings, usage, and budget status
-- **OpenAI-Compatible** - Drop-in replacement for the OpenAI API. Just change the base URL.
-- **Security** - API key encryption (AES-256-GCM), rate limiting, input validation, no prompt content logging
-## Cost Comparison
-BudgetLLM automatically picks the cheapest model for each task type. Here's how costs compare:
-| Task Type | Default Route | Cost/1M Tokens | vs GPT-4 Turbo | Savings |
-|-----------|--------------|----------------|-----------------|---------|
-| Chat | Gemini 2.0 Flash | $0.10 / $0.40 | $10 / $30 | **97%** |
-| Code | GPT-4o Mini | $0.15 / $0.60 | $10 / $30 | **98%** |
-| Creative | GPT-4o | $2.50 / $10.00 | $10 / $30 | **67%** |
-| Reasoning | DeepSeek R1 | $0.55 / $2.19 | $10 / $30 | **93%** |
-| Summarization | Gemini 2.0 Flash | $0.10 / $0.40 | $10 / $30 | **97%** |
-| Translation | Gemini 2.0 Flash Lite | $0.075 / $0.30 | $10 / $30 | **99%** |
-| Math | DeepSeek R1 | $0.55 / $2.19 | $10 / $30 | **93%** |
-| Analysis | DeepSeek V3 | $0.27 / $1.10 | $10 / $30 | **96%** |
-### Model Catalog
-| Model | Provider | Input/1M | Output/1M | Quality | Speed | Cost |
-|-------|----------|----------|-----------|---------|-------|------|
-| GPT-4o | OpenAI | $2.50 | $10.00 | 9/10 | 7/10 | 5/10 |
-| GPT-4o Mini | OpenAI | $0.15 | $0.60 | 7/10 | 9/10 | 9/10 |
-| GPT-4 Turbo | OpenAI | $10.00 | $30.00 | 9/10 | 6/10 | 3/10 |
-| Claude Sonnet 4 | Anthropic | $3.00 | $15.00 | 9/10 | 7/10 | 5/10 |
-| Claude 3.5 Haiku | Anthropic | $0.80 | $4.00 | 7/10 | 9/10 | 7/10 |
-| Gemini 2.0 Flash | Google | $0.10 | $0.40 | 7/10 | 10/10 | 10/10 |
-| Gemini 2.0 Flash Lite | Google | $0.075 | $0.30 | 6/10 | 10/10 | 10/10 |
-| DeepSeek V3 | DeepSeek | $0.27 | $1.10 | 8/10 | 8/10 | 8/10 |
-| DeepSeek R1 | DeepSeek | $0.55 | $2.19 | 9/10 | 5/10 | 7/10 |
+---
 ## Quick Start
-### 1. Install
 ```bash
-git clone https://github.com/your-username/budget-llm.git
+# Install
+npm install -g @theihtisham/budget-llm
+# Or from source
+git clone https://github.com/theihtisham/budget-llm.git
 cd budget-llm
-npm install
+npm install && npm run build
 ```
-### 2. Configure
+### Configure
 ```bash
 cp .env.example .env
 ```
-Edit `.env` and add your API keys:
+Edit `.env`:
 ```env
-# Add at least one provider API key
+# At least one provider required
 OPENAI_API_KEY=sk-...
 ANTHROPIC_API_KEY=sk-ant-...
-GOOGLE_API_KEY=AIza...
-DEEPSEEK_API_KEY=sk-...
+GOOGLE_API_KEY=...
+DEEPSEEK_API_KEY=...
-# Set your budget limits
-DEFAULT_DAILY_BUDGET=10.00
-DEFAULT_MONTHLY_BUDGET=200.00
-DEFAULT_PER_REQUEST_CAP=1.00
+# Server
+PORT=3000
+ENCRYPTION_KEY=your-32-char-encryption-key-here
 ```
-### 3. Run
+### Run
 ```bash
-# Development
-npm run dev
-# Production
-npm run build
+# Start proxy
 npm start
-```
-### 4. Use It
-BudgetLLM is a drop-in replacement for the OpenAI API. Just change the base URL:
-**Before (direct OpenAI):**
-```javascript
-const openai = new OpenAI({
-  apiKey: 'sk-your-key',
-  baseURL: 'https://api.openai.com/v1',
-});
+# Point your OpenAI SDK at it
+# base_url: http://localhost:3000/v1
 ```
-**After (via BudgetLLM):**
-```javascript
-const openai = new OpenAI({
-  apiKey: 'anything', // BudgetLLM doesn't require a client key
-  baseURL: 'http://localhost:3210/v1',
-});
-```
-That's it. Every request is now automatically optimized for cost.
+### Use with OpenAI SDK
-### 5. Optional: Task Type Hints
+```python
+from openai import OpenAI
-Help BudgetLLM pick the best model by specifying a task type:
+client = OpenAI(
+    base_url="http://localhost:3000/v1",
+    api_key="your-budget-llm-key"  # Any key for auth
+)
-```javascript
-const response = await openai.chat.completions.create({
-  model: 'auto', // let BudgetLLM decide
-  messages: [{ role: 'user', content: 'Write a sort function' }],
-  // BudgetLLM extension:
-  task_type: 'code',  // forces code-optimized routing
-});
+# Budget LLM auto-routes to cheapest capable model
+response = client.chat.completions.create(
+    model="auto",  # Magic: auto-selects cheapest
+    messages=[{"role": "user", "content": "Hello!"}]
+)
 ```
-## API Reference
+---
-### OpenAI-Compatible Endpoints
+## Supported Providers
-| Method | Endpoint | Description |
-|--------|----------|-------------|
-| POST | `/v1/chat/completions` | Chat completion (drop-in replacement) |
-| GET | `/v1/models` | List available models |
+| Provider | Models | Cost Tier |
+|----------|--------|-----------|
+| **OpenAI** | GPT-4o, GPT-4o-mini, GPT-3.5-turbo | $$ / $ |
+| **Anthropic** | Claude Opus 4.6, Sonnet 4.6, Haiku 4.5 | $$$ / $$ / $ |
+| **Google** | Gemini Pro, Gemini Flash | $$ / $ |
+| **DeepSeek** | DeepSeek Chat, DeepSeek Coder | $ |
-### BudgetLLM Extensions
+### Task Classification
-The `/v1/chat/completions` endpoint accepts these additional fields:
+| Score | Complexity | Route To |
+|-------|-----------|----------|
+| 0-3 | Simple (greeting, formatting) | Cheapest model |
+| 4-6 | Medium (summarization, basic code) | Mid-tier model |
+| 7-10 | Complex (reasoning, advanced code) | Best available model |
-| Field | Type | Description |
-|-------|------|-------------|
-| `task_type` | string | Force routing: `code`, `creative`, `reasoning`, `chat`, `summarization`, `translation`, `analysis`, `math`, `auto` |
-| `request_id` | string | Custom request ID for tracking |
-| `budget_cap` | number | Per-request budget cap in USD |
+---
-The response includes a `cost` field:
+## Configuration
-```json
-{
-  "cost": {
-    "inputCost": 0.0000015,
-    "outputCost": 0.000006,
-    "totalCost": 0.0000075,
-    "currency": "USD",
-    "model": "gpt-4o-mini",
-    "provider": "openai",
-    "savingsVsGpt4": 0.00012
-  }
-}
-```
+### Environment Variables
-### Management API
+| Variable | Required | Description |
+|----------|----------|-------------|
+| `OPENAI_API_KEY` | Yes* | OpenAI API key |
+| `ANTHROPIC_API_KEY` | Yes* | Anthropic API key |
+| `GOOGLE_API_KEY` | No | Google AI API key |
+| `DEEPSEEK_API_KEY` | No | DeepSeek API key |
+| `PORT` | No | Server port (default: 3000) |
+| `ENCRYPTION_KEY` | No | AES-256 key for API key storage |
+| `DB_PATH` | No | SQLite database path |
-| Method | Endpoint | Description |
-|--------|----------|-------------|
-| GET | `/health` | Health check |
-| GET | `/api/dashboard` | Full dashboard data |
-| GET | `/api/costs?days=30` | Cost summary |
-| GET | `/api/budget` | Budget config and status |
-| PUT | `/api/budget` | Update budget limits |
-| GET | `/api/rate-limit` | Rate limit status |
-| DELETE | `/api/cache` | Clear response cache |
-| GET | `/dashboard` | Web dashboard UI |
+*At least one provider API key required.
-## Architecture
+---
-```
-src/
-  server.ts          # Express HTTP server with all routes
-  proxy.ts           # Main request handler (orchestrates everything)
-  router.ts          # Smart model routing engine
-  task-classifier.ts # Task type detection from prompts
-  providers.ts       # Provider adapters (OpenAI, Anthropic, Google, DeepSeek)
-  database.ts        # SQLite database layer
-  rate-limiter.ts    # Token bucket rate limiter
-  config.ts          # Configuration, model catalog, logging
-  types.ts           # TypeScript type definitions
-  utils/
-    encryption.ts    # AES-256-GCM encryption for API keys
-```
+## Docker
-### Routing Algorithm
-BudgetLLM scores each model using weighted criteria that vary by task type:
-| Task Type | Quality Weight | Cost Weight | Speed Weight |
-|-----------|---------------|-------------|--------------|
-| Code | 40% | 45% | 15% |
-| Creative | 70% | 15% | 15% |
-| Reasoning | 65% | 25% | 10% |
-| Chat | 20% | 60% | 20% |
-| Summarization | 20% | 60% | 20% |
-| Translation | 20% | 60% | 20% |
-| Analysis | 50% | 35% | 15% |
-Models also get a +20 score bonus if they declare the task type as a capability, and a -10 penalty otherwise.
-### Fallback Chain
-When the primary provider fails, BudgetLLM tries up to 3 alternative providers in priority order. This means your app stays up even if one provider has an outage.
-### Caching
-Identical prompts (same messages, model, temperature, max_tokens) are cached with a configurable TTL. Cached responses are served instantly with zero cost. The cache uses content-hash matching so even rephrased requests that happen to match the exact same parameters get the benefit.
-## Security
+```bash
+docker build -t budget-llm .
+docker run -p 3000:3000 --env-file .env budget-llm
+```
-- **API Key Encryption** - Stored keys are encrypted with AES-256-GCM
-- **Rate Limiting** - Token bucket algorithm prevents abuse
-- **Input Validation** - Zod schema validation on all inputs
-- **No Prompt Logging** - Request metadata is logged, but prompt content is never stored in logs
-- **Helmet** - HTTP security headers via helmet middleware
-- **Budget Enforcement** - Hard limits prevent runaway spending
+---
 ## Development
 ```bash
-# Install dependencies
 npm install
-# Run in development mode with hot reload
-npm run dev
-# Run tests
+npm run build
 npm test
+npm run test:coverage
+npm run dev    # Watch mode with tsx
+```
-# Run tests in watch mode
-npm run test:watch
+---
-# Generate coverage report
-npm run test:coverage
+## Trending Tags
-# Type check
-npm run lint
+`llm` `cost-optimization` `ai-proxy` `model-routing` `openai` `anthropic` `google-ai` `deepseek` `express` `typescript` `budget` `api-gateway` `smart-routing` `task-classification` `developer-tools` `cost-tracking`
-# Build for production
-npm run build
-```
+---
 ## License
-[MIT](LICENSE)
+MIT License — see [LICENSE](LICENSE) for details.
+---
+<div align="center">
+**Built by [theihtisham](https://github.com/theihtisham)**
+[GitHub](https://github.com/theihtisham) · [npm](https://www.npmjs.com/~theihtisham) · [Email](mailto:Theihtisham@outlook.com)
+</div>

package/SECURITY.md ADDED Viewed

@@ -0,0 +1,22 @@
+# Security Policy
+## Supported Versions
+| Version | Supported |
+| ------- | --------- |
+| 1.x     | Yes       |
+## Reporting a Vulnerability
+If you discover a security vulnerability, please report it responsibly:
+- **Email**: Theihtisham@outlook.com
+- **Subject**: [Security] Vulnerability in ai-agent-memory
+Please include:
+1. Description of the vulnerability
+2. Steps to reproduce
+3. Potential impact
+4. Suggested fix (if any)
+We aim to respond within 48 hours and patch critical issues within 7 days.

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@theihtisham/budget-llm",
-  "version": "1.0.0",
-  "description": "Cut your AI costs by 60% — one API endpoint that automatically picks the cheapest model for every request.",
+  "version": "1.1.0",
+  "description": "Cut your AI costs by 60% \u2014 one API endpoint that automatically picks the cheapest model for every request.",
   "main": "dist/server.js",
   "types": "dist/server.d.ts",
   "scripts": {
@@ -25,8 +25,11 @@
     "budget",
     "routing"
   ],
-  "author": "",
+  "author": "theihtisham",
   "license": "MIT",
+  "publishConfig": {
+    "access": "public"
+  },
   "dependencies": {
     "compression": "^1.7.5",
     "cors": "^2.8.5",
@@ -53,11 +56,12 @@
   "engines": {
     "node": ">=18.0.0"
   },
-  "publishConfig": {
-    "access": "public"
-  },
   "repository": {
     "type": "git",
-    "url": "https://github.com/theihtisham/budget-llm"
-  }
-}
+    "url": "git+https://github.com/theihtisham/budget-llm.git"
+  },
+  "bugs": {
+    "url": "https://github.com/theihtisham/budget-llm/issues"
+  },
+  "homepage": "https://github.com/theihtisham/budget-llm#readme"
+}