npm - llmapi-v2 - Versions diffs - 2.1.0 - Mend

llmapi-v2 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (162) hide show

package/.env.example +40 -0
package/Dockerfile +17 -0
package/dist/config.d.ts +48 -0
package/dist/config.js +98 -0
package/dist/config.js.map +1 -0
package/dist/converter/request.d.ts +6 -0
package/dist/converter/request.js +184 -0
package/dist/converter/request.js.map +1 -0
package/dist/converter/response.d.ts +6 -0
package/dist/converter/response.js +76 -0
package/dist/converter/response.js.map +1 -0
package/dist/converter/stream.d.ts +54 -0
package/dist/converter/stream.js +318 -0
package/dist/converter/stream.js.map +1 -0
package/dist/converter/types.d.ts +239 -0
package/dist/converter/types.js +6 -0
package/dist/converter/types.js.map +1 -0
package/dist/data/posts.d.ts +19 -0
package/dist/data/posts.js +462 -0
package/dist/data/posts.js.map +1 -0
package/dist/index.d.ts +1 -0
package/dist/index.js +233 -0
package/dist/index.js.map +1 -0
package/dist/middleware/api-key-auth.d.ts +6 -0
package/dist/middleware/api-key-auth.js +76 -0
package/dist/middleware/api-key-auth.js.map +1 -0
package/dist/middleware/quota-guard.d.ts +10 -0
package/dist/middleware/quota-guard.js +27 -0
package/dist/middleware/quota-guard.js.map +1 -0
package/dist/middleware/rate-limiter.d.ts +5 -0
package/dist/middleware/rate-limiter.js +50 -0
package/dist/middleware/rate-limiter.js.map +1 -0
package/dist/middleware/request-logger.d.ts +6 -0
package/dist/middleware/request-logger.js +37 -0
package/dist/middleware/request-logger.js.map +1 -0
package/dist/middleware/session-auth.d.ts +19 -0
package/dist/middleware/session-auth.js +99 -0
package/dist/middleware/session-auth.js.map +1 -0
package/dist/providers/aliyun.d.ts +13 -0
package/dist/providers/aliyun.js +20 -0
package/dist/providers/aliyun.js.map +1 -0
package/dist/providers/base-provider.d.ts +36 -0
package/dist/providers/base-provider.js +133 -0
package/dist/providers/base-provider.js.map +1 -0
package/dist/providers/deepseek.d.ts +11 -0
package/dist/providers/deepseek.js +18 -0
package/dist/providers/deepseek.js.map +1 -0
package/dist/providers/registry.d.ts +18 -0
package/dist/providers/registry.js +98 -0
package/dist/providers/registry.js.map +1 -0
package/dist/providers/types.d.ts +17 -0
package/dist/providers/types.js +3 -0
package/dist/providers/types.js.map +1 -0
package/dist/routes/admin.d.ts +1 -0
package/dist/routes/admin.js +153 -0
package/dist/routes/admin.js.map +1 -0
package/dist/routes/auth.d.ts +2 -0
package/dist/routes/auth.js +318 -0
package/dist/routes/auth.js.map +1 -0
package/dist/routes/blog.d.ts +1 -0
package/dist/routes/blog.js +29 -0
package/dist/routes/blog.js.map +1 -0
package/dist/routes/dashboard.d.ts +1 -0
package/dist/routes/dashboard.js +184 -0
package/dist/routes/dashboard.js.map +1 -0
package/dist/routes/messages.d.ts +1 -0
package/dist/routes/messages.js +309 -0
package/dist/routes/messages.js.map +1 -0
package/dist/routes/models.d.ts +1 -0
package/dist/routes/models.js +39 -0
package/dist/routes/models.js.map +1 -0
package/dist/routes/payment.d.ts +1 -0
package/dist/routes/payment.js +150 -0
package/dist/routes/payment.js.map +1 -0
package/dist/routes/sitemap.d.ts +1 -0
package/dist/routes/sitemap.js +38 -0
package/dist/routes/sitemap.js.map +1 -0
package/dist/services/alipay.d.ts +27 -0
package/dist/services/alipay.js +106 -0
package/dist/services/alipay.js.map +1 -0
package/dist/services/database.d.ts +4 -0
package/dist/services/database.js +170 -0
package/dist/services/database.js.map +1 -0
package/dist/services/health-checker.d.ts +13 -0
package/dist/services/health-checker.js +95 -0
package/dist/services/health-checker.js.map +1 -0
package/dist/services/mailer.d.ts +3 -0
package/dist/services/mailer.js +91 -0
package/dist/services/mailer.js.map +1 -0
package/dist/services/metrics.d.ts +56 -0
package/dist/services/metrics.js +94 -0
package/dist/services/metrics.js.map +1 -0
package/dist/services/remote-control.d.ts +20 -0
package/dist/services/remote-control.js +209 -0
package/dist/services/remote-control.js.map +1 -0
package/dist/services/remote-ws.d.ts +5 -0
package/dist/services/remote-ws.js +143 -0
package/dist/services/remote-ws.js.map +1 -0
package/dist/services/usage.d.ts +13 -0
package/dist/services/usage.js +39 -0
package/dist/services/usage.js.map +1 -0
package/dist/utils/errors.d.ts +27 -0
package/dist/utils/errors.js +48 -0
package/dist/utils/errors.js.map +1 -0
package/dist/utils/logger.d.ts +2 -0
package/dist/utils/logger.js +14 -0
package/dist/utils/logger.js.map +1 -0
package/docker-compose.yml +19 -0
package/package.json +39 -0
package/public/robots.txt +8 -0
package/src/config.ts +140 -0
package/src/converter/request.ts +207 -0
package/src/converter/response.ts +85 -0
package/src/converter/stream.ts +373 -0
package/src/converter/types.ts +257 -0
package/src/data/posts.ts +474 -0
package/src/index.ts +219 -0
package/src/middleware/api-key-auth.ts +82 -0
package/src/middleware/quota-guard.ts +28 -0
package/src/middleware/rate-limiter.ts +61 -0
package/src/middleware/request-logger.ts +36 -0
package/src/middleware/session-auth.ts +91 -0
package/src/providers/aliyun.ts +16 -0
package/src/providers/base-provider.ts +148 -0
package/src/providers/deepseek.ts +14 -0
package/src/providers/registry.ts +111 -0
package/src/providers/types.ts +26 -0
package/src/routes/admin.ts +169 -0
package/src/routes/auth.ts +369 -0
package/src/routes/blog.ts +28 -0
package/src/routes/dashboard.ts +208 -0
package/src/routes/messages.ts +346 -0
package/src/routes/models.ts +37 -0
package/src/routes/payment.ts +189 -0
package/src/routes/sitemap.ts +40 -0
package/src/services/alipay.ts +116 -0
package/src/services/database.ts +187 -0
package/src/services/health-checker.ts +115 -0
package/src/services/mailer.ts +90 -0
package/src/services/metrics.ts +104 -0
package/src/services/remote-control.ts +226 -0
package/src/services/remote-ws.ts +145 -0
package/src/services/usage.ts +57 -0
package/src/types/express.d.ts +46 -0
package/src/utils/errors.ts +44 -0
package/src/utils/logger.ts +8 -0
package/tsconfig.json +17 -0
package/views/pages/404.ejs +14 -0
package/views/pages/admin.ejs +307 -0
package/views/pages/blog-post.ejs +378 -0
package/views/pages/blog.ejs +148 -0
package/views/pages/dashboard.ejs +441 -0
package/views/pages/docs.ejs +807 -0
package/views/pages/index.ejs +416 -0
package/views/pages/login.ejs +170 -0
package/views/pages/orders.ejs +111 -0
package/views/pages/pricing.ejs +379 -0
package/views/pages/register.ejs +397 -0
package/views/pages/remote.ejs +334 -0
package/views/pages/settings.ejs +373 -0
package/views/partials/header.ejs +70 -0
package/views/partials/nav.ejs +140 -0

package/src/data/posts.ts ADDED Viewed

@@ -0,0 +1,474 @@
+export interface BlogPost {
+  slug: string;
+  title: string;
+  description: string;
+  content: string; // Markdown
+  category: string;
+  keywords: string[];
+  readTime: number; // minutes
+  date: string; // YYYY-MM-DD
+  author: string;
+}
+export const posts: BlogPost[] = [
+  {
+    slug: 'claude-code-with-qwen-setup-guide',
+    title: 'Claude Code with Alternative Models — Complete Setup Guide',
+    description: 'How to use Claude Code with alternative models like Qwen, DeepSeek, and Gemini. Why DIY proxy setups fail and how LLM API gives you a turnkey solution in 2 minutes.',
+    category: 'Tutorial',
+    keywords: ['Claude Code', 'Qwen', 'DeepSeek', 'Claude Code alternative', 'cheap Claude Code', 'API setup'],
+    readTime: 8,
+    date: '2026-04-08',
+    author: 'LLM API Team',
+    content: `
+## Claude Code Is Incredible — But the Bills Are Not
+Claude Code is the best AI coding agent available today. Nobody disputes that. But at **$3/$15 per million tokens** (input/output) for Claude Sonnet 4.6, costs spiral fast. Developers routinely report spending **$500–2,000/month** on API usage alone. The Pro plan ($20/month) runs dry after roughly 45 minutes of heavy use. The Max plan ($200/month) helps, but it is still a hard sell for indie developers and small teams.
+So naturally, people look for alternatives.
+## The Alternative Model Landscape
+Several powerful coding models exist today — Qwen3-Coder-Plus from Alibaba, DeepSeek V3.2, Gemini 2.5 Pro from Google — and many of them approach Claude-level quality for routine coding tasks at a fraction of the cost.
+| Model | Input (per 1M tokens) | Output (per 1M tokens) | Approx. Quality |
+|-------|----------------------|------------------------|-----------------|
+| Claude Sonnet 4.6 | $3.00 | $15.00 | Baseline |
+| Qwen3-Coder-Plus | ~$0.50 | ~$1.00 | ~87% of Claude |
+| DeepSeek V3.2 | ~$0.14 | ~$0.28 | ~80% of Claude |
+That is a **10–15x cost reduction** with minimal quality loss for most day-to-day coding.
+## Why DIY Setup Is Painful
+You might think: "I'll just point Claude Code at one of these APIs myself." In practice, this turns into a multi-day project:
+- **LiteLLM compatibility breaks constantly.** Claude Code updates its CLI frequently, and each update risks breaking whatever proxy shim you built. GitHub issues are full of developers stuck on pinned versions.
+- **Tool calling fails silently.** Qwen and DeepSeek handle tool calls differently from Anthropic's format. A small mismatch means your edit/write/bash tools appear to work but produce garbage or simply do nothing.
+- **Model name mapping is fragile.** Claude Code sends \`claude-sonnet-4-6\` as the model name. DashScope expects \`qwen3-coder-plus\`. DeepSeek expects something else entirely. You end up writing and maintaining custom translation logic.
+- **Streaming disconnects during long sessions.** A 30-minute refactoring session will hit timeout and reconnection issues that require custom retry logic on your proxy.
+- **No automatic failover.** When one provider has an outage (and they all do), your setup just breaks until you manually intervene.
+Most developers who try the DIY route spend 3–10 hours on setup, then another 2–4 hours per month on maintenance. That time has a cost.
+## The Turnkey Solution: LLM API
+[LLM API](https://llmapi.pro) handles all of this for you. The entire setup is two environment variables:
+\`\`\`bash
+# macOS / Linux
+export ANTHROPIC_BASE_URL=https://llmapi.pro
+export ANTHROPIC_API_KEY=sk-relay-your-key-here
+\`\`\`
+\`\`\`powershell
+# Windows PowerShell
+$env:ANTHROPIC_BASE_URL = "https://llmapi.pro"
+$env:ANTHROPIC_API_KEY = "sk-relay-your-key-here"
+\`\`\`
+Then start Claude Code as normal:
+\`\`\`bash
+claude
+\`\`\`
+That is it. Model mapping, tool calling translation, streaming reliability, failover — all handled on our side. When Claude Code pushes an update, we patch compatibility the same day so you never notice.
+## What Works
+- Streaming responses
+- All Claude Code built-in tools (Read, Write, Edit, Bash, Grep, Glob)
+- Multi-turn conversations with tool results
+- System prompts and extended thinking
+- Multiple concurrent tool calls
+## Frequently Asked Questions
+**Q: Will Claude Code know it is not talking to the real Claude?**
+A: No. LLM API returns responses in the exact Anthropic Messages API format. Claude Code treats it identically.
+**Q: Can I switch back to official Claude anytime?**
+A: Yes. Remove the \`ANTHROPIC_BASE_URL\` variable and you are back on official Anthropic instantly.
+**Q: What models does LLM API use behind the scenes?**
+A: We continuously evaluate and route to the best available coding models — including Qwen3-Coder-Plus and others — so you always get the best quality-to-cost ratio without lifting a finger.
+`,
+  },
+  {
+    slug: 'claude-code-cost-optimization-2026',
+    title: 'Claude Code Cost Optimization: 5 Strategies to Cut Your AI Coding Bills by 90%',
+    description: 'Practical strategies to reduce Claude Code API costs from $500+/month to under $50, including compatible API providers, usage optimization, and smart model routing.',
+    category: 'Guide',
+    keywords: ['Claude Code cost', 'Claude Code expensive', 'Claude Code pricing', 'reduce AI coding cost', 'Claude Code budget'],
+    readTime: 10,
+    date: '2026-04-06',
+    author: 'LLM API Team',
+    content: `
+## The Claude Code Cost Problem
+If you have been using Claude Code for serious development, you have experienced the bill shock. Anthropic's own data shows the average developer spends **$6/day** on Claude Code API — that is **~$180/month**. Heavy users report **$500–2,000/month**.
+The Pro plan ($20/month) runs out after about 45 minutes of heavy use. The Max plan ($200/month) helps, but it is a hard sell for solo developers and small teams.
+Here are 5 practical strategies to dramatically cut your costs.
+## Strategy 1: Use a Compatible API Provider (Saves 80–95%)
+This is the single most impactful change you can make. Claude Code supports any Anthropic-compatible API endpoint, which means you can swap the expensive backend without changing your workflow at all.
+[LLM API](https://llmapi.pro) provides a fully compatible endpoint that routes your requests to high-quality coding models at a fraction of Anthropic's pricing:
+\`\`\`bash
+export ANTHROPIC_BASE_URL=https://llmapi.pro
+export ANTHROPIC_API_KEY=your-key
+\`\`\`
+**Savings: $180/month to ~$15/month** for equivalent usage. All tool calling, streaming, and multi-turn conversations work identically.
+Why not set up your own proxy to alternative models like Qwen or DeepSeek? Because maintaining API compatibility is a moving target. Claude Code updates frequently, tool calling formats differ between providers, and you will spend hours debugging silent failures. A managed provider absorbs that complexity for you.
+## Strategy 2: Smart Model Routing (Saves 30–50% on Top)
+Not every Claude Code request needs the most capable model. A smart routing layer can direct:
+- **Complex coding tasks** (tool calls present, long context) to the best available coding model
+- **Simple chat and questions** (no tools, short context) to a fast, cheap model
+LLM API does this automatically. You do not need to configure anything — it analyzes each request and picks the optimal model.
+## Strategy 3: Optimize Your Claude Code Usage Patterns
+- **Be specific in prompts** — vague instructions cause more back-and-forth, which means more API calls
+- **Use the \`/compact\` command** — compresses conversation context, reducing token usage on every subsequent turn
+- **Avoid unnecessary file reads** — each tool call is a round-trip API request with tokens in both directions
+- **Use \`.claudeignore\`** — prevent Claude Code from indexing large generated files, node_modules, or build artifacts
+## Strategy 4: Set Spending Limits
+Claude Code supports spending limits via environment variables:
+\`\`\`bash
+# Set a daily spending cap
+export CLAUDE_CODE_MAX_DAILY_COST=5.00
+\`\`\`
+This prevents runaway costs during marathon coding sessions where you lose track of usage.
+## Strategy 5: Use the Right Tool for the Job
+Not everything needs Claude Code:
+- **Simple autocomplete** — GitHub Copilot ($10/month) handles this well
+- **Quick one-off questions** — ChatGPT or Claude free tier is sufficient
+- **Full agentic coding** — Claude Code with a compatible provider like LLM API
+Reserve Claude Code for the agentic workflows where it shines, and use lighter tools for everything else.
+## Cost Summary
+| Usage Level | Anthropic Direct | LLM API | Savings |
+|------------|-----------------|---------|---------|
+| Light (2M tokens/month) | $36 | ~$3 | 92% |
+| Medium (10M tokens/month) | $180 | ~$15 | 92% |
+| Heavy (50M tokens/month) | $900 | ~$75 | 92% |
+| Team of 5 | $900+ | ~$49 (team plan) | 95% |
+## Getting Started
+The fastest path to cutting costs:
+1. Sign up at [llmapi.pro](https://llmapi.pro) — free tier available, no credit card required
+2. Set two environment variables
+3. Keep using Claude Code exactly as before
+Your workflow stays the same. Your bill drops by 90%.
+`,
+  },
+  {
+    slug: 'claude-code-alternatives-compared-2026',
+    title: 'Claude Code Alternatives in 2026: Comprehensive Comparison (Gemini CLI, Aider, OpenCode)',
+    description: 'Detailed comparison of Claude Code alternatives including Gemini CLI, Aider, OpenCode, and compatible API providers. Benchmarks, pricing, and real-world experience.',
+    category: 'Comparison',
+    keywords: ['Claude Code alternative', 'Gemini CLI vs Claude Code', 'Aider vs Claude Code', 'AI coding agent comparison', 'best AI coding tool 2026', 'Qwen', 'DeepSeek'],
+    readTime: 12,
+    date: '2026-04-04',
+    author: 'LLM API Team',
+    content: `
+## The AI Coding Agent Landscape in 2026
+Claude Code dominated 2025 with 83% market share among AI coding agents. But rising costs and usage limits have pushed developers to explore alternatives. Here is how the options compare — and why one approach stands above the rest.
+## The Contenders
+### 1. Claude Code (Official Anthropic)
+- **Model:** Claude Sonnet 4.6 / Opus 4.6
+- **Price:** $20/month (Pro, limited) or $100–200/month (Max) or API ($3–15/MTok)
+- **Strengths:** Best-in-class tool use, agentic capabilities, code quality
+- **Weaknesses:** Expensive, usage limits frustrate power users
+### 2. Claude Code + LLM API (Best Value)
+- **Model:** Best available coding models, automatically selected and routed
+- **Price:** Starting at $5/month
+- **Strengths:** Identical Claude Code CLI and workflow, 90% cheaper, full tool support, zero setup friction
+- **Weaknesses:** Slightly lower quality on complex multi-file reasoning tasks
+Behind the scenes, services like LLM API leverage powerful open-source and commercial coding models — including technologies like Qwen3-Coder-Plus and DeepSeek — to deliver high-quality results. You do not need to worry about which model is running; the provider handles selection, compatibility, and failover.
+### 3. Gemini CLI
+- **Model:** Gemini 2.5 Pro
+- **Price:** Free (1,000 requests/day)
+- **Strengths:** Free, massive 1M token context window, good code quality
+- **Weaknesses:** Completely different CLI and workflow, no Claude Code compatibility, Google ecosystem lock-in
+### 4. Aider
+- **Model:** Any (GPT-4o, Claude, Gemini, local models)
+- **Price:** Free (open source) + model API costs
+- **Strengths:** Multi-model support, git-native, well-established community
+- **Weaknesses:** Less polished agentic flow, steeper learning curve, different mental model
+### 5. OpenCode
+- **Model:** 75+ providers supported
+- **Price:** Free (open source) + model API costs
+- **Strengths:** Most flexible, community-driven, broad provider support
+- **Weaknesses:** Newer project, smaller ecosystem, requires more configuration
+## Benchmark Comparison
+| Tool + Backend | SWE-bench Verified | Tool Calling | Monthly Cost (Medium Use) |
+|---------------|-------------------|--------------|--------------------------|
+| Claude Code (official) | 79.6% | Excellent | $100–200 |
+| Claude Code + LLM API | ~69% | Very Good | $5–15 |
+| Gemini CLI | ~65% | Good | Free |
+| Aider + GPT-4o | ~50% | Good | $30–60 |
+| Aider + Claude Sonnet | ~72% | Excellent | $100+ |
+## The Key Insight: Keep the Best CLI, Optimize the Backend
+Switching to a completely different tool (Gemini CLI, Aider, OpenCode) means relearning workflows, losing muscle memory, and dealing with different capabilities. The Claude Code agent framework — its tool calling, file operations, git integration, conversation management — is genuinely the best available.
+The smart move is to keep Claude Code's framework and optimize what powers it.
+\`\`\`bash
+export ANTHROPIC_BASE_URL=https://llmapi.pro
+export ANTHROPIC_API_KEY=your-key
+claude
+\`\`\`
+Two environment variables. Same CLI. Same workflow. 90% lower cost.
+## When to Choose What
+- **Money is no object and you need peak quality** — Claude Code with official Anthropic API
+- **You want Claude Code's workflow at 90% less cost** — Claude Code + LLM API
+- **You want free and accept a different workflow** — Gemini CLI
+- **You want maximum flexibility and enjoy configuration** — Aider or OpenCode
+## Conclusion
+The AI coding landscape in 2026 offers real choice. But Claude Code's agent framework remains the gold standard. The smartest optimization is keeping the framework you know and love while letting a compatible provider handle the expensive model layer.
+`,
+  },
+  {
+    slug: 'qwen-coder-vs-claude-sonnet-for-coding',
+    title: 'AI Coding Model Quality Comparison: What Powers Your Claude Code?',
+    description: 'How do alternative coding models compare to Claude Sonnet 4.6? Benchmarks, tool calling accuracy, and why you should let your provider handle model selection.',
+    category: 'Comparison',
+    keywords: ['Qwen vs Claude', 'Qwen3 Coder', 'Claude Sonnet 4.6', 'DeepSeek coding', 'AI coding model comparison', 'best coding model 2026'],
+    readTime: 7,
+    date: '2026-04-02',
+    author: 'LLM API Team',
+    content: `
+## The Question Every Developer Asks
+*"If I switch away from official Claude, how much quality am I actually losing?"*
+It is a fair question. We run continuous benchmarks against real-world coding tasks so you do not have to. Here is what we have found.
+## The Benchmark Picture
+The coding model landscape in 2026 includes strong contenders from multiple providers — Alibaba's Qwen3-Coder-Plus, DeepSeek's V3.2, Google's Gemini 2.5 Pro, and others. We evaluate them all.
+| Benchmark | Our Best Coding Model | Claude Sonnet 4.6 | Gap |
+|-----------|----------------------|-------------------|-----|
+| SWE-bench Verified | ~69.6% | 79.6% | -12.5% |
+| HumanEval | 95%+ | 97%+ | -2% |
+| Tool Calling Accuracy | 96.5% | 99%+ | -3% |
+**The headline:** Our models reach **87% of Claude's capability** on hard coding benchmarks, and are nearly equal on everyday tasks. For most development work — writing functions, fixing bugs, running tests — you will not notice a difference.
+## Where Alternative Models Match Claude
+- **Routine code generation** — functions, classes, CRUD operations, boilerplate
+- **Bug fixes** — reading error messages, tracing issues, applying targeted fixes
+- **File operations** — reading, writing, and editing files via tool calls
+- **Shell commands** — running tests, git operations, build scripts
+- **Code review** — identifying issues, suggesting improvements
+- **Documentation** — comments, docstrings, technical writing
+This covers roughly **90% of what developers use Claude Code for** on a daily basis.
+## Where Claude Still Leads
+- **Complex multi-file refactoring** — Claude maintains context better across 10+ simultaneous file changes
+- **Architectural reasoning** — system design decisions and trade-off analysis are noticeably stronger
+- **Edge case handling** — Claude catches more corner cases in intricate logic
+- **Very long sessions** — Claude's prompt caching helps maintain quality across extended conversations
+These tasks matter, but they represent a small fraction of total usage for most developers.
+## Why You Should Not Pick Models Yourself
+The coding model landscape changes fast. A model that was best-in-class three months ago may be surpassed by a new release. Qwen, DeepSeek, and others ship updates frequently, and each update shifts the quality picture.
+Trying to keep up with this yourself means:
+- Monitoring benchmark releases across multiple providers
+- Testing new models against your specific use cases
+- Updating proxy configurations and model mappings
+- Dealing with compatibility issues every time you switch
+**We do this so you do not have to.** LLM API continuously evaluates the latest coding models and routes your requests to the best available option. When a new model outperforms the current one, we switch — transparently, with no action required on your end.
+## Cost-Adjusted Value
+The real question is not "which model is better" but "which gives more value per dollar":
+| Task | Claude (cost) | LLM API (cost) | Quality Difference |
+|------|--------------|----------------|-------------------|
+| Simple function | $0.05 | $0.003 | None |
+| Bug fix | $0.10 | $0.007 | Negligible |
+| Full feature (50 tool calls) | $2.00 | $0.15 | Minor |
+| Complex refactor (200 tool calls) | $8.00 | $0.60 | Noticeable |
+For the price of one Claude session, you can run **13 sessions** through LLM API. Even if an occasional task needs a retry, the economics are overwhelmingly in your favor.
+## Our Recommendation
+Use LLM API as your daily driver. For the rare complex architectural task where you feel you need peak quality, you can always switch back to official Claude temporarily:
+\`\`\`bash
+# Daily driver — LLM API
+export ANTHROPIC_BASE_URL=https://llmapi.pro
+export ANTHROPIC_API_KEY=your-key
+# For the occasional complex task — switch back
+unset ANTHROPIC_BASE_URL
+\`\`\`
+Most developers find that LLM API handles **90%+ of their work** with no noticeable difference. Let us handle the model selection — you focus on building.
+`,
+  },
+  {
+    slug: 'self-hosted-claude-code-proxy-vs-managed-service',
+    title: 'Self-Hosted Claude Code Proxy vs Managed Service: Which Should You Choose?',
+    description: 'Why self-hosting LiteLLM or claude-code-router for Claude Code leads to endless maintenance. Real failure stories and why a managed service like LLM API saves time and money.',
+    category: 'Guide',
+    keywords: ['LiteLLM Claude Code', 'Claude Code proxy', 'self-hosted AI proxy', 'Claude Code setup', 'managed AI API', 'Qwen proxy', 'DeepSeek proxy'],
+    readTime: 6,
+    date: '2026-03-28',
+    author: 'LLM API Team',
+    content: `
+## The DIY Temptation
+You have seen the open-source tools: LiteLLM, claude-code-router, ccproxy. They are free. You are a developer. Why not self-host a proxy to route Claude Code through cheaper models like Qwen or DeepSeek?
+Because "free" tools have a price measured in your time, frustration, and lost productivity. Here is what actually happens.
+## Self-Hosting: The Reality
+### Week 1: Setup (3–6 Hours)
+You install LiteLLM, write a config to map Claude model names to your chosen provider, start the proxy, set your environment variables, and... it almost works. Tool calling is broken. You dig through GitHub issues, find a workaround involving a custom request transformer, and get it limping along.
+### Week 2: The First Breakage
+Claude Code pushes version 1.0.8. It changes how it sends tool results. Your proxy returns a 400 error on every tool call. You spend 2 hours finding the issue, pinning Claude Code to 1.0.7, and disabling auto-updates. You are now stuck on an old version.
+### Month 2: Provider API Changes
+DashScope updates their Anthropic-compatible endpoint. The response format for streaming changes subtly. Your long coding sessions now disconnect after 10 minutes. Another evening spent debugging.
+### Month 3: The Silent Failures
+This is the worst kind. Your proxy appears to work, but Qwen's tool calling format differs just enough that file edits occasionally produce malformed output. You do not notice until Claude Code has corrupted three files in your project. You spend a day recovering from git.
+## Real Failures We Have Seen
+These are from actual GitHub issues and community reports:
+1. **LiteLLM v1.35 broke Anthropic streaming** — tool call deltas arrived out of order, causing Claude Code to hang mid-response. Fix took the LiteLLM team 4 days. Self-hosters were stuck the entire time.
+2. **DeepSeek's tool_use format silently differs** — the \`input\` field uses a different JSON structure than Anthropic expects. Claude Code accepts it but the tools do not execute. Everything looks fine in the logs. Files just do not get written.
+3. **Qwen model routing returns wrong stop reason** — \`end_turn\` vs \`stop\` vs \`tool_use\` mismatches cause Claude Code to either stop prematurely or enter infinite tool-calling loops.
+4. **Certificate issues on corporate networks** — self-hosted proxies behind corporate firewalls need custom CA certs. LiteLLM's SSL handling has known issues with self-signed certificates, causing intermittent connection failures.
+5. **Memory leaks on long-running proxy processes** — LiteLLM's proxy server gradually consumes more RAM over multi-day uptime. After 3–4 days, response times degrade and the proxy needs a restart.
+## The Managed Alternative: 2 Minutes, Zero Maintenance
+\`\`\`bash
+export ANTHROPIC_BASE_URL=https://llmapi.pro
+export ANTHROPIC_API_KEY=your-key
+\`\`\`
+Done. No server. No config files. No version pinning. No debugging streaming failures at 11 PM.
+### What LLM API handles for you:
+- Model name mapping — \`claude-sonnet-4-6\` routes to the best available coding model automatically
+- Compatibility patches — when Claude Code updates, we patch the same day
+- Smart routing — complex tasks get the best model, simple tasks get the cheapest
+- Automatic failover — if one provider goes down, traffic shifts instantly
+- Streaming reliability — connection management, retries, and keepalive handled server-side
+- Usage tracking and billing — one dashboard, one bill
+## The Real Cost Comparison
+| | Self-Hosted | LLM API |
+|---|---|---|
+| API costs | ~$0.50/MTok | ~$0.50/MTok + small margin |
+| Server costs | $5–20/month (VPS) | $0 |
+| Setup time | 3–6 hours | 2 minutes |
+| Monthly maintenance | 2–4 hours | 0 |
+| Downtime risk | You are on call | Not your problem |
+| Breakage from CLI updates | You fix it | We fix it |
+If your time is worth $50/hour, self-hosting costs you **$150–300** in the first month in time alone. Every month after that adds another **$100–200** in maintenance.
+## When Self-Hosting Makes Sense
+Be honest with yourself. Self-host only if:
+- You have strict air-gapped data sovereignty requirements
+- You are running local models on your own hardware
+- You genuinely enjoy infrastructure tinkering as a hobby
+- You need highly custom routing logic that no service provides
+For everyone else — and that is the vast majority of developers — a managed service is the rational choice.
+## Try It Risk-Free
+[Sign up for LLM API](https://llmapi.pro) — free tier available, no credit card required. Set two environment variables and get back to writing code. If it does not work for you, you can always go the self-hosted route later. But we think you will prefer spending your evenings on your actual project instead of debugging proxy configurations.
+`,
+  },
+];
+export function getPostBySlug(slug: string): BlogPost | undefined {
+  return posts.find(p => p.slug === slug);
+}
+export function getPostsByCategory(category: string): BlogPost[] {
+  return posts.filter(p => p.category === category);
+}
+export function getRecentPosts(limit = 10): BlogPost[] {
+  return [...posts].sort((a, b) => b.date.localeCompare(a.date)).slice(0, limit);
+}
+export function getAllCategories(): { name: string; count: number }[] {
+  const map = new Map<string, number>();
+  for (const p of posts) {
+    map.set(p.category, (map.get(p.category) || 0) + 1);
+  }
+  return Array.from(map.entries()).map(([name, count]) => ({ name, count }));
+}