freellmpool 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. freellmpool-0.3.0/.env.example +61 -0
  2. freellmpool-0.3.0/.github/ISSUE_TEMPLATE/add-provider.md +23 -0
  3. freellmpool-0.3.0/.github/workflows/ci.yml +29 -0
  4. freellmpool-0.3.0/.gitignore +31 -0
  5. freellmpool-0.3.0/CHANGELOG.md +74 -0
  6. freellmpool-0.3.0/CONTRIBUTING.md +59 -0
  7. freellmpool-0.3.0/LICENSE +21 -0
  8. freellmpool-0.3.0/PKG-INFO +251 -0
  9. freellmpool-0.3.0/README.md +224 -0
  10. freellmpool-0.3.0/docs/ACCOUNTS.md +114 -0
  11. freellmpool-0.3.0/docs/AGENTS.md +90 -0
  12. freellmpool-0.3.0/docs/ARCHITECTURE.md +71 -0
  13. freellmpool-0.3.0/examples/agent_openai_sdk.py +38 -0
  14. freellmpool-0.3.0/pyproject.toml +73 -0
  15. freellmpool-0.3.0/src/freellmpool/__init__.py +27 -0
  16. freellmpool-0.3.0/src/freellmpool/__main__.py +4 -0
  17. freellmpool-0.3.0/src/freellmpool/cli.py +239 -0
  18. freellmpool-0.3.0/src/freellmpool/client.py +249 -0
  19. freellmpool-0.3.0/src/freellmpool/config.py +81 -0
  20. freellmpool-0.3.0/src/freellmpool/errors.py +37 -0
  21. freellmpool-0.3.0/src/freellmpool/models.py +76 -0
  22. freellmpool-0.3.0/src/freellmpool/providers.toml +231 -0
  23. freellmpool-0.3.0/src/freellmpool/proxy.py +363 -0
  24. freellmpool-0.3.0/src/freellmpool/quota.py +101 -0
  25. freellmpool-0.3.0/src/freellmpool/router.py +215 -0
  26. freellmpool-0.3.0/tests/conftest.py +59 -0
  27. freellmpool-0.3.0/tests/helpers.py +47 -0
  28. freellmpool-0.3.0/tests/test_cli.py +17 -0
  29. freellmpool-0.3.0/tests/test_client.py +63 -0
  30. freellmpool-0.3.0/tests/test_config.py +63 -0
  31. freellmpool-0.3.0/tests/test_proxy.py +201 -0
  32. freellmpool-0.3.0/tests/test_quota.py +47 -0
  33. freellmpool-0.3.0/tests/test_router.py +139 -0
@@ -0,0 +1,61 @@
1
+ # freellmpool — API keys for free-tier providers.
2
+ #
3
+ # Copy this file to `.env` (which is gitignored) and fill in the keys for
4
+ # whichever providers you want to use. You do NOT need all of them — freellmpool
5
+ # uses whatever you have configured and skips the rest. Every key below is for
6
+ # a FREE tier; no credit card is required for any of these.
7
+ #
8
+ # freellmpool reads these from the environment. Either `export` them in your
9
+ # shell, or use a tool like `direnv` / `python-dotenv`, or pass them inline.
10
+ # freellmpool itself does not parse this file — it only reads the environment.
11
+
12
+ # Groq — https://console.groq.com/keys (free, very fast)
13
+ GROQ_API_KEY=
14
+
15
+ # Cerebras — https://cloud.cerebras.ai (free, very fast)
16
+ CEREBRAS_API_KEY=
17
+
18
+ # OpenRouter — https://openrouter.ai/keys (many ':free' models)
19
+ OPENROUTER_API_KEY=
20
+
21
+ # Google Gemini — https://aistudio.google.com/apikey (generous free tier)
22
+ GEMINI_API_KEY=
23
+
24
+ # GitHub Models — https://github.com/settings/tokens (any PAT works; free)
25
+ GITHUB_TOKEN=
26
+
27
+ # Cloudflare Workers AI — https://dash.cloudflare.com/profile/api-tokens
28
+ # Needs both the token and your account id.
29
+ CLOUDFLARE_API_TOKEN=
30
+ CLOUDFLARE_ACCOUNT_ID=
31
+
32
+ # Mistral — https://console.mistral.ai/api-keys (free tier)
33
+ MISTRAL_API_KEY=
34
+
35
+ # Cohere — https://dashboard.cohere.com/api-keys (free trial keys)
36
+ COHERE_API_KEY=
37
+
38
+ # SambaNova — https://cloud.sambanova.ai/apis (free tier)
39
+ SAMBANOVA_API_KEY=
40
+
41
+ # NVIDIA NIM — https://build.nvidia.com (free credits, huge model catalog)
42
+ NVIDIA_API_KEY=
43
+
44
+ # Z.ai / Zhipu GLM — https://z.ai (free GLM flash models)
45
+ ZHIPU_API_KEY=
46
+
47
+ # Ollama Cloud — https://ollama.com/settings/keys
48
+ OLLAMA_API_KEY=
49
+
50
+ # LongCat (Meituan) — https://longcat.chat
51
+ LONGCAT_API_KEY=
52
+
53
+ # ---------------------------------------------------------------------------
54
+ # Zero-setup providers (no key needed — included automatically):
55
+ # • OVHcloud AI Endpoints (anonymous/keyless)
56
+ # • LLM7 (works without a key; set LLM7_API_KEY for more)
57
+ # So freellmpool works out of the box even with this whole file empty.
58
+ # ---------------------------------------------------------------------------
59
+
60
+ # LLM7 — optional key for higher limits: https://token.llm7.io
61
+ LLM7_API_KEY=
@@ -0,0 +1,23 @@
1
+ ---
2
+ name: Add a free provider
3
+ about: Propose adding a new free-tier LLM provider to the catalog
4
+ title: "Add provider: <name>"
5
+ labels: ["good first issue", "provider"]
6
+ ---
7
+
8
+ **Provider:** <name + homepage>
9
+
10
+ **Free tier?** (link to the free-tier docs — must be usable without a credit card)
11
+
12
+ **OpenAI-compatible?** (does it expose `/v1/chat/completions`? If yes, this is a
13
+ one-block PR to `src/freellmpool/providers.toml`. If no, it needs a small adapter.)
14
+
15
+ **Base URL:**
16
+
17
+ **Models to include** (name + free daily request limit if known):
18
+ -
19
+
20
+ **Env var for the API key:** `<PROVIDER>_API_KEY`
21
+
22
+ See [CONTRIBUTING.md](../../CONTRIBUTING.md) for the (small) steps. New providers
23
+ are the most valuable contribution to freellmpool — thank you!
@@ -0,0 +1,29 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ fail-fast: false
14
+ matrix:
15
+ python-version: ["3.11", "3.12", "3.13"]
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+ - name: Set up Python ${{ matrix.python-version }}
19
+ uses: actions/setup-python@v5
20
+ with:
21
+ python-version: ${{ matrix.python-version }}
22
+ - name: Install
23
+ run: |
24
+ python -m pip install --upgrade pip
25
+ pip install -e ".[dev]"
26
+ - name: Lint
27
+ run: ruff check src tests
28
+ - name: Test
29
+ run: pytest
@@ -0,0 +1,31 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ .eggs/
6
+ build/
7
+ dist/
8
+ .venv/
9
+ venv/
10
+ env/
11
+
12
+ # Tooling
13
+ .pytest_cache/
14
+ .ruff_cache/
15
+ .mypy_cache/
16
+ .coverage
17
+ htmlcov/
18
+
19
+ # Secrets & local state — never commit these
20
+ .env
21
+ .env.*
22
+ !.env.example
23
+ *.key
24
+ **/api_key
25
+ .freellmpool/
26
+ freellmpool-state/
27
+
28
+ # Editor / OS
29
+ .idea/
30
+ .vscode/
31
+ .DS_Store
@@ -0,0 +1,74 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project are documented here. The format is based on
4
+ [Keep a Changelog](https://keepachangelog.com/), and the project aims to follow
5
+ [Semantic Versioning](https://semver.org/).
6
+
7
+ ## [0.3.0] — 2026-06-03
8
+
9
+ ### Changed
10
+ - **Renamed `llmbuffet` → `freellmpool`** (clearer, keyword-rich, no name
11
+ collision). Python API is now `from freellmpool import Pool`; CLI is
12
+ `freellmpool` (with `ffp` as a short alias); config lives under
13
+ `~/.config/freellmpool/`; env vars are `FREELLMPOOL_*`.
14
+
15
+ ### Added
16
+ - **Codex / Responses API shim** — the proxy now serves `POST /v1/responses`
17
+ (non-streaming + typed SSE events), so OpenAI Codex CLI and other
18
+ Responses-based agents can run on pooled free inference.
19
+ - **Pollinations** — a second keyless provider (16 providers / 56 models total),
20
+ strengthening the zero-config path.
21
+ - Agent docs for Codex CLI in `docs/AGENTS.md`; honest **Limitations** section
22
+ in the README.
23
+
24
+ ## [0.2.0] — 2026-06-03
25
+
26
+ ### Added
27
+ - **Six more providers** (15 total / 53 models): NVIDIA NIM, OVHcloud AI
28
+ Endpoints, LLM7, Ollama Cloud, Z.ai/Zhipu GLM, LongCat; expanded model lists
29
+ for Groq, Cerebras, OpenRouter, GitHub Models, SambaNova, Mistral, Gemini.
30
+ - **Keyless / zero-setup providers.** OVHcloud works with no API key
31
+ (anonymous); LLM7's key is optional. `pip install freellmpool && freellmpool ask`
32
+ now works with no signup at all. Catalog gains `auth` and `key_optional`.
33
+ - **Model selection.** New `freellmpool models` lists every `provider/model` id;
34
+ `ask -m provider/model` pins an exact model on an exact provider.
35
+ - **Streaming proxy.** The proxy honors `stream: true` with a buffered
36
+ OpenAI-style SSE stream, so stream-only clients (chat UIs, agents) work.
37
+ - **429 cooldown.** A rate-limited provider is deprioritized for a cooldown
38
+ window instead of being retried immediately.
39
+ - **Reasoning-model handling.** Thinking models get a `max_tokens` floor and
40
+ `<think>…</think>` blocks are stripped from output.
41
+ - `freellmpool ask --json` requests JSON and strips code fences.
42
+
43
+ ### Hardening (post-review)
44
+ - Proxy now validates all request fields and returns OpenAI-style `400`s for
45
+ malformed input; a catch-all ensures no request can kill a server thread.
46
+ - Optional proxy auth: `--api-key` / `FREELLMPOOL_PROXY_KEY` requires a Bearer
47
+ token; a warning fires when binding to a non-loopback host without one.
48
+ - Quota store is now thread-safe (lock + unique temp file) and best-effort, so
49
+ a persistence hiccup can't abort a successful completion.
50
+ - A provider that returns `429` has its remaining models skipped for that
51
+ request; cooldowns update under a lock with `max()`.
52
+ - Verified live against 11 providers + the OpenAI SDK (non-streaming & SSE).
53
+ Fixed the LongCat model id (`LongCat-2.0-Preview`); LLM7 leads the keyless
54
+ pool (most reliable zero-key provider).
55
+
56
+ ## [0.1.0] — 2026-06-02
57
+
58
+ Initial release.
59
+
60
+ ### Added
61
+ - Provider catalog (`providers.toml`) covering 9 free-tier providers and 24
62
+ models: Groq, Cerebras, OpenRouter, Google Gemini, GitHub Models, Cloudflare
63
+ Workers AI, Mistral, Cohere, SambaNova.
64
+ - Quota-aware, least-used-first router with automatic failover across providers.
65
+ - Persistent per-provider/day quota tracking (`~/.config/freellmpool/quota.json`,
66
+ resets at UTC midnight).
67
+ - OpenAI-compatible proxy server (`freellmpool proxy`) exposing
68
+ `/v1/chat/completions` and `/v1/models` — a drop-in `OPENAI_BASE_URL`.
69
+ - CLI: `ask`, `providers`, `quota`, `proxy`.
70
+ - Python API: `from freellmpool import Pool`.
71
+ - Three request/response adapters (openai, gemini, cloudflare) and per-user
72
+ catalog overrides via `~/.config/freellmpool/providers.toml`.
73
+ - Full unit-test suite with a faked transport (no network) and CI on Python
74
+ 3.11–3.13.
@@ -0,0 +1,59 @@
1
+ # Contributing to freellmpool
2
+
3
+ Thanks for helping! The two highest-value contributions are **adding free
4
+ providers** and **keeping the existing catalog accurate** as free tiers drift.
5
+
6
+ ## Dev setup
7
+
8
+ ```bash
9
+ git clone https://github.com/0xzr/freellmpool
10
+ cd freellmpool
11
+ python -m venv .venv && source .venv/bin/activate
12
+ pip install -e ".[dev]"
13
+ pytest # 0 network calls — everything is faked
14
+ ruff check src tests
15
+ ```
16
+
17
+ ## Adding a provider
18
+
19
+ The whole catalog is [`src/freellmpool/providers.toml`](src/freellmpool/providers.toml).
20
+ Most providers are OpenAI-compatible, so adding one is just a TOML block:
21
+
22
+ ```toml
23
+ [[provider]]
24
+ id = "myprovider"
25
+ label = "My Provider"
26
+ adapter = "openai" # "openai" | "gemini" | "cloudflare"
27
+ base_url = "https://api.myprovider.ai/v1"
28
+ key_env = "MYPROVIDER_API_KEY" # env var the user sets; never a key
29
+ models = [
30
+ { name = "some-model", rpd = 0 }, # rpd = free daily request hint, 0 = unknown
31
+ ]
32
+ ```
33
+
34
+ Rules of thumb:
35
+
36
+ - **Free tier only.** freellmpool is about free pools. If a provider needs a card
37
+ on file to use the tier, it doesn't belong in the default catalog.
38
+ - **Never commit a key.** Only the *name* of the env var goes in the catalog.
39
+ - If the provider isn't OpenAI-compatible, it needs a small adapter in
40
+ [`src/freellmpool/client.py`](src/freellmpool/client.py) (see the `gemini` one for
41
+ a ~30-line template) and a unit test in `tests/`.
42
+ - Add the env var to [`.env.example`](.env.example) and the signup steps to
43
+ [`docs/ACCOUNTS.md`](docs/ACCOUNTS.md).
44
+
45
+ ## Fixing a stale limit or endpoint
46
+
47
+ Free tiers change constantly. If a model name, base URL, or daily limit is
48
+ wrong, a one-line PR to `providers.toml` is perfect and very welcome.
49
+
50
+ ## Tests
51
+
52
+ Every code path is unit-tested without touching the network via an injected
53
+ fake transport (`tests/helpers.py`). Please keep it that way — new behavior
54
+ should come with a fake-backed test. Run `pytest` and `ruff check` before
55
+ opening a PR.
56
+
57
+ ## Code of conduct
58
+
59
+ Be kind. Assume good faith. We're all here to make free LLMs easier to use.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 0xzr (github.com/0xzr)
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,251 @@
1
+ Metadata-Version: 2.4
2
+ Name: freellmpool
3
+ Version: 0.3.0
4
+ Summary: Pool the free tiers of 15+ LLM providers behind one OpenAI-compatible endpoint. Free, zero-config, with automatic failover and quota tracking.
5
+ Project-URL: Homepage, https://github.com/0xzr/freellmpool
6
+ Project-URL: Repository, https://github.com/0xzr/freellmpool
7
+ Project-URL: Issues, https://github.com/0xzr/freellmpool/issues
8
+ Author: 0xzr
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: cerebras,failover,free,gateway,gemini,groq,llm,load-balancer,openai,openrouter,proxy,router
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Environment :: Console
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Software Development :: Libraries
20
+ Classifier: Topic :: Utilities
21
+ Requires-Python: >=3.11
22
+ Requires-Dist: httpx>=0.27
23
+ Provides-Extra: dev
24
+ Requires-Dist: pytest>=8.0; extra == 'dev'
25
+ Requires-Dist: ruff>=0.6; extra == 'dev'
26
+ Description-Content-Type: text/markdown
27
+
28
+ # freellmpool — pool every free LLM API into one endpoint
29
+
30
+ **A free, OpenAI-compatible LLM gateway that pools the free tiers of 16 providers (Groq, Cerebras, NVIDIA NIM, Gemini, OpenRouter, GitHub Models, Cloudflare & more) behind one `/v1` endpoint — with automatic failover and quota tracking. Works out of the box with zero API keys.**
31
+
32
+ [![PyPI](https://img.shields.io/pypi/v/freellmpool.svg)](https://pypi.org/project/freellmpool/)
33
+ [![CI](https://github.com/0xzr/freellmpool/actions/workflows/ci.yml/badge.svg)](https://github.com/0xzr/freellmpool/actions/workflows/ci.yml)
34
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
35
+ [![Python 3.11+](https://img.shields.io/badge/python-3.11+-blue.svg)](https://www.python.org)
36
+
37
+ > One free tier is a toy. **Sixteen, stacked, are tens of thousands of free requests a day.** Point your OpenAI client at `freellmpool` and stop paying for a hobby project's inference.
38
+
39
+ Groq, Cerebras, NVIDIA NIM, Google Gemini, OpenRouter, GitHub Models, Cloudflare Workers AI, Mistral, Cohere, and more each hand out a generous **free tier** — but each has its own SDK, rate limits, and daily cap. `freellmpool` puts all of them into one pool:
40
+
41
+ - 🔌 **One OpenAI-compatible endpoint.** Point any OpenAI SDK / tool at `freellmpool` and it just works — `/v1/chat/completions`, `/v1/models`, and a `/v1/responses` shim for **Codex CLI & agents**.
42
+ - 🟢 **Zero config.** Works with **no API keys at all** — keyless providers are built in. `pip install` → `ask` → done.
43
+ - 🔁 **Automatic failover.** Rate-limited or 5xx on one provider? `freellmpool` transparently rolls to the next, with a cooldown so it stops hammering a throttled pool.
44
+ - 📊 **Quota-aware routing.** Spreads load least-used-first and respects each free daily limit, so you squeeze the most out of every tier.
45
+ - 🤖 **Built for agents.** Streaming (SSE), a Codex/Responses shim, and mid-run failover — exactly where long agent loops usually die.
46
+ - 🪶 **Tiny.** Pure-Python, one dependency (`httpx`). The proxy runs on the standard library. No keys are ever stored in the repo.
47
+
48
+ ---
49
+
50
+ ## Install
51
+
52
+ ```bash
53
+ pip install freellmpool # or: pipx install freellmpool
54
+ ```
55
+
56
+ ## Zero-config: it works with no keys at all
57
+
58
+ Three providers in the catalog need **no signup** (Pollinations and OVHcloud are keyless; LLM7's key is optional), so this works the moment you install:
59
+
60
+ ```bash
61
+ pip install freellmpool
62
+ freellmpool ask "Explain the CAP theorem in one sentence."
63
+ ```
64
+
65
+ Add provider keys (below) to unlock more models, higher limits, and better failover.
66
+
67
+ ## 60-second quickstart (with keys)
68
+
69
+ 1. Grab one or more free API keys — **all free, no credit card**. You only need
70
+ **one** to start (Groq and Cerebras are the fastest to sign up for).
71
+ 👉 **[docs/ACCOUNTS.md](docs/ACCOUNTS.md) has 1-minute, click-by-click steps for every provider.**
72
+
73
+ | Provider | Get a key |
74
+ |---|---|
75
+ | Groq | <https://console.groq.com/keys> |
76
+ | Cerebras | <https://cloud.cerebras.ai> |
77
+ | OpenRouter | <https://openrouter.ai/keys> |
78
+ | Google Gemini | <https://aistudio.google.com/apikey> |
79
+ | GitHub Models | any GitHub PAT |
80
+
81
+ 2. Export the ones you have (see [`.env.example`](.env.example) for all of them):
82
+
83
+ ```bash
84
+ export GROQ_API_KEY=gsk_...
85
+ export CEREBRAS_API_KEY=csk-...
86
+ ```
87
+
88
+ 3. Ask something:
89
+
90
+ ```bash
91
+ freellmpool ask "Explain the CAP theorem in one sentence."
92
+ ```
93
+
94
+ or pipe context in:
95
+
96
+ ```bash
97
+ cat error.log | freellmpool ask "What's the root cause here?"
98
+ ```
99
+
100
+ Check what's wired up:
101
+
102
+ ```bash
103
+ freellmpool providers
104
+ ```
105
+
106
+ ```
107
+ freellmpool catalog: 16 providers, 56 models
108
+
109
+ ✓ ovh OVHcloud AI Endpoints (keyless) 5 models [configured]
110
+ ✓ llm7 LLM7 (key optional) 1 models [configured]
111
+ · groq Groq 6 models [set GROQ_API_KEY]
112
+ · cerebras Cerebras 4 models [set CEREBRAS_API_KEY]
113
+ · nvidia NVIDIA NIM 5 models [set NVIDIA_API_KEY]
114
+ ...
115
+ ```
116
+
117
+ ## Choosing a model or provider
118
+
119
+ By default freellmpool auto-picks the least-used provider you have. To pin a choice:
120
+
121
+ ```bash
122
+ freellmpool models # list every provider/model id
123
+ freellmpool ask -m groq/llama-3.3-70b-versatile "hi" # exact provider + model
124
+ freellmpool ask -m llama-3.3-70b-versatile "hi" # that model on any provider
125
+ freellmpool ask -p cerebras,groq "hi" # restrict to these providers
126
+ ```
127
+
128
+ Same idea through the proxy via the OpenAI `model` field: `"auto"`, `"groq"`, or `"groq/llama-3.3-70b-versatile"`.
129
+
130
+ ### Providers in the box
131
+
132
+ | Provider | Key env | Notes |
133
+ |---|---|---|
134
+ | Pollinations | — | **keyless**, works out of the box |
135
+ | OVHcloud AI Endpoints | — | **keyless**, works out of the box |
136
+ | LLM7 | `LLM7_API_KEY` | key optional |
137
+ | Groq | `GROQ_API_KEY` | very fast |
138
+ | Cerebras | `CEREBRAS_API_KEY` | very fast, large daily cap |
139
+ | NVIDIA NIM | `NVIDIA_API_KEY` | big model catalog (build.nvidia.com) |
140
+ | OpenRouter | `OPENROUTER_API_KEY` | many `:free` models |
141
+ | Google Gemini | `GEMINI_API_KEY` | generous free tier |
142
+ | GitHub Models | `GITHUB_TOKEN` | any PAT works |
143
+ | Cloudflare Workers AI | `CLOUDFLARE_API_TOKEN` + `CLOUDFLARE_ACCOUNT_ID` | |
144
+ | Mistral | `MISTRAL_API_KEY` | |
145
+ | Cohere | `COHERE_API_KEY` | |
146
+ | SambaNova | `SAMBANOVA_API_KEY` | |
147
+ | Z.ai / Zhipu GLM | `ZHIPU_API_KEY` | |
148
+ | Ollama Cloud | `OLLAMA_API_KEY` | |
149
+ | LongCat (Meituan) | `LONGCAT_API_KEY` | |
150
+
151
+ Full signup steps for each: **[docs/ACCOUNTS.md](docs/ACCOUNTS.md)**.
152
+
153
+ ## The killer feature: a drop-in OpenAI proxy
154
+
155
+ Run the gateway:
156
+
157
+ ```bash
158
+ freellmpool proxy --port 8080
159
+ ```
160
+
161
+ Now point **any** OpenAI-compatible app or SDK at it — no other changes:
162
+
163
+ ```bash
164
+ export OPENAI_BASE_URL=http://localhost:8080/v1
165
+ export OPENAI_API_KEY=anything # freellmpool ignores it
166
+ ```
167
+
168
+ ```python
169
+ from openai import OpenAI
170
+
171
+ client = OpenAI() # picks up OPENAI_BASE_URL
172
+ resp = client.chat.completions.create(
173
+ model="auto", # or "groq", or "groq/llama-3.3-70b-versatile"
174
+ messages=[{"role": "user", "content": "Say hi in French."}],
175
+ )
176
+ print(resp.choices[0].message.content)
177
+ ```
178
+
179
+ The `model` field controls routing:
180
+
181
+ | `model` value | Routes to |
182
+ |---|---|
183
+ | `auto` (or omitted) | any configured provider, least-used first |
184
+ | `groq` | any model on Groq |
185
+ | `groq/llama-3.3-70b-versatile` | that exact model |
186
+ | `llama-3.3-70b-versatile` | that model on any provider that has it |
187
+
188
+ ## Use it as the free LLM backend for your AI agent
189
+
190
+ Coding agents and agent frameworks (aider, Continue, Cline, the OpenAI Agents SDK, LangChain, ...) almost all speak the OpenAI API — so they can run on pooled free inference through `freellmpool`, with **failover when one provider rate-limits you mid-run** (exactly when long agent loops tend to die):
191
+
192
+ ```bash
193
+ freellmpool proxy --port 8080
194
+ export OPENAI_BASE_URL=http://localhost:8080/v1 OPENAI_API_KEY=anything
195
+ aider --model openai/auto # or point any OpenAI-compatible tool here
196
+ ```
197
+
198
+ The proxy supports `stream: true` (Server-Sent Events), so streaming chat UIs and agent loops work too. Full integration snippets (aider, LangChain, Continue/Cline, OpenAI Agents SDK) are in **[docs/AGENTS.md](docs/AGENTS.md)**.
199
+
200
+ ## Use it as a library
201
+
202
+ ```python
203
+ from freellmpool import Pool
204
+
205
+ pool = Pool.from_default_config()
206
+ reply = pool.ask("Summarize the plot of Hamlet in 20 words.")
207
+ print(reply.text)
208
+ print(f"served by {reply.provider_id}/{reply.model}")
209
+ ```
210
+
211
+ ## How routing works
212
+
213
+ For each request `freellmpool` builds the list of `(provider, model)` candidates you have keys for, orders them **least-used-today first** (providers already over their free daily hint sink to the bottom), then tries them in order until one returns a non-empty completion. Every success is recorded to a small per-day counter at `~/.config/freellmpool/quota.json` (reset at UTC midnight). See [`docs/ARCHITECTURE.md`](docs/ARCHITECTURE.md) for the full picture.
214
+
215
+ ## Adding or overriding providers
216
+
217
+ The built-in catalog lives in [`src/freellmpool/providers.toml`](src/freellmpool/providers.toml). To add a provider or override a model list without forking, drop a `providers.toml` at `~/.config/freellmpool/providers.toml` (or point `FREELLMPOOL_CONFIG` at one). Same-`id` entries override the built-ins; new ids are appended. See [CONTRIBUTING.md](CONTRIBUTING.md) for the (small) anatomy of a provider.
218
+
219
+ ## Comparison
220
+
221
+ | | freellmpool | Calling each SDK by hand | A paid gateway |
222
+ |---|---|---|---|
223
+ | Free tiers pooled | ✅ 16 providers | ⚠️ you wire each one | ❌ |
224
+ | Automatic failover | ✅ | ❌ | ✅ |
225
+ | Quota tracking | ✅ per-day | ❌ | varies |
226
+ | Drop-in OpenAI proxy | ✅ | ❌ | ✅ |
227
+ | Cost | $0 | $0 | 💸 |
228
+ | Dependencies | 1 (`httpx`) | many | a service |
229
+
230
+ ## Limitations (read this)
231
+
232
+ `freellmpool` is honest about what it is — a way to pool **free tiers**, not a frontier-model service:
233
+
234
+ - **No GPT-5 / Claude-Opus-class reasoning.** Free tiers are smaller/faster models — great for triage, drafting, classification, tool-routing, and everyday coding; reach for a frontier model for the hardest reasoning.
235
+ - **Quality and capacity vary through the day** as high-cap pools exhaust; daily limits reset at UTC midnight.
236
+ - **Free tiers change without notice.** Endpoints, model ids, and limits drift — that's what the one-line `providers.toml` PRs are for.
237
+ - **Local-first, single-user.** The proxy defaults to `127.0.0.1`; if you bind it to a network interface, set a proxy key (`--api-key`). Not meant as a multi-tenant production gateway.
238
+ - **Respect the providers.** This pools *free* tiers for personal projects and experimentation — don't abuse them, or we all lose them.
239
+
240
+ ## Status
241
+
242
+ `freellmpool` is `0.3` and moving fast. Provider endpoints and free-tier limits drift — if something breaks, please [open an issue](https://github.com/0xzr/freellmpool/issues) or send a one-line PR to `providers.toml`. Contributions of new free providers are especially welcome.
243
+
244
+ ## Found this useful?
245
+
246
+ ⭐ **Star the repo** — it's the single biggest thing that helps others discover freellmpool, and it keeps the free-provider catalog maintained. New free providers and one-line limit fixes are always welcome ([CONTRIBUTING.md](CONTRIBUTING.md)).
247
+
248
+ ## License
249
+
250
+ MIT — see [LICENSE](LICENSE).
251
+