freellmpool 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- freellmpool-0.3.0/.env.example +61 -0
- freellmpool-0.3.0/.github/ISSUE_TEMPLATE/add-provider.md +23 -0
- freellmpool-0.3.0/.github/workflows/ci.yml +29 -0
- freellmpool-0.3.0/.gitignore +31 -0
- freellmpool-0.3.0/CHANGELOG.md +74 -0
- freellmpool-0.3.0/CONTRIBUTING.md +59 -0
- freellmpool-0.3.0/LICENSE +21 -0
- freellmpool-0.3.0/PKG-INFO +251 -0
- freellmpool-0.3.0/README.md +224 -0
- freellmpool-0.3.0/docs/ACCOUNTS.md +114 -0
- freellmpool-0.3.0/docs/AGENTS.md +90 -0
- freellmpool-0.3.0/docs/ARCHITECTURE.md +71 -0
- freellmpool-0.3.0/examples/agent_openai_sdk.py +38 -0
- freellmpool-0.3.0/pyproject.toml +73 -0
- freellmpool-0.3.0/src/freellmpool/__init__.py +27 -0
- freellmpool-0.3.0/src/freellmpool/__main__.py +4 -0
- freellmpool-0.3.0/src/freellmpool/cli.py +239 -0
- freellmpool-0.3.0/src/freellmpool/client.py +249 -0
- freellmpool-0.3.0/src/freellmpool/config.py +81 -0
- freellmpool-0.3.0/src/freellmpool/errors.py +37 -0
- freellmpool-0.3.0/src/freellmpool/models.py +76 -0
- freellmpool-0.3.0/src/freellmpool/providers.toml +231 -0
- freellmpool-0.3.0/src/freellmpool/proxy.py +363 -0
- freellmpool-0.3.0/src/freellmpool/quota.py +101 -0
- freellmpool-0.3.0/src/freellmpool/router.py +215 -0
- freellmpool-0.3.0/tests/conftest.py +59 -0
- freellmpool-0.3.0/tests/helpers.py +47 -0
- freellmpool-0.3.0/tests/test_cli.py +17 -0
- freellmpool-0.3.0/tests/test_client.py +63 -0
- freellmpool-0.3.0/tests/test_config.py +63 -0
- freellmpool-0.3.0/tests/test_proxy.py +201 -0
- freellmpool-0.3.0/tests/test_quota.py +47 -0
- freellmpool-0.3.0/tests/test_router.py +139 -0
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# freellmpool — API keys for free-tier providers.
|
|
2
|
+
#
|
|
3
|
+
# Copy this file to `.env` (which is gitignored) and fill in the keys for
|
|
4
|
+
# whichever providers you want to use. You do NOT need all of them — freellmpool
|
|
5
|
+
# uses whatever you have configured and skips the rest. Every key below is for
|
|
6
|
+
# a FREE tier; no credit card is required for any of these.
|
|
7
|
+
#
|
|
8
|
+
# freellmpool reads these from the environment. Either `export` them in your
|
|
9
|
+
# shell, or use a tool like `direnv` / `python-dotenv`, or pass them inline.
|
|
10
|
+
# freellmpool itself does not parse this file — it only reads the environment.
|
|
11
|
+
|
|
12
|
+
# Groq — https://console.groq.com/keys (free, very fast)
|
|
13
|
+
GROQ_API_KEY=
|
|
14
|
+
|
|
15
|
+
# Cerebras — https://cloud.cerebras.ai (free, very fast)
|
|
16
|
+
CEREBRAS_API_KEY=
|
|
17
|
+
|
|
18
|
+
# OpenRouter — https://openrouter.ai/keys (many ':free' models)
|
|
19
|
+
OPENROUTER_API_KEY=
|
|
20
|
+
|
|
21
|
+
# Google Gemini — https://aistudio.google.com/apikey (generous free tier)
|
|
22
|
+
GEMINI_API_KEY=
|
|
23
|
+
|
|
24
|
+
# GitHub Models — https://github.com/settings/tokens (any PAT works; free)
|
|
25
|
+
GITHUB_TOKEN=
|
|
26
|
+
|
|
27
|
+
# Cloudflare Workers AI — https://dash.cloudflare.com/profile/api-tokens
|
|
28
|
+
# Needs both the token and your account id.
|
|
29
|
+
CLOUDFLARE_API_TOKEN=
|
|
30
|
+
CLOUDFLARE_ACCOUNT_ID=
|
|
31
|
+
|
|
32
|
+
# Mistral — https://console.mistral.ai/api-keys (free tier)
|
|
33
|
+
MISTRAL_API_KEY=
|
|
34
|
+
|
|
35
|
+
# Cohere — https://dashboard.cohere.com/api-keys (free trial keys)
|
|
36
|
+
COHERE_API_KEY=
|
|
37
|
+
|
|
38
|
+
# SambaNova — https://cloud.sambanova.ai/apis (free tier)
|
|
39
|
+
SAMBANOVA_API_KEY=
|
|
40
|
+
|
|
41
|
+
# NVIDIA NIM — https://build.nvidia.com (free credits, huge model catalog)
|
|
42
|
+
NVIDIA_API_KEY=
|
|
43
|
+
|
|
44
|
+
# Z.ai / Zhipu GLM — https://z.ai (free GLM flash models)
|
|
45
|
+
ZHIPU_API_KEY=
|
|
46
|
+
|
|
47
|
+
# Ollama Cloud — https://ollama.com/settings/keys
|
|
48
|
+
OLLAMA_API_KEY=
|
|
49
|
+
|
|
50
|
+
# LongCat (Meituan) — https://longcat.chat
|
|
51
|
+
LONGCAT_API_KEY=
|
|
52
|
+
|
|
53
|
+
# ---------------------------------------------------------------------------
|
|
54
|
+
# Zero-setup providers (no key needed — included automatically):
|
|
55
|
+
# • OVHcloud AI Endpoints (anonymous/keyless)
|
|
56
|
+
# • LLM7 (works without a key; set LLM7_API_KEY for more)
|
|
57
|
+
# So freellmpool works out of the box even with this whole file empty.
|
|
58
|
+
# ---------------------------------------------------------------------------
|
|
59
|
+
|
|
60
|
+
# LLM7 — optional key for higher limits: https://token.llm7.io
|
|
61
|
+
LLM7_API_KEY=
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: Add a free provider
|
|
3
|
+
about: Propose adding a new free-tier LLM provider to the catalog
|
|
4
|
+
title: "Add provider: <name>"
|
|
5
|
+
labels: ["good first issue", "provider"]
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
**Provider:** <name + homepage>
|
|
9
|
+
|
|
10
|
+
**Free tier?** (link to the free-tier docs — must be usable without a credit card)
|
|
11
|
+
|
|
12
|
+
**OpenAI-compatible?** (does it expose `/v1/chat/completions`? If yes, this is a
|
|
13
|
+
one-block PR to `src/freellmpool/providers.toml`. If no, it needs a small adapter.)
|
|
14
|
+
|
|
15
|
+
**Base URL:**
|
|
16
|
+
|
|
17
|
+
**Models to include** (name + free daily request limit if known):
|
|
18
|
+
-
|
|
19
|
+
|
|
20
|
+
**Env var for the API key:** `<PROVIDER>_API_KEY`
|
|
21
|
+
|
|
22
|
+
See [CONTRIBUTING.md](../../CONTRIBUTING.md) for the (small) steps. New providers
|
|
23
|
+
are the most valuable contribution to freellmpool — thank you!
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
strategy:
|
|
13
|
+
fail-fast: false
|
|
14
|
+
matrix:
|
|
15
|
+
python-version: ["3.11", "3.12", "3.13"]
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
19
|
+
uses: actions/setup-python@v5
|
|
20
|
+
with:
|
|
21
|
+
python-version: ${{ matrix.python-version }}
|
|
22
|
+
- name: Install
|
|
23
|
+
run: |
|
|
24
|
+
python -m pip install --upgrade pip
|
|
25
|
+
pip install -e ".[dev]"
|
|
26
|
+
- name: Lint
|
|
27
|
+
run: ruff check src tests
|
|
28
|
+
- name: Test
|
|
29
|
+
run: pytest
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.egg-info/
|
|
5
|
+
.eggs/
|
|
6
|
+
build/
|
|
7
|
+
dist/
|
|
8
|
+
.venv/
|
|
9
|
+
venv/
|
|
10
|
+
env/
|
|
11
|
+
|
|
12
|
+
# Tooling
|
|
13
|
+
.pytest_cache/
|
|
14
|
+
.ruff_cache/
|
|
15
|
+
.mypy_cache/
|
|
16
|
+
.coverage
|
|
17
|
+
htmlcov/
|
|
18
|
+
|
|
19
|
+
# Secrets & local state — never commit these
|
|
20
|
+
.env
|
|
21
|
+
.env.*
|
|
22
|
+
!.env.example
|
|
23
|
+
*.key
|
|
24
|
+
**/api_key
|
|
25
|
+
.freellmpool/
|
|
26
|
+
freellmpool-state/
|
|
27
|
+
|
|
28
|
+
# Editor / OS
|
|
29
|
+
.idea/
|
|
30
|
+
.vscode/
|
|
31
|
+
.DS_Store
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project are documented here. The format is based on
|
|
4
|
+
[Keep a Changelog](https://keepachangelog.com/), and the project aims to follow
|
|
5
|
+
[Semantic Versioning](https://semver.org/).
|
|
6
|
+
|
|
7
|
+
## [0.3.0] — 2026-06-03
|
|
8
|
+
|
|
9
|
+
### Changed
|
|
10
|
+
- **Renamed `llmbuffet` → `freellmpool`** (clearer, keyword-rich, no name
|
|
11
|
+
collision). Python API is now `from freellmpool import Pool`; CLI is
|
|
12
|
+
`freellmpool` (with `ffp` as a short alias); config lives under
|
|
13
|
+
`~/.config/freellmpool/`; env vars are `FREELLMPOOL_*`.
|
|
14
|
+
|
|
15
|
+
### Added
|
|
16
|
+
- **Codex / Responses API shim** — the proxy now serves `POST /v1/responses`
|
|
17
|
+
(non-streaming + typed SSE events), so OpenAI Codex CLI and other
|
|
18
|
+
Responses-based agents can run on pooled free inference.
|
|
19
|
+
- **Pollinations** — a second keyless provider (16 providers / 56 models total),
|
|
20
|
+
strengthening the zero-config path.
|
|
21
|
+
- Agent docs for Codex CLI in `docs/AGENTS.md`; honest **Limitations** section
|
|
22
|
+
in the README.
|
|
23
|
+
|
|
24
|
+
## [0.2.0] — 2026-06-03
|
|
25
|
+
|
|
26
|
+
### Added
|
|
27
|
+
- **Six more providers** (15 total / 53 models): NVIDIA NIM, OVHcloud AI
|
|
28
|
+
Endpoints, LLM7, Ollama Cloud, Z.ai/Zhipu GLM, LongCat; expanded model lists
|
|
29
|
+
for Groq, Cerebras, OpenRouter, GitHub Models, SambaNova, Mistral, Gemini.
|
|
30
|
+
- **Keyless / zero-setup providers.** OVHcloud works with no API key
|
|
31
|
+
(anonymous); LLM7's key is optional. `pip install freellmpool && freellmpool ask`
|
|
32
|
+
now works with no signup at all. Catalog gains `auth` and `key_optional`.
|
|
33
|
+
- **Model selection.** New `freellmpool models` lists every `provider/model` id;
|
|
34
|
+
`ask -m provider/model` pins an exact model on an exact provider.
|
|
35
|
+
- **Streaming proxy.** The proxy honors `stream: true` with a buffered
|
|
36
|
+
OpenAI-style SSE stream, so stream-only clients (chat UIs, agents) work.
|
|
37
|
+
- **429 cooldown.** A rate-limited provider is deprioritized for a cooldown
|
|
38
|
+
window instead of being retried immediately.
|
|
39
|
+
- **Reasoning-model handling.** Thinking models get a `max_tokens` floor and
|
|
40
|
+
`<think>…</think>` blocks are stripped from output.
|
|
41
|
+
- `freellmpool ask --json` requests JSON and strips code fences.
|
|
42
|
+
|
|
43
|
+
### Hardening (post-review)
|
|
44
|
+
- Proxy now validates all request fields and returns OpenAI-style `400`s for
|
|
45
|
+
malformed input; a catch-all ensures no request can kill a server thread.
|
|
46
|
+
- Optional proxy auth: `--api-key` / `FREELLMPOOL_PROXY_KEY` requires a Bearer
|
|
47
|
+
token; a warning fires when binding to a non-loopback host without one.
|
|
48
|
+
- Quota store is now thread-safe (lock + unique temp file) and best-effort, so
|
|
49
|
+
a persistence hiccup can't abort a successful completion.
|
|
50
|
+
- A provider that returns `429` has its remaining models skipped for that
|
|
51
|
+
request; cooldowns update under a lock with `max()`.
|
|
52
|
+
- Verified live against 11 providers + the OpenAI SDK (non-streaming & SSE).
|
|
53
|
+
Fixed the LongCat model id (`LongCat-2.0-Preview`); LLM7 leads the keyless
|
|
54
|
+
pool (most reliable zero-key provider).
|
|
55
|
+
|
|
56
|
+
## [0.1.0] — 2026-06-02
|
|
57
|
+
|
|
58
|
+
Initial release.
|
|
59
|
+
|
|
60
|
+
### Added
|
|
61
|
+
- Provider catalog (`providers.toml`) covering 9 free-tier providers and 24
|
|
62
|
+
models: Groq, Cerebras, OpenRouter, Google Gemini, GitHub Models, Cloudflare
|
|
63
|
+
Workers AI, Mistral, Cohere, SambaNova.
|
|
64
|
+
- Quota-aware, least-used-first router with automatic failover across providers.
|
|
65
|
+
- Persistent per-provider/day quota tracking (`~/.config/freellmpool/quota.json`,
|
|
66
|
+
resets at UTC midnight).
|
|
67
|
+
- OpenAI-compatible proxy server (`freellmpool proxy`) exposing
|
|
68
|
+
`/v1/chat/completions` and `/v1/models` — a drop-in `OPENAI_BASE_URL`.
|
|
69
|
+
- CLI: `ask`, `providers`, `quota`, `proxy`.
|
|
70
|
+
- Python API: `from freellmpool import Pool`.
|
|
71
|
+
- Three request/response adapters (openai, gemini, cloudflare) and per-user
|
|
72
|
+
catalog overrides via `~/.config/freellmpool/providers.toml`.
|
|
73
|
+
- Full unit-test suite with a faked transport (no network) and CI on Python
|
|
74
|
+
3.11–3.13.
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# Contributing to freellmpool
|
|
2
|
+
|
|
3
|
+
Thanks for helping! The two highest-value contributions are **adding free
|
|
4
|
+
providers** and **keeping the existing catalog accurate** as free tiers drift.
|
|
5
|
+
|
|
6
|
+
## Dev setup
|
|
7
|
+
|
|
8
|
+
```bash
|
|
9
|
+
git clone https://github.com/0xzr/freellmpool
|
|
10
|
+
cd freellmpool
|
|
11
|
+
python -m venv .venv && source .venv/bin/activate
|
|
12
|
+
pip install -e ".[dev]"
|
|
13
|
+
pytest # 0 network calls — everything is faked
|
|
14
|
+
ruff check src tests
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Adding a provider
|
|
18
|
+
|
|
19
|
+
The whole catalog is [`src/freellmpool/providers.toml`](src/freellmpool/providers.toml).
|
|
20
|
+
Most providers are OpenAI-compatible, so adding one is just a TOML block:
|
|
21
|
+
|
|
22
|
+
```toml
|
|
23
|
+
[[provider]]
|
|
24
|
+
id = "myprovider"
|
|
25
|
+
label = "My Provider"
|
|
26
|
+
adapter = "openai" # "openai" | "gemini" | "cloudflare"
|
|
27
|
+
base_url = "https://api.myprovider.ai/v1"
|
|
28
|
+
key_env = "MYPROVIDER_API_KEY" # env var the user sets; never a key
|
|
29
|
+
models = [
|
|
30
|
+
{ name = "some-model", rpd = 0 }, # rpd = free daily request hint, 0 = unknown
|
|
31
|
+
]
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
Rules of thumb:
|
|
35
|
+
|
|
36
|
+
- **Free tier only.** freellmpool is about free pools. If a provider needs a card
|
|
37
|
+
on file to use the tier, it doesn't belong in the default catalog.
|
|
38
|
+
- **Never commit a key.** Only the *name* of the env var goes in the catalog.
|
|
39
|
+
- If the provider isn't OpenAI-compatible, it needs a small adapter in
|
|
40
|
+
[`src/freellmpool/client.py`](src/freellmpool/client.py) (see the `gemini` one for
|
|
41
|
+
a ~30-line template) and a unit test in `tests/`.
|
|
42
|
+
- Add the env var to [`.env.example`](.env.example) and the signup steps to
|
|
43
|
+
[`docs/ACCOUNTS.md`](docs/ACCOUNTS.md).
|
|
44
|
+
|
|
45
|
+
## Fixing a stale limit or endpoint
|
|
46
|
+
|
|
47
|
+
Free tiers change constantly. If a model name, base URL, or daily limit is
|
|
48
|
+
wrong, a one-line PR to `providers.toml` is perfect and very welcome.
|
|
49
|
+
|
|
50
|
+
## Tests
|
|
51
|
+
|
|
52
|
+
Every code path is unit-tested without touching the network via an injected
|
|
53
|
+
fake transport (`tests/helpers.py`). Please keep it that way — new behavior
|
|
54
|
+
should come with a fake-backed test. Run `pytest` and `ruff check` before
|
|
55
|
+
opening a PR.
|
|
56
|
+
|
|
57
|
+
## Code of conduct
|
|
58
|
+
|
|
59
|
+
Be kind. Assume good faith. We're all here to make free LLMs easier to use.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 0xzr (github.com/0xzr)
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: freellmpool
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: Pool the free tiers of 15+ LLM providers behind one OpenAI-compatible endpoint. Free, zero-config, with automatic failover and quota tracking.
|
|
5
|
+
Project-URL: Homepage, https://github.com/0xzr/freellmpool
|
|
6
|
+
Project-URL: Repository, https://github.com/0xzr/freellmpool
|
|
7
|
+
Project-URL: Issues, https://github.com/0xzr/freellmpool/issues
|
|
8
|
+
Author: 0xzr
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: cerebras,failover,free,gateway,gemini,groq,llm,load-balancer,openai,openrouter,proxy,router
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Environment :: Console
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
20
|
+
Classifier: Topic :: Utilities
|
|
21
|
+
Requires-Python: >=3.11
|
|
22
|
+
Requires-Dist: httpx>=0.27
|
|
23
|
+
Provides-Extra: dev
|
|
24
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
25
|
+
Requires-Dist: ruff>=0.6; extra == 'dev'
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
|
|
28
|
+
# freellmpool — pool every free LLM API into one endpoint
|
|
29
|
+
|
|
30
|
+
**A free, OpenAI-compatible LLM gateway that pools the free tiers of 16 providers (Groq, Cerebras, NVIDIA NIM, Gemini, OpenRouter, GitHub Models, Cloudflare & more) behind one `/v1` endpoint — with automatic failover and quota tracking. Works out of the box with zero API keys.**
|
|
31
|
+
|
|
32
|
+
[](https://pypi.org/project/freellmpool/)
|
|
33
|
+
[](https://github.com/0xzr/freellmpool/actions/workflows/ci.yml)
|
|
34
|
+
[](LICENSE)
|
|
35
|
+
[](https://www.python.org)
|
|
36
|
+
|
|
37
|
+
> One free tier is a toy. **Sixteen, stacked, are tens of thousands of free requests a day.** Point your OpenAI client at `freellmpool` and stop paying for a hobby project's inference.
|
|
38
|
+
|
|
39
|
+
Groq, Cerebras, NVIDIA NIM, Google Gemini, OpenRouter, GitHub Models, Cloudflare Workers AI, Mistral, Cohere, and more each hand out a generous **free tier** — but each has its own SDK, rate limits, and daily cap. `freellmpool` puts all of them into one pool:
|
|
40
|
+
|
|
41
|
+
- 🔌 **One OpenAI-compatible endpoint.** Point any OpenAI SDK / tool at `freellmpool` and it just works — `/v1/chat/completions`, `/v1/models`, and a `/v1/responses` shim for **Codex CLI & agents**.
|
|
42
|
+
- 🟢 **Zero config.** Works with **no API keys at all** — keyless providers are built in. `pip install` → `ask` → done.
|
|
43
|
+
- 🔁 **Automatic failover.** Rate-limited or 5xx on one provider? `freellmpool` transparently rolls to the next, with a cooldown so it stops hammering a throttled pool.
|
|
44
|
+
- 📊 **Quota-aware routing.** Spreads load least-used-first and respects each free daily limit, so you squeeze the most out of every tier.
|
|
45
|
+
- 🤖 **Built for agents.** Streaming (SSE), a Codex/Responses shim, and mid-run failover — exactly where long agent loops usually die.
|
|
46
|
+
- 🪶 **Tiny.** Pure-Python, one dependency (`httpx`). The proxy runs on the standard library. No keys are ever stored in the repo.
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## Install
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
pip install freellmpool # or: pipx install freellmpool
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Zero-config: it works with no keys at all
|
|
57
|
+
|
|
58
|
+
Three providers in the catalog need **no signup** (Pollinations and OVHcloud are keyless; LLM7's key is optional), so this works the moment you install:
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
pip install freellmpool
|
|
62
|
+
freellmpool ask "Explain the CAP theorem in one sentence."
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
Add provider keys (below) to unlock more models, higher limits, and better failover.
|
|
66
|
+
|
|
67
|
+
## 60-second quickstart (with keys)
|
|
68
|
+
|
|
69
|
+
1. Grab one or more free API keys — **all free, no credit card**. You only need
|
|
70
|
+
**one** to start (Groq and Cerebras are the fastest to sign up for).
|
|
71
|
+
👉 **[docs/ACCOUNTS.md](docs/ACCOUNTS.md) has 1-minute, click-by-click steps for every provider.**
|
|
72
|
+
|
|
73
|
+
| Provider | Get a key |
|
|
74
|
+
|---|---|
|
|
75
|
+
| Groq | <https://console.groq.com/keys> |
|
|
76
|
+
| Cerebras | <https://cloud.cerebras.ai> |
|
|
77
|
+
| OpenRouter | <https://openrouter.ai/keys> |
|
|
78
|
+
| Google Gemini | <https://aistudio.google.com/apikey> |
|
|
79
|
+
| GitHub Models | any GitHub PAT |
|
|
80
|
+
|
|
81
|
+
2. Export the ones you have (see [`.env.example`](.env.example) for all of them):
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
export GROQ_API_KEY=gsk_...
|
|
85
|
+
export CEREBRAS_API_KEY=csk-...
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
3. Ask something:
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
freellmpool ask "Explain the CAP theorem in one sentence."
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
or pipe context in:
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
cat error.log | freellmpool ask "What's the root cause here?"
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
Check what's wired up:
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
freellmpool providers
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
```
|
|
107
|
+
freellmpool catalog: 16 providers, 56 models
|
|
108
|
+
|
|
109
|
+
✓ ovh OVHcloud AI Endpoints (keyless) 5 models [configured]
|
|
110
|
+
✓ llm7 LLM7 (key optional) 1 models [configured]
|
|
111
|
+
· groq Groq 6 models [set GROQ_API_KEY]
|
|
112
|
+
· cerebras Cerebras 4 models [set CEREBRAS_API_KEY]
|
|
113
|
+
· nvidia NVIDIA NIM 5 models [set NVIDIA_API_KEY]
|
|
114
|
+
...
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
## Choosing a model or provider
|
|
118
|
+
|
|
119
|
+
By default freellmpool auto-picks the least-used provider you have. To pin a choice:
|
|
120
|
+
|
|
121
|
+
```bash
|
|
122
|
+
freellmpool models # list every provider/model id
|
|
123
|
+
freellmpool ask -m groq/llama-3.3-70b-versatile "hi" # exact provider + model
|
|
124
|
+
freellmpool ask -m llama-3.3-70b-versatile "hi" # that model on any provider
|
|
125
|
+
freellmpool ask -p cerebras,groq "hi" # restrict to these providers
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
Same idea through the proxy via the OpenAI `model` field: `"auto"`, `"groq"`, or `"groq/llama-3.3-70b-versatile"`.
|
|
129
|
+
|
|
130
|
+
### Providers in the box
|
|
131
|
+
|
|
132
|
+
| Provider | Key env | Notes |
|
|
133
|
+
|---|---|---|
|
|
134
|
+
| Pollinations | — | **keyless**, works out of the box |
|
|
135
|
+
| OVHcloud AI Endpoints | — | **keyless**, works out of the box |
|
|
136
|
+
| LLM7 | `LLM7_API_KEY` | key optional |
|
|
137
|
+
| Groq | `GROQ_API_KEY` | very fast |
|
|
138
|
+
| Cerebras | `CEREBRAS_API_KEY` | very fast, large daily cap |
|
|
139
|
+
| NVIDIA NIM | `NVIDIA_API_KEY` | big model catalog (build.nvidia.com) |
|
|
140
|
+
| OpenRouter | `OPENROUTER_API_KEY` | many `:free` models |
|
|
141
|
+
| Google Gemini | `GEMINI_API_KEY` | generous free tier |
|
|
142
|
+
| GitHub Models | `GITHUB_TOKEN` | any PAT works |
|
|
143
|
+
| Cloudflare Workers AI | `CLOUDFLARE_API_TOKEN` + `CLOUDFLARE_ACCOUNT_ID` | |
|
|
144
|
+
| Mistral | `MISTRAL_API_KEY` | |
|
|
145
|
+
| Cohere | `COHERE_API_KEY` | |
|
|
146
|
+
| SambaNova | `SAMBANOVA_API_KEY` | |
|
|
147
|
+
| Z.ai / Zhipu GLM | `ZHIPU_API_KEY` | |
|
|
148
|
+
| Ollama Cloud | `OLLAMA_API_KEY` | |
|
|
149
|
+
| LongCat (Meituan) | `LONGCAT_API_KEY` | |
|
|
150
|
+
|
|
151
|
+
Full signup steps for each: **[docs/ACCOUNTS.md](docs/ACCOUNTS.md)**.
|
|
152
|
+
|
|
153
|
+
## The killer feature: a drop-in OpenAI proxy
|
|
154
|
+
|
|
155
|
+
Run the gateway:
|
|
156
|
+
|
|
157
|
+
```bash
|
|
158
|
+
freellmpool proxy --port 8080
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
Now point **any** OpenAI-compatible app or SDK at it — no other changes:
|
|
162
|
+
|
|
163
|
+
```bash
|
|
164
|
+
export OPENAI_BASE_URL=http://localhost:8080/v1
|
|
165
|
+
export OPENAI_API_KEY=anything # freellmpool ignores it
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
```python
|
|
169
|
+
from openai import OpenAI
|
|
170
|
+
|
|
171
|
+
client = OpenAI() # picks up OPENAI_BASE_URL
|
|
172
|
+
resp = client.chat.completions.create(
|
|
173
|
+
model="auto", # or "groq", or "groq/llama-3.3-70b-versatile"
|
|
174
|
+
messages=[{"role": "user", "content": "Say hi in French."}],
|
|
175
|
+
)
|
|
176
|
+
print(resp.choices[0].message.content)
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
The `model` field controls routing:
|
|
180
|
+
|
|
181
|
+
| `model` value | Routes to |
|
|
182
|
+
|---|---|
|
|
183
|
+
| `auto` (or omitted) | any configured provider, least-used first |
|
|
184
|
+
| `groq` | any model on Groq |
|
|
185
|
+
| `groq/llama-3.3-70b-versatile` | that exact model |
|
|
186
|
+
| `llama-3.3-70b-versatile` | that model on any provider that has it |
|
|
187
|
+
|
|
188
|
+
## Use it as the free LLM backend for your AI agent
|
|
189
|
+
|
|
190
|
+
Coding agents and agent frameworks (aider, Continue, Cline, the OpenAI Agents SDK, LangChain, ...) almost all speak the OpenAI API — so they can run on pooled free inference through `freellmpool`, with **failover when one provider rate-limits you mid-run** (exactly when long agent loops tend to die):
|
|
191
|
+
|
|
192
|
+
```bash
|
|
193
|
+
freellmpool proxy --port 8080
|
|
194
|
+
export OPENAI_BASE_URL=http://localhost:8080/v1 OPENAI_API_KEY=anything
|
|
195
|
+
aider --model openai/auto # or point any OpenAI-compatible tool here
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
The proxy supports `stream: true` (Server-Sent Events), so streaming chat UIs and agent loops work too. Full integration snippets (aider, LangChain, Continue/Cline, OpenAI Agents SDK) are in **[docs/AGENTS.md](docs/AGENTS.md)**.
|
|
199
|
+
|
|
200
|
+
## Use it as a library
|
|
201
|
+
|
|
202
|
+
```python
|
|
203
|
+
from freellmpool import Pool
|
|
204
|
+
|
|
205
|
+
pool = Pool.from_default_config()
|
|
206
|
+
reply = pool.ask("Summarize the plot of Hamlet in 20 words.")
|
|
207
|
+
print(reply.text)
|
|
208
|
+
print(f"served by {reply.provider_id}/{reply.model}")
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
## How routing works
|
|
212
|
+
|
|
213
|
+
For each request `freellmpool` builds the list of `(provider, model)` candidates you have keys for, orders them **least-used-today first** (providers already over their free daily hint sink to the bottom), then tries them in order until one returns a non-empty completion. Every success is recorded to a small per-day counter at `~/.config/freellmpool/quota.json` (reset at UTC midnight). See [`docs/ARCHITECTURE.md`](docs/ARCHITECTURE.md) for the full picture.
|
|
214
|
+
|
|
215
|
+
## Adding or overriding providers
|
|
216
|
+
|
|
217
|
+
The built-in catalog lives in [`src/freellmpool/providers.toml`](src/freellmpool/providers.toml). To add a provider or override a model list without forking, drop a `providers.toml` at `~/.config/freellmpool/providers.toml` (or point `FREELLMPOOL_CONFIG` at one). Same-`id` entries override the built-ins; new ids are appended. See [CONTRIBUTING.md](CONTRIBUTING.md) for the (small) anatomy of a provider.
|
|
218
|
+
|
|
219
|
+
## Comparison
|
|
220
|
+
|
|
221
|
+
| | freellmpool | Calling each SDK by hand | A paid gateway |
|
|
222
|
+
|---|---|---|---|
|
|
223
|
+
| Free tiers pooled | ✅ 16 providers | ⚠️ you wire each one | ❌ |
|
|
224
|
+
| Automatic failover | ✅ | ❌ | ✅ |
|
|
225
|
+
| Quota tracking | ✅ per-day | ❌ | varies |
|
|
226
|
+
| Drop-in OpenAI proxy | ✅ | ❌ | ✅ |
|
|
227
|
+
| Cost | $0 | $0 | 💸 |
|
|
228
|
+
| Dependencies | 1 (`httpx`) | many | a service |
|
|
229
|
+
|
|
230
|
+
## Limitations (read this)
|
|
231
|
+
|
|
232
|
+
`freellmpool` is honest about what it is — a way to pool **free tiers**, not a frontier-model service:
|
|
233
|
+
|
|
234
|
+
- **No GPT-5 / Claude-Opus-class reasoning.** Free tiers are smaller/faster models — great for triage, drafting, classification, tool-routing, and everyday coding; reach for a frontier model for the hardest reasoning.
|
|
235
|
+
- **Quality and capacity vary through the day** as high-cap pools exhaust; daily limits reset at UTC midnight.
|
|
236
|
+
- **Free tiers change without notice.** Endpoints, model ids, and limits drift — that's what the one-line `providers.toml` PRs are for.
|
|
237
|
+
- **Local-first, single-user.** The proxy defaults to `127.0.0.1`; if you bind it to a network interface, set a proxy key (`--api-key`). Not meant as a multi-tenant production gateway.
|
|
238
|
+
- **Respect the providers.** This pools *free* tiers for personal projects and experimentation — don't abuse them, or we all lose them.
|
|
239
|
+
|
|
240
|
+
## Status
|
|
241
|
+
|
|
242
|
+
`freellmpool` is `0.3` and moving fast. Provider endpoints and free-tier limits drift — if something breaks, please [open an issue](https://github.com/0xzr/freellmpool/issues) or send a one-line PR to `providers.toml`. Contributions of new free providers are especially welcome.
|
|
243
|
+
|
|
244
|
+
## Found this useful?
|
|
245
|
+
|
|
246
|
+
⭐ **Star the repo** — it's the single biggest thing that helps others discover freellmpool, and it keeps the free-provider catalog maintained. New free providers and one-line limit fixes are always welcome ([CONTRIBUTING.md](CONTRIBUTING.md)).
|
|
247
|
+
|
|
248
|
+
## License
|
|
249
|
+
|
|
250
|
+
MIT — see [LICENSE](LICENSE).
|
|
251
|
+
|