freelm 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- freelm-0.1.0/.github/workflows/ci.yml +26 -0
- freelm-0.1.0/.github/workflows/release.yml +30 -0
- freelm-0.1.0/.gitignore +16 -0
- freelm-0.1.0/CHANGELOG.md +24 -0
- freelm-0.1.0/LICENSE +21 -0
- freelm-0.1.0/PKG-INFO +182 -0
- freelm-0.1.0/README.md +154 -0
- freelm-0.1.0/examples/basic.py +22 -0
- freelm-0.1.0/pyproject.toml +40 -0
- freelm-0.1.0/src/freelm/__init__.py +63 -0
- freelm-0.1.0/src/freelm/_backoff.py +13 -0
- freelm-0.1.0/src/freelm/_breaker.py +41 -0
- freelm-0.1.0/src/freelm/_cache.py +62 -0
- freelm-0.1.0/src/freelm/_engine.py +87 -0
- freelm-0.1.0/src/freelm/_keys.py +94 -0
- freelm-0.1.0/src/freelm/_ratelimit.py +52 -0
- freelm-0.1.0/src/freelm/_types.py +119 -0
- freelm-0.1.0/src/freelm/client.py +272 -0
- freelm-0.1.0/src/freelm/compat/__init__.py +3 -0
- freelm-0.1.0/src/freelm/compat/openai.py +76 -0
- freelm-0.1.0/src/freelm/config.py +53 -0
- freelm-0.1.0/src/freelm/discovery.py +149 -0
- freelm-0.1.0/src/freelm/errors.py +124 -0
- freelm-0.1.0/src/freelm/providers/__init__.py +6 -0
- freelm-0.1.0/src/freelm/providers/base.py +133 -0
- freelm-0.1.0/src/freelm/providers/google.py +34 -0
- freelm-0.1.0/src/freelm/providers/nim.py +30 -0
- freelm-0.1.0/src/freelm/providers/openrouter.py +49 -0
- freelm-0.1.0/src/freelm/py.typed +0 -0
- freelm-0.1.0/src/freelm/registry.py +53 -0
- freelm-0.1.0/src/freelm/strategy.py +56 -0
- freelm-0.1.0/src/freelm/types_compat.py +83 -0
- freelm-0.1.0/tests/conftest.py +12 -0
- freelm-0.1.0/tests/test_async.py +36 -0
- freelm-0.1.0/tests/test_compat.py +22 -0
- freelm-0.1.0/tests/test_discovery.py +73 -0
- freelm-0.1.0/tests/test_router.py +115 -0
- freelm-0.1.0/tests/test_units.py +61 -0
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
test:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
strategy:
|
|
12
|
+
fail-fast: false
|
|
13
|
+
matrix:
|
|
14
|
+
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]
|
|
15
|
+
steps:
|
|
16
|
+
- uses: actions/checkout@v4
|
|
17
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
18
|
+
uses: actions/setup-python@v5
|
|
19
|
+
with:
|
|
20
|
+
python-version: ${{ matrix.python-version }}
|
|
21
|
+
- name: Install
|
|
22
|
+
run: |
|
|
23
|
+
python -m pip install --upgrade pip
|
|
24
|
+
pip install -e ".[dev]"
|
|
25
|
+
- name: Test
|
|
26
|
+
run: pytest -q
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
# Publishes to PyPI when a v* tag is pushed.
|
|
4
|
+
# Uses PyPI Trusted Publishing (OIDC) — configure the publisher on PyPI first:
|
|
5
|
+
# https://docs.pypi.org/trusted-publishers/ (project: freelm, workflow: release.yml)
|
|
6
|
+
# No API token/secret required once trusted publishing is set up.
|
|
7
|
+
|
|
8
|
+
on:
|
|
9
|
+
push:
|
|
10
|
+
tags: ["v*"]
|
|
11
|
+
|
|
12
|
+
permissions:
|
|
13
|
+
contents: read
|
|
14
|
+
id-token: write
|
|
15
|
+
|
|
16
|
+
jobs:
|
|
17
|
+
publish:
|
|
18
|
+
runs-on: ubuntu-latest
|
|
19
|
+
environment: pypi
|
|
20
|
+
steps:
|
|
21
|
+
- uses: actions/checkout@v4
|
|
22
|
+
- uses: actions/setup-python@v5
|
|
23
|
+
with:
|
|
24
|
+
python-version: "3.12"
|
|
25
|
+
- name: Build
|
|
26
|
+
run: |
|
|
27
|
+
python -m pip install --upgrade pip build
|
|
28
|
+
python -m build
|
|
29
|
+
- name: Publish to PyPI
|
|
30
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
freelm-0.1.0/.gitignore
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to `freelm` are documented here. Format follows
|
|
4
|
+
[Keep a Changelog](https://keepachangelog.com/); versioning is [SemVer](https://semver.org/).
|
|
5
|
+
|
|
6
|
+
## [0.1.0] - 2026-06-07
|
|
7
|
+
|
|
8
|
+
Initial release.
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- `FreeLLM` (sync) and `AsyncFreeLLM` (async) always-up chat clients.
|
|
12
|
+
- Providers (OpenAI-compatible HTTP): OpenRouter, Google AI Studio (Gemini), NVIDIA NIM.
|
|
13
|
+
- Fault tolerance: per-key circuit breaker, cross-provider failover, retry classification
|
|
14
|
+
(429 cooldown/rotate, 5xx/timeout backoff, 401 key-disable, model errors → next model).
|
|
15
|
+
- Model-scoped vs key-scoped 429 handling (OpenRouter free models throttle per-model upstream).
|
|
16
|
+
- Quota guard: per-key requests/minute token bucket + requests/day counter.
|
|
17
|
+
- Routing strategies: `priority`, `round_robin`, `quota_aware`, `latency`.
|
|
18
|
+
- Virtual models (`auto`, `chat:fast`, `chat:large`, ...) resolved per provider.
|
|
19
|
+
- Dynamic model discovery via `GET /models` with disk cache (TTL, `0600`) and
|
|
20
|
+
live → cache → hardcoded fallback. `list_free_models()` helper.
|
|
21
|
+
- OpenAI drop-in shim: `freelm.compat.OpenAI` / `AsyncOpenAI`.
|
|
22
|
+
- `FreeLLM.from_env()` config from environment; `llm.health()` introspection.
|
|
23
|
+
|
|
24
|
+
[0.1.0]: https://github.com/shihabshahrier/freelm/releases/tag/v0.1.0
|
freelm-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Shihab Shahriar Antor / Shahriar Labs
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
freelm-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: freelm
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: One always-up LLM client over free-tier providers (OpenRouter, Google AI Studio, NVIDIA NIM) with auto key-rotation, failover, circuit breaking and quota-aware routing.
|
|
5
|
+
Project-URL: Homepage, https://github.com/shihabshahrier/freelm
|
|
6
|
+
Project-URL: Issues, https://github.com/shihabshahrier/freelm/issues
|
|
7
|
+
Author-email: Shihab Shahriar Antor <shahriarlabs@gmail.com>
|
|
8
|
+
License: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: ai,failover,free,gemini,llm,nvidia-nim,openai,openrouter,router
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Requires-Python: >=3.9
|
|
22
|
+
Requires-Dist: httpx>=0.24
|
|
23
|
+
Provides-Extra: dev
|
|
24
|
+
Requires-Dist: anyio>=4; extra == 'dev'
|
|
25
|
+
Requires-Dist: pytest>=7; extra == 'dev'
|
|
26
|
+
Requires-Dist: respx>=0.20; extra == 'dev'
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
|
|
29
|
+
# freelm
|
|
30
|
+
|
|
31
|
+
**One always-up LLM client over free-tier providers.** Drop in your OpenRouter, Google AI Studio, and/or NVIDIA NIM keys, and `freelm` gives you a single chat call that auto-rotates keys, fails over across providers, paces itself to each tier's limits, and trips circuit breakers on dead keys — so your app keeps talking to an LLM even when one source rate-limits or dies.
|
|
32
|
+
|
|
33
|
+
> Python first. JS/TS and Go ports planned (the core is spec-driven for portability).
|
|
34
|
+
|
|
35
|
+
## Why
|
|
36
|
+
|
|
37
|
+
LLMs show up in nearly every project, and they cost money — but there's a lot of *free* capacity scattered across providers:
|
|
38
|
+
|
|
39
|
+
- **OpenRouter** — free models (`:free`), ~50 req/day under $10 credit, ~1000/day at ≥$10.
|
|
40
|
+
- **Google AI Studio (Gemini)** — generous free tier; Tier 1 (billing on) lifts limits hard.
|
|
41
|
+
- **NVIDIA NIM** (`build.nvidia.com`) — many models free against build credits.
|
|
42
|
+
|
|
43
|
+
`freelm` pools them behind one fault-tolerant client.
|
|
44
|
+
|
|
45
|
+
## Install
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
pip install freelm
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Quick start
|
|
52
|
+
|
|
53
|
+
```python
|
|
54
|
+
import freelm
|
|
55
|
+
|
|
56
|
+
llm = freelm.FreeLLM.from_env() # reads keys from environment
|
|
57
|
+
print(llm.text("Explain black holes in one sentence."))
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
Explicit config:
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
from freelm import FreeLLM, OpenRouter, GoogleAIStudio, NIM
|
|
64
|
+
|
|
65
|
+
llm = FreeLLM(
|
|
66
|
+
providers=[
|
|
67
|
+
OpenRouter("sk-or-...", tier="free"), # or tier="credit" if ≥ $10
|
|
68
|
+
GoogleAIStudio("AIza...", tier="free"), # or tier="tier1"
|
|
69
|
+
NIM("nvapi-..."),
|
|
70
|
+
],
|
|
71
|
+
strategy="quota_aware", # priority | round_robin | quota_aware | latency
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
resp = llm.chat(
|
|
75
|
+
[{"role": "user", "content": "Write a haiku about failover."}],
|
|
76
|
+
model="chat:fast", # virtual model, see below
|
|
77
|
+
)
|
|
78
|
+
print(resp.text, "via", resp.provider)
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Async is symmetric:
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
from freelm import AsyncFreeLLM
|
|
85
|
+
|
|
86
|
+
async with AsyncFreeLLM.from_env() as llm:
|
|
87
|
+
print(await llm.text("hi"))
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## Drop-in OpenAI shim
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
# from openai import OpenAI
|
|
94
|
+
from freelm.compat import OpenAI
|
|
95
|
+
|
|
96
|
+
client = OpenAI() # backed by FreeLLM.from_env()
|
|
97
|
+
r = client.chat.completions.create(
|
|
98
|
+
model="auto",
|
|
99
|
+
messages=[{"role": "user", "content": "hi"}],
|
|
100
|
+
)
|
|
101
|
+
print(r.choices[0].message.content)
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
## Environment variables
|
|
105
|
+
|
|
106
|
+
| Provider | Key vars (first match wins) | Tier var |
|
|
107
|
+
|----------|------------------------------|----------|
|
|
108
|
+
| OpenRouter | `OPENROUTER_API_KEY` / `FREELM_OPENROUTER_KEYS` | `FREELM_OPENROUTER_TIER` (`free`\|`credit`) |
|
|
109
|
+
| Google AI Studio | `GEMINI_API_KEY` / `GOOGLE_API_KEY` / `GOOGLE_AI_STUDIO_KEY` / `FREELM_GOOGLE_KEYS` | `FREELM_GOOGLE_TIER` (`free`\|`tier1`) |
|
|
110
|
+
| NVIDIA NIM | `NVIDIA_API_KEY` / `NIM_API_KEY` / `FREELM_NIM_KEYS` | `FREELM_NIM_TIER` (`free`) |
|
|
111
|
+
|
|
112
|
+
Multiple keys per provider: comma-separate them.
|
|
113
|
+
|
|
114
|
+
## Virtual models
|
|
115
|
+
|
|
116
|
+
Names differ per provider, so ask by intent and `freelm` maps to a concrete model:
|
|
117
|
+
|
|
118
|
+
| Alias | Meaning |
|
|
119
|
+
|-------|---------|
|
|
120
|
+
| `auto` / `chat` | any available chat model (registry order) |
|
|
121
|
+
| `chat:large` / `large` | a larger/stronger model |
|
|
122
|
+
| `chat:fast` / `fast` | a fast/cheap model |
|
|
123
|
+
| `chat:small` / `small` | smallest model |
|
|
124
|
+
| `vendor/model-id` | passthrough — use exactly this model |
|
|
125
|
+
|
|
126
|
+
Override the table per provider with `models=[ModelSpec(...)]`.
|
|
127
|
+
|
|
128
|
+
## Dynamic model discovery
|
|
129
|
+
|
|
130
|
+
Free model IDs churn constantly, so `freelm` **doesn't trust its hardcoded list**. For OpenRouter (on by default), it queries `GET /models` on first use, derives tags (`large`/`fast`/`small`, plus `tools`/`vision`/`reasoning` from `supported_parameters`), and caches the list to disk.
|
|
131
|
+
|
|
132
|
+
Resolution order: **live API → disk cache → hardcoded fallback** (so it still works offline / key-less).
|
|
133
|
+
|
|
134
|
+
```python
|
|
135
|
+
from freelm import list_free_models
|
|
136
|
+
|
|
137
|
+
for m in list_free_models()[:5]: # live OpenRouter free models, cached
|
|
138
|
+
print(m.id, m.tags, m.ctx)
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
Control it:
|
|
142
|
+
|
|
143
|
+
```python
|
|
144
|
+
OpenRouter("sk-or-...", discover=True, discover_free_only=True, cache_ttl=3600)
|
|
145
|
+
GoogleAIStudio("AIza...", discover=True) # opt-in for other providers' /models
|
|
146
|
+
|
|
147
|
+
llm.refresh_models() # force re-fetch on next call
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
| Env var | Default | Meaning |
|
|
151
|
+
|---------|---------|---------|
|
|
152
|
+
| `FREELM_CACHE_DIR` | `~/.cache/freelm` | where the model cache lives (file is `0600`) |
|
|
153
|
+
| `FREELM_CACHE_TTL` | `3600` | cache lifetime in seconds |
|
|
154
|
+
|
|
155
|
+
## How "always-up" works
|
|
156
|
+
|
|
157
|
+
- **Key pool** per provider, round-robined to spread load.
|
|
158
|
+
- **Failover chain**: key → next key → next provider until one succeeds.
|
|
159
|
+
- **Circuit breaker** per key: opens after repeated failures, half-opens after a cooldown — no hammering a dead key.
|
|
160
|
+
- **Retry classification**: `429` → cool the key & rotate; `5xx`/timeout → breaker + backoff; `401/403` → disable the key; `4xx` model errors → try another model/provider; other `4xx` → surfaced as a caller bug.
|
|
161
|
+
- **Quota guard**: per-key requests/minute (token bucket) + requests/day counter, so a key predicted to be exhausted is skipped before you waste a call.
|
|
162
|
+
- **`wait=True`** (optional): briefly sleep until a key frees up instead of failing, bounded by `max_wait`.
|
|
163
|
+
|
|
164
|
+
Inspect live state any time:
|
|
165
|
+
|
|
166
|
+
```python
|
|
167
|
+
for row in llm.health():
|
|
168
|
+
print(row) # provider, key (masked), ready, breaker, rpd_used, last_error, latency
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
## Roadmap
|
|
172
|
+
|
|
173
|
+
- v1.1 — streaming (SSE normalization across providers)
|
|
174
|
+
- v1.2 — persistent quota tracking (sqlite/json) + tighter tier pacing
|
|
175
|
+
- v1.3 — tool / function-calling normalization
|
|
176
|
+
- v2 — embeddings, vision; JS/TS and Go ports
|
|
177
|
+
|
|
178
|
+
## License
|
|
179
|
+
|
|
180
|
+
MIT © Shahriar Labs
|
|
181
|
+
|
|
182
|
+
> Free-tier model lists change often — `freelm` discovers OpenRouter models live and caches them, so you rarely touch the hardcoded list. Tier **rate-limit numbers** are still heuristic defaults; override `rpm`/`rpd`/`tier` as providers evolve.
|
freelm-0.1.0/README.md
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
# freelm
|
|
2
|
+
|
|
3
|
+
**One always-up LLM client over free-tier providers.** Drop in your OpenRouter, Google AI Studio, and/or NVIDIA NIM keys, and `freelm` gives you a single chat call that auto-rotates keys, fails over across providers, paces itself to each tier's limits, and trips circuit breakers on dead keys — so your app keeps talking to an LLM even when one source rate-limits or dies.
|
|
4
|
+
|
|
5
|
+
> Python first. JS/TS and Go ports planned (the core is spec-driven for portability).
|
|
6
|
+
|
|
7
|
+
## Why
|
|
8
|
+
|
|
9
|
+
LLMs show up in nearly every project, and they cost money — but there's a lot of *free* capacity scattered across providers:
|
|
10
|
+
|
|
11
|
+
- **OpenRouter** — free models (`:free`), ~50 req/day under $10 credit, ~1000/day at ≥$10.
|
|
12
|
+
- **Google AI Studio (Gemini)** — generous free tier; Tier 1 (billing on) lifts limits hard.
|
|
13
|
+
- **NVIDIA NIM** (`build.nvidia.com`) — many models free against build credits.
|
|
14
|
+
|
|
15
|
+
`freelm` pools them behind one fault-tolerant client.
|
|
16
|
+
|
|
17
|
+
## Install
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip install freelm
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## Quick start
|
|
24
|
+
|
|
25
|
+
```python
|
|
26
|
+
import freelm
|
|
27
|
+
|
|
28
|
+
llm = freelm.FreeLLM.from_env() # reads keys from environment
|
|
29
|
+
print(llm.text("Explain black holes in one sentence."))
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
Explicit config:
|
|
33
|
+
|
|
34
|
+
```python
|
|
35
|
+
from freelm import FreeLLM, OpenRouter, GoogleAIStudio, NIM
|
|
36
|
+
|
|
37
|
+
llm = FreeLLM(
|
|
38
|
+
providers=[
|
|
39
|
+
OpenRouter("sk-or-...", tier="free"), # or tier="credit" if ≥ $10
|
|
40
|
+
GoogleAIStudio("AIza...", tier="free"), # or tier="tier1"
|
|
41
|
+
NIM("nvapi-..."),
|
|
42
|
+
],
|
|
43
|
+
strategy="quota_aware", # priority | round_robin | quota_aware | latency
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
resp = llm.chat(
|
|
47
|
+
[{"role": "user", "content": "Write a haiku about failover."}],
|
|
48
|
+
model="chat:fast", # virtual model, see below
|
|
49
|
+
)
|
|
50
|
+
print(resp.text, "via", resp.provider)
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
Async is symmetric:
|
|
54
|
+
|
|
55
|
+
```python
|
|
56
|
+
from freelm import AsyncFreeLLM
|
|
57
|
+
|
|
58
|
+
async with AsyncFreeLLM.from_env() as llm:
|
|
59
|
+
print(await llm.text("hi"))
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## Drop-in OpenAI shim
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
# from openai import OpenAI
|
|
66
|
+
from freelm.compat import OpenAI
|
|
67
|
+
|
|
68
|
+
client = OpenAI() # backed by FreeLLM.from_env()
|
|
69
|
+
r = client.chat.completions.create(
|
|
70
|
+
model="auto",
|
|
71
|
+
messages=[{"role": "user", "content": "hi"}],
|
|
72
|
+
)
|
|
73
|
+
print(r.choices[0].message.content)
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## Environment variables
|
|
77
|
+
|
|
78
|
+
| Provider | Key vars (first match wins) | Tier var |
|
|
79
|
+
|----------|------------------------------|----------|
|
|
80
|
+
| OpenRouter | `OPENROUTER_API_KEY` / `FREELM_OPENROUTER_KEYS` | `FREELM_OPENROUTER_TIER` (`free`\|`credit`) |
|
|
81
|
+
| Google AI Studio | `GEMINI_API_KEY` / `GOOGLE_API_KEY` / `GOOGLE_AI_STUDIO_KEY` / `FREELM_GOOGLE_KEYS` | `FREELM_GOOGLE_TIER` (`free`\|`tier1`) |
|
|
82
|
+
| NVIDIA NIM | `NVIDIA_API_KEY` / `NIM_API_KEY` / `FREELM_NIM_KEYS` | `FREELM_NIM_TIER` (`free`) |
|
|
83
|
+
|
|
84
|
+
Multiple keys per provider: comma-separate them.
|
|
85
|
+
|
|
86
|
+
## Virtual models
|
|
87
|
+
|
|
88
|
+
Names differ per provider, so ask by intent and `freelm` maps to a concrete model:
|
|
89
|
+
|
|
90
|
+
| Alias | Meaning |
|
|
91
|
+
|-------|---------|
|
|
92
|
+
| `auto` / `chat` | any available chat model (registry order) |
|
|
93
|
+
| `chat:large` / `large` | a larger/stronger model |
|
|
94
|
+
| `chat:fast` / `fast` | a fast/cheap model |
|
|
95
|
+
| `chat:small` / `small` | smallest model |
|
|
96
|
+
| `vendor/model-id` | passthrough — use exactly this model |
|
|
97
|
+
|
|
98
|
+
Override the table per provider with `models=[ModelSpec(...)]`.
|
|
99
|
+
|
|
100
|
+
## Dynamic model discovery
|
|
101
|
+
|
|
102
|
+
Free model IDs churn constantly, so `freelm` **doesn't trust its hardcoded list**. For OpenRouter (on by default), it queries `GET /models` on first use, derives tags (`large`/`fast`/`small`, plus `tools`/`vision`/`reasoning` from `supported_parameters`), and caches the list to disk.
|
|
103
|
+
|
|
104
|
+
Resolution order: **live API → disk cache → hardcoded fallback** (so it still works offline / key-less).
|
|
105
|
+
|
|
106
|
+
```python
|
|
107
|
+
from freelm import list_free_models
|
|
108
|
+
|
|
109
|
+
for m in list_free_models()[:5]: # live OpenRouter free models, cached
|
|
110
|
+
print(m.id, m.tags, m.ctx)
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Control it:
|
|
114
|
+
|
|
115
|
+
```python
|
|
116
|
+
OpenRouter("sk-or-...", discover=True, discover_free_only=True, cache_ttl=3600)
|
|
117
|
+
GoogleAIStudio("AIza...", discover=True) # opt-in for other providers' /models
|
|
118
|
+
|
|
119
|
+
llm.refresh_models() # force re-fetch on next call
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
| Env var | Default | Meaning |
|
|
123
|
+
|---------|---------|---------|
|
|
124
|
+
| `FREELM_CACHE_DIR` | `~/.cache/freelm` | where the model cache lives (file is `0600`) |
|
|
125
|
+
| `FREELM_CACHE_TTL` | `3600` | cache lifetime in seconds |
|
|
126
|
+
|
|
127
|
+
## How "always-up" works
|
|
128
|
+
|
|
129
|
+
- **Key pool** per provider, round-robined to spread load.
|
|
130
|
+
- **Failover chain**: key → next key → next provider until one succeeds.
|
|
131
|
+
- **Circuit breaker** per key: opens after repeated failures, half-opens after a cooldown — no hammering a dead key.
|
|
132
|
+
- **Retry classification**: `429` → cool the key & rotate; `5xx`/timeout → breaker + backoff; `401/403` → disable the key; `4xx` model errors → try another model/provider; other `4xx` → surfaced as a caller bug.
|
|
133
|
+
- **Quota guard**: per-key requests/minute (token bucket) + requests/day counter, so a key predicted to be exhausted is skipped before you waste a call.
|
|
134
|
+
- **`wait=True`** (optional): briefly sleep until a key frees up instead of failing, bounded by `max_wait`.
|
|
135
|
+
|
|
136
|
+
Inspect live state any time:
|
|
137
|
+
|
|
138
|
+
```python
|
|
139
|
+
for row in llm.health():
|
|
140
|
+
print(row) # provider, key (masked), ready, breaker, rpd_used, last_error, latency
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## Roadmap
|
|
144
|
+
|
|
145
|
+
- v1.1 — streaming (SSE normalization across providers)
|
|
146
|
+
- v1.2 — persistent quota tracking (sqlite/json) + tighter tier pacing
|
|
147
|
+
- v1.3 — tool / function-calling normalization
|
|
148
|
+
- v2 — embeddings, vision; JS/TS and Go ports
|
|
149
|
+
|
|
150
|
+
## License
|
|
151
|
+
|
|
152
|
+
MIT © Shahriar Labs
|
|
153
|
+
|
|
154
|
+
> Free-tier model lists change often — `freelm` discovers OpenRouter models live and caches them, so you rarely touch the hardcoded list. Tier **rate-limit numbers** are still heuristic defaults; override `rpm`/`rpd`/`tier` as providers evolve.
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""Run me: python examples/basic.py
|
|
2
|
+
|
|
3
|
+
Set at least one of OPENROUTER_API_KEY / GEMINI_API_KEY / NVIDIA_API_KEY first.
|
|
4
|
+
"""
|
|
5
|
+
from freelm import FreeLLM
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def main() -> None:
|
|
9
|
+
# Reads keys + tiers from environment.
|
|
10
|
+
llm = FreeLLM.from_env(strategy="quota_aware")
|
|
11
|
+
|
|
12
|
+
print(llm.text("Explain prompt caching in one sentence.", model="chat:fast"))
|
|
13
|
+
|
|
14
|
+
print("\n--- key health ---")
|
|
15
|
+
for row in llm.health():
|
|
16
|
+
print(row)
|
|
17
|
+
|
|
18
|
+
llm.close()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
if __name__ == "__main__":
|
|
22
|
+
main()
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "freelm"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "One always-up LLM client over free-tier providers (OpenRouter, Google AI Studio, NVIDIA NIM) with auto key-rotation, failover, circuit breaking and quota-aware routing."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [{ name = "Shihab Shahriar Antor", email = "shahriarlabs@gmail.com" }]
|
|
13
|
+
keywords = ["llm", "openrouter", "gemini", "nvidia-nim", "free", "failover", "router", "openai", "ai"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 4 - Beta",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Programming Language :: Python :: 3.9",
|
|
20
|
+
"Programming Language :: Python :: 3.10",
|
|
21
|
+
"Programming Language :: Python :: 3.11",
|
|
22
|
+
"Programming Language :: Python :: 3.12",
|
|
23
|
+
"Programming Language :: Python :: 3.13",
|
|
24
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
25
|
+
]
|
|
26
|
+
dependencies = ["httpx>=0.24"]
|
|
27
|
+
|
|
28
|
+
[project.optional-dependencies]
|
|
29
|
+
dev = ["pytest>=7", "respx>=0.20", "anyio>=4"]
|
|
30
|
+
|
|
31
|
+
[project.urls]
|
|
32
|
+
Homepage = "https://github.com/shihabshahrier/freelm"
|
|
33
|
+
Issues = "https://github.com/shihabshahrier/freelm/issues"
|
|
34
|
+
|
|
35
|
+
[tool.hatch.build.targets.wheel]
|
|
36
|
+
packages = ["src/freelm"]
|
|
37
|
+
|
|
38
|
+
[tool.pytest.ini_options]
|
|
39
|
+
testpaths = ["tests"]
|
|
40
|
+
addopts = "-q"
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""freelm — one always-up LLM client over free-tier providers.
|
|
2
|
+
|
|
3
|
+
Quick start::
|
|
4
|
+
|
|
5
|
+
import freelm
|
|
6
|
+
llm = freelm.FreeLLM.from_env()
|
|
7
|
+
print(llm.text("Explain black holes in one sentence."))
|
|
8
|
+
|
|
9
|
+
Explicit config::
|
|
10
|
+
|
|
11
|
+
from freelm import FreeLLM, OpenRouter, GoogleAIStudio, NIM
|
|
12
|
+
llm = FreeLLM(
|
|
13
|
+
providers=[OpenRouter("sk-or-..."), GoogleAIStudio("AIza..."), NIM("nvapi-...")],
|
|
14
|
+
strategy="quota_aware",
|
|
15
|
+
)
|
|
16
|
+
"""
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
from ._types import ChatRequest, ChatResponse, Choice, Message, Usage
|
|
20
|
+
from .client import AsyncFreeLLM, FreeLLM
|
|
21
|
+
from .config import providers_from_env
|
|
22
|
+
from .discovery import list_free_models
|
|
23
|
+
from .errors import (
|
|
24
|
+
AuthError,
|
|
25
|
+
ConfigError,
|
|
26
|
+
FreeLLMError,
|
|
27
|
+
ModelNotFound,
|
|
28
|
+
NoProvidersAvailable,
|
|
29
|
+
ProviderError,
|
|
30
|
+
RateLimited,
|
|
31
|
+
Transient,
|
|
32
|
+
)
|
|
33
|
+
from .providers import Gemini, GoogleAIStudio, NIM, OpenRouter, Provider
|
|
34
|
+
from .registry import ModelSpec
|
|
35
|
+
|
|
36
|
+
__version__ = "0.1.0"
|
|
37
|
+
|
|
38
|
+
__all__ = [
|
|
39
|
+
"FreeLLM",
|
|
40
|
+
"AsyncFreeLLM",
|
|
41
|
+
"Provider",
|
|
42
|
+
"OpenRouter",
|
|
43
|
+
"GoogleAIStudio",
|
|
44
|
+
"Gemini",
|
|
45
|
+
"NIM",
|
|
46
|
+
"ModelSpec",
|
|
47
|
+
"Message",
|
|
48
|
+
"ChatRequest",
|
|
49
|
+
"ChatResponse",
|
|
50
|
+
"Choice",
|
|
51
|
+
"Usage",
|
|
52
|
+
"providers_from_env",
|
|
53
|
+
"list_free_models",
|
|
54
|
+
"FreeLLMError",
|
|
55
|
+
"ConfigError",
|
|
56
|
+
"ProviderError",
|
|
57
|
+
"AuthError",
|
|
58
|
+
"RateLimited",
|
|
59
|
+
"Transient",
|
|
60
|
+
"ModelNotFound",
|
|
61
|
+
"NoProvidersAvailable",
|
|
62
|
+
"__version__",
|
|
63
|
+
]
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Exponential backoff with full jitter."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import random
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def compute_delay(attempt: int, base: float = 0.5, factor: float = 2.0, cap: float = 30.0, jitter: bool = True) -> float:
|
|
8
|
+
"""Delay (seconds) for a given retry attempt (1-based)."""
|
|
9
|
+
attempt = max(1, attempt)
|
|
10
|
+
raw = min(cap, base * (factor ** (attempt - 1)))
|
|
11
|
+
if jitter:
|
|
12
|
+
return random.uniform(0.0, raw)
|
|
13
|
+
return raw
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""Per-key circuit breaker. Time is injected (monotonic seconds) for testability."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
CLOSED = "closed"
|
|
7
|
+
OPEN = "open"
|
|
8
|
+
HALF_OPEN = "half_open"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class CircuitBreaker:
|
|
13
|
+
fail_threshold: int = 4
|
|
14
|
+
cooldown: float = 30.0
|
|
15
|
+
state: str = CLOSED
|
|
16
|
+
failures: int = 0
|
|
17
|
+
opened_at: float = 0.0
|
|
18
|
+
|
|
19
|
+
def allow(self, now: float) -> bool:
|
|
20
|
+
"""May a request go through right now?"""
|
|
21
|
+
if self.state == OPEN:
|
|
22
|
+
if now - self.opened_at >= self.cooldown:
|
|
23
|
+
self.state = HALF_OPEN
|
|
24
|
+
return True
|
|
25
|
+
return False
|
|
26
|
+
return True
|
|
27
|
+
|
|
28
|
+
def on_success(self) -> None:
|
|
29
|
+
self.failures = 0
|
|
30
|
+
self.state = CLOSED
|
|
31
|
+
|
|
32
|
+
def on_failure(self, now: float) -> None:
|
|
33
|
+
self.failures += 1
|
|
34
|
+
if self.state == HALF_OPEN or self.failures >= self.fail_threshold:
|
|
35
|
+
self.state = OPEN
|
|
36
|
+
self.opened_at = now
|
|
37
|
+
|
|
38
|
+
def time_until_half_open(self, now: float) -> float:
|
|
39
|
+
if self.state != OPEN:
|
|
40
|
+
return 0.0
|
|
41
|
+
return max(0.0, self.cooldown - (now - self.opened_at))
|