freelm 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. freelm-0.1.0/.github/workflows/ci.yml +26 -0
  2. freelm-0.1.0/.github/workflows/release.yml +30 -0
  3. freelm-0.1.0/.gitignore +16 -0
  4. freelm-0.1.0/CHANGELOG.md +24 -0
  5. freelm-0.1.0/LICENSE +21 -0
  6. freelm-0.1.0/PKG-INFO +182 -0
  7. freelm-0.1.0/README.md +154 -0
  8. freelm-0.1.0/examples/basic.py +22 -0
  9. freelm-0.1.0/pyproject.toml +40 -0
  10. freelm-0.1.0/src/freelm/__init__.py +63 -0
  11. freelm-0.1.0/src/freelm/_backoff.py +13 -0
  12. freelm-0.1.0/src/freelm/_breaker.py +41 -0
  13. freelm-0.1.0/src/freelm/_cache.py +62 -0
  14. freelm-0.1.0/src/freelm/_engine.py +87 -0
  15. freelm-0.1.0/src/freelm/_keys.py +94 -0
  16. freelm-0.1.0/src/freelm/_ratelimit.py +52 -0
  17. freelm-0.1.0/src/freelm/_types.py +119 -0
  18. freelm-0.1.0/src/freelm/client.py +272 -0
  19. freelm-0.1.0/src/freelm/compat/__init__.py +3 -0
  20. freelm-0.1.0/src/freelm/compat/openai.py +76 -0
  21. freelm-0.1.0/src/freelm/config.py +53 -0
  22. freelm-0.1.0/src/freelm/discovery.py +149 -0
  23. freelm-0.1.0/src/freelm/errors.py +124 -0
  24. freelm-0.1.0/src/freelm/providers/__init__.py +6 -0
  25. freelm-0.1.0/src/freelm/providers/base.py +133 -0
  26. freelm-0.1.0/src/freelm/providers/google.py +34 -0
  27. freelm-0.1.0/src/freelm/providers/nim.py +30 -0
  28. freelm-0.1.0/src/freelm/providers/openrouter.py +49 -0
  29. freelm-0.1.0/src/freelm/py.typed +0 -0
  30. freelm-0.1.0/src/freelm/registry.py +53 -0
  31. freelm-0.1.0/src/freelm/strategy.py +56 -0
  32. freelm-0.1.0/src/freelm/types_compat.py +83 -0
  33. freelm-0.1.0/tests/conftest.py +12 -0
  34. freelm-0.1.0/tests/test_async.py +36 -0
  35. freelm-0.1.0/tests/test_compat.py +22 -0
  36. freelm-0.1.0/tests/test_discovery.py +73 -0
  37. freelm-0.1.0/tests/test_router.py +115 -0
  38. freelm-0.1.0/tests/test_units.py +61 -0
@@ -0,0 +1,26 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+
8
+ jobs:
9
+ test:
10
+ runs-on: ubuntu-latest
11
+ strategy:
12
+ fail-fast: false
13
+ matrix:
14
+ python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+ - name: Set up Python ${{ matrix.python-version }}
18
+ uses: actions/setup-python@v5
19
+ with:
20
+ python-version: ${{ matrix.python-version }}
21
+ - name: Install
22
+ run: |
23
+ python -m pip install --upgrade pip
24
+ pip install -e ".[dev]"
25
+ - name: Test
26
+ run: pytest -q
@@ -0,0 +1,30 @@
1
+ name: Release
2
+
3
+ # Publishes to PyPI when a v* tag is pushed.
4
+ # Uses PyPI Trusted Publishing (OIDC) — configure the publisher on PyPI first:
5
+ # https://docs.pypi.org/trusted-publishers/ (project: freelm, workflow: release.yml)
6
+ # No API token/secret required once trusted publishing is set up.
7
+
8
+ on:
9
+ push:
10
+ tags: ["v*"]
11
+
12
+ permissions:
13
+ contents: read
14
+ id-token: write
15
+
16
+ jobs:
17
+ publish:
18
+ runs-on: ubuntu-latest
19
+ environment: pypi
20
+ steps:
21
+ - uses: actions/checkout@v4
22
+ - uses: actions/setup-python@v5
23
+ with:
24
+ python-version: "3.12"
25
+ - name: Build
26
+ run: |
27
+ python -m pip install --upgrade pip build
28
+ python -m build
29
+ - name: Publish to PyPI
30
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,16 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.egg-info/
4
+ .eggs/
5
+ build/
6
+ dist/
7
+ .venv/
8
+ venv/
9
+ .env
10
+ .pytest_cache/
11
+ .mypy_cache/
12
+ .ruff_cache/
13
+ .DS_Store
14
+ *.log
15
+ .coverage
16
+ htmlcov/
@@ -0,0 +1,24 @@
1
+ # Changelog
2
+
3
+ All notable changes to `freelm` are documented here. Format follows
4
+ [Keep a Changelog](https://keepachangelog.com/); versioning is [SemVer](https://semver.org/).
5
+
6
+ ## [0.1.0] - 2026-06-07
7
+
8
+ Initial release.
9
+
10
+ ### Added
11
+ - `FreeLLM` (sync) and `AsyncFreeLLM` (async) always-up chat clients.
12
+ - Providers (OpenAI-compatible HTTP): OpenRouter, Google AI Studio (Gemini), NVIDIA NIM.
13
+ - Fault tolerance: per-key circuit breaker, cross-provider failover, retry classification
14
+ (429 cooldown/rotate, 5xx/timeout backoff, 401 key-disable, model errors → next model).
15
+ - Model-scoped vs key-scoped 429 handling (OpenRouter free models throttle per-model upstream).
16
+ - Quota guard: per-key requests/minute token bucket + requests/day counter.
17
+ - Routing strategies: `priority`, `round_robin`, `quota_aware`, `latency`.
18
+ - Virtual models (`auto`, `chat:fast`, `chat:large`, ...) resolved per provider.
19
+ - Dynamic model discovery via `GET /models` with disk cache (TTL, `0600`) and
20
+ live → cache → hardcoded fallback. `list_free_models()` helper.
21
+ - OpenAI drop-in shim: `freelm.compat.OpenAI` / `AsyncOpenAI`.
22
+ - `FreeLLM.from_env()` config from environment; `llm.health()` introspection.
23
+
24
+ [0.1.0]: https://github.com/shihabshahrier/freelm/releases/tag/v0.1.0
freelm-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Shihab Shahriar Antor / Shahriar Labs
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
freelm-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,182 @@
1
+ Metadata-Version: 2.4
2
+ Name: freelm
3
+ Version: 0.1.0
4
+ Summary: One always-up LLM client over free-tier providers (OpenRouter, Google AI Studio, NVIDIA NIM) with auto key-rotation, failover, circuit breaking and quota-aware routing.
5
+ Project-URL: Homepage, https://github.com/shihabshahrier/freelm
6
+ Project-URL: Issues, https://github.com/shihabshahrier/freelm/issues
7
+ Author-email: Shihab Shahriar Antor <shahriarlabs@gmail.com>
8
+ License: MIT
9
+ License-File: LICENSE
10
+ Keywords: ai,failover,free,gemini,llm,nvidia-nim,openai,openrouter,router
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
+ Requires-Python: >=3.9
22
+ Requires-Dist: httpx>=0.24
23
+ Provides-Extra: dev
24
+ Requires-Dist: anyio>=4; extra == 'dev'
25
+ Requires-Dist: pytest>=7; extra == 'dev'
26
+ Requires-Dist: respx>=0.20; extra == 'dev'
27
+ Description-Content-Type: text/markdown
28
+
29
+ # freelm
30
+
31
+ **One always-up LLM client over free-tier providers.** Drop in your OpenRouter, Google AI Studio, and/or NVIDIA NIM keys, and `freelm` gives you a single chat call that auto-rotates keys, fails over across providers, paces itself to each tier's limits, and trips circuit breakers on dead keys — so your app keeps talking to an LLM even when one source rate-limits or dies.
32
+
33
+ > Python first. JS/TS and Go ports planned (the core is spec-driven for portability).
34
+
35
+ ## Why
36
+
37
+ LLMs show up in nearly every project, and they cost money — but there's a lot of *free* capacity scattered across providers:
38
+
39
+ - **OpenRouter** — free models (`:free`), ~50 req/day under $10 credit, ~1000/day at ≥$10.
40
+ - **Google AI Studio (Gemini)** — generous free tier; Tier 1 (billing on) lifts limits hard.
41
+ - **NVIDIA NIM** (`build.nvidia.com`) — many models free against build credits.
42
+
43
+ `freelm` pools them behind one fault-tolerant client.
44
+
45
+ ## Install
46
+
47
+ ```bash
48
+ pip install freelm
49
+ ```
50
+
51
+ ## Quick start
52
+
53
+ ```python
54
+ import freelm
55
+
56
+ llm = freelm.FreeLLM.from_env() # reads keys from environment
57
+ print(llm.text("Explain black holes in one sentence."))
58
+ ```
59
+
60
+ Explicit config:
61
+
62
+ ```python
63
+ from freelm import FreeLLM, OpenRouter, GoogleAIStudio, NIM
64
+
65
+ llm = FreeLLM(
66
+ providers=[
67
+ OpenRouter("sk-or-...", tier="free"), # or tier="credit" if ≥ $10
68
+ GoogleAIStudio("AIza...", tier="free"), # or tier="tier1"
69
+ NIM("nvapi-..."),
70
+ ],
71
+ strategy="quota_aware", # priority | round_robin | quota_aware | latency
72
+ )
73
+
74
+ resp = llm.chat(
75
+ [{"role": "user", "content": "Write a haiku about failover."}],
76
+ model="chat:fast", # virtual model, see below
77
+ )
78
+ print(resp.text, "via", resp.provider)
79
+ ```
80
+
81
+ Async is symmetric:
82
+
83
+ ```python
84
+ from freelm import AsyncFreeLLM
85
+
86
+ async with AsyncFreeLLM.from_env() as llm:
87
+ print(await llm.text("hi"))
88
+ ```
89
+
90
+ ## Drop-in OpenAI shim
91
+
92
+ ```python
93
+ # from openai import OpenAI
94
+ from freelm.compat import OpenAI
95
+
96
+ client = OpenAI() # backed by FreeLLM.from_env()
97
+ r = client.chat.completions.create(
98
+ model="auto",
99
+ messages=[{"role": "user", "content": "hi"}],
100
+ )
101
+ print(r.choices[0].message.content)
102
+ ```
103
+
104
+ ## Environment variables
105
+
106
+ | Provider | Key vars (first match wins) | Tier var |
107
+ |----------|------------------------------|----------|
108
+ | OpenRouter | `OPENROUTER_API_KEY` / `FREELM_OPENROUTER_KEYS` | `FREELM_OPENROUTER_TIER` (`free`\|`credit`) |
109
+ | Google AI Studio | `GEMINI_API_KEY` / `GOOGLE_API_KEY` / `GOOGLE_AI_STUDIO_KEY` / `FREELM_GOOGLE_KEYS` | `FREELM_GOOGLE_TIER` (`free`\|`tier1`) |
110
+ | NVIDIA NIM | `NVIDIA_API_KEY` / `NIM_API_KEY` / `FREELM_NIM_KEYS` | `FREELM_NIM_TIER` (`free`) |
111
+
112
+ Multiple keys per provider: comma-separate them.
113
+
114
+ ## Virtual models
115
+
116
+ Names differ per provider, so ask by intent and `freelm` maps to a concrete model:
117
+
118
+ | Alias | Meaning |
119
+ |-------|---------|
120
+ | `auto` / `chat` | any available chat model (registry order) |
121
+ | `chat:large` / `large` | a larger/stronger model |
122
+ | `chat:fast` / `fast` | a fast/cheap model |
123
+ | `chat:small` / `small` | smallest model |
124
+ | `vendor/model-id` | passthrough — use exactly this model |
125
+
126
+ Override the table per provider with `models=[ModelSpec(...)]`.
127
+
128
+ ## Dynamic model discovery
129
+
130
+ Free model IDs churn constantly, so `freelm` **doesn't trust its hardcoded list**. For OpenRouter (on by default), it queries `GET /models` on first use, derives tags (`large`/`fast`/`small`, plus `tools`/`vision`/`reasoning` from `supported_parameters`), and caches the list to disk.
131
+
132
+ Resolution order: **live API → disk cache → hardcoded fallback** (so it still works offline / key-less).
133
+
134
+ ```python
135
+ from freelm import list_free_models
136
+
137
+ for m in list_free_models()[:5]: # live OpenRouter free models, cached
138
+ print(m.id, m.tags, m.ctx)
139
+ ```
140
+
141
+ Control it:
142
+
143
+ ```python
144
+ OpenRouter("sk-or-...", discover=True, discover_free_only=True, cache_ttl=3600)
145
+ GoogleAIStudio("AIza...", discover=True) # opt-in for other providers' /models
146
+
147
+ llm.refresh_models() # force re-fetch on next call
148
+ ```
149
+
150
+ | Env var | Default | Meaning |
151
+ |---------|---------|---------|
152
+ | `FREELM_CACHE_DIR` | `~/.cache/freelm` | where the model cache lives (file is `0600`) |
153
+ | `FREELM_CACHE_TTL` | `3600` | cache lifetime in seconds |
154
+
155
+ ## How "always-up" works
156
+
157
+ - **Key pool** per provider, round-robined to spread load.
158
+ - **Failover chain**: key → next key → next provider until one succeeds.
159
+ - **Circuit breaker** per key: opens after repeated failures, half-opens after a cooldown — no hammering a dead key.
160
+ - **Retry classification**: `429` → cool the key & rotate; `5xx`/timeout → breaker + backoff; `401/403` → disable the key; `4xx` model errors → try another model/provider; other `4xx` → surfaced as a caller bug.
161
+ - **Quota guard**: per-key requests/minute (token bucket) + requests/day counter, so a key predicted to be exhausted is skipped before you waste a call.
162
+ - **`wait=True`** (optional): briefly sleep until a key frees up instead of failing, bounded by `max_wait`.
163
+
164
+ Inspect live state any time:
165
+
166
+ ```python
167
+ for row in llm.health():
168
+ print(row) # provider, key (masked), ready, breaker, rpd_used, last_error, latency
169
+ ```
170
+
171
+ ## Roadmap
172
+
173
+ - v1.1 — streaming (SSE normalization across providers)
174
+ - v1.2 — persistent quota tracking (sqlite/json) + tighter tier pacing
175
+ - v1.3 — tool / function-calling normalization
176
+ - v2 — embeddings, vision; JS/TS and Go ports
177
+
178
+ ## License
179
+
180
+ MIT © Shahriar Labs
181
+
182
+ > Free-tier model lists change often — `freelm` discovers OpenRouter models live and caches them, so you rarely touch the hardcoded list. Tier **rate-limit numbers** are still heuristic defaults; override `rpm`/`rpd`/`tier` as providers evolve.
freelm-0.1.0/README.md ADDED
@@ -0,0 +1,154 @@
1
+ # freelm
2
+
3
+ **One always-up LLM client over free-tier providers.** Drop in your OpenRouter, Google AI Studio, and/or NVIDIA NIM keys, and `freelm` gives you a single chat call that auto-rotates keys, fails over across providers, paces itself to each tier's limits, and trips circuit breakers on dead keys — so your app keeps talking to an LLM even when one source rate-limits or dies.
4
+
5
+ > Python first. JS/TS and Go ports planned (the core is spec-driven for portability).
6
+
7
+ ## Why
8
+
9
+ LLMs show up in nearly every project, and they cost money — but there's a lot of *free* capacity scattered across providers:
10
+
11
+ - **OpenRouter** — free models (`:free`), ~50 req/day under $10 credit, ~1000/day at ≥$10.
12
+ - **Google AI Studio (Gemini)** — generous free tier; Tier 1 (billing on) lifts limits hard.
13
+ - **NVIDIA NIM** (`build.nvidia.com`) — many models free against build credits.
14
+
15
+ `freelm` pools them behind one fault-tolerant client.
16
+
17
+ ## Install
18
+
19
+ ```bash
20
+ pip install freelm
21
+ ```
22
+
23
+ ## Quick start
24
+
25
+ ```python
26
+ import freelm
27
+
28
+ llm = freelm.FreeLLM.from_env() # reads keys from environment
29
+ print(llm.text("Explain black holes in one sentence."))
30
+ ```
31
+
32
+ Explicit config:
33
+
34
+ ```python
35
+ from freelm import FreeLLM, OpenRouter, GoogleAIStudio, NIM
36
+
37
+ llm = FreeLLM(
38
+ providers=[
39
+ OpenRouter("sk-or-...", tier="free"), # or tier="credit" if ≥ $10
40
+ GoogleAIStudio("AIza...", tier="free"), # or tier="tier1"
41
+ NIM("nvapi-..."),
42
+ ],
43
+ strategy="quota_aware", # priority | round_robin | quota_aware | latency
44
+ )
45
+
46
+ resp = llm.chat(
47
+ [{"role": "user", "content": "Write a haiku about failover."}],
48
+ model="chat:fast", # virtual model, see below
49
+ )
50
+ print(resp.text, "via", resp.provider)
51
+ ```
52
+
53
+ Async is symmetric:
54
+
55
+ ```python
56
+ from freelm import AsyncFreeLLM
57
+
58
+ async with AsyncFreeLLM.from_env() as llm:
59
+ print(await llm.text("hi"))
60
+ ```
61
+
62
+ ## Drop-in OpenAI shim
63
+
64
+ ```python
65
+ # from openai import OpenAI
66
+ from freelm.compat import OpenAI
67
+
68
+ client = OpenAI() # backed by FreeLLM.from_env()
69
+ r = client.chat.completions.create(
70
+ model="auto",
71
+ messages=[{"role": "user", "content": "hi"}],
72
+ )
73
+ print(r.choices[0].message.content)
74
+ ```
75
+
76
+ ## Environment variables
77
+
78
+ | Provider | Key vars (first match wins) | Tier var |
79
+ |----------|------------------------------|----------|
80
+ | OpenRouter | `OPENROUTER_API_KEY` / `FREELM_OPENROUTER_KEYS` | `FREELM_OPENROUTER_TIER` (`free`\|`credit`) |
81
+ | Google AI Studio | `GEMINI_API_KEY` / `GOOGLE_API_KEY` / `GOOGLE_AI_STUDIO_KEY` / `FREELM_GOOGLE_KEYS` | `FREELM_GOOGLE_TIER` (`free`\|`tier1`) |
82
+ | NVIDIA NIM | `NVIDIA_API_KEY` / `NIM_API_KEY` / `FREELM_NIM_KEYS` | `FREELM_NIM_TIER` (`free`) |
83
+
84
+ Multiple keys per provider: comma-separate them.
85
+
86
+ ## Virtual models
87
+
88
+ Names differ per provider, so ask by intent and `freelm` maps to a concrete model:
89
+
90
+ | Alias | Meaning |
91
+ |-------|---------|
92
+ | `auto` / `chat` | any available chat model (registry order) |
93
+ | `chat:large` / `large` | a larger/stronger model |
94
+ | `chat:fast` / `fast` | a fast/cheap model |
95
+ | `chat:small` / `small` | smallest model |
96
+ | `vendor/model-id` | passthrough — use exactly this model |
97
+
98
+ Override the table per provider with `models=[ModelSpec(...)]`.
99
+
100
+ ## Dynamic model discovery
101
+
102
+ Free model IDs churn constantly, so `freelm` **doesn't trust its hardcoded list**. For OpenRouter (on by default), it queries `GET /models` on first use, derives tags (`large`/`fast`/`small`, plus `tools`/`vision`/`reasoning` from `supported_parameters`), and caches the list to disk.
103
+
104
+ Resolution order: **live API → disk cache → hardcoded fallback** (so it still works offline / key-less).
105
+
106
+ ```python
107
+ from freelm import list_free_models
108
+
109
+ for m in list_free_models()[:5]: # live OpenRouter free models, cached
110
+ print(m.id, m.tags, m.ctx)
111
+ ```
112
+
113
+ Control it:
114
+
115
+ ```python
116
+ OpenRouter("sk-or-...", discover=True, discover_free_only=True, cache_ttl=3600)
117
+ GoogleAIStudio("AIza...", discover=True) # opt-in for other providers' /models
118
+
119
+ llm.refresh_models() # force re-fetch on next call
120
+ ```
121
+
122
+ | Env var | Default | Meaning |
123
+ |---------|---------|---------|
124
+ | `FREELM_CACHE_DIR` | `~/.cache/freelm` | where the model cache lives (file is `0600`) |
125
+ | `FREELM_CACHE_TTL` | `3600` | cache lifetime in seconds |
126
+
127
+ ## How "always-up" works
128
+
129
+ - **Key pool** per provider, round-robined to spread load.
130
+ - **Failover chain**: key → next key → next provider until one succeeds.
131
+ - **Circuit breaker** per key: opens after repeated failures, half-opens after a cooldown — no hammering a dead key.
132
+ - **Retry classification**: `429` → cool the key & rotate; `5xx`/timeout → breaker + backoff; `401/403` → disable the key; `4xx` model errors → try another model/provider; other `4xx` → surfaced as a caller bug.
133
+ - **Quota guard**: per-key requests/minute (token bucket) + requests/day counter, so a key predicted to be exhausted is skipped before you waste a call.
134
+ - **`wait=True`** (optional): briefly sleep until a key frees up instead of failing, bounded by `max_wait`.
135
+
136
+ Inspect live state any time:
137
+
138
+ ```python
139
+ for row in llm.health():
140
+ print(row) # provider, key (masked), ready, breaker, rpd_used, last_error, latency
141
+ ```
142
+
143
+ ## Roadmap
144
+
145
+ - v1.1 — streaming (SSE normalization across providers)
146
+ - v1.2 — persistent quota tracking (sqlite/json) + tighter tier pacing
147
+ - v1.3 — tool / function-calling normalization
148
+ - v2 — embeddings, vision; JS/TS and Go ports
149
+
150
+ ## License
151
+
152
+ MIT © Shahriar Labs
153
+
154
+ > Free-tier model lists change often — `freelm` discovers OpenRouter models live and caches them, so you rarely touch the hardcoded list. Tier **rate-limit numbers** are still heuristic defaults; override `rpm`/`rpd`/`tier` as providers evolve.
@@ -0,0 +1,22 @@
1
+ """Run me: python examples/basic.py
2
+
3
+ Set at least one of OPENROUTER_API_KEY / GEMINI_API_KEY / NVIDIA_API_KEY first.
4
+ """
5
+ from freelm import FreeLLM
6
+
7
+
8
+ def main() -> None:
9
+ # Reads keys + tiers from environment.
10
+ llm = FreeLLM.from_env(strategy="quota_aware")
11
+
12
+ print(llm.text("Explain prompt caching in one sentence.", model="chat:fast"))
13
+
14
+ print("\n--- key health ---")
15
+ for row in llm.health():
16
+ print(row)
17
+
18
+ llm.close()
19
+
20
+
21
+ if __name__ == "__main__":
22
+ main()
@@ -0,0 +1,40 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "freelm"
7
+ version = "0.1.0"
8
+ description = "One always-up LLM client over free-tier providers (OpenRouter, Google AI Studio, NVIDIA NIM) with auto key-rotation, failover, circuit breaking and quota-aware routing."
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ license = { text = "MIT" }
12
+ authors = [{ name = "Shihab Shahriar Antor", email = "shahriarlabs@gmail.com" }]
13
+ keywords = ["llm", "openrouter", "gemini", "nvidia-nim", "free", "failover", "router", "openai", "ai"]
14
+ classifiers = [
15
+ "Development Status :: 4 - Beta",
16
+ "Intended Audience :: Developers",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Programming Language :: Python :: 3",
19
+ "Programming Language :: Python :: 3.9",
20
+ "Programming Language :: Python :: 3.10",
21
+ "Programming Language :: Python :: 3.11",
22
+ "Programming Language :: Python :: 3.12",
23
+ "Programming Language :: Python :: 3.13",
24
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
25
+ ]
26
+ dependencies = ["httpx>=0.24"]
27
+
28
+ [project.optional-dependencies]
29
+ dev = ["pytest>=7", "respx>=0.20", "anyio>=4"]
30
+
31
+ [project.urls]
32
+ Homepage = "https://github.com/shihabshahrier/freelm"
33
+ Issues = "https://github.com/shihabshahrier/freelm/issues"
34
+
35
+ [tool.hatch.build.targets.wheel]
36
+ packages = ["src/freelm"]
37
+
38
+ [tool.pytest.ini_options]
39
+ testpaths = ["tests"]
40
+ addopts = "-q"
@@ -0,0 +1,63 @@
1
+ """freelm — one always-up LLM client over free-tier providers.
2
+
3
+ Quick start::
4
+
5
+ import freelm
6
+ llm = freelm.FreeLLM.from_env()
7
+ print(llm.text("Explain black holes in one sentence."))
8
+
9
+ Explicit config::
10
+
11
+ from freelm import FreeLLM, OpenRouter, GoogleAIStudio, NIM
12
+ llm = FreeLLM(
13
+ providers=[OpenRouter("sk-or-..."), GoogleAIStudio("AIza..."), NIM("nvapi-...")],
14
+ strategy="quota_aware",
15
+ )
16
+ """
17
+ from __future__ import annotations
18
+
19
+ from ._types import ChatRequest, ChatResponse, Choice, Message, Usage
20
+ from .client import AsyncFreeLLM, FreeLLM
21
+ from .config import providers_from_env
22
+ from .discovery import list_free_models
23
+ from .errors import (
24
+ AuthError,
25
+ ConfigError,
26
+ FreeLLMError,
27
+ ModelNotFound,
28
+ NoProvidersAvailable,
29
+ ProviderError,
30
+ RateLimited,
31
+ Transient,
32
+ )
33
+ from .providers import Gemini, GoogleAIStudio, NIM, OpenRouter, Provider
34
+ from .registry import ModelSpec
35
+
36
+ __version__ = "0.1.0"
37
+
38
+ __all__ = [
39
+ "FreeLLM",
40
+ "AsyncFreeLLM",
41
+ "Provider",
42
+ "OpenRouter",
43
+ "GoogleAIStudio",
44
+ "Gemini",
45
+ "NIM",
46
+ "ModelSpec",
47
+ "Message",
48
+ "ChatRequest",
49
+ "ChatResponse",
50
+ "Choice",
51
+ "Usage",
52
+ "providers_from_env",
53
+ "list_free_models",
54
+ "FreeLLMError",
55
+ "ConfigError",
56
+ "ProviderError",
57
+ "AuthError",
58
+ "RateLimited",
59
+ "Transient",
60
+ "ModelNotFound",
61
+ "NoProvidersAvailable",
62
+ "__version__",
63
+ ]
@@ -0,0 +1,13 @@
1
+ """Exponential backoff with full jitter."""
2
+ from __future__ import annotations
3
+
4
+ import random
5
+
6
+
7
+ def compute_delay(attempt: int, base: float = 0.5, factor: float = 2.0, cap: float = 30.0, jitter: bool = True) -> float:
8
+ """Delay (seconds) for a given retry attempt (1-based)."""
9
+ attempt = max(1, attempt)
10
+ raw = min(cap, base * (factor ** (attempt - 1)))
11
+ if jitter:
12
+ return random.uniform(0.0, raw)
13
+ return raw
@@ -0,0 +1,41 @@
1
+ """Per-key circuit breaker. Time is injected (monotonic seconds) for testability."""
2
+ from __future__ import annotations
3
+
4
+ from dataclasses import dataclass
5
+
6
+ CLOSED = "closed"
7
+ OPEN = "open"
8
+ HALF_OPEN = "half_open"
9
+
10
+
11
+ @dataclass
12
+ class CircuitBreaker:
13
+ fail_threshold: int = 4
14
+ cooldown: float = 30.0
15
+ state: str = CLOSED
16
+ failures: int = 0
17
+ opened_at: float = 0.0
18
+
19
+ def allow(self, now: float) -> bool:
20
+ """May a request go through right now?"""
21
+ if self.state == OPEN:
22
+ if now - self.opened_at >= self.cooldown:
23
+ self.state = HALF_OPEN
24
+ return True
25
+ return False
26
+ return True
27
+
28
+ def on_success(self) -> None:
29
+ self.failures = 0
30
+ self.state = CLOSED
31
+
32
+ def on_failure(self, now: float) -> None:
33
+ self.failures += 1
34
+ if self.state == HALF_OPEN or self.failures >= self.fail_threshold:
35
+ self.state = OPEN
36
+ self.opened_at = now
37
+
38
+ def time_until_half_open(self, now: float) -> float:
39
+ if self.state != OPEN:
40
+ return 0.0
41
+ return max(0.0, self.cooldown - (now - self.opened_at))