loom-router 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loom_router-1.0.0/LICENSE +21 -0
- loom_router-1.0.0/PKG-INFO +366 -0
- loom_router-1.0.0/loom/__init__.py +91 -0
- loom_router-1.0.0/loom/_cache.py +162 -0
- loom_router-1.0.0/loom/_call_key.py +45 -0
- loom_router-1.0.0/loom/_context.py +45 -0
- loom_router-1.0.0/loom/_context_cache.py +84 -0
- loom_router-1.0.0/loom/_dedup.py +103 -0
- loom_router-1.0.0/loom/_equivalents.py +112 -0
- loom_router-1.0.0/loom/_logging.py +89 -0
- loom_router-1.0.0/loom/_loom.py +580 -0
- loom_router-1.0.0/loom/_pricing.py +108 -0
- loom_router-1.0.0/loom/_prompt_cache_rates.py +39 -0
- loom_router-1.0.0/loom/_retry.py +171 -0
- loom_router-1.0.0/loom/_router.py +244 -0
- loom_router-1.0.0/loom/batch.py +161 -0
- loom_router-1.0.0/loom/batch_providers/__init__.py +51 -0
- loom_router-1.0.0/loom/batch_providers/anthropic_batch.py +248 -0
- loom_router-1.0.0/loom/batch_providers/openai_batch.py +255 -0
- loom_router-1.0.0/loom/catalog/__init__.py +22 -0
- loom_router-1.0.0/loom/catalog/_catalog.py +109 -0
- loom_router-1.0.0/loom/catalog/_data.py +309 -0
- loom_router-1.0.0/loom/catalog/backends.py +75 -0
- loom_router-1.0.0/loom/context_cache_providers/__init__.py +44 -0
- loom_router-1.0.0/loom/context_cache_providers/gemini_context_cache.py +84 -0
- loom_router-1.0.0/loom/errors.py +26 -0
- loom_router-1.0.0/loom/observability/__init__.py +37 -0
- loom_router-1.0.0/loom/observability/dashboard.py +89 -0
- loom_router-1.0.0/loom/observability/handler.py +37 -0
- loom_router-1.0.0/loom/observability/queries.py +151 -0
- loom_router-1.0.0/loom/observability/sink.py +146 -0
- loom_router-1.0.0/loom/observability/templates/dashboard.html +225 -0
- loom_router-1.0.0/loom/providers/__init__.py +105 -0
- loom_router-1.0.0/loom/providers/_common.py +61 -0
- loom_router-1.0.0/loom/providers/_openai_compatible.py +134 -0
- loom_router-1.0.0/loom/providers/anthropic_provider.py +139 -0
- loom_router-1.0.0/loom/providers/bfl_provider.py +85 -0
- loom_router-1.0.0/loom/providers/deepseek_provider.py +38 -0
- loom_router-1.0.0/loom/providers/gemini_provider.py +185 -0
- loom_router-1.0.0/loom/providers/hunyuan_provider.py +100 -0
- loom_router-1.0.0/loom/providers/ideogram_provider.py +45 -0
- loom_router-1.0.0/loom/providers/minimax_provider.py +38 -0
- loom_router-1.0.0/loom/providers/mistral_provider.py +38 -0
- loom_router-1.0.0/loom/providers/moonshot_provider.py +43 -0
- loom_router-1.0.0/loom/providers/openai_provider.py +129 -0
- loom_router-1.0.0/loom/providers/perplexity_provider.py +38 -0
- loom_router-1.0.0/loom/providers/seedream_provider.py +57 -0
- loom_router-1.0.0/loom/providers/together_provider.py +38 -0
- loom_router-1.0.0/loom/providers/xai_provider.py +38 -0
- loom_router-1.0.0/loom/providers/zhipu_provider.py +38 -0
- loom_router-1.0.0/loom/types.py +75 -0
- loom_router-1.0.0/loom/vault.py +314 -0
- loom_router-1.0.0/loom_router.egg-info/PKG-INFO +366 -0
- loom_router-1.0.0/loom_router.egg-info/SOURCES.txt +81 -0
- loom_router-1.0.0/loom_router.egg-info/dependency_links.txt +1 -0
- loom_router-1.0.0/loom_router.egg-info/requires.txt +34 -0
- loom_router-1.0.0/loom_router.egg-info/top_level.txt +1 -0
- loom_router-1.0.0/pyproject.toml +66 -0
- loom_router-1.0.0/readme.md +296 -0
- loom_router-1.0.0/setup.cfg +4 -0
- loom_router-1.0.0/tests/test_anthropic_batch.py +272 -0
- loom_router-1.0.0/tests/test_api_keys.py +66 -0
- loom_router-1.0.0/tests/test_app_patch.py +205 -0
- loom_router-1.0.0/tests/test_async.py +80 -0
- loom_router-1.0.0/tests/test_batch.py +269 -0
- loom_router-1.0.0/tests/test_cache.py +129 -0
- loom_router-1.0.0/tests/test_call_key.py +45 -0
- loom_router-1.0.0/tests/test_catalog.py +77 -0
- loom_router-1.0.0/tests/test_catalog_backends.py +64 -0
- loom_router-1.0.0/tests/test_context_cache.py +233 -0
- loom_router-1.0.0/tests/test_cost.py +94 -0
- loom_router-1.0.0/tests/test_dedup.py +157 -0
- loom_router-1.0.0/tests/test_dispatch.py +95 -0
- loom_router-1.0.0/tests/test_failover.py +166 -0
- loom_router-1.0.0/tests/test_logging.py +57 -0
- loom_router-1.0.0/tests/test_observability.py +261 -0
- loom_router-1.0.0/tests/test_openai_smoke.py +64 -0
- loom_router-1.0.0/tests/test_prompt_caching.py +363 -0
- loom_router-1.0.0/tests/test_provider_registry.py +41 -0
- loom_router-1.0.0/tests/test_retry.py +146 -0
- loom_router-1.0.0/tests/test_routing.py +232 -0
- loom_router-1.0.0/tests/test_vault.py +276 -0
- loom_router-1.0.0/tests/test_version.py +31 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Jyotiraditya Singh
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,366 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: loom-router
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: One contract for every AI vendor. Native SDKs preserved; keys, cost, retries, and routing centralised.
|
|
5
|
+
Author: Eyas Ventures
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2026 Jyotiraditya Singh
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Project-URL: Homepage, https://github.com/jyotir07/Loom
|
|
29
|
+
Project-URL: Repository, https://github.com/jyotir07/Loom
|
|
30
|
+
Project-URL: Issues, https://github.com/jyotir07/Loom/issues
|
|
31
|
+
Project-URL: Changelog, https://github.com/jyotir07/Loom/blob/main/CHANGELOG.md
|
|
32
|
+
Keywords: llm,ai,openai,anthropic,gemini,router
|
|
33
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
34
|
+
Classifier: Programming Language :: Python :: 3
|
|
35
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
36
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
37
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
38
|
+
Classifier: Operating System :: OS Independent
|
|
39
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
40
|
+
Requires-Python: >=3.10
|
|
41
|
+
Description-Content-Type: text/markdown
|
|
42
|
+
License-File: LICENSE
|
|
43
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
44
|
+
Requires-Dist: requests>=2.31.0
|
|
45
|
+
Provides-Extra: openai
|
|
46
|
+
Requires-Dist: openai>=1.50.0; extra == "openai"
|
|
47
|
+
Provides-Extra: anthropic
|
|
48
|
+
Requires-Dist: anthropic>=0.39.0; extra == "anthropic"
|
|
49
|
+
Provides-Extra: gemini
|
|
50
|
+
Requires-Dist: google-genai>=0.3.0; extra == "gemini"
|
|
51
|
+
Provides-Extra: tencent
|
|
52
|
+
Requires-Dist: tencentcloud-sdk-python>=3.0.1200; extra == "tencent"
|
|
53
|
+
Provides-Extra: yaml
|
|
54
|
+
Requires-Dist: PyYAML>=6.0; extra == "yaml"
|
|
55
|
+
Provides-Extra: redis
|
|
56
|
+
Requires-Dist: redis>=5.0; extra == "redis"
|
|
57
|
+
Provides-Extra: all
|
|
58
|
+
Requires-Dist: openai>=1.50.0; extra == "all"
|
|
59
|
+
Requires-Dist: anthropic>=0.39.0; extra == "all"
|
|
60
|
+
Requires-Dist: google-genai>=0.3.0; extra == "all"
|
|
61
|
+
Requires-Dist: tencentcloud-sdk-python>=3.0.1200; extra == "all"
|
|
62
|
+
Requires-Dist: PyYAML>=6.0; extra == "all"
|
|
63
|
+
Requires-Dist: redis>=5.0; extra == "all"
|
|
64
|
+
Provides-Extra: dev
|
|
65
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
66
|
+
Requires-Dist: pytest-mock>=3.12; extra == "dev"
|
|
67
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
|
|
68
|
+
Requires-Dist: PyYAML>=6.0; extra == "dev"
|
|
69
|
+
Dynamic: license-file
|
|
70
|
+
|
|
71
|
+
# Loom
|
|
72
|
+
|
|
73
|
+
*One API for every AI provider. Built once, used everywhere.*
|
|
74
|
+
|
|
75
|
+
---
|
|
76
|
+
|
|
77
|
+
## The idea
|
|
78
|
+
|
|
79
|
+
Every team that builds something with AI ends up writing the same plumbing: pick a provider, learn their SDK, manage their API keys, handle their errors, track their costs, repeat for the next provider. Multiply this across a dozen projects and the company is paying for the same integration work over and over — while still losing out on cost optimizations that no single project has the time to build.
|
|
80
|
+
|
|
81
|
+
Loom is a Python framework that sits between your projects and the AI providers. Projects make one kind of call. Loom handles the rest: which vendor, which model, which SDK, which retry policy, which cache, which batch endpoint. The provider list grows in one place. The optimizations land in one place. Every project benefits the moment they upgrade.
|
|
82
|
+
|
|
83
|
+
It is not an aggregator. Each vendor is integrated with its own native SDK so vendor-specific features (prompt caching, grounding, image polling, streaming, structured output) are preserved instead of flattened to a lowest common denominator.
|
|
84
|
+
|
|
85
|
+
## What problem it solves
|
|
86
|
+
|
|
87
|
+
In a typical org without something like Loom:
|
|
88
|
+
|
|
89
|
+
- **Every project re-integrates the same vendors.** Five projects calling OpenAI means five sets of retry logic, five places where the key lives, five different ways of handling rate limits.
|
|
90
|
+
- **Switching models requires a code change in each project.** When a cheaper model launches, or a vendor deprecates a model ID, somebody has to file PRs across every repo.
|
|
91
|
+
- **Cost optimizations get skipped.** Prompt caching, batch APIs, smart routing to cheaper models — these all require real engineering effort that no single project can justify on its own.
|
|
92
|
+
- **API keys live in too many places.** Each project repo has its own `.env`, its own secrets manager entry, its own potential leak surface.
|
|
93
|
+
- **There's no unified view of cost.** Finance asks "what are we spending on AI?" and the answer is "we'll get back to you."
|
|
94
|
+
- **Vendor breaking changes hurt N times.** When a provider changes their response shape, every consuming project breaks.
|
|
95
|
+
|
|
96
|
+
Loom collapses all of this into **ONE** library.
|
|
97
|
+
|
|
98
|
+
## What Loom provides
|
|
99
|
+
|
|
100
|
+
### A single, stable contract
|
|
101
|
+
|
|
102
|
+
Every call goes through one function:
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
from loom import generate
|
|
106
|
+
|
|
107
|
+
result = generate(
|
|
108
|
+
provider="anthropic",
|
|
109
|
+
model="claude-opus-4-7",
|
|
110
|
+
prompt="Summarize this contract in three bullets.",
|
|
111
|
+
params={"max_tokens": 500},
|
|
112
|
+
)
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
The return shape is consistent across providers:
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
# Text response
|
|
119
|
+
{"kind": "text", "text": "..."}
|
|
120
|
+
|
|
121
|
+
# Image response
|
|
122
|
+
{"kind": "image", "images": [{"mime_type": "...", "data_b64": "..."}]}
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
Sync and async both supported. Type-hinted responses available for IDE autocomplete.
|
|
126
|
+
|
|
127
|
+
### A pluggable provider registry
|
|
128
|
+
|
|
129
|
+
Loom ships with 14+ providers wired up:
|
|
130
|
+
|
|
131
|
+
- **Text and image:** OpenAI, Google Gemini
|
|
132
|
+
- **Text only:** Anthropic, xAI (Grok), Mistral, DeepSeek, MiniMax, Z.AI (GLM), Perplexity, Together AI
|
|
133
|
+
- **Image only:** Black Forest Labs (Flux), ByteDance Seedream, Tencent Hunyuan, Ideogram
|
|
134
|
+
|
|
135
|
+
Any OpenAI-compatible provider can be added in roughly ten lines of code via a shared adapter. Native-SDK providers follow a documented `generate(modality, model, params, prompt) -> dict` contract.
|
|
136
|
+
|
|
137
|
+
### A catalog of models
|
|
138
|
+
|
|
139
|
+
Models are registered as data, not code. Each entry carries:
|
|
140
|
+
|
|
141
|
+
- Stable ID and display name
|
|
142
|
+
- Upstream model ID (the one the vendor expects)
|
|
143
|
+
- Default parameters
|
|
144
|
+
- Pricing (per 1M input/output tokens for text, per image for image)
|
|
145
|
+
- Free-tier flag
|
|
146
|
+
|
|
147
|
+
Adding a new model is a one-line catalog entry. The catalog can be backed by an in-memory dict, a YAML file, or Postgres — pick what fits the consuming project.
|
|
148
|
+
|
|
149
|
+
### A cost optimization layer
|
|
150
|
+
|
|
151
|
+
This is where Loom pays for itself. These optimizations are built once, in the framework, and every consuming project inherits them on upgrade.
|
|
152
|
+
|
|
153
|
+
- **Response caching.** Identical `(provider, model, prompt, params)` calls hit a cache instead of the API. Realistic savings of 20–60% on workloads with repeated queries.
|
|
154
|
+
- **Vendor-native prompt caching.** Anthropic, OpenAI, Gemini, and DeepSeek all offer 50–90% discounts on cached prefix tokens. Loom wires this up automatically for repeated system prompts and few-shot examples.
|
|
155
|
+
- **Smart model routing.** Try a cheap model first (Haiku, GPT-4o-mini, Gemini Flash); escalate to expensive ones only when confidence is low or validation fails. Realistic savings of 50–80% on mixed workloads.
|
|
156
|
+
- **Batch API usage.** OpenAI, Anthropic, and Gemini all offer 50% discounts on batch endpoints with 24-hour turnaround. Loom can auto-batch non-urgent calls.
|
|
157
|
+
- **Centralized retry and failover.** Exponential backoff done correctly once. If one vendor is down or rate-limited, fall back to an equivalent model on another vendor instead of failing.
|
|
158
|
+
- **Request deduplication.** When the same call fires from multiple places within a short window, collapse to one upstream request.
|
|
159
|
+
|
|
160
|
+
These numbers are ceilings, not guarantees — actual savings depend on workload. A project making 100% unique real-time calls won't benefit much from caching. The point is: the headroom exists, and projects don't have to build any of it themselves.
|
|
161
|
+
|
|
162
|
+
### Centralized key management
|
|
163
|
+
|
|
164
|
+
API keys live in one place — the Loom deployment — not in each consuming project's repo or environment. Projects authenticate to Loom with their own credentials and never see vendor keys.
|
|
165
|
+
|
|
166
|
+
### Observability
|
|
167
|
+
|
|
168
|
+
Every call is logged with provider, model, latency, token counts, and cost (in both USD and the configured local currency). Projects get a per-call cost field on every response. Finance gets a unified dashboard. Engineering gets to find the prompts that are burning the budget.
|
|
169
|
+
|
|
170
|
+
## Architecture
|
|
171
|
+
|
|
172
|
+
### Where Loom sits
|
|
173
|
+
|
|
174
|
+
```
|
|
175
|
+
┌─────────────────────────────────────────────────────────────┐
|
|
176
|
+
│ Company projects │
|
|
177
|
+
│ Support bot │ Analytics │ Marketing │ Doc search │ Sales │
|
|
178
|
+
└──────────────────────────┬──────────────────────────────────┘
|
|
179
|
+
│
|
|
180
|
+
│ generate(provider, model, prompt)
|
|
181
|
+
▼
|
|
182
|
+
┌─────────────────────────────────────────────────────────────┐
|
|
183
|
+
│ Loom │
|
|
184
|
+
│ │
|
|
185
|
+
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
|
|
186
|
+
│ │ Auth + keys │ │ Cache │ │ Router │ │
|
|
187
|
+
│ └─────────────┘ └─────────────┘ └─────────────┘ │
|
|
188
|
+
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
|
|
189
|
+
│ │ Batcher │ │ Retry │ │ Cost logs │ │
|
|
190
|
+
│ └─────────────┘ └─────────────┘ └─────────────┘ │
|
|
191
|
+
│ │
|
|
192
|
+
│ ┌─────────────────────────────────────────────────────┐ │
|
|
193
|
+
│ │ Catalog + Provider registry + Adapters │ │
|
|
194
|
+
│ └─────────────────────────────────────────────────────┘ │
|
|
195
|
+
└──────────────────────────┬──────────────────────────────────┘
|
|
196
|
+
│
|
|
197
|
+
▼
|
|
198
|
+
┌─────────────────────────────────────────────────────────────┐
|
|
199
|
+
│ AI providers │
|
|
200
|
+
│ OpenAI │ Anthropic │ Gemini │ BFL │ + 10 more vendors │
|
|
201
|
+
└─────────────────────────────────────────────────────────────┘
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
### Internal layers
|
|
205
|
+
|
|
206
|
+
1. **Public API.** The single `generate(...)` function (sync and async). Stable across versions.
|
|
207
|
+
2. **Optimization layer.** Cache, router, batcher, retry, dedup, logging. Each is independently toggleable per call.
|
|
208
|
+
3. **Core services.** Catalog (what models exist), provider registry (who knows how to call them), observability (what happened).
|
|
209
|
+
4. **Provider adapters.** One module per vendor. OpenAI-compatible vendors share a single ~12-line adapter; native-SDK vendors get their own module.
|
|
210
|
+
5. **Upstream.** The actual vendor APIs. Out of our control, but their churn is absorbed inside Loom.
|
|
211
|
+
|
|
212
|
+
## Integration
|
|
213
|
+
|
|
214
|
+
### Installation
|
|
215
|
+
|
|
216
|
+
```bash
|
|
217
|
+
pip install loom-router
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
### Minimum viable usage
|
|
221
|
+
|
|
222
|
+
```python
|
|
223
|
+
from loom import Loom
|
|
224
|
+
|
|
225
|
+
c = Loom.from_env() # picks up keys from environment variables
|
|
226
|
+
|
|
227
|
+
response = c.generate(
|
|
228
|
+
provider="anthropic",
|
|
229
|
+
model="claude-sonnet-4-6",
|
|
230
|
+
prompt="Write three subject lines for a launch email.",
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
print(response["text"])
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
### Programmatic configuration
|
|
237
|
+
|
|
238
|
+
For projects that don't want to use environment variables:
|
|
239
|
+
|
|
240
|
+
```python
|
|
241
|
+
from loom import Loom, Catalog
|
|
242
|
+
|
|
243
|
+
catalog = Catalog()
|
|
244
|
+
catalog.register_model(
|
|
245
|
+
provider="openai",
|
|
246
|
+
model_id="gpt-5",
|
|
247
|
+
upstream_model="gpt-5-2026-01",
|
|
248
|
+
input_cost_per_1m=2.50,
|
|
249
|
+
output_cost_per_1m=10.00,
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
c = Loom(
|
|
253
|
+
catalog=catalog,
|
|
254
|
+
api_keys={"openai": "sk-...", "anthropic": "sk-ant-..."},
|
|
255
|
+
cache_backend="redis://localhost:6379",
|
|
256
|
+
)
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
### Async usage
|
|
260
|
+
|
|
261
|
+
```python
|
|
262
|
+
import asyncio
|
|
263
|
+
from loom import Loom
|
|
264
|
+
|
|
265
|
+
async def main():
|
|
266
|
+
c = Loom.from_env()
|
|
267
|
+
response = await c.agenerate(
|
|
268
|
+
provider="gemini",
|
|
269
|
+
model="gemini-2.5-pro",
|
|
270
|
+
prompt="...",
|
|
271
|
+
)
|
|
272
|
+
return response
|
|
273
|
+
|
|
274
|
+
asyncio.run(main())
|
|
275
|
+
```
|
|
276
|
+
|
|
277
|
+
### Adding a new provider
|
|
278
|
+
|
|
279
|
+
Most new providers are OpenAI-compatible. For these, registration is a one-liner:
|
|
280
|
+
|
|
281
|
+
```python
|
|
282
|
+
c.register_openai_compatible(
|
|
283
|
+
key="newco",
|
|
284
|
+
label="NewCo AI",
|
|
285
|
+
base_url="https://api.newco.ai/v1",
|
|
286
|
+
api_key_env="NEWCO_API_KEY",
|
|
287
|
+
)
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
For providers with native SDKs or async polling patterns (like BFL or Hunyuan), implement the contract:
|
|
291
|
+
|
|
292
|
+
```python
|
|
293
|
+
# providers/newco_provider.py
|
|
294
|
+
def generate(modality: str, model: str, params: dict, prompt: str) -> dict:
|
|
295
|
+
...
|
|
296
|
+
return {"kind": "text", "text": "..."}
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
Register it once in `providers/__init__.py` and it's available everywhere.
|
|
300
|
+
|
|
301
|
+
### Framework-agnostic by design
|
|
302
|
+
|
|
303
|
+
Loom doesn't care what's calling it. It works inside:
|
|
304
|
+
|
|
305
|
+
- Flask, FastAPI, or Django apps
|
|
306
|
+
- Celery workers and background jobs
|
|
307
|
+
- CLI scripts
|
|
308
|
+
- Jupyter notebooks
|
|
309
|
+
- AWS Lambda or other serverless runtimes
|
|
310
|
+
|
|
311
|
+
There is no web framework lock-in. Loom is a library, not a service — though it can be deployed as a service if a team wants to centralize it behind an internal HTTP API.
|
|
312
|
+
|
|
313
|
+
## Migration path for existing projects
|
|
314
|
+
|
|
315
|
+
Loom is designed for incremental adoption. A project doesn't have to rewrite anything to start using it.
|
|
316
|
+
|
|
317
|
+
**Step 1 — Install and replace the simplest call site.**
|
|
318
|
+
Pick one place where the project currently calls a vendor SDK directly. Replace that call with `loom.generate(...)`. Ship it. Verify cost logging shows up in the dashboard.
|
|
319
|
+
|
|
320
|
+
**Step 2 — Migrate the remaining call sites at the team's own pace.**
|
|
321
|
+
There's no "big bang" cutover. Old direct-SDK calls and new Loom calls coexist fine.
|
|
322
|
+
|
|
323
|
+
**Step 3 — Remove vendor SDKs from the project's dependencies.**
|
|
324
|
+
Once all call sites are migrated, the project can drop `openai`, `anthropic`, `google-genai`, etc. from its requirements. Loom owns those dependencies now.
|
|
325
|
+
|
|
326
|
+
**Step 4 — Opt into optimization features.**
|
|
327
|
+
Enable caching, smart routing, or batching per-call or globally. These are off by default to preserve exact behavior during migration, then turned on once the team is comfortable.
|
|
328
|
+
|
|
329
|
+
## What this delivers
|
|
330
|
+
|
|
331
|
+
For **engineering**:
|
|
332
|
+
|
|
333
|
+
- One API to learn, not fourteen
|
|
334
|
+
- One place to upgrade SDKs, not N project repos
|
|
335
|
+
- One place where retry logic, error handling, and timeouts live
|
|
336
|
+
- New AI projects go from "spec" to "first call" in hours
|
|
337
|
+
|
|
338
|
+
For **finance**:
|
|
339
|
+
|
|
340
|
+
- Per-project, per-model, per-day cost reporting
|
|
341
|
+
- Budget alerts before a runaway prompt empties the account
|
|
342
|
+
- Visibility into which optimizations are actually saving money
|
|
343
|
+
|
|
344
|
+
For **security**:
|
|
345
|
+
|
|
346
|
+
- API keys live in one audited location
|
|
347
|
+
- No vendor credentials in project repos or developer laptops
|
|
348
|
+
- Centralized rate limiting prevents one bug from burning the entire org's quota
|
|
349
|
+
|
|
350
|
+
For **the org**:
|
|
351
|
+
|
|
352
|
+
- Lower per-call cost via centralized optimization
|
|
353
|
+
- Faster product velocity on anything AI-touching
|
|
354
|
+
- Insulation from vendor lock-in — switching providers is a config change
|
|
355
|
+
|
|
356
|
+
## Status
|
|
357
|
+
|
|
358
|
+
Loom is built on top of the existing Models Catalog project, which already has the provider abstraction, native SDK adapters, unified catalog, and `generate(...)` contract working in production. The remaining work is packaging it as an installable library, extracting the engine from the Flask app, adding the optimization layer, and writing documentation.
|
|
359
|
+
|
|
360
|
+
Estimated timeline:
|
|
361
|
+
|
|
362
|
+
- **Stage 1 (1–2 weeks):** Extract into a proper Python package. Flask app keeps working, now importing from the new library.
|
|
363
|
+
- **Stage 2 (3–4 weeks):** Programmatic configuration, optional Postgres, typed responses, async support. Internal release.
|
|
364
|
+
- **Stage 3 (ongoing):** Optimization layer, observability dashboard, semver stability, public docs.
|
|
365
|
+
|
|
366
|
+
Each stage delivers value independently. The org can stop at Stage 2 and have a useful internal library, or continue to Stage 3 for the full cost-optimization story.
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""Loom — one contract for every AI vendor.
|
|
2
|
+
|
|
3
|
+
Quick start:
|
|
4
|
+
|
|
5
|
+
import loom
|
|
6
|
+
|
|
7
|
+
result = loom.generate(
|
|
8
|
+
provider="openai",
|
|
9
|
+
modality="text",
|
|
10
|
+
model="gpt-4o-mini",
|
|
11
|
+
prompt="Say hi in five words.",
|
|
12
|
+
)
|
|
13
|
+
print(result["text"])
|
|
14
|
+
|
|
15
|
+
Same thing, explicit instance (useful when you want a non-default config):
|
|
16
|
+
|
|
17
|
+
from loom import Loom
|
|
18
|
+
|
|
19
|
+
client = Loom.from_env()
|
|
20
|
+
result = client.generate(
|
|
21
|
+
provider="openai",
|
|
22
|
+
modality="text",
|
|
23
|
+
model="gpt-4o-mini",
|
|
24
|
+
prompt="Say hi in five words.",
|
|
25
|
+
)
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from loom._cache import CacheBackend, InMemoryCache, RedisCache
|
|
29
|
+
from loom._context_cache import ContextCacheHandle
|
|
30
|
+
from loom._equivalents import EquivalenceMap
|
|
31
|
+
from loom._loom import AsyncLoom, Loom, agenerate, generate
|
|
32
|
+
from loom._retry import RetryPolicy
|
|
33
|
+
from loom._router import Candidate, Router
|
|
34
|
+
from loom.batch import BatchHandle, BatchRequest
|
|
35
|
+
from loom.catalog import Catalog
|
|
36
|
+
from loom.errors import (
|
|
37
|
+
AuthError,
|
|
38
|
+
LoomError,
|
|
39
|
+
ModelNotFoundError,
|
|
40
|
+
ProviderError,
|
|
41
|
+
RateLimitError,
|
|
42
|
+
)
|
|
43
|
+
from loom.types import (
|
|
44
|
+
Cost,
|
|
45
|
+
ImagePayload,
|
|
46
|
+
ImageResponse,
|
|
47
|
+
TextResponse,
|
|
48
|
+
Usage,
|
|
49
|
+
)
|
|
50
|
+
from loom.vault import (
|
|
51
|
+
AWSSecretsManagerVault,
|
|
52
|
+
GCPSecretManagerVault,
|
|
53
|
+
HashiCorpVaultBackend,
|
|
54
|
+
InMemoryVault,
|
|
55
|
+
KeyVault,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
__all__ = [
|
|
59
|
+
"Loom",
|
|
60
|
+
"AsyncLoom",
|
|
61
|
+
"Catalog",
|
|
62
|
+
"generate",
|
|
63
|
+
"agenerate",
|
|
64
|
+
"RetryPolicy",
|
|
65
|
+
"CacheBackend",
|
|
66
|
+
"InMemoryCache",
|
|
67
|
+
"RedisCache",
|
|
68
|
+
"BatchRequest",
|
|
69
|
+
"BatchHandle",
|
|
70
|
+
"ContextCacheHandle",
|
|
71
|
+
"Router",
|
|
72
|
+
"Candidate",
|
|
73
|
+
"EquivalenceMap",
|
|
74
|
+
"LoomError",
|
|
75
|
+
"ProviderError",
|
|
76
|
+
"AuthError",
|
|
77
|
+
"RateLimitError",
|
|
78
|
+
"ModelNotFoundError",
|
|
79
|
+
"TextResponse",
|
|
80
|
+
"ImageResponse",
|
|
81
|
+
"ImagePayload",
|
|
82
|
+
"Usage",
|
|
83
|
+
"Cost",
|
|
84
|
+
"KeyVault",
|
|
85
|
+
"InMemoryVault",
|
|
86
|
+
"AWSSecretsManagerVault",
|
|
87
|
+
"GCPSecretManagerVault",
|
|
88
|
+
"HashiCorpVaultBackend",
|
|
89
|
+
]
|
|
90
|
+
|
|
91
|
+
__version__ = "1.0.0"
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
"""Response cache — pluggable backends.
|
|
2
|
+
|
|
3
|
+
Cache key is the SHA-256 of the canonical (provider, modality, model,
|
|
4
|
+
prompt, params) tuple — see loom._call_key. Hits return a deep copy of
|
|
5
|
+
the cached value so callers can't mutate the cached payload.
|
|
6
|
+
|
|
7
|
+
Backends:
|
|
8
|
+
|
|
9
|
+
InMemoryCache(maxsize, ttl) — LRU + TTL, single-process. Default.
|
|
10
|
+
RedisCache(url=..., ttl=...) — Redis. Requires `pip install redis`.
|
|
11
|
+
|
|
12
|
+
Wire on the client:
|
|
13
|
+
|
|
14
|
+
Loom(cache=InMemoryCache(maxsize=10_000, ttl=3600))
|
|
15
|
+
|
|
16
|
+
Per-call opt-out:
|
|
17
|
+
|
|
18
|
+
Loom(...).generate(..., use_cache=False)
|
|
19
|
+
|
|
20
|
+
Image responses are cached too — they're just bytes-as-base64 in the
|
|
21
|
+
unified shape, no streaming, so they round-trip cleanly.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import copy
|
|
27
|
+
import json
|
|
28
|
+
import logging
|
|
29
|
+
import threading
|
|
30
|
+
import time
|
|
31
|
+
from collections import OrderedDict
|
|
32
|
+
from typing import Any, Protocol
|
|
33
|
+
|
|
34
|
+
_logger = logging.getLogger("loom.cache")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class CacheBackend(Protocol):
|
|
38
|
+
"""Minimal cache interface. Backends MUST be thread-safe."""
|
|
39
|
+
|
|
40
|
+
def get(self, key: str) -> dict[str, Any] | None: ...
|
|
41
|
+
|
|
42
|
+
def set(
|
|
43
|
+
self, key: str, value: dict[str, Any], ttl: float | None = None
|
|
44
|
+
) -> None: ...
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class InMemoryCache:
|
|
48
|
+
"""LRU + TTL cache in a single process. Default backend.
|
|
49
|
+
|
|
50
|
+
`maxsize=0` makes it unbounded. `ttl=None` keeps entries indefinitely;
|
|
51
|
+
a positive `ttl` (seconds) expires entries on read.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
def __init__(self, *, maxsize: int = 1024, ttl: float | None = 3600.0) -> None:
|
|
55
|
+
self.maxsize = maxsize
|
|
56
|
+
self.ttl = ttl
|
|
57
|
+
self._data: OrderedDict[str, tuple[float, dict[str, Any]]] = OrderedDict()
|
|
58
|
+
self._lock = threading.Lock()
|
|
59
|
+
|
|
60
|
+
def get(self, key: str) -> dict[str, Any] | None:
|
|
61
|
+
with self._lock:
|
|
62
|
+
entry = self._data.get(key)
|
|
63
|
+
if entry is None:
|
|
64
|
+
return None
|
|
65
|
+
expires_at, value = entry
|
|
66
|
+
if expires_at > 0 and time.time() > expires_at:
|
|
67
|
+
self._data.pop(key, None)
|
|
68
|
+
return None
|
|
69
|
+
# Mark as recently used.
|
|
70
|
+
self._data.move_to_end(key)
|
|
71
|
+
return copy.deepcopy(value)
|
|
72
|
+
|
|
73
|
+
def set(
|
|
74
|
+
self, key: str, value: dict[str, Any], ttl: float | None = None
|
|
75
|
+
) -> None:
|
|
76
|
+
effective_ttl = ttl if ttl is not None else self.ttl
|
|
77
|
+
expires_at = time.time() + effective_ttl if effective_ttl else 0.0
|
|
78
|
+
with self._lock:
|
|
79
|
+
self._data[key] = (expires_at, copy.deepcopy(value))
|
|
80
|
+
self._data.move_to_end(key)
|
|
81
|
+
if self.maxsize > 0:
|
|
82
|
+
while len(self._data) > self.maxsize:
|
|
83
|
+
self._data.popitem(last=False)
|
|
84
|
+
|
|
85
|
+
def clear(self) -> None:
|
|
86
|
+
with self._lock:
|
|
87
|
+
self._data.clear()
|
|
88
|
+
|
|
89
|
+
def __len__(self) -> int:
|
|
90
|
+
with self._lock:
|
|
91
|
+
return len(self._data)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class RedisCache:
|
|
95
|
+
"""Redis-backed cache.
|
|
96
|
+
|
|
97
|
+
Lazy-imports `redis` so the dependency stays optional. The connection
|
|
98
|
+
is built on first use; failures during get/set are logged and swallowed
|
|
99
|
+
(treat as a miss / no-op) so a Redis outage degrades to no-cache rather
|
|
100
|
+
than failing every request.
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
def __init__(
|
|
104
|
+
self,
|
|
105
|
+
*,
|
|
106
|
+
url: str = "redis://localhost:6379/0",
|
|
107
|
+
ttl: float | None = 3600.0,
|
|
108
|
+
prefix: str = "loom:",
|
|
109
|
+
) -> None:
|
|
110
|
+
self.url = url
|
|
111
|
+
self.ttl = ttl
|
|
112
|
+
self.prefix = prefix
|
|
113
|
+
self._client: Any | None = None
|
|
114
|
+
|
|
115
|
+
def _conn(self) -> Any:
|
|
116
|
+
if self._client is None:
|
|
117
|
+
try:
|
|
118
|
+
import redis # type: ignore[import-not-found]
|
|
119
|
+
except ImportError as exc:
|
|
120
|
+
raise ImportError(
|
|
121
|
+
"loom.RedisCache requires the `redis` package. "
|
|
122
|
+
"Install with `pip install loom-router[redis]` or `pip install redis`."
|
|
123
|
+
) from exc
|
|
124
|
+
self._client = redis.Redis.from_url(self.url, decode_responses=False)
|
|
125
|
+
return self._client
|
|
126
|
+
|
|
127
|
+
def _k(self, key: str) -> str:
|
|
128
|
+
return self.prefix + key
|
|
129
|
+
|
|
130
|
+
def get(self, key: str) -> dict[str, Any] | None:
|
|
131
|
+
try:
|
|
132
|
+
blob = self._conn().get(self._k(key))
|
|
133
|
+
except Exception as exc: # noqa: BLE001 — log and degrade
|
|
134
|
+
_logger.warning("redis cache get failed: %s", exc)
|
|
135
|
+
return None
|
|
136
|
+
if blob is None:
|
|
137
|
+
return None
|
|
138
|
+
try:
|
|
139
|
+
return json.loads(blob)
|
|
140
|
+
except Exception as exc: # noqa: BLE001
|
|
141
|
+
_logger.warning("redis cache payload not JSON: %s", exc)
|
|
142
|
+
return None
|
|
143
|
+
|
|
144
|
+
def set(
|
|
145
|
+
self, key: str, value: dict[str, Any], ttl: float | None = None
|
|
146
|
+
) -> None:
|
|
147
|
+
effective_ttl = ttl if ttl is not None else self.ttl
|
|
148
|
+
try:
|
|
149
|
+
blob = json.dumps(value)
|
|
150
|
+
except Exception as exc: # noqa: BLE001 — unserializable payload
|
|
151
|
+
_logger.warning("redis cache set skipped (not JSON-serialisable): %s", exc)
|
|
152
|
+
return
|
|
153
|
+
try:
|
|
154
|
+
if effective_ttl and effective_ttl > 0:
|
|
155
|
+
self._conn().set(self._k(key), blob, ex=int(effective_ttl))
|
|
156
|
+
else:
|
|
157
|
+
self._conn().set(self._k(key), blob)
|
|
158
|
+
except Exception as exc: # noqa: BLE001
|
|
159
|
+
_logger.warning("redis cache set failed: %s", exc)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
__all__ = ["CacheBackend", "InMemoryCache", "RedisCache"]
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""Stable cache/dedup key for a generate() call.
|
|
2
|
+
|
|
3
|
+
The same inputs must always produce the same key, across runs and
|
|
4
|
+
across processes — so we hash a canonical JSON form of the inputs.
|
|
5
|
+
Param ordering is not stable across callers, so we sort recursively.
|
|
6
|
+
|
|
7
|
+
Used by:
|
|
8
|
+
- loom._cache : key into the response cache
|
|
9
|
+
- loom._dedup : key for single-flight coalescing of concurrent calls
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import hashlib
|
|
15
|
+
import json
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _canonicalize(obj: Any) -> Any:
|
|
20
|
+
if isinstance(obj, dict):
|
|
21
|
+
return {k: _canonicalize(obj[k]) for k in sorted(obj.keys())}
|
|
22
|
+
if isinstance(obj, (list, tuple)):
|
|
23
|
+
return [_canonicalize(x) for x in obj]
|
|
24
|
+
return obj
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def call_key(
|
|
28
|
+
*,
|
|
29
|
+
provider: str,
|
|
30
|
+
modality: str,
|
|
31
|
+
model: str,
|
|
32
|
+
prompt: str,
|
|
33
|
+
params: dict[str, Any] | None,
|
|
34
|
+
) -> str:
|
|
35
|
+
"""Return a stable 64-char hex key for a generate() call."""
|
|
36
|
+
payload = {
|
|
37
|
+
"v": 1, # bump if the key shape ever changes
|
|
38
|
+
"provider": provider,
|
|
39
|
+
"modality": modality,
|
|
40
|
+
"model": model,
|
|
41
|
+
"prompt": prompt,
|
|
42
|
+
"params": _canonicalize(params or {}),
|
|
43
|
+
}
|
|
44
|
+
blob = json.dumps(payload, ensure_ascii=False, separators=(",", ":"))
|
|
45
|
+
return hashlib.sha256(blob.encode("utf-8")).hexdigest()
|