loom-router 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. loom_router-1.0.0/LICENSE +21 -0
  2. loom_router-1.0.0/PKG-INFO +366 -0
  3. loom_router-1.0.0/loom/__init__.py +91 -0
  4. loom_router-1.0.0/loom/_cache.py +162 -0
  5. loom_router-1.0.0/loom/_call_key.py +45 -0
  6. loom_router-1.0.0/loom/_context.py +45 -0
  7. loom_router-1.0.0/loom/_context_cache.py +84 -0
  8. loom_router-1.0.0/loom/_dedup.py +103 -0
  9. loom_router-1.0.0/loom/_equivalents.py +112 -0
  10. loom_router-1.0.0/loom/_logging.py +89 -0
  11. loom_router-1.0.0/loom/_loom.py +580 -0
  12. loom_router-1.0.0/loom/_pricing.py +108 -0
  13. loom_router-1.0.0/loom/_prompt_cache_rates.py +39 -0
  14. loom_router-1.0.0/loom/_retry.py +171 -0
  15. loom_router-1.0.0/loom/_router.py +244 -0
  16. loom_router-1.0.0/loom/batch.py +161 -0
  17. loom_router-1.0.0/loom/batch_providers/__init__.py +51 -0
  18. loom_router-1.0.0/loom/batch_providers/anthropic_batch.py +248 -0
  19. loom_router-1.0.0/loom/batch_providers/openai_batch.py +255 -0
  20. loom_router-1.0.0/loom/catalog/__init__.py +22 -0
  21. loom_router-1.0.0/loom/catalog/_catalog.py +109 -0
  22. loom_router-1.0.0/loom/catalog/_data.py +309 -0
  23. loom_router-1.0.0/loom/catalog/backends.py +75 -0
  24. loom_router-1.0.0/loom/context_cache_providers/__init__.py +44 -0
  25. loom_router-1.0.0/loom/context_cache_providers/gemini_context_cache.py +84 -0
  26. loom_router-1.0.0/loom/errors.py +26 -0
  27. loom_router-1.0.0/loom/observability/__init__.py +37 -0
  28. loom_router-1.0.0/loom/observability/dashboard.py +89 -0
  29. loom_router-1.0.0/loom/observability/handler.py +37 -0
  30. loom_router-1.0.0/loom/observability/queries.py +151 -0
  31. loom_router-1.0.0/loom/observability/sink.py +146 -0
  32. loom_router-1.0.0/loom/observability/templates/dashboard.html +225 -0
  33. loom_router-1.0.0/loom/providers/__init__.py +105 -0
  34. loom_router-1.0.0/loom/providers/_common.py +61 -0
  35. loom_router-1.0.0/loom/providers/_openai_compatible.py +134 -0
  36. loom_router-1.0.0/loom/providers/anthropic_provider.py +139 -0
  37. loom_router-1.0.0/loom/providers/bfl_provider.py +85 -0
  38. loom_router-1.0.0/loom/providers/deepseek_provider.py +38 -0
  39. loom_router-1.0.0/loom/providers/gemini_provider.py +185 -0
  40. loom_router-1.0.0/loom/providers/hunyuan_provider.py +100 -0
  41. loom_router-1.0.0/loom/providers/ideogram_provider.py +45 -0
  42. loom_router-1.0.0/loom/providers/minimax_provider.py +38 -0
  43. loom_router-1.0.0/loom/providers/mistral_provider.py +38 -0
  44. loom_router-1.0.0/loom/providers/moonshot_provider.py +43 -0
  45. loom_router-1.0.0/loom/providers/openai_provider.py +129 -0
  46. loom_router-1.0.0/loom/providers/perplexity_provider.py +38 -0
  47. loom_router-1.0.0/loom/providers/seedream_provider.py +57 -0
  48. loom_router-1.0.0/loom/providers/together_provider.py +38 -0
  49. loom_router-1.0.0/loom/providers/xai_provider.py +38 -0
  50. loom_router-1.0.0/loom/providers/zhipu_provider.py +38 -0
  51. loom_router-1.0.0/loom/types.py +75 -0
  52. loom_router-1.0.0/loom/vault.py +314 -0
  53. loom_router-1.0.0/loom_router.egg-info/PKG-INFO +366 -0
  54. loom_router-1.0.0/loom_router.egg-info/SOURCES.txt +81 -0
  55. loom_router-1.0.0/loom_router.egg-info/dependency_links.txt +1 -0
  56. loom_router-1.0.0/loom_router.egg-info/requires.txt +34 -0
  57. loom_router-1.0.0/loom_router.egg-info/top_level.txt +1 -0
  58. loom_router-1.0.0/pyproject.toml +66 -0
  59. loom_router-1.0.0/readme.md +296 -0
  60. loom_router-1.0.0/setup.cfg +4 -0
  61. loom_router-1.0.0/tests/test_anthropic_batch.py +272 -0
  62. loom_router-1.0.0/tests/test_api_keys.py +66 -0
  63. loom_router-1.0.0/tests/test_app_patch.py +205 -0
  64. loom_router-1.0.0/tests/test_async.py +80 -0
  65. loom_router-1.0.0/tests/test_batch.py +269 -0
  66. loom_router-1.0.0/tests/test_cache.py +129 -0
  67. loom_router-1.0.0/tests/test_call_key.py +45 -0
  68. loom_router-1.0.0/tests/test_catalog.py +77 -0
  69. loom_router-1.0.0/tests/test_catalog_backends.py +64 -0
  70. loom_router-1.0.0/tests/test_context_cache.py +233 -0
  71. loom_router-1.0.0/tests/test_cost.py +94 -0
  72. loom_router-1.0.0/tests/test_dedup.py +157 -0
  73. loom_router-1.0.0/tests/test_dispatch.py +95 -0
  74. loom_router-1.0.0/tests/test_failover.py +166 -0
  75. loom_router-1.0.0/tests/test_logging.py +57 -0
  76. loom_router-1.0.0/tests/test_observability.py +261 -0
  77. loom_router-1.0.0/tests/test_openai_smoke.py +64 -0
  78. loom_router-1.0.0/tests/test_prompt_caching.py +363 -0
  79. loom_router-1.0.0/tests/test_provider_registry.py +41 -0
  80. loom_router-1.0.0/tests/test_retry.py +146 -0
  81. loom_router-1.0.0/tests/test_routing.py +232 -0
  82. loom_router-1.0.0/tests/test_vault.py +276 -0
  83. loom_router-1.0.0/tests/test_version.py +31 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Jyotiraditya Singh
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,366 @@
1
+ Metadata-Version: 2.4
2
+ Name: loom-router
3
+ Version: 1.0.0
4
+ Summary: One contract for every AI vendor. Native SDKs preserved; keys, cost, retries, and routing centralised.
5
+ Author: Eyas Ventures
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 Jyotiraditya Singh
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/jyotir07/Loom
29
+ Project-URL: Repository, https://github.com/jyotir07/Loom
30
+ Project-URL: Issues, https://github.com/jyotir07/Loom/issues
31
+ Project-URL: Changelog, https://github.com/jyotir07/Loom/blob/main/CHANGELOG.md
32
+ Keywords: llm,ai,openai,anthropic,gemini,router
33
+ Classifier: License :: OSI Approved :: MIT License
34
+ Classifier: Programming Language :: Python :: 3
35
+ Classifier: Programming Language :: Python :: 3.10
36
+ Classifier: Programming Language :: Python :: 3.11
37
+ Classifier: Programming Language :: Python :: 3.12
38
+ Classifier: Operating System :: OS Independent
39
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
40
+ Requires-Python: >=3.10
41
+ Description-Content-Type: text/markdown
42
+ License-File: LICENSE
43
+ Requires-Dist: python-dotenv>=1.0.0
44
+ Requires-Dist: requests>=2.31.0
45
+ Provides-Extra: openai
46
+ Requires-Dist: openai>=1.50.0; extra == "openai"
47
+ Provides-Extra: anthropic
48
+ Requires-Dist: anthropic>=0.39.0; extra == "anthropic"
49
+ Provides-Extra: gemini
50
+ Requires-Dist: google-genai>=0.3.0; extra == "gemini"
51
+ Provides-Extra: tencent
52
+ Requires-Dist: tencentcloud-sdk-python>=3.0.1200; extra == "tencent"
53
+ Provides-Extra: yaml
54
+ Requires-Dist: PyYAML>=6.0; extra == "yaml"
55
+ Provides-Extra: redis
56
+ Requires-Dist: redis>=5.0; extra == "redis"
57
+ Provides-Extra: all
58
+ Requires-Dist: openai>=1.50.0; extra == "all"
59
+ Requires-Dist: anthropic>=0.39.0; extra == "all"
60
+ Requires-Dist: google-genai>=0.3.0; extra == "all"
61
+ Requires-Dist: tencentcloud-sdk-python>=3.0.1200; extra == "all"
62
+ Requires-Dist: PyYAML>=6.0; extra == "all"
63
+ Requires-Dist: redis>=5.0; extra == "all"
64
+ Provides-Extra: dev
65
+ Requires-Dist: pytest>=8.0; extra == "dev"
66
+ Requires-Dist: pytest-mock>=3.12; extra == "dev"
67
+ Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
68
+ Requires-Dist: PyYAML>=6.0; extra == "dev"
69
+ Dynamic: license-file
70
+
71
+ # Loom
72
+
73
+ *One API for every AI provider. Built once, used everywhere.*
74
+
75
+ ---
76
+
77
+ ## The idea
78
+
79
+ Every team that builds something with AI ends up writing the same plumbing: pick a provider, learn their SDK, manage their API keys, handle their errors, track their costs, repeat for the next provider. Multiply this across a dozen projects and the company is paying for the same integration work over and over — while still losing out on cost optimizations that no single project has the time to build.
80
+
81
+ Loom is a Python framework that sits between your projects and the AI providers. Projects make one kind of call. Loom handles the rest: which vendor, which model, which SDK, which retry policy, which cache, which batch endpoint. The provider list grows in one place. The optimizations land in one place. Every project benefits the moment they upgrade.
82
+
83
+ It is not an aggregator. Each vendor is integrated with its own native SDK so vendor-specific features (prompt caching, grounding, image polling, streaming, structured output) are preserved instead of flattened to a lowest common denominator.
84
+
85
+ ## What problem it solves
86
+
87
+ In a typical org without something like Loom:
88
+
89
+ - **Every project re-integrates the same vendors.** Five projects calling OpenAI means five sets of retry logic, five places where the key lives, five different ways of handling rate limits.
90
+ - **Switching models requires a code change in each project.** When a cheaper model launches, or a vendor deprecates a model ID, somebody has to file PRs across every repo.
91
+ - **Cost optimizations get skipped.** Prompt caching, batch APIs, smart routing to cheaper models — these all require real engineering effort that no single project can justify on its own.
92
+ - **API keys live in too many places.** Each project repo has its own `.env`, its own secrets manager entry, its own potential leak surface.
93
+ - **There's no unified view of cost.** Finance asks "what are we spending on AI?" and the answer is "we'll get back to you."
94
+ - **Vendor breaking changes hurt N times.** When a provider changes their response shape, every consuming project breaks.
95
+
96
+ Loom collapses all of this into **ONE** library.
97
+
98
+ ## What Loom provides
99
+
100
+ ### A single, stable contract
101
+
102
+ Every call goes through one function:
103
+
104
+ ```python
105
+ from loom import generate
106
+
107
+ result = generate(
108
+ provider="anthropic",
109
+ model="claude-opus-4-7",
110
+ prompt="Summarize this contract in three bullets.",
111
+ params={"max_tokens": 500},
112
+ )
113
+ ```
114
+
115
+ The return shape is consistent across providers:
116
+
117
+ ```python
118
+ # Text response
119
+ {"kind": "text", "text": "..."}
120
+
121
+ # Image response
122
+ {"kind": "image", "images": [{"mime_type": "...", "data_b64": "..."}]}
123
+ ```
124
+
125
+ Sync and async both supported. Type-hinted responses available for IDE autocomplete.
126
+
127
+ ### A pluggable provider registry
128
+
129
+ Loom ships with 14+ providers wired up:
130
+
131
+ - **Text and image:** OpenAI, Google Gemini
132
+ - **Text only:** Anthropic, xAI (Grok), Mistral, DeepSeek, MiniMax, Z.AI (GLM), Perplexity, Together AI
133
+ - **Image only:** Black Forest Labs (Flux), ByteDance Seedream, Tencent Hunyuan, Ideogram
134
+
135
+ Any OpenAI-compatible provider can be added in roughly ten lines of code via a shared adapter. Native-SDK providers follow a documented `generate(modality, model, params, prompt) -> dict` contract.
136
+
137
+ ### A catalog of models
138
+
139
+ Models are registered as data, not code. Each entry carries:
140
+
141
+ - Stable ID and display name
142
+ - Upstream model ID (the one the vendor expects)
143
+ - Default parameters
144
+ - Pricing (per 1M input/output tokens for text, per image for image)
145
+ - Free-tier flag
146
+
147
+ Adding a new model is a one-line catalog entry. The catalog can be backed by an in-memory dict, a YAML file, or Postgres — pick what fits the consuming project.
148
+
149
+ ### A cost optimization layer
150
+
151
+ This is where Loom pays for itself. These optimizations are built once, in the framework, and every consuming project inherits them on upgrade.
152
+
153
+ - **Response caching.** Identical `(provider, model, prompt, params)` calls hit a cache instead of the API. Realistic savings of 20–60% on workloads with repeated queries.
154
+ - **Vendor-native prompt caching.** Anthropic, OpenAI, Gemini, and DeepSeek all offer 50–90% discounts on cached prefix tokens. Loom wires this up automatically for repeated system prompts and few-shot examples.
155
+ - **Smart model routing.** Try a cheap model first (Haiku, GPT-4o-mini, Gemini Flash); escalate to expensive ones only when confidence is low or validation fails. Realistic savings of 50–80% on mixed workloads.
156
+ - **Batch API usage.** OpenAI, Anthropic, and Gemini all offer 50% discounts on batch endpoints with 24-hour turnaround. Loom can auto-batch non-urgent calls.
157
+ - **Centralized retry and failover.** Exponential backoff done correctly once. If one vendor is down or rate-limited, fall back to an equivalent model on another vendor instead of failing.
158
+ - **Request deduplication.** When the same call fires from multiple places within a short window, collapse to one upstream request.
159
+
160
+ These numbers are ceilings, not guarantees — actual savings depend on workload. A project making 100% unique real-time calls won't benefit much from caching. The point is: the headroom exists, and projects don't have to build any of it themselves.
161
+
162
+ ### Centralized key management
163
+
164
+ API keys live in one place — the Loom deployment — not in each consuming project's repo or environment. Projects authenticate to Loom with their own credentials and never see vendor keys.
165
+
166
+ ### Observability
167
+
168
+ Every call is logged with provider, model, latency, token counts, and cost (in both USD and the configured local currency). Projects get a per-call cost field on every response. Finance gets a unified dashboard. Engineering gets to find the prompts that are burning the budget.
169
+
170
+ ## Architecture
171
+
172
+ ### Where Loom sits
173
+
174
+ ```
175
+ ┌─────────────────────────────────────────────────────────────┐
176
+ │ Company projects │
177
+ │ Support bot │ Analytics │ Marketing │ Doc search │ Sales │
178
+ └──────────────────────────┬──────────────────────────────────┘
179
+
180
+ │ generate(provider, model, prompt)
181
+
182
+ ┌─────────────────────────────────────────────────────────────┐
183
+ │ Loom │
184
+ │ │
185
+ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
186
+ │ │ Auth + keys │ │ Cache │ │ Router │ │
187
+ │ └─────────────┘ └─────────────┘ └─────────────┘ │
188
+ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
189
+ │ │ Batcher │ │ Retry │ │ Cost logs │ │
190
+ │ └─────────────┘ └─────────────┘ └─────────────┘ │
191
+ │ │
192
+ │ ┌─────────────────────────────────────────────────────┐ │
193
+ │ │ Catalog + Provider registry + Adapters │ │
194
+ │ └─────────────────────────────────────────────────────┘ │
195
+ └──────────────────────────┬──────────────────────────────────┘
196
+
197
+
198
+ ┌─────────────────────────────────────────────────────────────┐
199
+ │ AI providers │
200
+ │ OpenAI │ Anthropic │ Gemini │ BFL │ + 10 more vendors │
201
+ └─────────────────────────────────────────────────────────────┘
202
+ ```
203
+
204
+ ### Internal layers
205
+
206
+ 1. **Public API.** The single `generate(...)` function (sync and async). Stable across versions.
207
+ 2. **Optimization layer.** Cache, router, batcher, retry, dedup, logging. Each is independently toggleable per call.
208
+ 3. **Core services.** Catalog (what models exist), provider registry (who knows how to call them), observability (what happened).
209
+ 4. **Provider adapters.** One module per vendor. OpenAI-compatible vendors share a single ~12-line adapter; native-SDK vendors get their own module.
210
+ 5. **Upstream.** The actual vendor APIs. Out of our control, but their churn is absorbed inside Loom.
211
+
212
+ ## Integration
213
+
214
+ ### Installation
215
+
216
+ ```bash
217
+ pip install loom-router
218
+ ```
219
+
220
+ ### Minimum viable usage
221
+
222
+ ```python
223
+ from loom import Loom
224
+
225
+ c = Loom.from_env() # picks up keys from environment variables
226
+
227
+ response = c.generate(
228
+ provider="anthropic",
229
+ model="claude-sonnet-4-6",
230
+ prompt="Write three subject lines for a launch email.",
231
+ )
232
+
233
+ print(response["text"])
234
+ ```
235
+
236
+ ### Programmatic configuration
237
+
238
+ For projects that don't want to use environment variables:
239
+
240
+ ```python
241
+ from loom import Loom, Catalog
242
+
243
+ catalog = Catalog()
244
+ catalog.register_model(
245
+ provider="openai",
246
+ model_id="gpt-5",
247
+ upstream_model="gpt-5-2026-01",
248
+ input_cost_per_1m=2.50,
249
+ output_cost_per_1m=10.00,
250
+ )
251
+
252
+ c = Loom(
253
+ catalog=catalog,
254
+ api_keys={"openai": "sk-...", "anthropic": "sk-ant-..."},
255
+ cache_backend="redis://localhost:6379",
256
+ )
257
+ ```
258
+
259
+ ### Async usage
260
+
261
+ ```python
262
+ import asyncio
263
+ from loom import Loom
264
+
265
+ async def main():
266
+ c = Loom.from_env()
267
+ response = await c.agenerate(
268
+ provider="gemini",
269
+ model="gemini-2.5-pro",
270
+ prompt="...",
271
+ )
272
+ return response
273
+
274
+ asyncio.run(main())
275
+ ```
276
+
277
+ ### Adding a new provider
278
+
279
+ Most new providers are OpenAI-compatible. For these, registration is a one-liner:
280
+
281
+ ```python
282
+ c.register_openai_compatible(
283
+ key="newco",
284
+ label="NewCo AI",
285
+ base_url="https://api.newco.ai/v1",
286
+ api_key_env="NEWCO_API_KEY",
287
+ )
288
+ ```
289
+
290
+ For providers with native SDKs or async polling patterns (like BFL or Hunyuan), implement the contract:
291
+
292
+ ```python
293
+ # providers/newco_provider.py
294
+ def generate(modality: str, model: str, params: dict, prompt: str) -> dict:
295
+ ...
296
+ return {"kind": "text", "text": "..."}
297
+ ```
298
+
299
+ Register it once in `providers/__init__.py` and it's available everywhere.
300
+
301
+ ### Framework-agnostic by design
302
+
303
+ Loom doesn't care what's calling it. It works inside:
304
+
305
+ - Flask, FastAPI, or Django apps
306
+ - Celery workers and background jobs
307
+ - CLI scripts
308
+ - Jupyter notebooks
309
+ - AWS Lambda or other serverless runtimes
310
+
311
+ There is no web framework lock-in. Loom is a library, not a service — though it can be deployed as a service if a team wants to centralize it behind an internal HTTP API.
312
+
313
+ ## Migration path for existing projects
314
+
315
+ Loom is designed for incremental adoption. A project doesn't have to rewrite anything to start using it.
316
+
317
+ **Step 1 — Install and replace the simplest call site.**
318
+ Pick one place where the project currently calls a vendor SDK directly. Replace that call with `loom.generate(...)`. Ship it. Verify cost logging shows up in the dashboard.
319
+
320
+ **Step 2 — Migrate the remaining call sites at the team's own pace.**
321
+ There's no "big bang" cutover. Old direct-SDK calls and new Loom calls coexist fine.
322
+
323
+ **Step 3 — Remove vendor SDKs from the project's dependencies.**
324
+ Once all call sites are migrated, the project can drop `openai`, `anthropic`, `google-genai`, etc. from its requirements. Loom owns those dependencies now.
325
+
326
+ **Step 4 — Opt into optimization features.**
327
+ Enable caching, smart routing, or batching per-call or globally. These are off by default to preserve exact behavior during migration, then turned on once the team is comfortable.
328
+
329
+ ## What this delivers
330
+
331
+ For **engineering**:
332
+
333
+ - One API to learn, not fourteen
334
+ - One place to upgrade SDKs, not N project repos
335
+ - One place where retry logic, error handling, and timeouts live
336
+ - New AI projects go from "spec" to "first call" in hours
337
+
338
+ For **finance**:
339
+
340
+ - Per-project, per-model, per-day cost reporting
341
+ - Budget alerts before a runaway prompt empties the account
342
+ - Visibility into which optimizations are actually saving money
343
+
344
+ For **security**:
345
+
346
+ - API keys live in one audited location
347
+ - No vendor credentials in project repos or developer laptops
348
+ - Centralized rate limiting prevents one bug from burning the entire org's quota
349
+
350
+ For **the org**:
351
+
352
+ - Lower per-call cost via centralized optimization
353
+ - Faster product velocity on anything AI-touching
354
+ - Insulation from vendor lock-in — switching providers is a config change
355
+
356
+ ## Status
357
+
358
+ Loom is built on top of the existing Models Catalog project, which already has the provider abstraction, native SDK adapters, unified catalog, and `generate(...)` contract working in production. The remaining work is packaging it as an installable library, extracting the engine from the Flask app, adding the optimization layer, and writing documentation.
359
+
360
+ Estimated timeline:
361
+
362
+ - **Stage 1 (1–2 weeks):** Extract into a proper Python package. Flask app keeps working, now importing from the new library.
363
+ - **Stage 2 (3–4 weeks):** Programmatic configuration, optional Postgres, typed responses, async support. Internal release.
364
+ - **Stage 3 (ongoing):** Optimization layer, observability dashboard, semver stability, public docs.
365
+
366
+ Each stage delivers value independently. The org can stop at Stage 2 and have a useful internal library, or continue to Stage 3 for the full cost-optimization story.
@@ -0,0 +1,91 @@
1
+ """Loom — one contract for every AI vendor.
2
+
3
+ Quick start:
4
+
5
+ import loom
6
+
7
+ result = loom.generate(
8
+ provider="openai",
9
+ modality="text",
10
+ model="gpt-4o-mini",
11
+ prompt="Say hi in five words.",
12
+ )
13
+ print(result["text"])
14
+
15
+ Same thing, explicit instance (useful when you want a non-default config):
16
+
17
+ from loom import Loom
18
+
19
+ client = Loom.from_env()
20
+ result = client.generate(
21
+ provider="openai",
22
+ modality="text",
23
+ model="gpt-4o-mini",
24
+ prompt="Say hi in five words.",
25
+ )
26
+ """
27
+
28
+ from loom._cache import CacheBackend, InMemoryCache, RedisCache
29
+ from loom._context_cache import ContextCacheHandle
30
+ from loom._equivalents import EquivalenceMap
31
+ from loom._loom import AsyncLoom, Loom, agenerate, generate
32
+ from loom._retry import RetryPolicy
33
+ from loom._router import Candidate, Router
34
+ from loom.batch import BatchHandle, BatchRequest
35
+ from loom.catalog import Catalog
36
+ from loom.errors import (
37
+ AuthError,
38
+ LoomError,
39
+ ModelNotFoundError,
40
+ ProviderError,
41
+ RateLimitError,
42
+ )
43
+ from loom.types import (
44
+ Cost,
45
+ ImagePayload,
46
+ ImageResponse,
47
+ TextResponse,
48
+ Usage,
49
+ )
50
+ from loom.vault import (
51
+ AWSSecretsManagerVault,
52
+ GCPSecretManagerVault,
53
+ HashiCorpVaultBackend,
54
+ InMemoryVault,
55
+ KeyVault,
56
+ )
57
+
58
+ __all__ = [
59
+ "Loom",
60
+ "AsyncLoom",
61
+ "Catalog",
62
+ "generate",
63
+ "agenerate",
64
+ "RetryPolicy",
65
+ "CacheBackend",
66
+ "InMemoryCache",
67
+ "RedisCache",
68
+ "BatchRequest",
69
+ "BatchHandle",
70
+ "ContextCacheHandle",
71
+ "Router",
72
+ "Candidate",
73
+ "EquivalenceMap",
74
+ "LoomError",
75
+ "ProviderError",
76
+ "AuthError",
77
+ "RateLimitError",
78
+ "ModelNotFoundError",
79
+ "TextResponse",
80
+ "ImageResponse",
81
+ "ImagePayload",
82
+ "Usage",
83
+ "Cost",
84
+ "KeyVault",
85
+ "InMemoryVault",
86
+ "AWSSecretsManagerVault",
87
+ "GCPSecretManagerVault",
88
+ "HashiCorpVaultBackend",
89
+ ]
90
+
91
+ __version__ = "1.0.0"
@@ -0,0 +1,162 @@
1
+ """Response cache — pluggable backends.
2
+
3
+ Cache key is the SHA-256 of the canonical (provider, modality, model,
4
+ prompt, params) tuple — see loom._call_key. Hits return a deep copy of
5
+ the cached value so callers can't mutate the cached payload.
6
+
7
+ Backends:
8
+
9
+ InMemoryCache(maxsize, ttl) — LRU + TTL, single-process. Default.
10
+ RedisCache(url=..., ttl=...) — Redis. Requires `pip install redis`.
11
+
12
+ Wire on the client:
13
+
14
+ Loom(cache=InMemoryCache(maxsize=10_000, ttl=3600))
15
+
16
+ Per-call opt-out:
17
+
18
+ Loom(...).generate(..., use_cache=False)
19
+
20
+ Image responses are cached too — they're just bytes-as-base64 in the
21
+ unified shape, no streaming, so they round-trip cleanly.
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import copy
27
+ import json
28
+ import logging
29
+ import threading
30
+ import time
31
+ from collections import OrderedDict
32
+ from typing import Any, Protocol
33
+
34
+ _logger = logging.getLogger("loom.cache")
35
+
36
+
37
+ class CacheBackend(Protocol):
38
+ """Minimal cache interface. Backends MUST be thread-safe."""
39
+
40
+ def get(self, key: str) -> dict[str, Any] | None: ...
41
+
42
+ def set(
43
+ self, key: str, value: dict[str, Any], ttl: float | None = None
44
+ ) -> None: ...
45
+
46
+
47
+ class InMemoryCache:
48
+ """LRU + TTL cache in a single process. Default backend.
49
+
50
+ `maxsize=0` makes it unbounded. `ttl=None` keeps entries indefinitely;
51
+ a positive `ttl` (seconds) expires entries on read.
52
+ """
53
+
54
+ def __init__(self, *, maxsize: int = 1024, ttl: float | None = 3600.0) -> None:
55
+ self.maxsize = maxsize
56
+ self.ttl = ttl
57
+ self._data: OrderedDict[str, tuple[float, dict[str, Any]]] = OrderedDict()
58
+ self._lock = threading.Lock()
59
+
60
+ def get(self, key: str) -> dict[str, Any] | None:
61
+ with self._lock:
62
+ entry = self._data.get(key)
63
+ if entry is None:
64
+ return None
65
+ expires_at, value = entry
66
+ if expires_at > 0 and time.time() > expires_at:
67
+ self._data.pop(key, None)
68
+ return None
69
+ # Mark as recently used.
70
+ self._data.move_to_end(key)
71
+ return copy.deepcopy(value)
72
+
73
+ def set(
74
+ self, key: str, value: dict[str, Any], ttl: float | None = None
75
+ ) -> None:
76
+ effective_ttl = ttl if ttl is not None else self.ttl
77
+ expires_at = time.time() + effective_ttl if effective_ttl else 0.0
78
+ with self._lock:
79
+ self._data[key] = (expires_at, copy.deepcopy(value))
80
+ self._data.move_to_end(key)
81
+ if self.maxsize > 0:
82
+ while len(self._data) > self.maxsize:
83
+ self._data.popitem(last=False)
84
+
85
+ def clear(self) -> None:
86
+ with self._lock:
87
+ self._data.clear()
88
+
89
+ def __len__(self) -> int:
90
+ with self._lock:
91
+ return len(self._data)
92
+
93
+
94
+ class RedisCache:
95
+ """Redis-backed cache.
96
+
97
+ Lazy-imports `redis` so the dependency stays optional. The connection
98
+ is built on first use; failures during get/set are logged and swallowed
99
+ (treat as a miss / no-op) so a Redis outage degrades to no-cache rather
100
+ than failing every request.
101
+ """
102
+
103
+ def __init__(
104
+ self,
105
+ *,
106
+ url: str = "redis://localhost:6379/0",
107
+ ttl: float | None = 3600.0,
108
+ prefix: str = "loom:",
109
+ ) -> None:
110
+ self.url = url
111
+ self.ttl = ttl
112
+ self.prefix = prefix
113
+ self._client: Any | None = None
114
+
115
+ def _conn(self) -> Any:
116
+ if self._client is None:
117
+ try:
118
+ import redis # type: ignore[import-not-found]
119
+ except ImportError as exc:
120
+ raise ImportError(
121
+ "loom.RedisCache requires the `redis` package. "
122
+ "Install with `pip install loom-router[redis]` or `pip install redis`."
123
+ ) from exc
124
+ self._client = redis.Redis.from_url(self.url, decode_responses=False)
125
+ return self._client
126
+
127
+ def _k(self, key: str) -> str:
128
+ return self.prefix + key
129
+
130
+ def get(self, key: str) -> dict[str, Any] | None:
131
+ try:
132
+ blob = self._conn().get(self._k(key))
133
+ except Exception as exc: # noqa: BLE001 — log and degrade
134
+ _logger.warning("redis cache get failed: %s", exc)
135
+ return None
136
+ if blob is None:
137
+ return None
138
+ try:
139
+ return json.loads(blob)
140
+ except Exception as exc: # noqa: BLE001
141
+ _logger.warning("redis cache payload not JSON: %s", exc)
142
+ return None
143
+
144
+ def set(
145
+ self, key: str, value: dict[str, Any], ttl: float | None = None
146
+ ) -> None:
147
+ effective_ttl = ttl if ttl is not None else self.ttl
148
+ try:
149
+ blob = json.dumps(value)
150
+ except Exception as exc: # noqa: BLE001 — unserializable payload
151
+ _logger.warning("redis cache set skipped (not JSON-serialisable): %s", exc)
152
+ return
153
+ try:
154
+ if effective_ttl and effective_ttl > 0:
155
+ self._conn().set(self._k(key), blob, ex=int(effective_ttl))
156
+ else:
157
+ self._conn().set(self._k(key), blob)
158
+ except Exception as exc: # noqa: BLE001
159
+ _logger.warning("redis cache set failed: %s", exc)
160
+
161
+
162
+ __all__ = ["CacheBackend", "InMemoryCache", "RedisCache"]
@@ -0,0 +1,45 @@
1
+ """Stable cache/dedup key for a generate() call.
2
+
3
+ The same inputs must always produce the same key, across runs and
4
+ across processes — so we hash a canonical JSON form of the inputs.
5
+ Param ordering is not stable across callers, so we sort recursively.
6
+
7
+ Used by:
8
+ - loom._cache : key into the response cache
9
+ - loom._dedup : key for single-flight coalescing of concurrent calls
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import hashlib
15
+ import json
16
+ from typing import Any
17
+
18
+
19
+ def _canonicalize(obj: Any) -> Any:
20
+ if isinstance(obj, dict):
21
+ return {k: _canonicalize(obj[k]) for k in sorted(obj.keys())}
22
+ if isinstance(obj, (list, tuple)):
23
+ return [_canonicalize(x) for x in obj]
24
+ return obj
25
+
26
+
27
+ def call_key(
28
+ *,
29
+ provider: str,
30
+ modality: str,
31
+ model: str,
32
+ prompt: str,
33
+ params: dict[str, Any] | None,
34
+ ) -> str:
35
+ """Return a stable 64-char hex key for a generate() call."""
36
+ payload = {
37
+ "v": 1, # bump if the key shape ever changes
38
+ "provider": provider,
39
+ "modality": modality,
40
+ "model": model,
41
+ "prompt": prompt,
42
+ "params": _canonicalize(params or {}),
43
+ }
44
+ blob = json.dumps(payload, ensure_ascii=False, separators=(",", ":"))
45
+ return hashlib.sha256(blob.encode("utf-8")).hexdigest()