llm-spendguard 0.2.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llm_spendguard-0.2.6/LICENSE +21 -0
- llm_spendguard-0.2.6/PKG-INFO +374 -0
- llm_spendguard-0.2.6/README.md +334 -0
- llm_spendguard-0.2.6/pyproject.toml +62 -0
- llm_spendguard-0.2.6/setup.cfg +4 -0
- llm_spendguard-0.2.6/src/llm_spendguard.egg-info/PKG-INFO +374 -0
- llm_spendguard-0.2.6/src/llm_spendguard.egg-info/SOURCES.txt +88 -0
- llm_spendguard-0.2.6/src/llm_spendguard.egg-info/dependency_links.txt +1 -0
- llm_spendguard-0.2.6/src/llm_spendguard.egg-info/entry_points.txt +2 -0
- llm_spendguard-0.2.6/src/llm_spendguard.egg-info/requires.txt +23 -0
- llm_spendguard-0.2.6/src/llm_spendguard.egg-info/top_level.txt +1 -0
- llm_spendguard-0.2.6/src/spendguard/__init__.py +18 -0
- llm_spendguard-0.2.6/src/spendguard/adapters.py +75 -0
- llm_spendguard-0.2.6/src/spendguard/advise.py +89 -0
- llm_spendguard-0.2.6/src/spendguard/advisor.py +260 -0
- llm_spendguard-0.2.6/src/spendguard/audit.py +64 -0
- llm_spendguard-0.2.6/src/spendguard/backfill.py +108 -0
- llm_spendguard-0.2.6/src/spendguard/bootstrap.py +94 -0
- llm_spendguard-0.2.6/src/spendguard/brief.py +132 -0
- llm_spendguard-0.2.6/src/spendguard/budget.py +252 -0
- llm_spendguard-0.2.6/src/spendguard/cacheaudit.py +156 -0
- llm_spendguard-0.2.6/src/spendguard/cachetest.py +164 -0
- llm_spendguard-0.2.6/src/spendguard/callio.py +298 -0
- llm_spendguard-0.2.6/src/spendguard/calls.py +221 -0
- llm_spendguard-0.2.6/src/spendguard/cascade.py +95 -0
- llm_spendguard-0.2.6/src/spendguard/cli.py +204 -0
- llm_spendguard-0.2.6/src/spendguard/compare.py +53 -0
- llm_spendguard-0.2.6/src/spendguard/config.py +265 -0
- llm_spendguard-0.2.6/src/spendguard/config_schema.py +146 -0
- llm_spendguard-0.2.6/src/spendguard/conv.py +382 -0
- llm_spendguard-0.2.6/src/spendguard/emit.py +143 -0
- llm_spendguard-0.2.6/src/spendguard/equivalence.py +121 -0
- llm_spendguard-0.2.6/src/spendguard/estimate.py +124 -0
- llm_spendguard-0.2.6/src/spendguard/experiment.py +348 -0
- llm_spendguard-0.2.6/src/spendguard/gate.py +684 -0
- llm_spendguard-0.2.6/src/spendguard/guard.py +80 -0
- llm_spendguard-0.2.6/src/spendguard/history.py +252 -0
- llm_spendguard-0.2.6/src/spendguard/learn.py +157 -0
- llm_spendguard-0.2.6/src/spendguard/ledger_sync.py +248 -0
- llm_spendguard-0.2.6/src/spendguard/models.py +156 -0
- llm_spendguard-0.2.6/src/spendguard/notify.py +94 -0
- llm_spendguard-0.2.6/src/spendguard/prices.json +205 -0
- llm_spendguard-0.2.6/src/spendguard/pricing.py +241 -0
- llm_spendguard-0.2.6/src/spendguard/py.typed +0 -0
- llm_spendguard-0.2.6/src/spendguard/reconcile_anthropic.py +154 -0
- llm_spendguard-0.2.6/src/spendguard/reconcile_openai.py +110 -0
- llm_spendguard-0.2.6/src/spendguard/refresh.py +93 -0
- llm_spendguard-0.2.6/src/spendguard/report.py +185 -0
- llm_spendguard-0.2.6/src/spendguard/resources.py +235 -0
- llm_spendguard-0.2.6/src/spendguard/review.py +152 -0
- llm_spendguard-0.2.6/src/spendguard/saas.py +503 -0
- llm_spendguard-0.2.6/src/spendguard/semcache.py +250 -0
- llm_spendguard-0.2.6/src/spendguard/setup.py +497 -0
- llm_spendguard-0.2.6/src/spendguard/share.py +143 -0
- llm_spendguard-0.2.6/src/spendguard/signal.py +129 -0
- llm_spendguard-0.2.6/src/spendguard/submit.py +124 -0
- llm_spendguard-0.2.6/src/spendguard/sync.py +89 -0
- llm_spendguard-0.2.6/src/spendguard/tag.py +65 -0
- llm_spendguard-0.2.6/src/spendguard/validate.py +130 -0
- llm_spendguard-0.2.6/src/spendguard/workdone.py +126 -0
- llm_spendguard-0.2.6/tests/test_adapters.py +239 -0
- llm_spendguard-0.2.6/tests/test_advise.py +149 -0
- llm_spendguard-0.2.6/tests/test_advisor.py +31 -0
- llm_spendguard-0.2.6/tests/test_audit.py +118 -0
- llm_spendguard-0.2.6/tests/test_backfill.py +177 -0
- llm_spendguard-0.2.6/tests/test_bootstrap.py +128 -0
- llm_spendguard-0.2.6/tests/test_brief.py +18 -0
- llm_spendguard-0.2.6/tests/test_cacheaudit.py +18 -0
- llm_spendguard-0.2.6/tests/test_cascade.py +35 -0
- llm_spendguard-0.2.6/tests/test_conv.py +32 -0
- llm_spendguard-0.2.6/tests/test_equivalence.py +25 -0
- llm_spendguard-0.2.6/tests/test_experiment.py +26 -0
- llm_spendguard-0.2.6/tests/test_gate.py +138 -0
- llm_spendguard-0.2.6/tests/test_gate_failclosed.py +90 -0
- llm_spendguard-0.2.6/tests/test_history.py +42 -0
- llm_spendguard-0.2.6/tests/test_learn.py +40 -0
- llm_spendguard-0.2.6/tests/test_ledger.py +35 -0
- llm_spendguard-0.2.6/tests/test_ledger_sync.py +280 -0
- llm_spendguard-0.2.6/tests/test_models.py +28 -0
- llm_spendguard-0.2.6/tests/test_pricing.py +44 -0
- llm_spendguard-0.2.6/tests/test_reconcile.py +26 -0
- llm_spendguard-0.2.6/tests/test_reconcile_anthropic.py +172 -0
- llm_spendguard-0.2.6/tests/test_reconcile_openai.py +98 -0
- llm_spendguard-0.2.6/tests/test_runner.py +67 -0
- llm_spendguard-0.2.6/tests/test_saas.py +85 -0
- llm_spendguard-0.2.6/tests/test_saas_rollup.py +44 -0
- llm_spendguard-0.2.6/tests/test_semcache.py +48 -0
- llm_spendguard-0.2.6/tests/test_setup.py +44 -0
- llm_spendguard-0.2.6/tests/test_submit.py +56 -0
- llm_spendguard-0.2.6/tests/test_workdone.py +219 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Ash Damle
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,374 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: llm-spendguard
|
|
3
|
+
Version: 0.2.6
|
|
4
|
+
Summary: A pre-spend GATE + learning advisor for LLM API cost: caps every call, prices from a verified table, and learns the cheapest config that keeps quality.
|
|
5
|
+
Author: Ash Damle
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://llmspendguard.com
|
|
8
|
+
Project-URL: Documentation, https://docs.llmspendguard.com/
|
|
9
|
+
Project-URL: Repository, https://github.com/llmspendguard/llm-spendguard
|
|
10
|
+
Project-URL: Changelog, https://github.com/llmspendguard/llm-spendguard/blob/main/CHANGELOG.md
|
|
11
|
+
Keywords: llm,openai,anthropic,cost,budget,finops,tokens,prompt-caching,observability
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
17
|
+
Classifier: Topic :: System :: Monitoring
|
|
18
|
+
Requires-Python: >=3.9
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE
|
|
21
|
+
Provides-Extra: openai
|
|
22
|
+
Requires-Dist: openai>=1.0; extra == "openai"
|
|
23
|
+
Requires-Dist: tiktoken; extra == "openai"
|
|
24
|
+
Provides-Extra: anthropic
|
|
25
|
+
Requires-Dist: anthropic>=0.40; extra == "anthropic"
|
|
26
|
+
Provides-Extra: otel
|
|
27
|
+
Requires-Dist: opentelemetry-sdk; extra == "otel"
|
|
28
|
+
Provides-Extra: all
|
|
29
|
+
Requires-Dist: openai>=1.0; extra == "all"
|
|
30
|
+
Requires-Dist: tiktoken; extra == "all"
|
|
31
|
+
Requires-Dist: anthropic>=0.40; extra == "all"
|
|
32
|
+
Requires-Dist: opentelemetry-sdk; extra == "all"
|
|
33
|
+
Provides-Extra: dev
|
|
34
|
+
Requires-Dist: pytest>=7; extra == "dev"
|
|
35
|
+
Requires-Dist: build; extra == "dev"
|
|
36
|
+
Requires-Dist: twine; extra == "dev"
|
|
37
|
+
Requires-Dist: ruff>=0.6; extra == "dev"
|
|
38
|
+
Requires-Dist: coverage[toml]>=7; extra == "dev"
|
|
39
|
+
Dynamic: license-file
|
|
40
|
+
|
|
41
|
+
# llm-spendguard
|
|
42
|
+
|
|
43
|
+
A pre-spend **governor** for LLM API cost (OpenAI + Anthropic): it caps every call before the spend,
|
|
44
|
+
prices from a verified table, and **learns the cheapest config that still keeps quality** — then proves
|
|
45
|
+
and enforces it. Zero required dependencies; install is one line; it never breaks a job (fail-open).
|
|
46
|
+
Learn more at https://llmspendguard.com · **[Docs & quickstart →](https://docs.llmspendguard.com/)**
|
|
47
|
+
|
|
48
|
+
## Why llm-spendguard?
|
|
49
|
+
Cost overruns don't announce themselves — they slip in silently: a hardcoded price that drifted from the
|
|
50
|
+
real rate, a forgotten model swap, under-batching that re-bills a shared prompt every request, a job
|
|
51
|
+
cancelled "to save money" that still bills for completed work, an ungated script in some other venv quietly
|
|
52
|
+
leaking spend. spendguard stops those before the spend (the gate hard-stops over a cap, prices from a
|
|
53
|
+
verified table, finds the leaks) **and** learns what was actually worth it — so "cheaper" never quietly
|
|
54
|
+
costs you quality.
|
|
55
|
+
|
|
56
|
+
Born from a real incident: a "cost-conscious" day meant to cost ~$33 actually cost **$149.76** — a price
|
|
57
|
+
constant was hardcoded wrong (GPT-5.5 at the old GPT-5 rate) and jobs ran 1 item/request (the shared prompt
|
|
58
|
+
re-billed every call). spendguard makes those mistakes impossible to ship silently — and goes further:
|
|
59
|
+
it reconstructs *what* you should do cheaper, and won't let "cheaper" cost you quality.
|
|
60
|
+
|
|
61
|
+
## What it does
|
|
62
|
+
**Enforce → see → plan → prove → learn.**
|
|
63
|
+
- **gate** — overlay on the OpenAI/Anthropic SDKs (auto-installs via `sitecustomize.py`): estimates every
|
|
64
|
+
batch/real-time call, **hard-stops** over a cap (per-batch + cross-process daily/monthly) — then *asks* if interactive.
|
|
65
|
+
- **pricing** — one canonical, verifiable table (layered from LiteLLM + curated + override), cross-checked
|
|
66
|
+
vs OpenRouter; an `audit` fails CI if any code hardcodes a disagreeing price.
|
|
67
|
+
- **reconcile** — actual $ from real billed tokens; **`reconcile-ledger`** compares the local ledger to
|
|
68
|
+
provider billing to find **leaks** (ungoverned spend from a non-gated venv/repo).
|
|
69
|
+
- **report** — daily/weekly/monthly email with spend totals + a leak alert + the advisor's top learnings.
|
|
70
|
+
- **learning advisor** — a per-call cost+quality corpus → confidence-scored, lifecycle-tracked **insights**;
|
|
71
|
+
`brief` pre-fills a plan, `optimize` recommends the cheapest config that held quality, `experiment` proves
|
|
72
|
+
it (cost↓ **and** same-output), `promote` runs it and keeps the output. Cost-per-**good**-result, not per-token.
|
|
73
|
+
- **cost levers** — prompt-caching audit/test, semantic cache + batch dedup, cost-aware cascade routing.
|
|
74
|
+
- **observability** — emits OpenTelemetry GenAI-convention metrics+spans → Langfuse / Helicone / Phoenix / any OTLP backend.
|
|
75
|
+
|
|
76
|
+
The advisor's own LLM use is itself **caged** (a separate `caps.meta` budget, tagged `spendguard:*`, excluded
|
|
77
|
+
from the corpus it analyzes) so the governor can't overspend governing.
|
|
78
|
+
|
|
79
|
+
**Docs:** [Architecture + diagrams](docs/ARCHITECTURE.md) · [Use with Claude/Cursor](docs/USING-WITH-CLAUDE.md) · [Methodology](docs/README.md) · [Roadmap (teams/orgs/SaaS)](docs/ROADMAP.md) · [Module map](src/spendguard/README.md) · [Contributing](CONTRIBUTING.md) · [Changelog](CHANGELOG.md) · [Setup](SETUP.md)
|
|
80
|
+
|
|
81
|
+
**Use with an AI assistant:** `spendguard install-rule --global` writes a rule into `CLAUDE.md` so **every** Claude/Cursor conversation routes the LLM code it builds through spendguard — then `spendguard install-skills` adds `/spend` (status) and `/spendguard-learn` (advisor) as slash-commands. See [Use with Claude](docs/USING-WITH-CLAUDE.md).
|
|
82
|
+
**Teams & orgs:** each user keeps their own ledger + sets their own caps (partner, not supervisor); opt-in roll-up for shared visibility + pooled learnings via the SaaS (separate repo). The client (this package) is **production-ready and fully standalone**. The team/org dashboard (a separate server) is **in development** — see [ROADMAP.md](docs/ROADMAP.md).
|
|
83
|
+
|
|
84
|
+
## Quickstart
|
|
85
|
+
|
|
86
|
+
**A) Set up with Claude (recommended).** Point Claude Code / the desktop app at this repo and say:
|
|
87
|
+
> *Install spendguard from this repo and run the guided setup in `SETUP.md`.*
|
|
88
|
+
|
|
89
|
+
Or just run `spendguard init` — it reads the **config registry** (`src/spendguard/config_schema.py` — the
|
|
90
|
+
single source of truth for every setting, its default, valid options, and whether it's secret) and walks you
|
|
91
|
+
through caps, projects, and providers **conversationally**, one question at a time, then writes your config.
|
|
92
|
+
Pointed at this repo, Claude does the same end-to-end: installs the package, runs the interview off that same
|
|
93
|
+
registry, and wires up the gate. Details: [SETUP.md](SETUP.md).
|
|
94
|
+
|
|
95
|
+
**B) pip + code.**
|
|
96
|
+
```
|
|
97
|
+
pip install llm-spendguard # once published to PyPI
|
|
98
|
+
# or, from a clone of this repo:
|
|
99
|
+
pip install -e .
|
|
100
|
+
```
|
|
101
|
+
```python
|
|
102
|
+
import spendguard
|
|
103
|
+
spendguard.install(cap=75) # gate every batch submission in this process
|
|
104
|
+
```
|
|
105
|
+
Or auto-install for every process in a venv — drop this in `sitecustomize.py`:
|
|
106
|
+
```python
|
|
107
|
+
import spendguard; spendguard.install()
|
|
108
|
+
```
|
|
109
|
+
Configure with `spendguard init` (interactive) / `spendguard config` (show current); see [Configuration](#configuration-prices-providers-models).
|
|
110
|
+
|
|
111
|
+
## CLI — full command reference
|
|
112
|
+
```
|
|
113
|
+
# enforce / control
|
|
114
|
+
spendguard status | on | off # kill switch (persistent flag)
|
|
115
|
+
spendguard doctor # is the gate ENFORCING in THIS interpreter? (+ ledger-leak check)
|
|
116
|
+
spendguard install-hook --venv <path> # gate every process in ANOTHER venv/repo (--uninstall to remove)
|
|
117
|
+
spendguard install-hook --user [--python P] # gate a python's per-USER site (system-python bypass; PEP668-safe, no pip)
|
|
118
|
+
spendguard install-rule [--global|--project DIR] # drop the spendguard rule into CLAUDE.md → every AI chat wires it in
|
|
119
|
+
spendguard install-skills # deploy /spend + /spendguard-learn as Claude slash-commands
|
|
120
|
+
spendguard coverage # across ALL pythons (3.11/3.14/…): which can call LLMs & which are GATED
|
|
121
|
+
# in code, fail-closed: import spendguard; spendguard.require() # refuses to run if NOT actually gated
|
|
122
|
+
|
|
123
|
+
# teams / orgs (client seam → future server repo, llmseg.ai)
|
|
124
|
+
spendguard saas [status|ping|push|pull] # opt-in roll-up; partner not supervisor; private until you enable it
|
|
125
|
+
|
|
126
|
+
# see the money
|
|
127
|
+
spendguard report [--alert-threshold 150] [--email] # daily/weekly/monthly + ledger-leak alert + top learnings
|
|
128
|
+
spendguard reconcile openai|anthropic [--by-day] # actual billed batch spend from the provider
|
|
129
|
+
spendguard reconcile-ledger [--since DATE] # local gate ledger vs provider billing → find LEAKS
|
|
130
|
+
spendguard calls [--intent X] # per-intent cost + good% + $/good (opt-in corpus)
|
|
131
|
+
spendguard estimate --items N --from-sample f.jsonl --packs 1,30
|
|
132
|
+
spendguard pricing | cross-check | check-prices | sync-prices # canonical table · OpenRouter drift · freshness · LiteLLM sync
|
|
133
|
+
spendguard audit [--ci] # fail if a script hardcodes a price ≠ the table
|
|
134
|
+
|
|
135
|
+
# plan / decide (the briefing + advisor loop)
|
|
136
|
+
spendguard brief --task "..." # "what we need to do" → pre-filled confirm-or-correct plan
|
|
137
|
+
spendguard advise [--intent X] [--plan M] # deterministic per-intent ranking by $/good (no spend)
|
|
138
|
+
spendguard optimize --intent X [--plan M] # caged LLM recommendation (cheapest config that holds quality)
|
|
139
|
+
spendguard models [show <model>] # per-model learnings, auto-applied (reasoning/cache/tokens)
|
|
140
|
+
spendguard insights list|export|import # living insights; opt-in scrubbed collective learning
|
|
141
|
+
spendguard backtest --as-of DATE # replay advise as of a past date
|
|
142
|
+
|
|
143
|
+
# prove / run cheaper (estimate-first, caged by caps.meta)
|
|
144
|
+
spendguard experiment --intent X --model M... [--semantic embed|rubric] [--run] # A/B cost↓ + same-output, graduated
|
|
145
|
+
spendguard promote --intent X --model M [--input chunk.jsonl] [--batch] [--run] # run the winner + KEEP output
|
|
146
|
+
spendguard cache-audit | cache-test --script f.py [--run] # prompt-caching: find + prove savings
|
|
147
|
+
spendguard cascade --ladder cheap,…,strong --intent X [--prompt …] --run # cheap→verify→escalate
|
|
148
|
+
spendguard cache-stats | dedup --input f.jsonl --out u.jsonl | dedup-populate # response cache + batch dedup
|
|
149
|
+
|
|
150
|
+
# cold start / corpus
|
|
151
|
+
spendguard bootstrap [--repo] [--transcripts] # mine ALL history → corpus + insights (free, then estimate)
|
|
152
|
+
spendguard fetch-io [--cap 50] # recover real prompt+output from providers (free)
|
|
153
|
+
spendguard backfill [--intent-map …] # seed corpus + graph from the batch ledgers (free)
|
|
154
|
+
spendguard mine-history {intents,graph,git} [--apply] # reconstruct intents/edges from the repo (free)
|
|
155
|
+
spendguard mine-conv {index,synth} [--run] # mine session transcripts for the cost playbook
|
|
156
|
+
spendguard validate # re-check learnings vs the current corpus (lifecycle)
|
|
157
|
+
|
|
158
|
+
# setup
|
|
159
|
+
spendguard init | config # guided setup / show resolved config
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
### The workflow it's built around
|
|
163
|
+
**brief** (pre-filled plan) → **experiment** (prove the cheapest config that holds quality, graduated) →
|
|
164
|
+
**promote** (run it + keep the output) → the gate **enforces** caps → **reconcile-ledger** (catch leaks vs
|
|
165
|
+
provider billing) → **report** (daily email: totals + leak alert + top learnings) → **validate** (learnings
|
|
166
|
+
stay true as data grows) → those learnings feed the next **brief**.
|
|
167
|
+
|
|
168
|
+
### Gate another repo
|
|
169
|
+
The gate auto-installs per venv via a `sitecustomize.py` hook. To gate another project:
|
|
170
|
+
```
|
|
171
|
+
spendguard install-hook --venv /path/to/that-repo/.venv # pip-installs spendguard + writes the hook
|
|
172
|
+
```
|
|
173
|
+
Then every process in that venv is gated (kill switch: `GATE_DISABLE=1` or `spendguard off`). Until a repo
|
|
174
|
+
is gated, its provider spend shows up in `reconcile-ledger` as a **leak** (billed but ungoverned).
|
|
175
|
+
|
|
176
|
+
## Knobs (env)
|
|
177
|
+
`GATE_CAP=<$>` (default 75) · `GATE_ALLOW=1` (permit one over-cap run) · `GATE_DISABLE=1` (off for one run)
|
|
178
|
+
· `SPENDGUARD_HOME=<dir>` (data/flag/log location, default `~/.spendguard`) · `SPENDGUARD_ENV=<path>` (.env for keys)
|
|
179
|
+
|
|
180
|
+
## Caps by resource class (LLM · compute · total)
|
|
181
|
+
Beyond the per-batch cap, spendguard tracks **cumulative** spend caps split by *what's spending* — so you can
|
|
182
|
+
set a tight LLM sub-limit under a higher overall ceiling. Each class has a `daily` and a `monthly` window
|
|
183
|
+
(`null` = off), stored in `config.json` under `caps`, with an env override for every one:
|
|
184
|
+
|
|
185
|
+
| Cap | Config (nested or flat) | Env | Behaviour |
|
|
186
|
+
|---|---|---|---|
|
|
187
|
+
| **LLM** daily / monthly | `caps.llm.{daily,monthly}` | `GATE_LLM_DAILY` · `GATE_LLM_MONTHLY` | **HARD — gate-enforced** (OpenAI + Anthropic calls hit the gate) |
|
|
188
|
+
| **Compute** daily / monthly | `caps.compute.{daily,monthly}` | `GATE_COMPUTE_DAILY` · `GATE_COMPUTE_MONTHLY` | **alert-only** (remote-compute / vast.ai launches don't pass through the gate — surfaced in the report + dashboard) |
|
|
189
|
+
| **Total** daily / monthly | `caps.total.{daily,monthly}` | `GATE_TOTAL_DAILY` · `GATE_TOTAL_MONTHLY` | overall ceiling (LLM + compute) |
|
|
190
|
+
|
|
191
|
+
These need `budget.backend = sqlite` (the cross-process ledger). The **legacy flat `caps.daily` / `caps.monthly`**
|
|
192
|
+
still work and are honored as the **total** ceiling. (Config storage accepts either the nested `caps.llm.daily`
|
|
193
|
+
or the flat `caps["llm.daily"]` form — see `config.class_cap` / `config_schema.py`.)
|
|
194
|
+
|
|
195
|
+
## Pricing: layered, broad, low-maintenance
|
|
196
|
+
Prices load in layers, lowest→highest precedence — so you get **2,700+ models across all providers** for free,
|
|
197
|
+
your hand-verified rates always win, and you can override anything:
|
|
198
|
+
|
|
199
|
+
1. **LiteLLM community dataset** (breadth + freshness) — `spendguard sync-prices` fetches
|
|
200
|
+
[LiteLLM's CI-maintained `model_prices_and_context_window.json`](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json)
|
|
201
|
+
(~2,300 priced models, 80+ providers), validates it (refuses an empty/bad fetch), and caches it to
|
|
202
|
+
`~/.spendguard/litellm_prices.json`. Read from cache only — **no network at import**.
|
|
203
|
+
2. **Curated `prices.json`** (shipped in the package) — your verified models (gpt-5.5, opus-4.8, …) override LiteLLM.
|
|
204
|
+
3. **User override** — `~/.spendguard/prices.json` / `.yaml` / `$SPENDGUARD_PRICES` wins over everything.
|
|
205
|
+
|
|
206
|
+
If nothing loads, a built-in table in `pricing.py` is the final fallback (never breaks). Run `spendguard sync-prices`
|
|
207
|
+
once (and periodically) to refresh; that's the primary freshness mechanism — `check-prices`/`refresh-prices` are backups.
|
|
208
|
+
|
|
209
|
+
## Configuration (prices, providers, models)
|
|
210
|
+
The curated/override files use this structure (`src/spendguard/prices.json`, `~/.spendguard/prices.json`, or `$SPENDGUARD_PRICES`):
|
|
211
|
+
```json
|
|
212
|
+
{ "_meta": {"verified": "2026-06-13", "source": "https://…", "stale_after_days": 45},
|
|
213
|
+
"providers": {
|
|
214
|
+
"openai": {"models": {"gpt-5.5": {"in_": 5.0, "out": 30.0, "cached_in": 0.5, "batch_in": 2.5, "batch_out": 15.0}}},
|
|
215
|
+
"anthropic": {"models": {"claude-opus-4-8": {"in_": 5.0, "out": 25.0, "cached_in": 0.5, "batch_in": 2.5, "batch_out": 12.5}}}
|
|
216
|
+
}}
|
|
217
|
+
```
|
|
218
|
+
Add a provider/model by adding an entry. A user-override file only needs the models it changes. `spendguard providers`
|
|
219
|
+
lists what's configured. If the config can't load, the built-in table in `pricing.py` is the fallback (never breaks).
|
|
220
|
+
|
|
221
|
+
## Pricing freshness
|
|
222
|
+
Prices drift, and a wrong price is the bug that started this project. `spendguard check-prices` shows the
|
|
223
|
+
`verified` date and flags the table **STALE** once it's older than `stale_after_days` (default 45); the daily
|
|
224
|
+
`spendguard report` prints the same warning. To refresh: re-verify against the `source` URL and bump the
|
|
225
|
+
`verified` date in `prices.json`. (A live fetch-and-diff against provider pricing pages is a planned addition.)
|
|
226
|
+
|
|
227
|
+
## Real-time budget
|
|
228
|
+
Batch cost is known before submit; real-time isn't (output tokens). So the real-time layer **accounts actual
|
|
229
|
+
usage after each call** (and logs it, so real-time spend shows in `report`) and **hard-stops before the next call**
|
|
230
|
+
once per-process cumulative spend crosses `GATE_RT_BUDGET` (default $50) — the runaway-loop guard.
|
|
231
|
+
|
|
232
|
+
## Email the report
|
|
233
|
+
`spendguard report --email` (or `--email-to addr`) emails the report so a scheduled run isn't missed.
|
|
234
|
+
Config lives in `~/.spendguard/email.json` (gitignored — safe for the secret) or env.
|
|
235
|
+
|
|
236
|
+
**Email needs a *gated* sender — this is universal, not a spendguard limitation.** Mail servers reject
|
|
237
|
+
unauthenticated senders, so every provider makes you prove ownership *somehow* before sending. Pick whichever
|
|
238
|
+
is least friction for you:
|
|
239
|
+
|
|
240
|
+
| Backend | What it takes (one-time) | DNS? | config |
|
|
241
|
+
|---|---|---|---|
|
|
242
|
+
| **Gmail / Workspace SMTP** | a 16-char app password (Google authenticates the send) | no | `{"host":"smtp.gmail.com","port":587,"user":"you@co.com","password":"<app pw>","to":"you@co.com"}` |
|
|
243
|
+
| **SendGrid (Twilio)** | "Single Sender Verification" — click a link in a confirm email | no | SMTP host `smtp.sendgrid.net`, or add a SendGrid backend |
|
|
244
|
+
| **Resend** | verify a domain (SPF/DKIM DNS records) for arbitrary recipients; or send only to your Resend signup email via `onboarding@resend.dev` | yes (for arbitrary recipients) | `{"provider":"resend","to":"you@co.com","from_":"reports@your-verified-domain","api_key":"re_…"}` |
|
|
245
|
+
|
|
246
|
+
**If it isn't configured, it gracefully no-ops** — `report` still prints (and the scheduled task still delivers in-app);
|
|
247
|
+
you'll just see `email not configured — skipping`. A *configured* backend that errors prints `EMAIL FAILED: <reason>`
|
|
248
|
+
(e.g. Resend's "verify a domain" message) without affecting the report. So leaving email unset is a fine default.
|
|
249
|
+
|
|
250
|
+
> **⚠️ Deliverability (shared senders land in spam).** Sending from a provider's *shared* address
|
|
251
|
+
> (e.g. Resend's `onboarding@resend.dev`) **sends fine but frequently lands in Gmail/Workspace Spam** — the
|
|
252
|
+
> domain has no alignment with yours, so receivers distrust it. The report *is* delivered; it's just filtered.
|
|
253
|
+
> Fixes, simplest first: **(1)** in Gmail, "Report as not spam" + a filter on the sender/subject set to
|
|
254
|
+
> *Never send to Spam*; **(2)** use **Gmail/Workspace SMTP** so it sends *as you* from inside Google (inbox, no DNS);
|
|
255
|
+
> **(3)** verify your own domain on the provider and send from it. Also note `api.resend.com` is behind Cloudflare,
|
|
256
|
+
> which 403s the default `urllib` User-Agent — spendguard sets one (don't strip it).
|
|
257
|
+
|
|
258
|
+
## Compare models (cost-per-result)
|
|
259
|
+
Run one prompt across providers and table **cost + latency + output** — spendguard's angle is
|
|
260
|
+
*cost-per-result* (for deep evals, use promptfoo). Real calls, metered by the gate:
|
|
261
|
+
```
|
|
262
|
+
spendguard compare --prompt "Summarize X in 3 bullets" \
|
|
263
|
+
--models gpt-5.5,claude-opus-4-8,gemini-2.5-flash,deepseek-chat,qwen-max --show
|
|
264
|
+
```
|
|
265
|
+
Built-in providers: **openai, anthropic, gemini, deepseek, qwen** (most via their OpenAI-compatible
|
|
266
|
+
endpoints, so the gate already meters them). Keys resolve per provider from env / `~/.spendguard` / `./.env`
|
|
267
|
+
(`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `GEMINI_API_KEY`, `DEEPSEEK_API_KEY`, `DASHSCOPE_API_KEY` for Qwen).
|
|
268
|
+
**Add another in one line:**
|
|
269
|
+
```python
|
|
270
|
+
from spendguard.adapters import register_provider
|
|
271
|
+
register_provider("together", "https://api.together.xyz/v1", "TOGETHER_API_KEY", ("meta-llama", "mistralai"))
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
## Call context & cost-per-good-result (opt-in)
|
|
275
|
+
Beyond *cost*, spendguard can record per-call **context** to build a cost+**quality** corpus. Off by default
|
|
276
|
+
(it can store prompts/outputs — privacy). Enable `calls.enabled` (+ `calls.store_prompts` for snippets and the
|
|
277
|
+
implicit signal).
|
|
278
|
+
- **Tag intent:** `with spendguard.context(intent="loinc-typing", chain="run-42"): ...`
|
|
279
|
+
- **Quality is deferred** — you can't judge an output when it's made, but the *next* call reveals it:
|
|
280
|
+
- *automatic ("used"):* a later call in the same chain that reuses an output marks it good.
|
|
281
|
+
- *explicit / judge:* `spendguard.feedback(call_id, ok=True, source="judge")` — capture the verdicts you already produce.
|
|
282
|
+
- **`spendguard calls`** → per intent: calls, $, good%, and **$/good (cost-per-good-result)** — the efficiency metric.
|
|
283
|
+
|
|
284
|
+
Real-time calls are recorded automatically (caller, prompt/output snippets, latency); batches record job-level.
|
|
285
|
+
|
|
286
|
+
### Smart attribution (a clean P&L, no manual bookkeeping)
|
|
287
|
+
Every charge is tagged on **two orthogonal dimensions**, so you can slice spend by either without bookkeeping:
|
|
288
|
+
- **WHO** — `org → team → contributor`, which **rolls up** the hierarchy. The contributor is set per install
|
|
289
|
+
(default: git `user.email`); the org/team is resolved server-side from the connection key.
|
|
290
|
+
- **WHAT** — `project · intent · resource` (the repo/work, the labeled task, and whether it's LLM or
|
|
291
|
+
remote-compute GPU).
|
|
292
|
+
|
|
293
|
+
Tagging is automatic: a project is inferred from the repo/cwd, refined by the call corpus's intent/caller and
|
|
294
|
+
the conversation that ran each batch; remote-compute rows route by instance label. The still-ambiguous
|
|
295
|
+
remainder can be resolved by a small, **capped, estimate-first** LLM pass (never auto-run). The result is a
|
|
296
|
+
clean P&L by team / project / intent with no manual entry. (Mechanism: `tag.py` cascade, `signal.py` per
|
|
297
|
+
project·intent·model roll-up, `conv.py` batch→conversation attribution, `saas.py` `org→team→user` push.)
|
|
298
|
+
|
|
299
|
+
## Learning advisor — *recommend considering history* (Layer 1 deterministic · Layer 2 LLM)
|
|
300
|
+
- **`spendguard advise [--intent X] [--plan MODEL]`** — pure-SQL ranking of your corpus by `$/good` (or `$/M out`
|
|
301
|
+
when quality isn't labeled yet), confidence-weighted, with caveats. No LLM, no spend.
|
|
302
|
+
- **`spendguard backtest --as-of DATE`** — replays `advise` as of a past date (would it have caught known-good calls?).
|
|
303
|
+
- **`spendguard backfill`** — seeds the corpus + learning graph from your real batch ledgers (no spend).
|
|
304
|
+
- **Layer 2 (its own, *caged*, LLM use)** — every op is **estimate-only by default**; `--run` spends, and each paid
|
|
305
|
+
call is tagged `intent=spendguard:*` so it hits a **separate meta budget** (`caps.meta`, default **$2/day**), is kept
|
|
306
|
+
out of your workload budget, and is excluded from the corpus it analyzes:
|
|
307
|
+
- **`spendguard mine`** — synthesize confidence-scored **insights** + learning-graph nodes from the evidence (reasoner).
|
|
308
|
+
- **`spendguard optimize [--intent X] [--plan MODEL]`** — an actionable recommendation citing evidence + insights (reasoner).
|
|
309
|
+
- **`spendguard reconstruct`** — judge a bounded sample of recovered call I/O for quality → real `good%`/`$/good`.
|
|
310
|
+
- **`spendguard review`** — **practice audit**: judges whether usage was *smart*, not just what it cost. Assembles a
|
|
311
|
+
context bundle (cost + quality + token-ratio + sample I/O + linked chat notes) and emits **conditional** insights
|
|
312
|
+
(IF task_class/regime THEN action BECAUSE mechanism) — needs no ground truth, so it's robust where output-judging isn't.
|
|
313
|
+
- **Models are configurable:** `advisor.model` (reasoner, default Opus 4.8) · `advisor.judge_model` (judge, default
|
|
314
|
+
Haiku 4.5) — any priced model / provider. Run any op without `--run` to see the projected cost first.
|
|
315
|
+
|
|
316
|
+
### Cold start, quality corpus, living insights, collective learning
|
|
317
|
+
- **`spendguard bootstrap`** — the cold-start process: mine **all** history (ledgers → intents → graph → provider I/O →
|
|
318
|
+
conversation) for free, then estimate the caged reasoning. One command, history → corpus → insights.
|
|
319
|
+
- **`spendguard fetch-io`** — recover the **real prompts+outputs** from the providers (OpenAI batch input/output files,
|
|
320
|
+
streamed with early-stop; Anthropic results within 29 days) into a bounded `call_io` sample. **Zero token cost.**
|
|
321
|
+
- **`spendguard validate`** — **living insights**: re-checks each learning against the current corpus and moves it through
|
|
322
|
+
its lifecycle (corroborated → `active` + confidence up; cited model gone / gap inverted → `refuted`/`superseded`). The
|
|
323
|
+
advisor weights by *current* confidence + status, so stale advice sinks as data grows.
|
|
324
|
+
- **`spendguard insights {list,export,import}`** — **collective learning, opt-in + scrubbed**. Export *abstracts* insights
|
|
325
|
+
into generalizable rules (keeps task_class/regime, model names, ratios; strips `$` amounts, intent names, evidence) and
|
|
326
|
+
**previews exactly what would leave**. Import brings community rules in as **low-trust priors** that must be locally
|
|
327
|
+
corroborated by `validate` before they sway the advisor.
|
|
328
|
+
|
|
329
|
+
> **On quality:** a cheap call that fails quality is wasted money, so cost-per-**good**-result is the metric. Two signals are
|
|
330
|
+
> trustworthy: **approach-quality** (`review` — needs no ground truth) and **outcome** (the conversation showing an output was
|
|
331
|
+
> used or redone). Judging output *correctness* in isolation is **not** reliable (an LLM can't verify a value it has no ground
|
|
332
|
+
> truth for) — spendguard quarantines such labels rather than trusting them.
|
|
333
|
+
- **Post-event mining (deterministic, zero spend)** — recover what the live recorder missed:
|
|
334
|
+
- **`spendguard mine-history {intents,graph,git}`** — reconstruct each batch's **intent** from repo artifacts
|
|
335
|
+
(`*batch_id*.json` + a size-bounded content scan of `data/`), add causal graph edges (`preceded`,
|
|
336
|
+
`derived_from`), and read git history for cost/fix signals. `--apply` writes; report-only otherwise.
|
|
337
|
+
- **`spendguard mine-conv {index,synth}`** — mine session transcripts for cost decisions. `index` is cached
|
|
338
|
+
(deterministic); `synth` is the caged reasoner turning the top decision snippets into `source='conversation'`
|
|
339
|
+
insights (estimate-first). Reconstructs your actual playbook (packing, never-cancel, price-basis errors, …).
|
|
340
|
+
|
|
341
|
+
## Observability (feed your existing stack)
|
|
342
|
+
spendguard emits an event per gated call — it's the *enforcement* layer, not another dashboard; route the
|
|
343
|
+
events to whatever you already run. Three sinks, all optional, none ever block or break the gate:
|
|
344
|
+
- **In-process callback:** `spendguard.on_event(lambda e: log(e))`
|
|
345
|
+
- **Webhook:** `emit.webhook` in `~/.spendguard/config.json` or `$SPENDGUARD_WEBHOOK` — POSTs the event JSON (Slack, your collector, …)
|
|
346
|
+
- **OpenTelemetry:** `emit.otel: true` / `$SPENDGUARD_OTEL` — a `spendguard.cost_usd` counter (needs `opentelemetry-sdk`)
|
|
347
|
+
|
|
348
|
+
Event shape: `{ts, kind: batch|realtime, provider, model, cost, decision}`. Webhook/OTel run on a background
|
|
349
|
+
daemon thread (drop-if-flooded), so even high-volume real-time calls aren't slowed; callbacks run inline (keep them fast).
|
|
350
|
+
|
|
351
|
+
## Extend to any SDK (zero required deps, fail-open)
|
|
352
|
+
spendguard ships with the OpenAI + Anthropic overlays, but the gate is generic — you can put **any** SDK under
|
|
353
|
+
it without adding a dependency:
|
|
354
|
+
1. **Intercept it:** `spendguard.register(module_path, ClassName, method, gate_fn)` patches that SDK's call
|
|
355
|
+
method (e.g. `register("cohere", "Client", "chat", gate_fn)`). Write a small `gate_fn` that reads the request
|
|
356
|
+
shape and estimates cost; add the model's prices to the table (`prices.json` / your override).
|
|
357
|
+
2. **Add an OpenAI-compatible provider in one line** (for `compare` + metering — most providers expose one):
|
|
358
|
+
`from spendguard.adapters import register_provider; register_provider("together", "https://api.together.xyz/v1", "TOGETHER_API_KEY", ("meta-llama", "mistralai"))`.
|
|
359
|
+
3. **Emit anywhere:** route the per-call event to a webhook, OpenTelemetry, or an in-process callback
|
|
360
|
+
(`spendguard.on_event(...)`) — see [Observability](#observability-feed-your-existing-stack).
|
|
361
|
+
|
|
362
|
+
All of it is **fail-open** (an estimation/patch error logs and lets the call proceed) and needs **no required
|
|
363
|
+
dependencies** — the SDKs and OTel are optional extras.
|
|
364
|
+
|
|
365
|
+
## Safety
|
|
366
|
+
Fail-**open**: any estimation or patch error logs a warning and lets the call proceed — the gate
|
|
367
|
+
never breaks a job by accident. Only the deliberate over-cap stop blocks. Disable instantly with
|
|
368
|
+
`spendguard off` (checked per-call, live) — and the kill switch is honored even if the gate itself errors.
|
|
369
|
+
|
|
370
|
+
## Getting help
|
|
371
|
+
- **Website:** https://llmspendguard.com
|
|
372
|
+
- **Bugs / feature requests:** [GitHub Issues](https://github.com/llmspendguard/llm-spendguard/issues)
|
|
373
|
+
- **Questions / ideas / show-and-tell:** [GitHub Discussions](https://github.com/llmspendguard/llm-spendguard/discussions)
|
|
374
|
+
- **Contributing:** see [CONTRIBUTING.md](CONTRIBUTING.md).
|