llm-spendguard 0.2.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. llm_spendguard-0.2.6/LICENSE +21 -0
  2. llm_spendguard-0.2.6/PKG-INFO +374 -0
  3. llm_spendguard-0.2.6/README.md +334 -0
  4. llm_spendguard-0.2.6/pyproject.toml +62 -0
  5. llm_spendguard-0.2.6/setup.cfg +4 -0
  6. llm_spendguard-0.2.6/src/llm_spendguard.egg-info/PKG-INFO +374 -0
  7. llm_spendguard-0.2.6/src/llm_spendguard.egg-info/SOURCES.txt +88 -0
  8. llm_spendguard-0.2.6/src/llm_spendguard.egg-info/dependency_links.txt +1 -0
  9. llm_spendguard-0.2.6/src/llm_spendguard.egg-info/entry_points.txt +2 -0
  10. llm_spendguard-0.2.6/src/llm_spendguard.egg-info/requires.txt +23 -0
  11. llm_spendguard-0.2.6/src/llm_spendguard.egg-info/top_level.txt +1 -0
  12. llm_spendguard-0.2.6/src/spendguard/__init__.py +18 -0
  13. llm_spendguard-0.2.6/src/spendguard/adapters.py +75 -0
  14. llm_spendguard-0.2.6/src/spendguard/advise.py +89 -0
  15. llm_spendguard-0.2.6/src/spendguard/advisor.py +260 -0
  16. llm_spendguard-0.2.6/src/spendguard/audit.py +64 -0
  17. llm_spendguard-0.2.6/src/spendguard/backfill.py +108 -0
  18. llm_spendguard-0.2.6/src/spendguard/bootstrap.py +94 -0
  19. llm_spendguard-0.2.6/src/spendguard/brief.py +132 -0
  20. llm_spendguard-0.2.6/src/spendguard/budget.py +252 -0
  21. llm_spendguard-0.2.6/src/spendguard/cacheaudit.py +156 -0
  22. llm_spendguard-0.2.6/src/spendguard/cachetest.py +164 -0
  23. llm_spendguard-0.2.6/src/spendguard/callio.py +298 -0
  24. llm_spendguard-0.2.6/src/spendguard/calls.py +221 -0
  25. llm_spendguard-0.2.6/src/spendguard/cascade.py +95 -0
  26. llm_spendguard-0.2.6/src/spendguard/cli.py +204 -0
  27. llm_spendguard-0.2.6/src/spendguard/compare.py +53 -0
  28. llm_spendguard-0.2.6/src/spendguard/config.py +265 -0
  29. llm_spendguard-0.2.6/src/spendguard/config_schema.py +146 -0
  30. llm_spendguard-0.2.6/src/spendguard/conv.py +382 -0
  31. llm_spendguard-0.2.6/src/spendguard/emit.py +143 -0
  32. llm_spendguard-0.2.6/src/spendguard/equivalence.py +121 -0
  33. llm_spendguard-0.2.6/src/spendguard/estimate.py +124 -0
  34. llm_spendguard-0.2.6/src/spendguard/experiment.py +348 -0
  35. llm_spendguard-0.2.6/src/spendguard/gate.py +684 -0
  36. llm_spendguard-0.2.6/src/spendguard/guard.py +80 -0
  37. llm_spendguard-0.2.6/src/spendguard/history.py +252 -0
  38. llm_spendguard-0.2.6/src/spendguard/learn.py +157 -0
  39. llm_spendguard-0.2.6/src/spendguard/ledger_sync.py +248 -0
  40. llm_spendguard-0.2.6/src/spendguard/models.py +156 -0
  41. llm_spendguard-0.2.6/src/spendguard/notify.py +94 -0
  42. llm_spendguard-0.2.6/src/spendguard/prices.json +205 -0
  43. llm_spendguard-0.2.6/src/spendguard/pricing.py +241 -0
  44. llm_spendguard-0.2.6/src/spendguard/py.typed +0 -0
  45. llm_spendguard-0.2.6/src/spendguard/reconcile_anthropic.py +154 -0
  46. llm_spendguard-0.2.6/src/spendguard/reconcile_openai.py +110 -0
  47. llm_spendguard-0.2.6/src/spendguard/refresh.py +93 -0
  48. llm_spendguard-0.2.6/src/spendguard/report.py +185 -0
  49. llm_spendguard-0.2.6/src/spendguard/resources.py +235 -0
  50. llm_spendguard-0.2.6/src/spendguard/review.py +152 -0
  51. llm_spendguard-0.2.6/src/spendguard/saas.py +503 -0
  52. llm_spendguard-0.2.6/src/spendguard/semcache.py +250 -0
  53. llm_spendguard-0.2.6/src/spendguard/setup.py +497 -0
  54. llm_spendguard-0.2.6/src/spendguard/share.py +143 -0
  55. llm_spendguard-0.2.6/src/spendguard/signal.py +129 -0
  56. llm_spendguard-0.2.6/src/spendguard/submit.py +124 -0
  57. llm_spendguard-0.2.6/src/spendguard/sync.py +89 -0
  58. llm_spendguard-0.2.6/src/spendguard/tag.py +65 -0
  59. llm_spendguard-0.2.6/src/spendguard/validate.py +130 -0
  60. llm_spendguard-0.2.6/src/spendguard/workdone.py +126 -0
  61. llm_spendguard-0.2.6/tests/test_adapters.py +239 -0
  62. llm_spendguard-0.2.6/tests/test_advise.py +149 -0
  63. llm_spendguard-0.2.6/tests/test_advisor.py +31 -0
  64. llm_spendguard-0.2.6/tests/test_audit.py +118 -0
  65. llm_spendguard-0.2.6/tests/test_backfill.py +177 -0
  66. llm_spendguard-0.2.6/tests/test_bootstrap.py +128 -0
  67. llm_spendguard-0.2.6/tests/test_brief.py +18 -0
  68. llm_spendguard-0.2.6/tests/test_cacheaudit.py +18 -0
  69. llm_spendguard-0.2.6/tests/test_cascade.py +35 -0
  70. llm_spendguard-0.2.6/tests/test_conv.py +32 -0
  71. llm_spendguard-0.2.6/tests/test_equivalence.py +25 -0
  72. llm_spendguard-0.2.6/tests/test_experiment.py +26 -0
  73. llm_spendguard-0.2.6/tests/test_gate.py +138 -0
  74. llm_spendguard-0.2.6/tests/test_gate_failclosed.py +90 -0
  75. llm_spendguard-0.2.6/tests/test_history.py +42 -0
  76. llm_spendguard-0.2.6/tests/test_learn.py +40 -0
  77. llm_spendguard-0.2.6/tests/test_ledger.py +35 -0
  78. llm_spendguard-0.2.6/tests/test_ledger_sync.py +280 -0
  79. llm_spendguard-0.2.6/tests/test_models.py +28 -0
  80. llm_spendguard-0.2.6/tests/test_pricing.py +44 -0
  81. llm_spendguard-0.2.6/tests/test_reconcile.py +26 -0
  82. llm_spendguard-0.2.6/tests/test_reconcile_anthropic.py +172 -0
  83. llm_spendguard-0.2.6/tests/test_reconcile_openai.py +98 -0
  84. llm_spendguard-0.2.6/tests/test_runner.py +67 -0
  85. llm_spendguard-0.2.6/tests/test_saas.py +85 -0
  86. llm_spendguard-0.2.6/tests/test_saas_rollup.py +44 -0
  87. llm_spendguard-0.2.6/tests/test_semcache.py +48 -0
  88. llm_spendguard-0.2.6/tests/test_setup.py +44 -0
  89. llm_spendguard-0.2.6/tests/test_submit.py +56 -0
  90. llm_spendguard-0.2.6/tests/test_workdone.py +219 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Ash Damle
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,374 @@
1
+ Metadata-Version: 2.4
2
+ Name: llm-spendguard
3
+ Version: 0.2.6
4
+ Summary: A pre-spend GATE + learning advisor for LLM API cost: caps every call, prices from a verified table, and learns the cheapest config that keeps quality.
5
+ Author: Ash Damle
6
+ License: MIT
7
+ Project-URL: Homepage, https://llmspendguard.com
8
+ Project-URL: Documentation, https://docs.llmspendguard.com/
9
+ Project-URL: Repository, https://github.com/llmspendguard/llm-spendguard
10
+ Project-URL: Changelog, https://github.com/llmspendguard/llm-spendguard/blob/main/CHANGELOG.md
11
+ Keywords: llm,openai,anthropic,cost,budget,finops,tokens,prompt-caching,observability
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
17
+ Classifier: Topic :: System :: Monitoring
18
+ Requires-Python: >=3.9
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ Provides-Extra: openai
22
+ Requires-Dist: openai>=1.0; extra == "openai"
23
+ Requires-Dist: tiktoken; extra == "openai"
24
+ Provides-Extra: anthropic
25
+ Requires-Dist: anthropic>=0.40; extra == "anthropic"
26
+ Provides-Extra: otel
27
+ Requires-Dist: opentelemetry-sdk; extra == "otel"
28
+ Provides-Extra: all
29
+ Requires-Dist: openai>=1.0; extra == "all"
30
+ Requires-Dist: tiktoken; extra == "all"
31
+ Requires-Dist: anthropic>=0.40; extra == "all"
32
+ Requires-Dist: opentelemetry-sdk; extra == "all"
33
+ Provides-Extra: dev
34
+ Requires-Dist: pytest>=7; extra == "dev"
35
+ Requires-Dist: build; extra == "dev"
36
+ Requires-Dist: twine; extra == "dev"
37
+ Requires-Dist: ruff>=0.6; extra == "dev"
38
+ Requires-Dist: coverage[toml]>=7; extra == "dev"
39
+ Dynamic: license-file
40
+
41
+ # llm-spendguard
42
+
43
+ A pre-spend **governor** for LLM API cost (OpenAI + Anthropic): it caps every call before the spend,
44
+ prices from a verified table, and **learns the cheapest config that still keeps quality** — then proves
45
+ and enforces it. Zero required dependencies; install is one line; it never breaks a job (fail-open).
46
+ Learn more at https://llmspendguard.com · **[Docs & quickstart →](https://docs.llmspendguard.com/)**
47
+
48
+ ## Why llm-spendguard?
49
+ Cost overruns don't announce themselves — they slip in silently: a hardcoded price that drifted from the
50
+ real rate, a forgotten model swap, under-batching that re-bills a shared prompt every request, a job
51
+ cancelled "to save money" that still bills for completed work, an ungated script in some other venv quietly
52
+ leaking spend. spendguard stops those before the spend (the gate hard-stops over a cap, prices from a
53
+ verified table, finds the leaks) **and** learns what was actually worth it — so "cheaper" never quietly
54
+ costs you quality.
55
+
56
+ Born from a real incident: a "cost-conscious" day meant to cost ~$33 actually cost **$149.76** — a price
57
+ constant was hardcoded wrong (GPT-5.5 at the old GPT-5 rate) and jobs ran 1 item/request (the shared prompt
58
+ re-billed every call). spendguard makes those mistakes impossible to ship silently — and goes further:
59
+ it reconstructs *what* you should do cheaper, and won't let "cheaper" cost you quality.
60
+
61
+ ## What it does
62
+ **Enforce → see → plan → prove → learn.**
63
+ - **gate** — overlay on the OpenAI/Anthropic SDKs (auto-installs via `sitecustomize.py`): estimates every
64
+ batch/real-time call, **hard-stops** over a cap (per-batch + cross-process daily/monthly) — then *asks* if interactive.
65
+ - **pricing** — one canonical, verifiable table (layered from LiteLLM + curated + override), cross-checked
66
+ vs OpenRouter; an `audit` fails CI if any code hardcodes a disagreeing price.
67
+ - **reconcile** — actual $ from real billed tokens; **`reconcile-ledger`** compares the local ledger to
68
+ provider billing to find **leaks** (ungoverned spend from a non-gated venv/repo).
69
+ - **report** — daily/weekly/monthly email with spend totals + a leak alert + the advisor's top learnings.
70
+ - **learning advisor** — a per-call cost+quality corpus → confidence-scored, lifecycle-tracked **insights**;
71
+ `brief` pre-fills a plan, `optimize` recommends the cheapest config that held quality, `experiment` proves
72
+ it (cost↓ **and** same-output), `promote` runs it and keeps the output. Cost-per-**good**-result, not per-token.
73
+ - **cost levers** — prompt-caching audit/test, semantic cache + batch dedup, cost-aware cascade routing.
74
+ - **observability** — emits OpenTelemetry GenAI-convention metrics+spans → Langfuse / Helicone / Phoenix / any OTLP backend.
75
+
76
+ The advisor's own LLM use is itself **caged** (a separate `caps.meta` budget, tagged `spendguard:*`, excluded
77
+ from the corpus it analyzes) so the governor can't overspend governing.
78
+
79
+ **Docs:** [Architecture + diagrams](docs/ARCHITECTURE.md) · [Use with Claude/Cursor](docs/USING-WITH-CLAUDE.md) · [Methodology](docs/README.md) · [Roadmap (teams/orgs/SaaS)](docs/ROADMAP.md) · [Module map](src/spendguard/README.md) · [Contributing](CONTRIBUTING.md) · [Changelog](CHANGELOG.md) · [Setup](SETUP.md)
80
+
81
+ **Use with an AI assistant:** `spendguard install-rule --global` writes a rule into `CLAUDE.md` so **every** Claude/Cursor conversation routes the LLM code it builds through spendguard — then `spendguard install-skills` adds `/spend` (status) and `/spendguard-learn` (advisor) as slash-commands. See [Use with Claude](docs/USING-WITH-CLAUDE.md).
82
+ **Teams & orgs:** each user keeps their own ledger + sets their own caps (partner, not supervisor); opt-in roll-up for shared visibility + pooled learnings via the SaaS (separate repo). The client (this package) is **production-ready and fully standalone**. The team/org dashboard (a separate server) is **in development** — see [ROADMAP.md](docs/ROADMAP.md).
83
+
84
+ ## Quickstart
85
+
86
+ **A) Set up with Claude (recommended).** Point Claude Code / the desktop app at this repo and say:
87
+ > *Install spendguard from this repo and run the guided setup in `SETUP.md`.*
88
+
89
+ Or just run `spendguard init` — it reads the **config registry** (`src/spendguard/config_schema.py` — the
90
+ single source of truth for every setting, its default, valid options, and whether it's secret) and walks you
91
+ through caps, projects, and providers **conversationally**, one question at a time, then writes your config.
92
+ Pointed at this repo, Claude does the same end-to-end: installs the package, runs the interview off that same
93
+ registry, and wires up the gate. Details: [SETUP.md](SETUP.md).
94
+
95
+ **B) pip + code.**
96
+ ```
97
+ pip install llm-spendguard # once published to PyPI
98
+ # or, from a clone of this repo:
99
+ pip install -e .
100
+ ```
101
+ ```python
102
+ import spendguard
103
+ spendguard.install(cap=75) # gate every batch submission in this process
104
+ ```
105
+ Or auto-install for every process in a venv — drop this in `sitecustomize.py`:
106
+ ```python
107
+ import spendguard; spendguard.install()
108
+ ```
109
+ Configure with `spendguard init` (interactive) / `spendguard config` (show current); see [Configuration](#configuration-prices-providers-models).
110
+
111
+ ## CLI — full command reference
112
+ ```
113
+ # enforce / control
114
+ spendguard status | on | off # kill switch (persistent flag)
115
+ spendguard doctor # is the gate ENFORCING in THIS interpreter? (+ ledger-leak check)
116
+ spendguard install-hook --venv <path> # gate every process in ANOTHER venv/repo (--uninstall to remove)
117
+ spendguard install-hook --user [--python P] # gate a python's per-USER site (system-python bypass; PEP668-safe, no pip)
118
+ spendguard install-rule [--global|--project DIR] # drop the spendguard rule into CLAUDE.md → every AI chat wires it in
119
+ spendguard install-skills # deploy /spend + /spendguard-learn as Claude slash-commands
120
+ spendguard coverage # across ALL pythons (3.11/3.14/…): which can call LLMs & which are GATED
121
+ # in code, fail-closed: import spendguard; spendguard.require() # refuses to run if NOT actually gated
122
+
123
+ # teams / orgs (client seam → future server repo, llmseg.ai)
124
+ spendguard saas [status|ping|push|pull] # opt-in roll-up; partner not supervisor; private until you enable it
125
+
126
+ # see the money
127
+ spendguard report [--alert-threshold 150] [--email] # daily/weekly/monthly + ledger-leak alert + top learnings
128
+ spendguard reconcile openai|anthropic [--by-day] # actual billed batch spend from the provider
129
+ spendguard reconcile-ledger [--since DATE] # local gate ledger vs provider billing → find LEAKS
130
+ spendguard calls [--intent X] # per-intent cost + good% + $/good (opt-in corpus)
131
+ spendguard estimate --items N --from-sample f.jsonl --packs 1,30
132
+ spendguard pricing | cross-check | check-prices | sync-prices # canonical table · OpenRouter drift · freshness · LiteLLM sync
133
+ spendguard audit [--ci] # fail if a script hardcodes a price ≠ the table
134
+
135
+ # plan / decide (the briefing + advisor loop)
136
+ spendguard brief --task "..." # "what we need to do" → pre-filled confirm-or-correct plan
137
+ spendguard advise [--intent X] [--plan M] # deterministic per-intent ranking by $/good (no spend)
138
+ spendguard optimize --intent X [--plan M] # caged LLM recommendation (cheapest config that holds quality)
139
+ spendguard models [show <model>] # per-model learnings, auto-applied (reasoning/cache/tokens)
140
+ spendguard insights list|export|import # living insights; opt-in scrubbed collective learning
141
+ spendguard backtest --as-of DATE # replay advise as of a past date
142
+
143
+ # prove / run cheaper (estimate-first, caged by caps.meta)
144
+ spendguard experiment --intent X --model M... [--semantic embed|rubric] [--run] # A/B cost↓ + same-output, graduated
145
+ spendguard promote --intent X --model M [--input chunk.jsonl] [--batch] [--run] # run the winner + KEEP output
146
+ spendguard cache-audit | cache-test --script f.py [--run] # prompt-caching: find + prove savings
147
+ spendguard cascade --ladder cheap,…,strong --intent X [--prompt …] --run # cheap→verify→escalate
148
+ spendguard cache-stats | dedup --input f.jsonl --out u.jsonl | dedup-populate # response cache + batch dedup
149
+
150
+ # cold start / corpus
151
+ spendguard bootstrap [--repo] [--transcripts] # mine ALL history → corpus + insights (free, then estimate)
152
+ spendguard fetch-io [--cap 50] # recover real prompt+output from providers (free)
153
+ spendguard backfill [--intent-map …] # seed corpus + graph from the batch ledgers (free)
154
+ spendguard mine-history {intents,graph,git} [--apply] # reconstruct intents/edges from the repo (free)
155
+ spendguard mine-conv {index,synth} [--run] # mine session transcripts for the cost playbook
156
+ spendguard validate # re-check learnings vs the current corpus (lifecycle)
157
+
158
+ # setup
159
+ spendguard init | config # guided setup / show resolved config
160
+ ```
161
+
162
+ ### The workflow it's built around
163
+ **brief** (pre-filled plan) → **experiment** (prove the cheapest config that holds quality, graduated) →
164
+ **promote** (run it + keep the output) → the gate **enforces** caps → **reconcile-ledger** (catch leaks vs
165
+ provider billing) → **report** (daily email: totals + leak alert + top learnings) → **validate** (learnings
166
+ stay true as data grows) → those learnings feed the next **brief**.
167
+
168
+ ### Gate another repo
169
+ The gate auto-installs per venv via a `sitecustomize.py` hook. To gate another project:
170
+ ```
171
+ spendguard install-hook --venv /path/to/that-repo/.venv # pip-installs spendguard + writes the hook
172
+ ```
173
+ Then every process in that venv is gated (kill switch: `GATE_DISABLE=1` or `spendguard off`). Until a repo
174
+ is gated, its provider spend shows up in `reconcile-ledger` as a **leak** (billed but ungoverned).
175
+
176
+ ## Knobs (env)
177
+ `GATE_CAP=<$>` (default 75) · `GATE_ALLOW=1` (permit one over-cap run) · `GATE_DISABLE=1` (off for one run)
178
+ · `SPENDGUARD_HOME=<dir>` (data/flag/log location, default `~/.spendguard`) · `SPENDGUARD_ENV=<path>` (.env for keys)
179
+
180
+ ## Caps by resource class (LLM · compute · total)
181
+ Beyond the per-batch cap, spendguard tracks **cumulative** spend caps split by *what's spending* — so you can
182
+ set a tight LLM sub-limit under a higher overall ceiling. Each class has a `daily` and a `monthly` window
183
+ (`null` = off), stored in `config.json` under `caps`, with an env override for every one:
184
+
185
+ | Cap | Config (nested or flat) | Env | Behaviour |
186
+ |---|---|---|---|
187
+ | **LLM** daily / monthly | `caps.llm.{daily,monthly}` | `GATE_LLM_DAILY` · `GATE_LLM_MONTHLY` | **HARD — gate-enforced** (OpenAI + Anthropic calls hit the gate) |
188
+ | **Compute** daily / monthly | `caps.compute.{daily,monthly}` | `GATE_COMPUTE_DAILY` · `GATE_COMPUTE_MONTHLY` | **alert-only** (remote-compute / vast.ai launches don't pass through the gate — surfaced in the report + dashboard) |
189
+ | **Total** daily / monthly | `caps.total.{daily,monthly}` | `GATE_TOTAL_DAILY` · `GATE_TOTAL_MONTHLY` | overall ceiling (LLM + compute) |
190
+
191
+ These need `budget.backend = sqlite` (the cross-process ledger). The **legacy flat `caps.daily` / `caps.monthly`**
192
+ still work and are honored as the **total** ceiling. (Config storage accepts either the nested `caps.llm.daily`
193
+ or the flat `caps["llm.daily"]` form — see `config.class_cap` / `config_schema.py`.)
194
+
195
+ ## Pricing: layered, broad, low-maintenance
196
+ Prices load in layers, lowest→highest precedence — so you get **2,700+ models across all providers** for free,
197
+ your hand-verified rates always win, and you can override anything:
198
+
199
+ 1. **LiteLLM community dataset** (breadth + freshness) — `spendguard sync-prices` fetches
200
+ [LiteLLM's CI-maintained `model_prices_and_context_window.json`](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json)
201
+ (~2,300 priced models, 80+ providers), validates it (refuses an empty/bad fetch), and caches it to
202
+ `~/.spendguard/litellm_prices.json`. Read from cache only — **no network at import**.
203
+ 2. **Curated `prices.json`** (shipped in the package) — your verified models (gpt-5.5, opus-4.8, …) override LiteLLM.
204
+ 3. **User override** — `~/.spendguard/prices.json` / `.yaml` / `$SPENDGUARD_PRICES` wins over everything.
205
+
206
+ If nothing loads, a built-in table in `pricing.py` is the final fallback (never breaks). Run `spendguard sync-prices`
207
+ once (and periodically) to refresh; that's the primary freshness mechanism — `check-prices`/`refresh-prices` are backups.
208
+
209
+ ## Configuration (prices, providers, models)
210
+ The curated/override files use this structure (`src/spendguard/prices.json`, `~/.spendguard/prices.json`, or `$SPENDGUARD_PRICES`):
211
+ ```json
212
+ { "_meta": {"verified": "2026-06-13", "source": "https://…", "stale_after_days": 45},
213
+ "providers": {
214
+ "openai": {"models": {"gpt-5.5": {"in_": 5.0, "out": 30.0, "cached_in": 0.5, "batch_in": 2.5, "batch_out": 15.0}}},
215
+ "anthropic": {"models": {"claude-opus-4-8": {"in_": 5.0, "out": 25.0, "cached_in": 0.5, "batch_in": 2.5, "batch_out": 12.5}}}
216
+ }}
217
+ ```
218
+ Add a provider/model by adding an entry. A user-override file only needs the models it changes. `spendguard providers`
219
+ lists what's configured. If the config can't load, the built-in table in `pricing.py` is the fallback (never breaks).
220
+
221
+ ## Pricing freshness
222
+ Prices drift, and a wrong price is the bug that started this project. `spendguard check-prices` shows the
223
+ `verified` date and flags the table **STALE** once it's older than `stale_after_days` (default 45); the daily
224
+ `spendguard report` prints the same warning. To refresh: re-verify against the `source` URL and bump the
225
+ `verified` date in `prices.json`. (A live fetch-and-diff against provider pricing pages is a planned addition.)
226
+
227
+ ## Real-time budget
228
+ Batch cost is known before submit; real-time isn't (output tokens). So the real-time layer **accounts actual
229
+ usage after each call** (and logs it, so real-time spend shows in `report`) and **hard-stops before the next call**
230
+ once per-process cumulative spend crosses `GATE_RT_BUDGET` (default $50) — the runaway-loop guard.
231
+
232
+ ## Email the report
233
+ `spendguard report --email` (or `--email-to addr`) emails the report so a scheduled run isn't missed.
234
+ Config lives in `~/.spendguard/email.json` (gitignored — safe for the secret) or env.
235
+
236
+ **Email needs a *gated* sender — this is universal, not a spendguard limitation.** Mail servers reject
237
+ unauthenticated senders, so every provider makes you prove ownership *somehow* before sending. Pick whichever
238
+ is least friction for you:
239
+
240
+ | Backend | What it takes (one-time) | DNS? | config |
241
+ |---|---|---|---|
242
+ | **Gmail / Workspace SMTP** | a 16-char app password (Google authenticates the send) | no | `{"host":"smtp.gmail.com","port":587,"user":"you@co.com","password":"<app pw>","to":"you@co.com"}` |
243
+ | **SendGrid (Twilio)** | "Single Sender Verification" — click a link in a confirm email | no | SMTP host `smtp.sendgrid.net`, or add a SendGrid backend |
244
+ | **Resend** | verify a domain (SPF/DKIM DNS records) for arbitrary recipients; or send only to your Resend signup email via `onboarding@resend.dev` | yes (for arbitrary recipients) | `{"provider":"resend","to":"you@co.com","from_":"reports@your-verified-domain","api_key":"re_…"}` |
245
+
246
+ **If it isn't configured, it gracefully no-ops** — `report` still prints (and the scheduled task still delivers in-app);
247
+ you'll just see `email not configured — skipping`. A *configured* backend that errors prints `EMAIL FAILED: <reason>`
248
+ (e.g. Resend's "verify a domain" message) without affecting the report. So leaving email unset is a fine default.
249
+
250
+ > **⚠️ Deliverability (shared senders land in spam).** Sending from a provider's *shared* address
251
+ > (e.g. Resend's `onboarding@resend.dev`) **sends fine but frequently lands in Gmail/Workspace Spam** — the
252
+ > domain has no alignment with yours, so receivers distrust it. The report *is* delivered; it's just filtered.
253
+ > Fixes, simplest first: **(1)** in Gmail, "Report as not spam" + a filter on the sender/subject set to
254
+ > *Never send to Spam*; **(2)** use **Gmail/Workspace SMTP** so it sends *as you* from inside Google (inbox, no DNS);
255
+ > **(3)** verify your own domain on the provider and send from it. Also note `api.resend.com` is behind Cloudflare,
256
+ > which 403s the default `urllib` User-Agent — spendguard sets one (don't strip it).
257
+
258
+ ## Compare models (cost-per-result)
259
+ Run one prompt across providers and table **cost + latency + output** — spendguard's angle is
260
+ *cost-per-result* (for deep evals, use promptfoo). Real calls, metered by the gate:
261
+ ```
262
+ spendguard compare --prompt "Summarize X in 3 bullets" \
263
+ --models gpt-5.5,claude-opus-4-8,gemini-2.5-flash,deepseek-chat,qwen-max --show
264
+ ```
265
+ Built-in providers: **openai, anthropic, gemini, deepseek, qwen** (most via their OpenAI-compatible
266
+ endpoints, so the gate already meters them). Keys resolve per provider from env / `~/.spendguard` / `./.env`
267
+ (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `GEMINI_API_KEY`, `DEEPSEEK_API_KEY`, `DASHSCOPE_API_KEY` for Qwen).
268
+ **Add another in one line:**
269
+ ```python
270
+ from spendguard.adapters import register_provider
271
+ register_provider("together", "https://api.together.xyz/v1", "TOGETHER_API_KEY", ("meta-llama", "mistralai"))
272
+ ```
273
+
274
+ ## Call context & cost-per-good-result (opt-in)
275
+ Beyond *cost*, spendguard can record per-call **context** to build a cost+**quality** corpus. Off by default
276
+ (it can store prompts/outputs — privacy). Enable `calls.enabled` (+ `calls.store_prompts` for snippets and the
277
+ implicit signal).
278
+ - **Tag intent:** `with spendguard.context(intent="loinc-typing", chain="run-42"): ...`
279
+ - **Quality is deferred** — you can't judge an output when it's made, but the *next* call reveals it:
280
+ - *automatic ("used"):* a later call in the same chain that reuses an output marks it good.
281
+ - *explicit / judge:* `spendguard.feedback(call_id, ok=True, source="judge")` — capture the verdicts you already produce.
282
+ - **`spendguard calls`** → per intent: calls, $, good%, and **$/good (cost-per-good-result)** — the efficiency metric.
283
+
284
+ Real-time calls are recorded automatically (caller, prompt/output snippets, latency); batches record job-level.
285
+
286
+ ### Smart attribution (a clean P&L, no manual bookkeeping)
287
+ Every charge is tagged on **two orthogonal dimensions**, so you can slice spend by either without bookkeeping:
288
+ - **WHO** — `org → team → contributor`, which **rolls up** the hierarchy. The contributor is set per install
289
+ (default: git `user.email`); the org/team is resolved server-side from the connection key.
290
+ - **WHAT** — `project · intent · resource` (the repo/work, the labeled task, and whether it's LLM or
291
+ remote-compute GPU).
292
+
293
+ Tagging is automatic: a project is inferred from the repo/cwd, refined by the call corpus's intent/caller and
294
+ the conversation that ran each batch; remote-compute rows route by instance label. The still-ambiguous
295
+ remainder can be resolved by a small, **capped, estimate-first** LLM pass (never auto-run). The result is a
296
+ clean P&L by team / project / intent with no manual entry. (Mechanism: `tag.py` cascade, `signal.py` per
297
+ project·intent·model roll-up, `conv.py` batch→conversation attribution, `saas.py` `org→team→user` push.)
298
+
299
+ ## Learning advisor — *recommend considering history* (Layer 1 deterministic · Layer 2 LLM)
300
+ - **`spendguard advise [--intent X] [--plan MODEL]`** — pure-SQL ranking of your corpus by `$/good` (or `$/M out`
301
+ when quality isn't labeled yet), confidence-weighted, with caveats. No LLM, no spend.
302
+ - **`spendguard backtest --as-of DATE`** — replays `advise` as of a past date (would it have caught known-good calls?).
303
+ - **`spendguard backfill`** — seeds the corpus + learning graph from your real batch ledgers (no spend).
304
+ - **Layer 2 (its own, *caged*, LLM use)** — every op is **estimate-only by default**; `--run` spends, and each paid
305
+ call is tagged `intent=spendguard:*` so it hits a **separate meta budget** (`caps.meta`, default **$2/day**), is kept
306
+ out of your workload budget, and is excluded from the corpus it analyzes:
307
+ - **`spendguard mine`** — synthesize confidence-scored **insights** + learning-graph nodes from the evidence (reasoner).
308
+ - **`spendguard optimize [--intent X] [--plan MODEL]`** — an actionable recommendation citing evidence + insights (reasoner).
309
+ - **`spendguard reconstruct`** — judge a bounded sample of recovered call I/O for quality → real `good%`/`$/good`.
310
+ - **`spendguard review`** — **practice audit**: judges whether usage was *smart*, not just what it cost. Assembles a
311
+ context bundle (cost + quality + token-ratio + sample I/O + linked chat notes) and emits **conditional** insights
312
+ (IF task_class/regime THEN action BECAUSE mechanism) — needs no ground truth, so it's robust where output-judging isn't.
313
+ - **Models are configurable:** `advisor.model` (reasoner, default Opus 4.8) · `advisor.judge_model` (judge, default
314
+ Haiku 4.5) — any priced model / provider. Run any op without `--run` to see the projected cost first.
315
+
316
+ ### Cold start, quality corpus, living insights, collective learning
317
+ - **`spendguard bootstrap`** — the cold-start process: mine **all** history (ledgers → intents → graph → provider I/O →
318
+ conversation) for free, then estimate the caged reasoning. One command, history → corpus → insights.
319
+ - **`spendguard fetch-io`** — recover the **real prompts+outputs** from the providers (OpenAI batch input/output files,
320
+ streamed with early-stop; Anthropic results within 29 days) into a bounded `call_io` sample. **Zero token cost.**
321
+ - **`spendguard validate`** — **living insights**: re-checks each learning against the current corpus and moves it through
322
+ its lifecycle (corroborated → `active` + confidence up; cited model gone / gap inverted → `refuted`/`superseded`). The
323
+ advisor weights by *current* confidence + status, so stale advice sinks as data grows.
324
+ - **`spendguard insights {list,export,import}`** — **collective learning, opt-in + scrubbed**. Export *abstracts* insights
325
+ into generalizable rules (keeps task_class/regime, model names, ratios; strips `$` amounts, intent names, evidence) and
326
+ **previews exactly what would leave**. Import brings community rules in as **low-trust priors** that must be locally
327
+ corroborated by `validate` before they sway the advisor.
328
+
329
+ > **On quality:** a cheap call that fails quality is wasted money, so cost-per-**good**-result is the metric. Two signals are
330
+ > trustworthy: **approach-quality** (`review` — needs no ground truth) and **outcome** (the conversation showing an output was
331
+ > used or redone). Judging output *correctness* in isolation is **not** reliable (an LLM can't verify a value it has no ground
332
+ > truth for) — spendguard quarantines such labels rather than trusting them.
333
+ - **Post-event mining (deterministic, zero spend)** — recover what the live recorder missed:
334
+ - **`spendguard mine-history {intents,graph,git}`** — reconstruct each batch's **intent** from repo artifacts
335
+ (`*batch_id*.json` + a size-bounded content scan of `data/`), add causal graph edges (`preceded`,
336
+ `derived_from`), and read git history for cost/fix signals. `--apply` writes; report-only otherwise.
337
+ - **`spendguard mine-conv {index,synth}`** — mine session transcripts for cost decisions. `index` is cached
338
+ (deterministic); `synth` is the caged reasoner turning the top decision snippets into `source='conversation'`
339
+ insights (estimate-first). Reconstructs your actual playbook (packing, never-cancel, price-basis errors, …).
340
+
341
+ ## Observability (feed your existing stack)
342
+ spendguard emits an event per gated call — it's the *enforcement* layer, not another dashboard; route the
343
+ events to whatever you already run. Three sinks, all optional, none ever block or break the gate:
344
+ - **In-process callback:** `spendguard.on_event(lambda e: log(e))`
345
+ - **Webhook:** `emit.webhook` in `~/.spendguard/config.json` or `$SPENDGUARD_WEBHOOK` — POSTs the event JSON (Slack, your collector, …)
346
+ - **OpenTelemetry:** `emit.otel: true` / `$SPENDGUARD_OTEL` — a `spendguard.cost_usd` counter (needs `opentelemetry-sdk`)
347
+
348
+ Event shape: `{ts, kind: batch|realtime, provider, model, cost, decision}`. Webhook/OTel run on a background
349
+ daemon thread (drop-if-flooded), so even high-volume real-time calls aren't slowed; callbacks run inline (keep them fast).
350
+
351
+ ## Extend to any SDK (zero required deps, fail-open)
352
+ spendguard ships with the OpenAI + Anthropic overlays, but the gate is generic — you can put **any** SDK under
353
+ it without adding a dependency:
354
+ 1. **Intercept it:** `spendguard.register(module_path, ClassName, method, gate_fn)` patches that SDK's call
355
+ method (e.g. `register("cohere", "Client", "chat", gate_fn)`). Write a small `gate_fn` that reads the request
356
+ shape and estimates cost; add the model's prices to the table (`prices.json` / your override).
357
+ 2. **Add an OpenAI-compatible provider in one line** (for `compare` + metering — most providers expose one):
358
+ `from spendguard.adapters import register_provider; register_provider("together", "https://api.together.xyz/v1", "TOGETHER_API_KEY", ("meta-llama", "mistralai"))`.
359
+ 3. **Emit anywhere:** route the per-call event to a webhook, OpenTelemetry, or an in-process callback
360
+ (`spendguard.on_event(...)`) — see [Observability](#observability-feed-your-existing-stack).
361
+
362
+ All of it is **fail-open** (an estimation/patch error logs and lets the call proceed) and needs **no required
363
+ dependencies** — the SDKs and OTel are optional extras.
364
+
365
+ ## Safety
366
+ Fail-**open**: any estimation or patch error logs a warning and lets the call proceed — the gate
367
+ never breaks a job by accident. Only the deliberate over-cap stop blocks. Disable instantly with
368
+ `spendguard off` (checked per-call, live) — and the kill switch is honored even if the gate itself errors.
369
+
370
+ ## Getting help
371
+ - **Website:** https://llmspendguard.com
372
+ - **Bugs / feature requests:** [GitHub Issues](https://github.com/llmspendguard/llm-spendguard/issues)
373
+ - **Questions / ideas / show-and-tell:** [GitHub Discussions](https://github.com/llmspendguard/llm-spendguard/discussions)
374
+ - **Contributing:** see [CONTRIBUTING.md](CONTRIBUTING.md).