cheap-llm-mcp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.env.example ADDED
@@ -0,0 +1,10 @@
1
+ CHEAP_LLM_API_KEY=sk-...
2
+ CHEAP_LLM_BASE_URL=https://api.deepseek.com
3
+ CHEAP_LLM_MODEL=deepseek-chat
4
+ CHEAP_LLM_CHAT_PATH=/chat/completions
5
+
6
+ SIMPLE_LLM_CHINESE_DEFAULT=true
7
+ SIMPLE_LLM_STABILITY_DEFAULT=true
8
+ SIMPLE_LLM_MAX_PROMPT_CHARS=12000
9
+ SIMPLE_LLM_TIMEOUT_MS=60000
10
+ SIMPLE_LLM_USAGE_LOG=
package/CHANGELOG.md ADDED
@@ -0,0 +1,9 @@
1
+ # Changelog
2
+
3
+ ## 0.1.0
4
+
5
+ - Initial productized MCP server.
6
+ - Added DeepSeek, Qwen, MiMo, and custom OpenAI-compatible provider support.
7
+ - Added CLI modes: stdio server, `setup`, `doctor`, and `config`.
8
+ - Added Chinese-first default system prompt.
9
+ - Added safety gates for external API approval, data classification, secret scanning, HTTPS, prompt length, timeouts, and error redaction.
@@ -0,0 +1,21 @@
1
+ # Contributing
2
+
3
+ Thanks for improving `cheap-llm-mcp`.
4
+
5
+ ## Development
6
+
7
+ ```bash
8
+ npm install
9
+ npm run ci
10
+ ```
11
+
12
+ ## Pull Requests
13
+
14
+ - Keep provider additions OpenAI-compatible unless there is a strong reason not to.
15
+ - Preserve safe defaults.
16
+ - Add tests for provider parsing, safety gates, and CLI output.
17
+ - Do not commit real API keys or provider credentials.
18
+
19
+ ## Release
20
+
21
+ Releases are published from tags by GitHub Actions after `npm run ci` passes.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 cheap-llm-mcp contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,243 @@
1
+ # cheap-llm-mcp
2
+
3
+ [![npm version](https://img.shields.io/npm/v/cheap-llm-mcp.svg)](https://www.npmjs.com/package/cheap-llm-mcp)
4
+ [![CI](https://github.com/stBlackCat/cheap-llm-mcp/actions/workflows/ci.yml/badge.svg)](https://github.com/stBlackCat/cheap-llm-mcp/actions/workflows/ci.yml)
5
+ [![Node.js >=20](https://img.shields.io/badge/node-%3E%3D20-339933)](https://nodejs.org/)
6
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
7
+
8
+ Still worried about GPT Plus limits? Still watching your Claude subscription tokens burn on tiny chores?
9
+
10
+ `cheap-llm-mcp` solves a big chunk of that pain: use cheap AI for cheap work, while your premium model stays in charge.
11
+
12
+ [中文文档](README.zh-CN.md)
13
+
14
+ This is a local stdio MCP server for Claude Code, Codex, and other MCP clients. It routes simple, low-risk, self-contained tasks to DeepSeek, Qwen, MiMo, or any OpenAI-compatible chat completions API. Your main AI still plans, reviews, edits, and decides. The cheap model just handles small drafts.
15
+
16
+ ## Quickstart
17
+
18
+ Install the MCP server first:
19
+
20
+ ```bash
21
+ npx -y cheap-llm-mcp@latest setup
22
+ ```
23
+
24
+ Then fill in one OpenAI-compatible endpoint:
25
+
26
+ ```bash
27
+ CHEAP_LLM_BASE_URL=https://api.deepseek.com
28
+ CHEAP_LLM_MODEL=deepseek-chat
29
+ CHEAP_LLM_API_KEY=sk-...
30
+ CHEAP_LLM_CHAT_PATH=/chat/completions
31
+ ```
32
+
33
+ That is the whole model setup. DeepSeek, Qwen, MiMo, SiliconFlow, OpenRouter, local OpenAI-compatible gateways, and most other providers work the same way as long as they expose a chat completions compatible endpoint.
34
+
35
+ Check your setup:
36
+
37
+ ```bash
38
+ npx -y cheap-llm-mcp@latest doctor
39
+ ```
40
+
41
+ Print manual config:
42
+
43
+ ```bash
44
+ npx -y cheap-llm-mcp@latest config
45
+ ```
46
+
47
+ ## Claude Code
48
+
49
+ The setup wizard can run this after confirmation:
50
+
51
+ ```bash
52
+ claude mcp add --transport stdio --scope user \
53
+ --env CHEAP_LLM_API_KEY=sk-... \
54
+ --env CHEAP_LLM_BASE_URL=https://api.deepseek.com \
55
+ --env CHEAP_LLM_MODEL=deepseek-chat \
56
+ --env CHEAP_LLM_CHAT_PATH=/chat/completions \
57
+ --env SIMPLE_LLM_CHINESE_DEFAULT=true \
58
+ --env SIMPLE_LLM_STABILITY_DEFAULT=true \
59
+ cheap-llm -- npx -y cheap-llm-mcp@latest
60
+ ```
61
+
62
+ Restart Claude Code and run:
63
+
64
+ ```text
65
+ /mcp
66
+ ```
67
+
68
+ ## Codex
69
+
70
+ The setup wizard can run this after confirmation:
71
+
72
+ ```bash
73
+ codex mcp add cheap-llm \
74
+ --env CHEAP_LLM_API_KEY=sk-... \
75
+ --env CHEAP_LLM_BASE_URL=https://api.deepseek.com \
76
+ --env CHEAP_LLM_MODEL=deepseek-chat \
77
+ --env CHEAP_LLM_CHAT_PATH=/chat/completions \
78
+ --env SIMPLE_LLM_CHINESE_DEFAULT=true \
79
+ --env SIMPLE_LLM_STABILITY_DEFAULT=true \
80
+ -- npx -y cheap-llm-mcp@latest
81
+ ```
82
+
83
+ Restart Codex and verify:
84
+
85
+ ```bash
86
+ codex mcp list
87
+ ```
88
+
89
+ If `codex mcp add` is unavailable, run:
90
+
91
+ ```bash
92
+ npx -y cheap-llm-mcp@latest config
93
+ ```
94
+
95
+ Then paste the printed TOML into `~/.codex/config.toml`.
96
+
97
+ ## What should be delegated?
98
+
99
+ Good cheap-model tasks:
100
+
101
+ - summarize a short note
102
+ - translate or rewrite text
103
+ - classify a small snippet
104
+ - extract fields into JSON
105
+ - draft a regex
106
+ - explain a short command
107
+ - produce a tiny isolated code snippet
108
+
109
+ Bad cheap-model tasks:
110
+
111
+ - decide architecture
112
+ - edit your repo directly
113
+ - review security-sensitive code
114
+ - reason over a full private codebase
115
+ - handle secrets or sensitive data
116
+ - debug complex cross-file behavior
117
+
118
+ ## Stability without wasting tokens
119
+
120
+ Cheap models are useful, but they are not the boss.
121
+
122
+ `cheap-llm-mcp` adds a compact default instruction that tells the cheap model to:
123
+
124
+ - return a concise draft only
125
+ - avoid final decisions
126
+ - avoid pretending it edited files
127
+ - avoid guessing missing facts
128
+ - say `UNCERTAIN` when the task is ambiguous
129
+
130
+ The MCP tool description also tells the host AI to lightly review the result against the original task before using it. This keeps the premium model in control without asking the cheap model to produce long self-review reports.
131
+
132
+ Disable this default only if you know what you are doing:
133
+
134
+ ```bash
135
+ SIMPLE_LLM_STABILITY_DEFAULT=false
136
+ ```
137
+
138
+ ## 30-second demo
139
+
140
+ 1. Run `npx -y cheap-llm-mcp@latest setup`.
141
+ 2. Restart Claude Code or Codex.
142
+ 3. Ask: "Use the cheap LLM MCP to summarize this short text."
143
+ 4. Your host AI delegates the small task, then checks the draft before using it.
144
+
145
+ Available tools:
146
+
147
+ - `ask_simple_model`: call a configured cheap model for a self-contained task.
148
+ - `list_simple_model_providers`: show configured providers without leaking API keys.
149
+ - `check_simple_model_setup`: validate local provider configuration without making a model request.
150
+ - `get_token_savings`: show how many provider-reported tokens were routed to cheap models.
151
+
152
+ ## OpenAI-compatible config
153
+
154
+ The recommended config is intentionally boring:
155
+
156
+ ```bash
157
+ CHEAP_LLM_BASE_URL=https://your-provider.example/v1
158
+ CHEAP_LLM_MODEL=your-cheap-model
159
+ CHEAP_LLM_API_KEY=your-api-key
160
+ CHEAP_LLM_CHAT_PATH=/chat/completions
161
+ ```
162
+
163
+ Examples:
164
+
165
+ ```bash
166
+ # DeepSeek
167
+ CHEAP_LLM_BASE_URL=https://api.deepseek.com
168
+ CHEAP_LLM_MODEL=deepseek-chat
169
+
170
+ # Qwen compatible mode
171
+ CHEAP_LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
172
+ CHEAP_LLM_MODEL=qwen-plus
173
+
174
+ # Any other OpenAI-compatible gateway
175
+ CHEAP_LLM_BASE_URL=https://example.com/v1
176
+ CHEAP_LLM_MODEL=model-id
177
+ ```
178
+
179
+ Advanced users can still configure multiple named providers with `SIMPLE_LLM_PROVIDERS`, but the default path is one cheap OpenAI-compatible endpoint.
180
+
181
+ ## Token savings
182
+
183
+ Run the MCP tool `get_token_savings` to see how many tokens were actually handled by cheap models during the current MCP server session.
184
+
185
+ It reports prompt, completion, and total cheap-model tokens, plus a provider/model breakdown and a rough `estimatedPremiumTokensAvoided` token-volume proxy. This is token-based by design; provider prices change often, so `cheap-llm-mcp` does not hardcode pricing tables.
186
+
187
+ For a persistent audit trail, set:
188
+
189
+ ```bash
190
+ SIMPLE_LLM_USAGE_LOG=/path/to/cheap-llm-usage.jsonl
191
+ ```
192
+
193
+ The usage log records provider, model, token counts, and timestamp only. It does not record prompts or model outputs.
194
+
195
+ ## Safety defaults
196
+
197
+ `cheap-llm-mcp` is for low-risk delegation, not blind outsourcing.
198
+
199
+ - Calls require `approvedForExternalApi=true`.
200
+ - Calls require `dataClassification`.
201
+ - `dataClassification=sensitive` is rejected.
202
+ - Common API key, token, password, AWS key, and private key patterns are rejected.
203
+ - HTTP providers are rejected unless `SIMPLE_LLM_ALLOW_HTTP=true`.
204
+ - Prompt size is capped by `SIMPLE_LLM_MAX_PROMPT_CHARS` (default: `12000`).
205
+ - Requests time out via `SIMPLE_LLM_TIMEOUT_MS` (default: `60000`).
206
+ - Provider errors are redacted before being returned.
207
+ - Optional usage logs contain provider/model/token counts only, not prompts or outputs.
208
+
209
+ ## Chinese default
210
+
211
+ Chinese-first output is enabled by default:
212
+
213
+ ```bash
214
+ SIMPLE_LLM_CHINESE_DEFAULT=true
215
+ ```
216
+
217
+ The server asks the cheap model to answer in Simplified Chinese while preserving code, commands, paths, API names, model names, error messages, config keys, and English technical terms. Disable it with:
218
+
219
+ ```bash
220
+ SIMPLE_LLM_CHINESE_DEFAULT=false
221
+ ```
222
+
223
+ ## Why not just use a smaller main model?
224
+
225
+ A smaller main model saves tokens, but it also becomes responsible for planning, safety, code changes, and tool orchestration. `cheap-llm-mcp` keeps your strongest model in charge and only delegates small, self-contained work. That preserves judgment while cutting premium-model spend.
226
+
227
+ ## Development
228
+
229
+ ```bash
230
+ npm install
231
+ npm run ci
232
+ ```
233
+
234
+ Start the MCP server locally:
235
+
236
+ ```bash
237
+ npm run build
238
+ node dist/index.js
239
+ ```
240
+
241
+ ## License
242
+
243
+ MIT
@@ -0,0 +1,161 @@
1
+ # cheap-llm-mcp
2
+
3
+ 还在为 GPT Plus 的额度发愁?还在心疼自己的 Claude 会员 token 太贵?
4
+
5
+ 这个 MCP 可以解决你的大部分小任务成本问题:用便宜的 AI 做便宜的事情,让贵的主模型继续负责统筹、审查和最终决策。
6
+
7
+ `cheap-llm-mcp` 是一个本地 stdio MCP server,适用于 Claude Code、Codex 和其他 MCP 客户端。它可以把摘要、翻译、分类、抽取、小段代码等低风险任务交给任意 OpenAI-compatible API。
8
+
9
+ ## 快速开始
10
+
11
+ 先一键安装 MCP:
12
+
13
+ ```bash
14
+ npx -y cheap-llm-mcp@latest setup
15
+ ```
16
+
17
+ 下一步只填一个兼容 OpenAI 的接口:
18
+
19
+ ```bash
20
+ CHEAP_LLM_BASE_URL=https://api.deepseek.com
21
+ CHEAP_LLM_MODEL=deepseek-chat
22
+ CHEAP_LLM_API_KEY=sk-...
23
+ CHEAP_LLM_CHAT_PATH=/chat/completions
24
+ ```
25
+
26
+ 就这么简单。DeepSeek、Qwen、MiMo、硅基流动、OpenRouter、本地 OpenAI-compatible 网关,只要提供 chat completions 兼容接口,都按这套配置走。
27
+
28
+ 检查配置:
29
+
30
+ ```bash
31
+ npx -y cheap-llm-mcp@latest doctor
32
+ ```
33
+
34
+ 打印手动配置:
35
+
36
+ ```bash
37
+ npx -y cheap-llm-mcp@latest config
38
+ ```
39
+
40
+ ## Claude Code
41
+
42
+ 向导会在你确认后执行类似命令:
43
+
44
+ ```bash
45
+ claude mcp add --transport stdio --scope user \
46
+ --env CHEAP_LLM_API_KEY=sk-... \
47
+ --env CHEAP_LLM_BASE_URL=https://api.deepseek.com \
48
+ --env CHEAP_LLM_MODEL=deepseek-chat \
49
+ --env CHEAP_LLM_CHAT_PATH=/chat/completions \
50
+ --env SIMPLE_LLM_CHINESE_DEFAULT=true \
51
+ --env SIMPLE_LLM_STABILITY_DEFAULT=true \
52
+ cheap-llm -- npx -y cheap-llm-mcp@latest
53
+ ```
54
+
55
+ 重启 Claude Code 后运行:
56
+
57
+ ```text
58
+ /mcp
59
+ ```
60
+
61
+ ## Codex
62
+
63
+ 向导会在你确认后执行类似命令:
64
+
65
+ ```bash
66
+ codex mcp add cheap-llm \
67
+ --env CHEAP_LLM_API_KEY=sk-... \
68
+ --env CHEAP_LLM_BASE_URL=https://api.deepseek.com \
69
+ --env CHEAP_LLM_MODEL=deepseek-chat \
70
+ --env CHEAP_LLM_CHAT_PATH=/chat/completions \
71
+ --env SIMPLE_LLM_CHINESE_DEFAULT=true \
72
+ --env SIMPLE_LLM_STABILITY_DEFAULT=true \
73
+ -- npx -y cheap-llm-mcp@latest
74
+ ```
75
+
76
+ 如果命令不可用,运行 `npx -y cheap-llm-mcp@latest config`,把输出的 TOML 写入 `~/.codex/config.toml`。
77
+
78
+ ## 配置格式
79
+
80
+ 推荐路径只有这四项:
81
+
82
+ ```bash
83
+ CHEAP_LLM_BASE_URL=https://your-provider.example/v1
84
+ CHEAP_LLM_MODEL=your-cheap-model
85
+ CHEAP_LLM_API_KEY=your-api-key
86
+ CHEAP_LLM_CHAT_PATH=/chat/completions
87
+ ```
88
+
89
+ 常见例子:
90
+
91
+ ```bash
92
+ # DeepSeek
93
+ CHEAP_LLM_BASE_URL=https://api.deepseek.com
94
+ CHEAP_LLM_MODEL=deepseek-chat
95
+
96
+ # Qwen compatible mode
97
+ CHEAP_LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
98
+ CHEAP_LLM_MODEL=qwen-plus
99
+
100
+ # 其他 OpenAI-compatible 网关
101
+ CHEAP_LLM_BASE_URL=https://example.com/v1
102
+ CHEAP_LLM_MODEL=model-id
103
+ ```
104
+
105
+ 高级用户仍然可以用 `SIMPLE_LLM_PROVIDERS` 配多个具名 provider,但默认体验就是一个便宜的 OpenAI-compatible endpoint。
106
+
107
+ ## 什么任务适合外包?
108
+
109
+ 适合:短文本摘要、翻译润色、简单分类、小段信息抽取成 JSON、正则草稿、简短命令解释、独立小代码片段。
110
+
111
+ 不适合:架构决策、直接修改仓库、安全敏感代码审查、完整私有代码库上下文推理、密钥或敏感数据、复杂跨文件调试。
112
+
113
+ ## 稳定性控制,但不浪费 token
114
+
115
+ 便宜模型可以干活,但不能当负责人。
116
+
117
+ `cheap-llm-mcp` 默认会给便宜模型加一条很短的稳定性约束:只输出简洁草案,不做最终决策,不假装已经修改文件,不乱猜缺失事实,遇到不确定任务时用 `UNCERTAIN` 说明。
118
+
119
+ 同时,MCP 工具描述会要求 Codex 或 Claude Code 的主 AI 对结果进行轻量审查:只对照原任务核对是否可用,不额外发大段上下文,也不默认让便宜模型再自我审查一遍。这样既能提升稳定性,也不会把省下来的 token 又花回去。
120
+
121
+ ## Token 节省统计
122
+
123
+ 运行 MCP 工具 `get_token_savings`,可以看到当前 MCP server 会话里实际有多少 token 被低费用模型处理了。
124
+
125
+ 它会统计低费用模型的 prompt、completion、total token,按 provider/model 分组,并给出粗略的 `estimatedPremiumTokensAvoided`。这里默认只统计 token,不硬编码价格表,因为各家模型价格经常变。
126
+
127
+ ## 默认中文约束
128
+
129
+ 默认开启:
130
+
131
+ ```bash
132
+ SIMPLE_LLM_CHINESE_DEFAULT=true
133
+ ```
134
+
135
+ MCP 会自动注入中文优先 system prompt:默认使用简体中文回答,但保留代码、命令、文件路径、API 名称、模型名称、错误信息、配置键和英文技术术语原文。
136
+
137
+ ## 安全边界
138
+
139
+ 这个 MCP 只适合低风险、可自包含的小任务:
140
+
141
+ - 必须显式确认 `approvedForExternalApi=true`
142
+ - 必须提供 `dataClassification`
143
+ - `sensitive` 数据会直接拒绝
144
+ - 自动扫描常见 API key、token、password、AWS key、private key
145
+ - 默认只允许 HTTPS provider
146
+ - 默认 prompt 上限是 12000 字符
147
+ - 默认请求超时是 60000ms
148
+ - provider 错误会脱敏后返回
149
+
150
+ 不要把密钥、敏感客户数据、完整私有仓库上下文、安全判断、复杂架构决策、大规模重构交给外部便宜模型。
151
+
152
+ ## 为什么不是直接换小模型?
153
+
154
+ 直接把主模型换小,省了 token,但规划、判断、安全边界、工具编排都会变弱。`cheap-llm-mcp` 的思路是强模型继续当负责人,只把小而明确的任务转交出去。
155
+
156
+ ## 开发
157
+
158
+ ```bash
159
+ npm install
160
+ npm run ci
161
+ ```
package/SECURITY.md ADDED
@@ -0,0 +1,17 @@
1
+ # Security Policy
2
+
3
+ ## Supported Versions
4
+
5
+ Security fixes target the latest published version of `cheap-llm-mcp`.
6
+
7
+ ## Reporting a Vulnerability
8
+
9
+ Please report security issues privately through GitHub Security Advisories once the repository is created. If advisories are not available, open an issue with minimal reproduction details and avoid posting secrets, tokens, or private data.
10
+
11
+ ## Security Model
12
+
13
+ `cheap-llm-mcp` sends selected prompts to third-party model APIs. It does not make that safe automatically.
14
+
15
+ The server rejects sensitive classifications, scans for common secret patterns, requires external API approval, enforces HTTPS by default, caps prompt size, times out provider requests, and redacts provider errors.
16
+
17
+ Users remain responsible for provider trust, API key handling, and deciding what data may leave their machine.
@@ -0,0 +1,16 @@
1
+ [mcp_servers.cheap-llm]
2
+ command = "npx"
3
+ args = ["-y", "cheap-llm-mcp@latest"]
4
+
5
+ [mcp_servers.cheap-llm.env]
6
+ CHEAP_LLM_BASE_URL = "https://api.deepseek.com"
7
+ CHEAP_LLM_MODEL = "deepseek-chat"
8
+ CHEAP_LLM_CHAT_PATH = "/chat/completions"
9
+ SIMPLE_LLM_CHINESE_DEFAULT = "true"
10
+ SIMPLE_LLM_STABILITY_DEFAULT = "true"
11
+ SIMPLE_LLM_MAX_PROMPT_CHARS = "12000"
12
+ SIMPLE_LLM_TIMEOUT_MS = "60000"
13
+ # SIMPLE_LLM_USAGE_LOG = "C:\\path\\to\\cheap-llm-usage.jsonl"
14
+
15
+ # Add an API key here, or set it in your system environment.
16
+ # CHEAP_LLM_API_KEY = "sk-..."
@@ -0,0 +1,39 @@
1
+ import type { ChatCompletionResponse } from "./types.js";
2
+ export type UsageEntry = {
3
+ timestamp: string;
4
+ provider: string;
5
+ model: string;
6
+ promptTokens: number;
7
+ completionTokens: number;
8
+ totalTokens: number;
9
+ hasUsage: boolean;
10
+ };
11
+ export type UsageSummary = {
12
+ startedAt: string;
13
+ calls: number;
14
+ callsWithUsage: number;
15
+ callsWithoutUsage: number;
16
+ cheapModelPromptTokens: number;
17
+ cheapModelCompletionTokens: number;
18
+ cheapModelTotalTokens: number;
19
+ estimatedPremiumTokensAvoided: number;
20
+ byProviderModel: Array<{
21
+ provider: string;
22
+ model: string;
23
+ calls: number;
24
+ callsWithUsage: number;
25
+ callsWithoutUsage: number;
26
+ promptTokens: number;
27
+ completionTokens: number;
28
+ totalTokens: number;
29
+ }>;
30
+ notes: string[];
31
+ };
32
+ export declare function recordUsage(input: {
33
+ provider: string;
34
+ requestedModel: string;
35
+ response: ChatCompletionResponse;
36
+ source?: NodeJS.ProcessEnv;
37
+ }): void;
38
+ export declare function resetUsage(): void;
39
+ export declare function getUsageSummary(): UsageSummary;
@@ -0,0 +1,79 @@
1
+ import { appendFile } from "node:fs/promises";
2
+ import { env } from "./env.js";
3
+ const startedAt = new Date().toISOString();
4
+ const entries = [];
5
+ function numberOrZero(value) {
6
+ return typeof value === "number" && Number.isFinite(value) ? value : 0;
7
+ }
8
+ export function recordUsage(input) {
9
+ const usage = input.response.usage;
10
+ const entry = {
11
+ timestamp: new Date().toISOString(),
12
+ provider: input.provider,
13
+ model: input.response.model ?? input.requestedModel,
14
+ promptTokens: numberOrZero(usage?.prompt_tokens),
15
+ completionTokens: numberOrZero(usage?.completion_tokens),
16
+ totalTokens: numberOrZero(usage?.total_tokens),
17
+ hasUsage: Boolean(usage)
18
+ };
19
+ entries.push(entry);
20
+ const logPath = env("SIMPLE_LLM_USAGE_LOG", input.source);
21
+ if (logPath) {
22
+ appendFile(logPath, `${JSON.stringify(entry)}\n`, "utf8").catch(() => {
23
+ // Usage logging must never break the MCP tool response.
24
+ });
25
+ }
26
+ }
27
+ export function resetUsage() {
28
+ entries.length = 0;
29
+ }
30
+ export function getUsageSummary() {
31
+ const byKey = new Map();
32
+ const totals = {
33
+ calls: 0,
34
+ callsWithUsage: 0,
35
+ callsWithoutUsage: 0,
36
+ cheapModelPromptTokens: 0,
37
+ cheapModelCompletionTokens: 0,
38
+ cheapModelTotalTokens: 0
39
+ };
40
+ for (const entry of entries) {
41
+ totals.calls += 1;
42
+ totals.callsWithUsage += entry.hasUsage ? 1 : 0;
43
+ totals.callsWithoutUsage += entry.hasUsage ? 0 : 1;
44
+ totals.cheapModelPromptTokens += entry.promptTokens;
45
+ totals.cheapModelCompletionTokens += entry.completionTokens;
46
+ totals.cheapModelTotalTokens += entry.totalTokens;
47
+ const key = `${entry.provider}\u0000${entry.model}`;
48
+ const bucket = byKey.get(key) ??
49
+ {
50
+ provider: entry.provider,
51
+ model: entry.model,
52
+ calls: 0,
53
+ callsWithUsage: 0,
54
+ callsWithoutUsage: 0,
55
+ promptTokens: 0,
56
+ completionTokens: 0,
57
+ totalTokens: 0
58
+ };
59
+ bucket.calls += 1;
60
+ bucket.callsWithUsage += entry.hasUsage ? 1 : 0;
61
+ bucket.callsWithoutUsage += entry.hasUsage ? 0 : 1;
62
+ bucket.promptTokens += entry.promptTokens;
63
+ bucket.completionTokens += entry.completionTokens;
64
+ bucket.totalTokens += entry.totalTokens;
65
+ byKey.set(key, bucket);
66
+ }
67
+ return {
68
+ startedAt,
69
+ ...totals,
70
+ estimatedPremiumTokensAvoided: totals.cheapModelTotalTokens,
71
+ byProviderModel: [...byKey.values()].sort((a, b) => b.totalTokens - a.totalTokens),
72
+ notes: [
73
+ "Only provider-reported usage is counted as actual cheap-model tokens.",
74
+ "estimatedPremiumTokensAvoided is a rough token-volume proxy, not a billing guarantee.",
75
+ "Host AI review tokens are not tracked by this MCP server."
76
+ ]
77
+ };
78
+ }
79
+ //# sourceMappingURL=accounting.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"accounting.js","sourceRoot":"","sources":["../src/accounting.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAC9C,OAAO,EAAE,GAAG,EAAE,MAAM,UAAU,CAAC;AAmC/B,MAAM,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;AAC3C,MAAM,OAAO,GAAiB,EAAE,CAAC;AAEjC,SAAS,YAAY,CAAC,KAAyB;IAC7C,OAAO,OAAO,KAAK,KAAK,QAAQ,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;AACzE,CAAC;AAED,MAAM,UAAU,WAAW,CAAC,KAK3B;IACC,MAAM,KAAK,GAAG,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAC;IACnC,MAAM,KAAK,GAAe;QACxB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,QAAQ,EAAE,KAAK,CAAC,QAAQ;QACxB,KAAK,EAAE,KAAK,CAAC,QAAQ,CAAC,KAAK,IAAI,KAAK,CAAC,cAAc;QACnD,YAAY,EAAE,YAAY,CAAC,KAAK,EAAE,aAAa,CAAC;QAChD,gBAAgB,EAAE,YAAY,CAAC,KAAK,EAAE,iBAAiB,CAAC;QACxD,WAAW,EAAE,YAAY,CAAC,KAAK,EAAE,YAAY,CAAC;QAC9C,QAAQ,EAAE,OAAO,CAAC,KAAK,CAAC;KACzB,CAAC;IACF,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAEpB,MAAM,OAAO,GAAG,GAAG,CAAC,sBAAsB,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IAC1D,IAAI,OAAO,EAAE,CAAC;QACZ,UAAU,CAAC,OAAO,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE;YACnE,wDAAwD;QAC1D,CAAC,CAAC,CAAC;IACL,CAAC;AACH,CAAC;AAED,MAAM,UAAU,UAAU;IACxB,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC;AACrB,CAAC;AAED,MAAM,UAAU,eAAe;IAC7B,MAAM,KAAK,GAAG,IAAI,GAAG,EAAmD,CAAC;IACzE,MAAM,MAAM,GAAG;QACb,KAAK,EAAE,CAAC;QACR,cAAc,EAAE,CAAC;QACjB,iBAAiB,EAAE,CAAC;QACpB,sBAAsB,EAAE,CAAC;QACzB,0BAA0B,EAAE,CAAC;QAC7B,qBAAqB,EAAE,CAAC;KACzB,CAAC;IAEF,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,MAAM,CAAC,KAAK,IAAI,CAAC,CAAC;QAClB,MAAM,CAAC,cAAc,IAAI,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAChD,MAAM,CAAC,iBAAiB,IAAI,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACnD,MAAM,CAAC,sBAAsB,IAAI,KAAK,CAAC,YAAY,CAAC;QACpD,MAAM,CAAC,0BAA0B,IAAI,KAAK,CAAC,gBAAgB,CAAC;QAC5D,MAAM,CAAC,qBAAqB,IAAI,KAAK,CAAC,WAAW,CAAC;QAElD,MAAM,GAAG,GAAG,GAAG,KAAK,CAAC,QAAQ,SAAS,KAAK,CAAC,KAAK,EAAE,CAAC;QACpD,MAAM,MAAM,GACV,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC;YACd;gBACE,QAAQ,EAAE,KAAK,CAAC,QAAQ;gBACxB,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,KAAK,EAAE,CAAC;gBACR,cAAc,EAAE,CAAC;gBACjB,iBAAiB,EAAE,CAAC;gBACpB,YAAY,EAAE,CAAC;gBACf,gBAAgB,EAAE,CAAC;gBACnB,WAAW,EAAE,CAAC;aACf,CAAC;QACJ,MAAM,CAAC,KAAK,IAAI,CAAC,CAAC;QAClB,MAAM,CAAC,cAAc,IAAI,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAChD,MAAM,CAAC,iBAAiB,IAAI,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACnD,MAAM,CAAC,YAAY,IAAI,KAAK,CAAC,YAAY,CAAC;QAC1C,MAAM,CAAC,gBAAgB,IAAI,KAAK,CAAC,gBAAgB,CAAC;QAClD,MAAM,CAAC,WAAW,IAAI,KAAK,CAAC,WAAW,CAAC;QACxC,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;IACzB,CAAC;IAED,OAAO;QACL,SAAS;QACT,GAAG,MAAM;QACT,6BAA6B,EAAE,MAAM,CAAC,qBAAqB;QAC3D,eAAe,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,GAAG,CAAC,CAAC,WAAW,CAAC;QAClF,KAAK,EAAE;YACL,uEAAuE;YACvE,uFAAuF;YACvF,2DAA2D;SAC5D;KACF,CAAC;AACJ,CAAC"}
package/dist/chat.d.ts ADDED
@@ -0,0 +1,4 @@
1
+ import type { AskSimpleModelInput, ChatCompletionResponse } from "./types.js";
2
+ export declare function buildRequestBody(input: AskSimpleModelInput, source?: NodeJS.ProcessEnv): Record<string, unknown>;
3
+ export declare function stringifyResult(response: ChatCompletionResponse, includeUsage: boolean): string;
4
+ export declare function callChatCompletion(input: AskSimpleModelInput, source?: NodeJS.ProcessEnv): Promise<string>;