@ghl-ai/aw 0.1.55 → 0.1.56-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,306 @@
1
+ /**
2
+ * AW Pricing — dynamic LLM cost estimation.
3
+ *
4
+ * Resolution order:
5
+ * 1. In-process memory cache
6
+ * 2. Disk cache (~/.aw/telemetry/pricing-cache.json, 24h TTL)
7
+ * 3. OpenRouter /api/v1/models fetch (3s timeout, no auth)
8
+ * 4. Hardcoded FALLBACK_PRICING (last resort)
9
+ * 5. null (model not found anywhere)
10
+ *
11
+ * CJS module — consistent with existing aw-ecc hook ecosystem.
12
+ */
13
+
14
+ 'use strict';
15
+
16
+ const fs = require('fs');
17
+ const path = require('path');
18
+ const os = require('os');
19
+ const https = require('https');
20
+
21
+ const CACHE_PATH = path.join(os.homedir(), '.aw', 'telemetry', 'pricing-cache.json');
22
+ const CACHE_TTL = 24 * 60 * 60 * 1000; // 24 hours
23
+ const FETCH_TIMEOUT = 3000; // 3 seconds
24
+ const OPENROUTER_URL = 'https://openrouter.ai/api/v1/models';
25
+
26
+ // ── Hardcoded fallback (last resort when API + cache both fail) ──────
27
+
28
+ const FALLBACK_PRICING = {
29
+ // Anthropic Claude (latest generation pricing)
30
+ 'haiku': { in: 1.00, out: 5.00 },
31
+ 'sonnet': { in: 3.00, out: 15.00 },
32
+ 'opus': { in: 5.00, out: 25.00 },
33
+ // OpenAI — GPT
34
+ 'gpt-5': { in: 1.25, out: 10.00 },
35
+ 'gpt-5-mini': { in: 0.25, out: 2.00 },
36
+ 'gpt-4o': { in: 2.50, out: 10.00 },
37
+ 'gpt-4o-mini': { in: 0.15, out: 0.60 },
38
+ 'gpt-4.1': { in: 2.00, out: 8.00 },
39
+ 'gpt-4.1-mini': { in: 0.40, out: 1.60 },
40
+ 'gpt-4.1-nano': { in: 0.10, out: 0.40 },
41
+ // OpenAI — reasoning
42
+ 'o1': { in: 15.00, out: 60.00 },
43
+ 'o1-mini': { in: 1.10, out: 4.40 },
44
+ 'o3': { in: 2.00, out: 8.00 },
45
+ 'o3-mini': { in: 1.10, out: 4.40 },
46
+ 'o4-mini': { in: 1.10, out: 4.40 },
47
+ // OpenAI — Codex CLI
48
+ 'codex-mini': { in: 1.50, out: 6.00 },
49
+ 'codex-1': { in: 1.50, out: 6.00 },
50
+ // OpenAI — GPT Codex (agentic coding)
51
+ 'gpt-5.1-codex-mini': { in: 0.25, out: 2.00 },
52
+ 'gpt-5.1-codex': { in: 1.25, out: 10.00 },
53
+ 'gpt-5.2-codex': { in: 1.75, out: 14.00 },
54
+ 'gpt-5.3-codex': { in: 1.75, out: 14.00 },
55
+ // Google Gemini
56
+ 'gemini-2.5-pro': { in: 1.25, out: 10.00 },
57
+ 'gemini-2.5-flash': { in: 0.30, out: 2.50 },
58
+ 'gemini-2.5-flash-lite': { in: 0.10, out: 0.40 },
59
+ };
60
+
61
+ // ── Helpers ──────────────────────────────────────────────────────────
62
+
63
+ function toNumber(value) {
64
+ const n = Number(value);
65
+ return Number.isFinite(n) ? n : 0;
66
+ }
67
+
68
+ // ── Memory cache ─────────────────────────────────────────────────────
69
+
70
+ let _pricingCache = null; // { models: { [key]: { in, out } }, fetched_at: string }
71
+
72
+ // ── Disk cache ───────────────────────────────────────────────────────
73
+
74
+ function readDiskCache() {
75
+ try {
76
+ const raw = fs.readFileSync(CACHE_PATH, 'utf8');
77
+ const data = JSON.parse(raw);
78
+ if (data && data.models && data.fetched_at) {
79
+ return data;
80
+ }
81
+ } catch { /* missing or corrupt — non-blocking */ }
82
+ return null;
83
+ }
84
+
85
+ function writeDiskCache(data) {
86
+ try {
87
+ fs.mkdirSync(path.dirname(CACHE_PATH), { recursive: true });
88
+ fs.writeFileSync(CACHE_PATH, JSON.stringify(data, null, 2) + '\n');
89
+ } catch { /* best effort */ }
90
+ }
91
+
92
+ function isCacheStale(cacheData) {
93
+ if (!cacheData || !cacheData.fetched_at) return true;
94
+ const age = Date.now() - new Date(cacheData.fetched_at).getTime();
95
+ return age > CACHE_TTL;
96
+ }
97
+
98
+ // ── OpenRouter fetch ─────────────────────────────────────────────────
99
+
100
+ function normalizeOpenRouterResponse(data) {
101
+ const models = {};
102
+ if (!Array.isArray(data)) return models;
103
+
104
+ for (const entry of data) {
105
+ if (!entry.id || !entry.pricing) continue;
106
+ const prompt = parseFloat(entry.pricing.prompt);
107
+ const completion = parseFloat(entry.pricing.completion);
108
+ if (!Number.isFinite(prompt) || !Number.isFinite(completion)) continue;
109
+
110
+ // Per-token → per-1M-token
111
+ const inRate = prompt * 1_000_000;
112
+ const outRate = completion * 1_000_000;
113
+
114
+ // Store under full OpenRouter id (e.g. "anthropic/claude-3.5-sonnet")
115
+ models[entry.id] = { in: inRate, out: outRate };
116
+
117
+ // Also store under the model slug after the slash (e.g. "claude-3.5-sonnet")
118
+ const slash = entry.id.indexOf('/');
119
+ if (slash > 0) {
120
+ const slug = entry.id.substring(slash + 1);
121
+ // Don't overwrite if slug already exists (first provider wins)
122
+ if (!models[slug]) {
123
+ models[slug] = { in: inRate, out: outRate };
124
+ }
125
+ }
126
+ }
127
+
128
+ return models;
129
+ }
130
+
131
+ /**
132
+ * Fetch pricing from OpenRouter. Returns a Promise that resolves to
133
+ * { models, fetched_at } or null on failure.
134
+ */
135
+ function fetchPricing() {
136
+ return new Promise((resolve) => {
137
+ const req = https.get(OPENROUTER_URL, { timeout: FETCH_TIMEOUT }, (res) => {
138
+ if (res.statusCode !== 200) {
139
+ res.resume(); // drain
140
+ resolve(null);
141
+ return;
142
+ }
143
+
144
+ let body = '';
145
+ res.setEncoding('utf8');
146
+ res.on('data', (chunk) => { body += chunk; });
147
+ res.on('end', () => {
148
+ try {
149
+ const json = JSON.parse(body);
150
+ const models = normalizeOpenRouterResponse(json.data);
151
+ if (Object.keys(models).length === 0) {
152
+ resolve(null);
153
+ return;
154
+ }
155
+ const cacheData = {
156
+ fetched_at: new Date().toISOString(),
157
+ model_count: Object.keys(models).length,
158
+ models,
159
+ };
160
+ resolve(cacheData);
161
+ } catch {
162
+ resolve(null);
163
+ }
164
+ });
165
+ });
166
+
167
+ req.on('error', () => resolve(null));
168
+ req.on('timeout', () => { req.destroy(); resolve(null); });
169
+ });
170
+ }
171
+
172
+ /**
173
+ * Non-blocking background refresh. Fetches pricing and updates both
174
+ * disk and memory caches. Never throws, never blocks the caller.
175
+ */
176
+ function refreshPricingAsync() {
177
+ fetchPricing().then((data) => {
178
+ if (data) {
179
+ writeDiskCache(data);
180
+ _pricingCache = data;
181
+ }
182
+ }).catch(() => { /* swallow — non-blocking */ });
183
+ }
184
+
185
+ // ── Load pricing (sync) ─────────────────────────────────────────────
186
+
187
+ /**
188
+ * Returns cached pricing data or null. Never fetches synchronously.
189
+ * Triggers a background refresh when cache is stale.
190
+ */
191
+ function loadPricingSync() {
192
+ // 1. Memory cache
193
+ if (_pricingCache && !isCacheStale(_pricingCache)) {
194
+ return _pricingCache;
195
+ }
196
+
197
+ // 2. Disk cache
198
+ const diskData = readDiskCache();
199
+ if (diskData) {
200
+ _pricingCache = diskData;
201
+ if (isCacheStale(diskData)) {
202
+ // Use stale data now, refresh in background for next time
203
+ refreshPricingAsync();
204
+ }
205
+ return _pricingCache;
206
+ }
207
+
208
+ // 3. No cache at all — trigger background fetch for next time
209
+ refreshPricingAsync();
210
+ return null;
211
+ }
212
+
213
+ // ── Model matching ───────────────────────────────────────────────────
214
+
215
+ /**
216
+ * Fuzzy-match a model string against a pricing map.
217
+ * Tries exact match first, then substring includes.
218
+ */
219
+ function findRates(model, pricingMap) {
220
+ if (!model || !pricingMap) return null;
221
+ const normalized = String(model).toLowerCase();
222
+
223
+ // Exact match
224
+ if (pricingMap[normalized]) return pricingMap[normalized];
225
+
226
+ // Match against keys using includes (both directions)
227
+ for (const [key, rates] of Object.entries(pricingMap)) {
228
+ const lowerKey = key.toLowerCase();
229
+ if (normalized.includes(lowerKey) || lowerKey.includes(normalized)) {
230
+ return rates;
231
+ }
232
+ }
233
+
234
+ return null;
235
+ }
236
+
237
+ // ── estimateCost ─────────────────────────────────────────────────────
238
+
239
+ /**
240
+ * Estimate the cost in USD for a model call.
241
+ *
242
+ * @param {string} model - Model identifier (e.g. "claude-sonnet-4-20250514", "gpt-4o")
243
+ * @param {number} inputTokens - Total input/prompt token count (includes cache tokens)
244
+ * @param {number} outputTokens - Output/completion token count
245
+ * @param {object} [opts] - Optional cache token breakdown
246
+ * @param {number} [opts.cacheReadTokens] - Tokens served from cache (billed at ~10% of input rate)
247
+ * @param {number} [opts.cacheWriteTokens] - Tokens written to cache (billed at ~125% of input rate)
248
+ * @returns {number|null} Estimated cost in USD, or null if unknown model or no tokens
249
+ */
250
+ function estimateCost(model, inputTokens, outputTokens, opts) {
251
+ if (!inputTokens && !outputTokens) return null;
252
+
253
+ const cacheRead = (opts && opts.cacheReadTokens) || 0;
254
+ const cacheWrite = (opts && opts.cacheWriteTokens) || 0;
255
+
256
+ function computeCost(rates) {
257
+ if (!rates) return null;
258
+ const inRate = rates.in;
259
+ const outRate = rates.out;
260
+
261
+ if (cacheRead || cacheWrite) {
262
+ const nonCached = Math.max(0, inputTokens - cacheRead - cacheWrite);
263
+ const cost =
264
+ (nonCached / 1_000_000) * inRate +
265
+ (cacheRead / 1_000_000) * (inRate * 0.1) +
266
+ (cacheWrite / 1_000_000) * (inRate * 1.25) +
267
+ (outputTokens / 1_000_000) * outRate;
268
+ return Math.round(cost * 1e6) / 1e6;
269
+ }
270
+
271
+ const cost = (inputTokens / 1_000_000) * inRate + (outputTokens / 1_000_000) * outRate;
272
+ return Math.round(cost * 1e6) / 1e6;
273
+ }
274
+
275
+ // Try dynamic pricing first (OpenRouter cache)
276
+ const cached = loadPricingSync();
277
+ if (cached && cached.models) {
278
+ const result = computeCost(findRates(model, cached.models));
279
+ if (result !== null) return result;
280
+ }
281
+
282
+ // Fallback to hardcoded pricing
283
+ const result = computeCost(findRates(model, FALLBACK_PRICING));
284
+ if (result !== null) return result;
285
+
286
+ // Model not found anywhere
287
+ return null;
288
+ }
289
+
290
+ module.exports = {
291
+ estimateCost,
292
+ toNumber,
293
+ loadPricingSync,
294
+ refreshPricingAsync,
295
+ FALLBACK_PRICING,
296
+ // Exposed for testing
297
+ _test: {
298
+ readDiskCache,
299
+ writeDiskCache,
300
+ isCacheStale,
301
+ normalizeOpenRouterResponse,
302
+ findRates,
303
+ CACHE_PATH,
304
+ CACHE_TTL,
305
+ },
306
+ };