vibeostheog 0.20.15 → 0.20.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,16 @@
1
+ ## 0.20.16
2
+ - fix: skip cache savings for free models + add modelCostPerTurn fallback + regression tests
3
+ - fix: wire incrementTurnCounter into onToolExecuteAfter so session compaction fires at turn 7+
4
+ - fix: make tests resilient in CI environment
5
+ - perf: add MODEL_PRICING_PER_1M with per-provider input/output rates
6
+ - perf: provider-aware cache savings with isModelFree gate + regression tests
7
+ - perf: dynamic cache savings rate from per-model input pricing
8
+ - perf: record cache savings for compressed tool outputs (write path)
9
+ - ci: retrigger checks for merge
10
+ Merge pull request #92 from DrunkkToys/pr/regression-tests-cache-savings
11
+ Merge pull request #91 from DrunkkToys/pr/cache-write-savings
12
+
13
+
1
14
  ## 0.20.15
2
15
  - feat: dashboard blackbox telemetry — bidirectional BE/FE sync
3
16
  - fix: mock auth and clear OPENCODE_MODEL in bootstrap test, commit blackbox .js for CI
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "vibeostheog",
3
- "version": "0.20.15",
3
+ "version": "0.20.16",
4
4
  "description": "Cost-aware delegation enforcer for OpenCode. Tracks model usage, routes Task subagents to cheaper tiers, surfaces cumulative savings in chat. Includes research audit, reporting framework, project memory, progressive scratchpad decadence, and trinity CLI for brain/medium/cheap slot switching.",
5
5
  "scripts": {
6
6
  "release": "node scripts/release.mjs",
@@ -2,8 +2,8 @@
2
2
  import { readFileSync, writeFileSync, appendFileSync, existsSync, mkdirSync } from "node:fs";
3
3
  import { join, basename } from "node:path";
4
4
  import { createHash } from "node:crypto";
5
- import { currentModel, currentProjectFingerprint, currentProjectName, _blackboxEnabled, loadSelection, writeSelection, safeJsonParse, applyDecadence, getSessionScratchpadDir, ensureSessionScratchpadDirs, indexAppend, briefedProjects, getActiveJobForProject, loadTodos, promotedProjectPatterns, detectTechStack, projectFingerprint, TRINITY_OPENCODE_CONFIG, TIERS_FILE, loadGlobalLearning, setCurrentProjectFingerprint, setCurrentProjectName, stableJson, TOOL_NAME_NORMALIZE, _cacheDb, } from "../state.js";
6
- import { applySlot, TRINITY_CHEAP, TRINITY_MEDIUM, } from "../pricing.js";
5
+ import { currentModel, currentProjectFingerprint, currentProjectName, _blackboxEnabled, loadSelection, writeSelection, safeJsonParse, applyDecadence, getSessionScratchpadDir, ensureSessionScratchpadDirs, indexAppend, briefedProjects, getActiveJobForProject, loadTodos, promotedProjectPatterns, detectTechStack, projectFingerprint, TRINITY_OPENCODE_CONFIG, TIERS_FILE, loadGlobalLearning, setCurrentProjectFingerprint, setCurrentProjectName, stableJson, TOOL_NAME_NORMALIZE, _cacheDb, recordCacheSaving, } from "../state.js";
6
+ import { applySlot, TRINITY_CHEAP, TRINITY_MEDIUM, cacheSavePer1MInputTokens, } from "../pricing.js";
7
7
  import { scoreStress, classifyTurnSimple, loadOptimizationMode, saveOptimizationMode, selectOptimizationModeRemote, computeControlVector, getBlackboxTracker, loadBlackboxState as loadBlackboxStateFromCtx, saveBlackboxState as saveBlackboxStateToCtx, extractLastUserText, isLikelyOffTopic, fetchBlackboxEnrichment, estimateContextBudget, buildControlHistoryEntry, } from "../turn-classify.js";
8
8
  import { applyBudgetFirstMode, peekBudgetFirstMode } from "../mode-policy.js";
9
9
  import { addCacheEntry, extractRecentCacheOutputs } from "../../vibeOS-lib/smart-cache.js";
@@ -14,6 +14,7 @@ import { noteProjectPattern } from "../index-helpers.js";
14
14
  import { saveSessionStress } from "../index-helpers.js";
15
15
  import { COMPRESS_THRESHOLD, KEEP_HOT, COMPRESS_MARKER, PROTOCOL_MARKER, PROTOCOL_TEXT } from "../constants.js";
16
16
  import { TEMPLATES, DEFAULT_TEMPLATE, resolveTemplate, shouldInjectTemplate } from "../templates.js";
17
+ const BYTES_PER_TOKEN = 4;
17
18
  function getVibeOSHome() {
18
19
  return process.env.VIBEOS_HOME || join(process.env.HOME || "", ".claude");
19
20
  }
@@ -308,6 +309,13 @@ function compressToolOutputs(messages) {
308
309
  `[summary] ${summary}`;
309
310
  state.output = ref;
310
311
  compressedBytes += raw.length - ref.length;
312
+ const toolKey = TOOL_NAME_NORMALIZE[part.tool] || part.tool;
313
+ const rate = cacheSavePer1MInputTokens(currentModel);
314
+ if (rate > 0) {
315
+ const inputTokens = Math.max(1, Math.round((raw.length - ref.length) / BYTES_PER_TOKEN));
316
+ const saveEst = Math.max(0.0001, Math.round(inputTokens * rate / 1_000_000 * 10000) / 10000);
317
+ recordCacheSaving(toolKey, saveEst, { hash });
318
+ }
311
319
  console.error(`[vibeOS] ctx-compress: ${raw.length}\u2192${ref.length} chars (hash: ${hash})`);
312
320
  }
313
321
  }
@@ -3,9 +3,9 @@ import { writeFileSync, appendFileSync, existsSync, mkdirSync } from "node:fs";
3
3
  import { join, dirname, basename } from "node:path";
4
4
  import { createHash } from "node:crypto";
5
5
  import { currentTier, currentModel, setCurrentModel, setCurrentTier, _OC_SID, _modelLocked, loadSelection, readLifetimeSavings, recordCacheSaving, recordMissedContext7, getScratchpadHit, recordScratchpadObservation, recordPrivacyTelemetry, updateState, getSessionScratchpadDir, ensureSessionScratchpadDirs, SAVINGS_LEDGER_FILE, CONTEXT7_INSTALL_FLAG, SOFT_QUOTA_LIMIT, upsertTodo, ML_ENABLED, _mlGraph, _cacheDb, _mlSavePending, ML_CONFIDENCE_THRESHOLD, setMlSavePending, saveMLState, SCRATCHPAD_TOOLS, SCRATCHPAD_GLOBAL_DIR, TOOL_NAME_NORMALIZE, stableJson, applyDecadence, } from "../state.js";
6
- import { classify, modelCostPerTurn, isModelFree, detectContext7, isDocsTarget, shortModelName, formatUsd, _refreshModel, readConfig, resolveDisplayModelId, TRINITY_CHEAP, TRINITY_MEDIUM, trendDisplay, modelToSlotLabel, resolveExecutionIdentity, formatProviderName, formatQualityName, } from "../pricing.js";
6
+ import { classify, modelCostPerTurn, isModelFree, detectContext7, isDocsTarget, shortModelName, formatUsd, _refreshModel, readConfig, resolveDisplayModelId, TRINITY_CHEAP, TRINITY_MEDIUM, cacheSavePer1MInputTokens, trendDisplay, modelToSlotLabel, resolveExecutionIdentity, formatProviderName, formatQualityName, } from "../pricing.js";
7
7
  import { latestUserIntent } from "./chat-transform.js";
8
- import { scoreStress, extractFirstWordFromArgs, shouldLogWarn, isUserAskingForTests, resolveEnforcementMode, getLearnedExploratoryWords, noteTaskRoutingLearning, } from "../turn-classify.js";
8
+ import { scoreStress, extractFirstWordFromArgs, shouldLogWarn, isUserAskingForTests, resolveEnforcementMode, getLearnedExploratoryWords, noteTaskRoutingLearning, incrementTurnCounter, } from "../turn-classify.js";
9
9
  import { saveReport } from "../reporting.js";
10
10
  import { loadCredit } from "../credit-api.js";
11
11
  import { remoteCall, VIBEOS_API_ENABLED } from "../api-client.js";
@@ -17,7 +17,6 @@ import { setActiveJobFromTaskPrompt, observeToolPattern, compressText, recordSav
17
17
  import { scoreTaskQuality, readRewardSignals } from "./footer.js";
18
18
  import { SAVE_EST, WARN_ON_DIRECT, SOFT_QUOTA, FREE, MONITOR } from "../constants.js";
19
19
  const BYTES_PER_TOKEN = 4;
20
- const CACHE_SAVED_PER_1M_INPUT_TOKENS = 0.10;
21
20
  const DEBUG_INTERNALS = process.env.VIBEOS_DEBUG_INTERNALS === "1";
22
21
  const IS_CLI_RUNTIME = Boolean(process.stdout?.isTTY || process.stderr?.isTTY || process.stdin?.isTTY);
23
22
  function getVibeOSHome() {
@@ -247,8 +246,12 @@ export const onToolExecuteBefore = async (input, output) => {
247
246
  // Persist cache savings as a first-class savings type.
248
247
  // Compute from actual scratchpad file size: inputs that would
249
248
  // have been charged at miss rate are served from cache.
250
- const _inputTokens = Math.max(1, Math.round(hit.sizeBytes / BYTES_PER_TOKEN));
251
- _cacheSave = Math.max(0.0001, Math.round(_inputTokens * CACHE_SAVED_PER_1M_INPUT_TOKENS / 1_000_000 * 10000) / 10000);
249
+ const rate = cacheSavePer1MInputTokens(currentModel);
250
+ _cacheSave = 0;
251
+ if (rate > 0) {
252
+ const _inputTokens = Math.max(1, Math.round(hit.sizeBytes / BYTES_PER_TOKEN));
253
+ _cacheSave = Math.max(0.0001, Math.round(_inputTokens * rate / 1_000_000 * 10000) / 10000);
254
+ }
252
255
  const cacheSaved = recordCacheSaving(t, _cacheSave, { hash: hit.hash });
253
256
  const sumNote = hit.summaryPath ? ` (summary: ${hit.summaryPath})` : "";
254
257
  const cacheNote = cacheSaved ? `, cache+$${(cacheSaved.lifetime || 0).toFixed(3)} lt` : "";
@@ -604,6 +607,11 @@ export const onToolExecuteAfter = async (input, output) => {
604
607
  }
605
608
  }
606
609
  catch { }
610
+ // ── Increment turn counter for compaction trigger ──
611
+ try {
612
+ incrementTurnCounter();
613
+ }
614
+ catch { }
607
615
  // ── Generate footer alert (prepended to tool result, visible in chat) ──
608
616
  let _footerText = "";
609
617
  try {
@@ -683,6 +691,11 @@ export const onToolExecuteAfter = async (input, output) => {
683
691
  }
684
692
  }
685
693
  catch { }
694
+ // ── Increment turn counter for compaction trigger ──
695
+ try {
696
+ incrementTurnCounter();
697
+ }
698
+ catch { }
686
699
  // ── End footer ──
687
700
  const t = input?.tool ?? "";
688
701
  if (t === "trinity") {
@@ -260,6 +260,50 @@ export function trendDisplay(sesTrend) {
260
260
  const CACHE_SAVED_PER_1M_INPUT_TOKENS = 0.10;
261
261
  // Approximate bytes per token for JSON/text content (varies 3-6, use 4 as safe estimate).
262
262
  const BYTES_PER_TOKEN = 4;
263
+ export function parseOpenRouterInputPer1M(modelRow) {
264
+ const p = modelRow?.pricing || {};
265
+ const inTok = Number(p.prompt ?? p.input ?? p.request);
266
+ if (Number.isFinite(inTok) && inTok > 0) {
267
+ return Math.round(inTok * 1_000_000 * 10000) / 10000;
268
+ }
269
+ return null;
270
+ }
271
+ export function cacheSavePer1MInputTokens(model) {
272
+ if (!model)
273
+ return CACHE_SAVED_PER_1M_INPUT_TOKENS;
274
+ if (isModelFree(model))
275
+ return 0;
276
+ const rawKey = String(model || "");
277
+ const key = normalizeModelId(model);
278
+ const rawNoPrefix = rawKey.includes("/") ? rawKey.split("/")[rawKey.split("/").length - 1] : rawKey;
279
+ try {
280
+ const cache = _loadDynamicPricingCache();
281
+ for (const candidate of [rawKey, key, rawNoPrefix]) {
282
+ const entry = cache[candidate];
283
+ const rate = parseOpenRouterInputPer1M(entry);
284
+ if (rate !== null)
285
+ return rate;
286
+ }
287
+ for (const [ck, cv] of Object.entries(cache)) {
288
+ if (ck.endsWith("/" + rawNoPrefix)) {
289
+ const rate = parseOpenRouterInputPer1M(cv);
290
+ if (rate !== null)
291
+ return rate;
292
+ }
293
+ }
294
+ }
295
+ catch { }
296
+ for (const candidate of [rawKey, key, rawNoPrefix]) {
297
+ const known = MODEL_PRICING_PER_1M[candidate];
298
+ if (known && Number.isFinite(known.input))
299
+ return known.input;
300
+ }
301
+ const turnCost = modelCostPerTurn(model);
302
+ if (Number.isFinite(turnCost) && turnCost > 0) {
303
+ return Math.round(turnCost * 375 * 100) / 100;
304
+ }
305
+ return CACHE_SAVED_PER_1M_INPUT_TOKENS;
306
+ }
263
307
  export function roundUsd(v, precision = 6) {
264
308
  const n = Number(v ?? 0);
265
309
  if (!Number.isFinite(n))
@@ -284,6 +328,89 @@ export function formatUsd(v) {
284
328
  // deepseek-chat is free with a DeepSeek API token — priced at $1e-12 (near-zero).
285
329
  const FREE_MODEL_TURN_USD = 1e-10;
286
330
  const FREE_MODELS = new Set([]);
331
+ // Actual input / output pricing per 1M tokens, sourced from provider API pages
332
+ // and OpenRouter /api/v1/models. Format: USD per 1 million tokens.
333
+ // Entries with provider/ prefix = OpenRouter route; without prefix = native provider.
334
+ const MODEL_PRICING_PER_1M = {
335
+ // ── Anthropic (native + OpenRouter) ─────────────────────
336
+ "anthropic/claude-opus-4-8-fast": { input: 10.0, output: 50.0 },
337
+ "anthropic/claude-opus-4-8": { input: 5.0, output: 25.0 },
338
+ "anthropic/claude-opus-4-7-fast": { input: 30.0, output: 150.0 },
339
+ "anthropic/claude-opus-4-7": { input: 5.0, output: 25.0 },
340
+ "anthropic/claude-opus-4-6-fast": { input: 30.0, output: 150.0 },
341
+ "anthropic/claude-opus-4-6": { input: 5.0, output: 25.0 },
342
+ "anthropic/claude-opus-4-5": { input: 5.0, output: 25.0 },
343
+ "anthropic/claude-opus-4.1": { input: 15.0, output: 75.0 },
344
+ "anthropic/claude-opus-4": { input: 15.0, output: 75.0 },
345
+ "anthropic/claude-sonnet-4-6": { input: 3.0, output: 15.0 },
346
+ "anthropic/claude-sonnet-4-5": { input: 3.0, output: 15.0 },
347
+ "anthropic/claude-sonnet-4": { input: 3.0, output: 15.0 },
348
+ "anthropic/claude-haiku-4-5": { input: 1.0, output: 5.0 },
349
+ "anthropic/claude-3.5-haiku": { input: 0.80, output: 4.0 },
350
+ "anthropic/claude-3-haiku": { input: 0.25, output: 1.25 },
351
+ "haiku": { input: 0.80, output: 4.0 },
352
+ // ── DeepSeek (native — free for chat, paid for pro/flash/r1) ──
353
+ "deepseek-chat": { input: 0, output: 0 }, // native → free
354
+ "deepseek-reasoner": { input: 0.55, output: 2.19 }, // native r1
355
+ // ── DeepSeek (OpenRouter route) ────────────────────────
356
+ "deepseek/deepseek-v4-pro": { input: 0.435, output: 0.870 },
357
+ "deepseek/deepseek-v4-flash": { input: 0.098, output: 0.197 },
358
+ "deepseek/deepseek-chat": { input: 0.229, output: 0.914 },
359
+ "deepseek/deepseek-v3.2": { input: 0.252, output: 0.378 },
360
+ "deepseek/deepseek-v3.2-exp": { input: 0.270, output: 0.410 },
361
+ "deepseek/deepseek-chat-v3.1": { input: 0.210, output: 0.790 },
362
+ "deepseek/deepseek-chat-v3-0324": { input: 0.200, output: 0.770 },
363
+ "deepseek/deepseek-v3.1-terminus": { input: 0.270, output: 0.950 },
364
+ "deepseek/deepseek-r1-0528": { input: 0.500, output: 2.150 },
365
+ "deepseek/deepseek-r1": { input: 0.700, output: 2.500 },
366
+ "deepseek/deepseek-r1-distill-qwen-32b": { input: 0.290, output: 0.290 },
367
+ "deepseek/deepseek-r1-distill-llama-70b": { input: 0.70, output: 0.80 },
368
+ "deepseek/deepseek-v3": { input: 0.252, output: 0.378 },
369
+ "deepseek/haiku": { input: 0.80, output: 4.0 },
370
+ // ── Google Gemini (OpenRouter route) ──────────────────
371
+ "google/gemini-2.5-pro": { input: 1.25, output: 10.0 },
372
+ "google/gemini-2.5-flash": { input: 0.30, output: 2.50 },
373
+ "google/gemini-2.5-flash-lite": { input: 0.10, output: 0.40 },
374
+ "google/gemini-2.0-flash-001": { input: 0.10, output: 0.40 },
375
+ "google/gemini-2.0-flash-lite-001": { input: 0.075, output: 0.30 },
376
+ "google/gemma-4-31b-it": { input: 0.12, output: 0.37 },
377
+ "google/gemma-4-26b-a4b-it": { input: 0.06, output: 0.33 },
378
+ // ── OpenAI (OpenRouter route) ─────────────────────────
379
+ "openai/gpt-5.5-pro": { input: 30.0, output: 180.0 },
380
+ "openai/gpt-5.5": { input: 5.0, output: 30.0 },
381
+ "openai/gpt-5.4-pro": { input: 30.0, output: 180.0 },
382
+ "openai/gpt-5.4": { input: 2.50, output: 15.0 },
383
+ "openai/gpt-5.4-mini": { input: 0.75, output: 4.50 },
384
+ "openai/gpt-5.4-nano": { input: 0.20, output: 1.25 },
385
+ "openai/gpt-5.3-chat": { input: 1.75, output: 14.0 },
386
+ "openai/gpt-5.3-codex": { input: 1.75, output: 14.0 },
387
+ "openai/gpt-5.2": { input: 1.75, output: 14.0 },
388
+ "openai/gpt-5.2-pro": { input: 21.0, output: 168.0 },
389
+ "openai/gpt-5.1": { input: 1.25, output: 10.0 },
390
+ "openai/gpt-5": { input: 1.25, output: 10.0 },
391
+ "openai/gpt-5-mini": { input: 0.25, output: 2.00 },
392
+ "openai/gpt-5-nano": { input: 0.05, output: 0.40 },
393
+ "openai/gpt-4o": { input: 2.50, output: 10.0 },
394
+ "openai/gpt-4o-mini": { input: 0.15, output: 0.60 },
395
+ "openai/gpt-4.1": { input: 2.00, output: 8.00 },
396
+ "openai/gpt-4.1-mini": { input: 0.40, output: 1.60 },
397
+ "openai/gpt-4.1-nano": { input: 0.10, output: 0.40 },
398
+ "openai/o4-mini": { input: 1.10, output: 4.40 },
399
+ "openai/o4-mini-high": { input: 1.10, output: 4.40 },
400
+ "openai/o3-pro": { input: 20.0, output: 80.0 },
401
+ "openai/o3": { input: 2.00, output: 8.00 },
402
+ "openai/o3-mini": { input: 1.10, output: 4.40 },
403
+ "openai/o1-pro": { input: 150.0, output: 600.0 },
404
+ "openai/o1": { input: 15.0, output: 60.0 },
405
+ "openai/gpt-4-turbo": { input: 10.0, output: 30.0 },
406
+ "openai/gpt-4": { input: 30.0, output: 60.0 },
407
+ "openai/gpt-3.5-turbo": { input: 0.50, output: 1.50 },
408
+ // ── Mistral (OpenRouter route) ────────────────────────
409
+ "mistralai/mistral-medium-3-5": { input: 1.50, output: 7.50 },
410
+ "mistralai/mistral-large-2512": { input: 0.50, output: 1.50 },
411
+ "mistralai/mistral-small-2603": { input: 0.15, output: 0.60 },
412
+ "mistralai/mistral-nemo": { input: 0.02, output: 0.03 },
413
+ };
287
414
  // Approximate USD per typical ~1 K-token turn (blended input+output).
288
415
  // Blend: 700 input + 300 output tokens per turn (line 272-273).
289
416
  // Sources: provider API pricing pages, OpenRouter /api/v1/models.
@@ -518,7 +645,14 @@ export function modelCostPerTurn(model) {
518
645
  if (key.startsWith(k) && /-\d+$/.test(k) && key.charAt(k.length) === "-")
519
646
  return v;
520
647
  }
521
- // Log unknown models so we can add entries
648
+ // Fallback: derive blended turn cost from MODEL_PRICING_PER_1M input/output rates
649
+ for (const candidate of [model, key, bare]) {
650
+ const pricing = MODEL_PRICING_PER_1M[candidate];
651
+ if (pricing && Number.isFinite(pricing.input) && Number.isFinite(pricing.output)) {
652
+ const blended = (pricing.input * 700 + pricing.output * 300) / 1_000_000;
653
+ return Number.isFinite(blended) ? blended : FREE_MODEL_TURN_USD;
654
+ }
655
+ }
522
656
  console.error(`[vibeOS] modelCostPerTurn: unknown model '${model}' (normalized: '${key}') — add to MODEL_USD_PER_TURN`);
523
657
  return FREE_MODEL_TURN_USD;
524
658
  }
@@ -3,11 +3,11 @@
3
3
  // @ts-nocheck
4
4
  // Blackbox — theWay decision core ported to TypeScript.
5
5
  // Barrel export for all blackbox modules.
6
- export { buildAdvice, buildDecisionBlock, computeModality, humanReadableAction, compressMetrics, compressUncertainty, compressEntropy, enforceClosure, stabilityScore, shouldUseFastPath, buildCautionNote, scoreUsefulness, getFallbackPlan, getActionSuggestion, getCuriosityPrompt } from "./advice-layer.js";
7
- export { classifySituation, getActions, recommendAction, getSituationTypes } from "./taxonomy.js";
6
+ export { buildAdvice, buildDecisionBlock, computeModality, humanReadableAction, compressMetrics, compressUncertainty, compressEntropy, enforceClosure, stabilityScore, shouldUseFastPath, buildCautionNote, scoreUsefulness, getFallbackPlan, getActionSuggestion, getCuriosityPrompt, } from "./advice-layer.js";
7
+ export { classifySituation, getActions, recommendAction, getSituationTypes, } from "./taxonomy.js";
8
8
  export { ResolutionTracker } from "./resolution-tracker.js";
9
9
  export { ExposureModel } from "./exposure-model.js";
10
- export { ACTION_TARGET, ACTION_TYPE, FALLBACK_PLANS, ACTION_SUGGESTIONS, CURIOSITY_PROMPTS } from "./crew-constants.js";
11
- export { computeControlVector, buildControlHistoryEntry, REGIME_CONTROL_TABLE } from "./meta-controller.js";
10
+ export { ACTION_TARGET, ACTION_TYPE, FALLBACK_PLANS, ACTION_SUGGESTIONS, CURIOSITY_PROMPTS, } from "./crew-constants.js";
11
+ export { computeControlVector, buildControlHistoryEntry, REGIME_CONTROL_TABLE, } from "./meta-controller.js";
12
12
  export { vibemaxSelectMode, vibemaxPipeline, predictVibeMaX, trainVibeMaXModelFromTelemetry, getVibeMaXModelMeta, resetVibeMaXPipeline } from "./vibemax.js";
13
13
  export { PivotCache } from "./pivot-cache.js";