npm - vibeostheog - Versions diffs - 0.20.15 → 0.20.16 - Mend

vibeostheog 0.20.15 → 0.20.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/CHANGELOG.md +13 -0
package/package.json +1 -1
package/src/lib/hooks/chat-transform.js +10 -2
package/src/lib/hooks/tool-execute.js +18 -5
package/src/lib/pricing.js +135 -1
package/src/vibeOS-lib/blackbox/index.js +4 -4

package/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,16 @@
+## 0.20.16
+- fix: skip cache savings for free models + add modelCostPerTurn fallback + regression tests
+- fix: wire incrementTurnCounter into onToolExecuteAfter so session compaction fires at turn 7+
+- fix: make tests resilient in CI environment
+- perf: add MODEL_PRICING_PER_1M with per-provider input/output rates
+- perf: provider-aware cache savings with isModelFree gate + regression tests
+- perf: dynamic cache savings rate from per-model input pricing
+- perf: record cache savings for compressed tool outputs (write path)
+- ci: retrigger checks for merge
+Merge pull request #92 from DrunkkToys/pr/regression-tests-cache-savings
+Merge pull request #91 from DrunkkToys/pr/cache-write-savings
 ## 0.20.15
 - feat: dashboard blackbox telemetry — bidirectional BE/FE sync
 - fix: mock auth and clear OPENCODE_MODEL in bootstrap test, commit blackbox .js for CI

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "vibeostheog",
-  "version": "0.20.15",
+  "version": "0.20.16",
   "description": "Cost-aware delegation enforcer for OpenCode. Tracks model usage, routes Task subagents to cheaper tiers, surfaces cumulative savings in chat. Includes research audit, reporting framework, project memory, progressive scratchpad decadence, and trinity CLI for brain/medium/cheap slot switching.",
   "scripts": {
     "release": "node scripts/release.mjs",

package/src/lib/hooks/chat-transform.js CHANGED Viewed

@@ -2,8 +2,8 @@
 import { readFileSync, writeFileSync, appendFileSync, existsSync, mkdirSync } from "node:fs";
 import { join, basename } from "node:path";
 import { createHash } from "node:crypto";
-import { currentModel, currentProjectFingerprint, currentProjectName, _blackboxEnabled, loadSelection, writeSelection, safeJsonParse, applyDecadence, getSessionScratchpadDir, ensureSessionScratchpadDirs, indexAppend, briefedProjects, getActiveJobForProject, loadTodos, promotedProjectPatterns, detectTechStack, projectFingerprint, TRINITY_OPENCODE_CONFIG, TIERS_FILE, loadGlobalLearning, setCurrentProjectFingerprint, setCurrentProjectName, stableJson, TOOL_NAME_NORMALIZE, _cacheDb, } from "../state.js";
-import { applySlot, TRINITY_CHEAP, TRINITY_MEDIUM, } from "../pricing.js";
+import { currentModel, currentProjectFingerprint, currentProjectName, _blackboxEnabled, loadSelection, writeSelection, safeJsonParse, applyDecadence, getSessionScratchpadDir, ensureSessionScratchpadDirs, indexAppend, briefedProjects, getActiveJobForProject, loadTodos, promotedProjectPatterns, detectTechStack, projectFingerprint, TRINITY_OPENCODE_CONFIG, TIERS_FILE, loadGlobalLearning, setCurrentProjectFingerprint, setCurrentProjectName, stableJson, TOOL_NAME_NORMALIZE, _cacheDb, recordCacheSaving, } from "../state.js";
+import { applySlot, TRINITY_CHEAP, TRINITY_MEDIUM, cacheSavePer1MInputTokens, } from "../pricing.js";
 import { scoreStress, classifyTurnSimple, loadOptimizationMode, saveOptimizationMode, selectOptimizationModeRemote, computeControlVector, getBlackboxTracker, loadBlackboxState as loadBlackboxStateFromCtx, saveBlackboxState as saveBlackboxStateToCtx, extractLastUserText, isLikelyOffTopic, fetchBlackboxEnrichment, estimateContextBudget, buildControlHistoryEntry, } from "../turn-classify.js";
 import { applyBudgetFirstMode, peekBudgetFirstMode } from "../mode-policy.js";
 import { addCacheEntry, extractRecentCacheOutputs } from "../../vibeOS-lib/smart-cache.js";
@@ -14,6 +14,7 @@ import { noteProjectPattern } from "../index-helpers.js";
 import { saveSessionStress } from "../index-helpers.js";
 import { COMPRESS_THRESHOLD, KEEP_HOT, COMPRESS_MARKER, PROTOCOL_MARKER, PROTOCOL_TEXT } from "../constants.js";
 import { TEMPLATES, DEFAULT_TEMPLATE, resolveTemplate, shouldInjectTemplate } from "../templates.js";
+const BYTES_PER_TOKEN = 4;
 function getVibeOSHome() {
     return process.env.VIBEOS_HOME || join(process.env.HOME || "", ".claude");
 }
@@ -308,6 +309,13 @@ function compressToolOutputs(messages) {
                 `[summary] ${summary}`;
             state.output = ref;
             compressedBytes += raw.length - ref.length;
+            const toolKey = TOOL_NAME_NORMALIZE[part.tool] || part.tool;
+            const rate = cacheSavePer1MInputTokens(currentModel);
+            if (rate > 0) {
+                const inputTokens = Math.max(1, Math.round((raw.length - ref.length) / BYTES_PER_TOKEN));
+                const saveEst = Math.max(0.0001, Math.round(inputTokens * rate / 1_000_000 * 10000) / 10000);
+                recordCacheSaving(toolKey, saveEst, { hash });
+            }
             console.error(`[vibeOS] ctx-compress: ${raw.length}\u2192${ref.length} chars (hash: ${hash})`);
         }
     }

package/src/lib/hooks/tool-execute.js CHANGED Viewed

@@ -3,9 +3,9 @@ import { writeFileSync, appendFileSync, existsSync, mkdirSync } from "node:fs";
 import { join, dirname, basename } from "node:path";
 import { createHash } from "node:crypto";
 import { currentTier, currentModel, setCurrentModel, setCurrentTier, _OC_SID, _modelLocked, loadSelection, readLifetimeSavings, recordCacheSaving, recordMissedContext7, getScratchpadHit, recordScratchpadObservation, recordPrivacyTelemetry, updateState, getSessionScratchpadDir, ensureSessionScratchpadDirs, SAVINGS_LEDGER_FILE, CONTEXT7_INSTALL_FLAG, SOFT_QUOTA_LIMIT, upsertTodo, ML_ENABLED, _mlGraph, _cacheDb, _mlSavePending, ML_CONFIDENCE_THRESHOLD, setMlSavePending, saveMLState, SCRATCHPAD_TOOLS, SCRATCHPAD_GLOBAL_DIR, TOOL_NAME_NORMALIZE, stableJson, applyDecadence, } from "../state.js";
-import { classify, modelCostPerTurn, isModelFree, detectContext7, isDocsTarget, shortModelName, formatUsd, _refreshModel, readConfig, resolveDisplayModelId, TRINITY_CHEAP, TRINITY_MEDIUM, trendDisplay, modelToSlotLabel, resolveExecutionIdentity, formatProviderName, formatQualityName, } from "../pricing.js";
+import { classify, modelCostPerTurn, isModelFree, detectContext7, isDocsTarget, shortModelName, formatUsd, _refreshModel, readConfig, resolveDisplayModelId, TRINITY_CHEAP, TRINITY_MEDIUM, cacheSavePer1MInputTokens, trendDisplay, modelToSlotLabel, resolveExecutionIdentity, formatProviderName, formatQualityName, } from "../pricing.js";
 import { latestUserIntent } from "./chat-transform.js";
-import { scoreStress, extractFirstWordFromArgs, shouldLogWarn, isUserAskingForTests, resolveEnforcementMode, getLearnedExploratoryWords, noteTaskRoutingLearning, } from "../turn-classify.js";
+import { scoreStress, extractFirstWordFromArgs, shouldLogWarn, isUserAskingForTests, resolveEnforcementMode, getLearnedExploratoryWords, noteTaskRoutingLearning, incrementTurnCounter, } from "../turn-classify.js";
 import { saveReport } from "../reporting.js";
 import { loadCredit } from "../credit-api.js";
 import { remoteCall, VIBEOS_API_ENABLED } from "../api-client.js";
@@ -17,7 +17,6 @@ import { setActiveJobFromTaskPrompt, observeToolPattern, compressText, recordSav
 import { scoreTaskQuality, readRewardSignals } from "./footer.js";
 import { SAVE_EST, WARN_ON_DIRECT, SOFT_QUOTA, FREE, MONITOR } from "../constants.js";
 const BYTES_PER_TOKEN = 4;
-const CACHE_SAVED_PER_1M_INPUT_TOKENS = 0.10;
 const DEBUG_INTERNALS = process.env.VIBEOS_DEBUG_INTERNALS === "1";
 const IS_CLI_RUNTIME = Boolean(process.stdout?.isTTY || process.stderr?.isTTY || process.stdin?.isTTY);
 function getVibeOSHome() {
@@ -247,8 +246,12 @@ export const onToolExecuteBefore = async (input, output) => {
             // Persist cache savings as a first-class savings type.
             // Compute from actual scratchpad file size: inputs that would
             // have been charged at miss rate are served from cache.
-            const _inputTokens = Math.max(1, Math.round(hit.sizeBytes / BYTES_PER_TOKEN));
-            _cacheSave = Math.max(0.0001, Math.round(_inputTokens * CACHE_SAVED_PER_1M_INPUT_TOKENS / 1_000_000 * 10000) / 10000);
+            const rate = cacheSavePer1MInputTokens(currentModel);
+            _cacheSave = 0;
+            if (rate > 0) {
+                const _inputTokens = Math.max(1, Math.round(hit.sizeBytes / BYTES_PER_TOKEN));
+                _cacheSave = Math.max(0.0001, Math.round(_inputTokens * rate / 1_000_000 * 10000) / 10000);
+            }
             const cacheSaved = recordCacheSaving(t, _cacheSave, { hash: hit.hash });
             const sumNote = hit.summaryPath ? ` (summary: ${hit.summaryPath})` : "";
             const cacheNote = cacheSaved ? `, cache+$${(cacheSaved.lifetime || 0).toFixed(3)} lt` : "";
@@ -604,6 +607,11 @@ export const onToolExecuteAfter = async (input, output) => {
         }
     }
     catch { }
+    // ── Increment turn counter for compaction trigger ──
+    try {
+        incrementTurnCounter();
+    }
+    catch { }
     // ── Generate footer alert (prepended to tool result, visible in chat) ──
     let _footerText = "";
     try {
@@ -683,6 +691,11 @@ export const onToolExecuteAfter = async (input, output) => {
         }
     }
     catch { }
+    // ── Increment turn counter for compaction trigger ──
+    try {
+        incrementTurnCounter();
+    }
+    catch { }
     // ── End footer ──
     const t = input?.tool ?? "";
     if (t === "trinity") {

package/src/lib/pricing.js CHANGED Viewed

@@ -260,6 +260,50 @@ export function trendDisplay(sesTrend) {
 const CACHE_SAVED_PER_1M_INPUT_TOKENS = 0.10;
 // Approximate bytes per token for JSON/text content (varies 3-6, use 4 as safe estimate).
 const BYTES_PER_TOKEN = 4;
+export function parseOpenRouterInputPer1M(modelRow) {
+    const p = modelRow?.pricing || {};
+    const inTok = Number(p.prompt ?? p.input ?? p.request);
+    if (Number.isFinite(inTok) && inTok > 0) {
+        return Math.round(inTok * 1_000_000 * 10000) / 10000;
+    }
+    return null;
+}
+export function cacheSavePer1MInputTokens(model) {
+    if (!model)
+        return CACHE_SAVED_PER_1M_INPUT_TOKENS;
+    if (isModelFree(model))
+        return 0;
+    const rawKey = String(model || "");
+    const key = normalizeModelId(model);
+    const rawNoPrefix = rawKey.includes("/") ? rawKey.split("/")[rawKey.split("/").length - 1] : rawKey;
+    try {
+        const cache = _loadDynamicPricingCache();
+        for (const candidate of [rawKey, key, rawNoPrefix]) {
+            const entry = cache[candidate];
+            const rate = parseOpenRouterInputPer1M(entry);
+            if (rate !== null)
+                return rate;
+        }
+        for (const [ck, cv] of Object.entries(cache)) {
+            if (ck.endsWith("/" + rawNoPrefix)) {
+                const rate = parseOpenRouterInputPer1M(cv);
+                if (rate !== null)
+                    return rate;
+            }
+        }
+    }
+    catch { }
+    for (const candidate of [rawKey, key, rawNoPrefix]) {
+        const known = MODEL_PRICING_PER_1M[candidate];
+        if (known && Number.isFinite(known.input))
+            return known.input;
+    }
+    const turnCost = modelCostPerTurn(model);
+    if (Number.isFinite(turnCost) && turnCost > 0) {
+        return Math.round(turnCost * 375 * 100) / 100;
+    }
+    return CACHE_SAVED_PER_1M_INPUT_TOKENS;
+}
 export function roundUsd(v, precision = 6) {
     const n = Number(v ?? 0);
     if (!Number.isFinite(n))
@@ -284,6 +328,89 @@ export function formatUsd(v) {
 // deepseek-chat is free with a DeepSeek API token — priced at $1e-12 (near-zero).
 const FREE_MODEL_TURN_USD = 1e-10;
 const FREE_MODELS = new Set([]);
+// Actual input / output pricing per 1M tokens, sourced from provider API pages
+// and OpenRouter /api/v1/models. Format: USD per 1 million tokens.
+// Entries with provider/ prefix = OpenRouter route; without prefix = native provider.
+const MODEL_PRICING_PER_1M = {
+    // ── Anthropic (native + OpenRouter) ─────────────────────
+    "anthropic/claude-opus-4-8-fast": { input: 10.0, output: 50.0 },
+    "anthropic/claude-opus-4-8": { input: 5.0, output: 25.0 },
+    "anthropic/claude-opus-4-7-fast": { input: 30.0, output: 150.0 },
+    "anthropic/claude-opus-4-7": { input: 5.0, output: 25.0 },
+    "anthropic/claude-opus-4-6-fast": { input: 30.0, output: 150.0 },
+    "anthropic/claude-opus-4-6": { input: 5.0, output: 25.0 },
+    "anthropic/claude-opus-4-5": { input: 5.0, output: 25.0 },
+    "anthropic/claude-opus-4.1": { input: 15.0, output: 75.0 },
+    "anthropic/claude-opus-4": { input: 15.0, output: 75.0 },
+    "anthropic/claude-sonnet-4-6": { input: 3.0, output: 15.0 },
+    "anthropic/claude-sonnet-4-5": { input: 3.0, output: 15.0 },
+    "anthropic/claude-sonnet-4": { input: 3.0, output: 15.0 },
+    "anthropic/claude-haiku-4-5": { input: 1.0, output: 5.0 },
+    "anthropic/claude-3.5-haiku": { input: 0.80, output: 4.0 },
+    "anthropic/claude-3-haiku": { input: 0.25, output: 1.25 },
+    "haiku": { input: 0.80, output: 4.0 },
+    // ── DeepSeek (native — free for chat, paid for pro/flash/r1) ──
+    "deepseek-chat": { input: 0, output: 0 }, // native → free
+    "deepseek-reasoner": { input: 0.55, output: 2.19 }, // native r1
+    // ── DeepSeek (OpenRouter route) ────────────────────────
+    "deepseek/deepseek-v4-pro": { input: 0.435, output: 0.870 },
+    "deepseek/deepseek-v4-flash": { input: 0.098, output: 0.197 },
+    "deepseek/deepseek-chat": { input: 0.229, output: 0.914 },
+    "deepseek/deepseek-v3.2": { input: 0.252, output: 0.378 },
+    "deepseek/deepseek-v3.2-exp": { input: 0.270, output: 0.410 },
+    "deepseek/deepseek-chat-v3.1": { input: 0.210, output: 0.790 },
+    "deepseek/deepseek-chat-v3-0324": { input: 0.200, output: 0.770 },
+    "deepseek/deepseek-v3.1-terminus": { input: 0.270, output: 0.950 },
+    "deepseek/deepseek-r1-0528": { input: 0.500, output: 2.150 },
+    "deepseek/deepseek-r1": { input: 0.700, output: 2.500 },
+    "deepseek/deepseek-r1-distill-qwen-32b": { input: 0.290, output: 0.290 },
+    "deepseek/deepseek-r1-distill-llama-70b": { input: 0.70, output: 0.80 },
+    "deepseek/deepseek-v3": { input: 0.252, output: 0.378 },
+    "deepseek/haiku": { input: 0.80, output: 4.0 },
+    // ── Google Gemini (OpenRouter route) ──────────────────
+    "google/gemini-2.5-pro": { input: 1.25, output: 10.0 },
+    "google/gemini-2.5-flash": { input: 0.30, output: 2.50 },
+    "google/gemini-2.5-flash-lite": { input: 0.10, output: 0.40 },
+    "google/gemini-2.0-flash-001": { input: 0.10, output: 0.40 },
+    "google/gemini-2.0-flash-lite-001": { input: 0.075, output: 0.30 },
+    "google/gemma-4-31b-it": { input: 0.12, output: 0.37 },
+    "google/gemma-4-26b-a4b-it": { input: 0.06, output: 0.33 },
+    // ── OpenAI (OpenRouter route) ─────────────────────────
+    "openai/gpt-5.5-pro": { input: 30.0, output: 180.0 },
+    "openai/gpt-5.5": { input: 5.0, output: 30.0 },
+    "openai/gpt-5.4-pro": { input: 30.0, output: 180.0 },
+    "openai/gpt-5.4": { input: 2.50, output: 15.0 },
+    "openai/gpt-5.4-mini": { input: 0.75, output: 4.50 },
+    "openai/gpt-5.4-nano": { input: 0.20, output: 1.25 },
+    "openai/gpt-5.3-chat": { input: 1.75, output: 14.0 },
+    "openai/gpt-5.3-codex": { input: 1.75, output: 14.0 },
+    "openai/gpt-5.2": { input: 1.75, output: 14.0 },
+    "openai/gpt-5.2-pro": { input: 21.0, output: 168.0 },
+    "openai/gpt-5.1": { input: 1.25, output: 10.0 },
+    "openai/gpt-5": { input: 1.25, output: 10.0 },
+    "openai/gpt-5-mini": { input: 0.25, output: 2.00 },
+    "openai/gpt-5-nano": { input: 0.05, output: 0.40 },
+    "openai/gpt-4o": { input: 2.50, output: 10.0 },
+    "openai/gpt-4o-mini": { input: 0.15, output: 0.60 },
+    "openai/gpt-4.1": { input: 2.00, output: 8.00 },
+    "openai/gpt-4.1-mini": { input: 0.40, output: 1.60 },
+    "openai/gpt-4.1-nano": { input: 0.10, output: 0.40 },
+    "openai/o4-mini": { input: 1.10, output: 4.40 },
+    "openai/o4-mini-high": { input: 1.10, output: 4.40 },
+    "openai/o3-pro": { input: 20.0, output: 80.0 },
+    "openai/o3": { input: 2.00, output: 8.00 },
+    "openai/o3-mini": { input: 1.10, output: 4.40 },
+    "openai/o1-pro": { input: 150.0, output: 600.0 },
+    "openai/o1": { input: 15.0, output: 60.0 },
+    "openai/gpt-4-turbo": { input: 10.0, output: 30.0 },
+    "openai/gpt-4": { input: 30.0, output: 60.0 },
+    "openai/gpt-3.5-turbo": { input: 0.50, output: 1.50 },
+    // ── Mistral (OpenRouter route) ────────────────────────
+    "mistralai/mistral-medium-3-5": { input: 1.50, output: 7.50 },
+    "mistralai/mistral-large-2512": { input: 0.50, output: 1.50 },
+    "mistralai/mistral-small-2603": { input: 0.15, output: 0.60 },
+    "mistralai/mistral-nemo": { input: 0.02, output: 0.03 },
+};
 // Approximate USD per typical ~1 K-token turn (blended input+output).
 // Blend: 700 input + 300 output tokens per turn (line 272-273).
 // Sources: provider API pricing pages, OpenRouter /api/v1/models.
@@ -518,7 +645,14 @@ export function modelCostPerTurn(model) {
         if (key.startsWith(k) && /-\d+$/.test(k) && key.charAt(k.length) === "-")
             return v;
     }
-    // Log unknown models so we can add entries
+    // Fallback: derive blended turn cost from MODEL_PRICING_PER_1M input/output rates
+    for (const candidate of [model, key, bare]) {
+        const pricing = MODEL_PRICING_PER_1M[candidate];
+        if (pricing && Number.isFinite(pricing.input) && Number.isFinite(pricing.output)) {
+            const blended = (pricing.input * 700 + pricing.output * 300) / 1_000_000;
+            return Number.isFinite(blended) ? blended : FREE_MODEL_TURN_USD;
+        }
+    }
     console.error(`[vibeOS] modelCostPerTurn: unknown model '${model}' (normalized: '${key}') — add to MODEL_USD_PER_TURN`);
     return FREE_MODEL_TURN_USD;
 }

package/src/vibeOS-lib/blackbox/index.js CHANGED Viewed

@@ -3,11 +3,11 @@
 // @ts-nocheck
 // Blackbox — theWay decision core ported to TypeScript.
 // Barrel export for all blackbox modules.
-export { buildAdvice, buildDecisionBlock, computeModality, humanReadableAction, compressMetrics, compressUncertainty, compressEntropy, enforceClosure, stabilityScore, shouldUseFastPath, buildCautionNote, scoreUsefulness, getFallbackPlan, getActionSuggestion, getCuriosityPrompt } from "./advice-layer.js";
-export { classifySituation, getActions, recommendAction, getSituationTypes } from "./taxonomy.js";
+export { buildAdvice, buildDecisionBlock, computeModality, humanReadableAction, compressMetrics, compressUncertainty, compressEntropy, enforceClosure, stabilityScore, shouldUseFastPath, buildCautionNote, scoreUsefulness, getFallbackPlan, getActionSuggestion, getCuriosityPrompt, } from "./advice-layer.js";
+export { classifySituation, getActions, recommendAction, getSituationTypes, } from "./taxonomy.js";
 export { ResolutionTracker } from "./resolution-tracker.js";
 export { ExposureModel } from "./exposure-model.js";
-export { ACTION_TARGET, ACTION_TYPE, FALLBACK_PLANS, ACTION_SUGGESTIONS, CURIOSITY_PROMPTS } from "./crew-constants.js";
-export { computeControlVector, buildControlHistoryEntry, REGIME_CONTROL_TABLE } from "./meta-controller.js";
+export { ACTION_TARGET, ACTION_TYPE, FALLBACK_PLANS, ACTION_SUGGESTIONS, CURIOSITY_PROMPTS, } from "./crew-constants.js";
+export { computeControlVector, buildControlHistoryEntry, REGIME_CONTROL_TABLE, } from "./meta-controller.js";
 export { vibemaxSelectMode, vibemaxPipeline, predictVibeMaX, trainVibeMaXModelFromTelemetry, getVibeMaXModelMeta, resetVibeMaXPipeline } from "./vibemax.js";
 export { PivotCache } from "./pivot-cache.js";