vibeostheog 0.20.15 → 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +13 -0
- package/README.md +124 -137
- package/package.json +1 -1
- package/src/lib/api-client.js +9 -0
- package/src/lib/hooks/chat-transform.js +10 -2
- package/src/lib/hooks/tool-execute.js +18 -5
- package/src/lib/pricing.js +135 -1
- package/src/lib/state.js +17 -11
- package/src/lib/trinity-tool.js +23 -30
package/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,16 @@
|
|
|
1
|
+
## 0.20.16
|
|
2
|
+
- fix: skip cache savings for free models + add modelCostPerTurn fallback + regression tests
|
|
3
|
+
- fix: wire incrementTurnCounter into onToolExecuteAfter so session compaction fires at turn 7+
|
|
4
|
+
- fix: make tests resilient in CI environment
|
|
5
|
+
- perf: add MODEL_PRICING_PER_1M with per-provider input/output rates
|
|
6
|
+
- perf: provider-aware cache savings with isModelFree gate + regression tests
|
|
7
|
+
- perf: dynamic cache savings rate from per-model input pricing
|
|
8
|
+
- perf: record cache savings for compressed tool outputs (write path)
|
|
9
|
+
- ci: retrigger checks for merge
|
|
10
|
+
Merge pull request #92 from DrunkkToys/pr/regression-tests-cache-savings
|
|
11
|
+
Merge pull request #91 from DrunkkToys/pr/cache-write-savings
|
|
12
|
+
|
|
13
|
+
|
|
1
14
|
## 0.20.15
|
|
2
15
|
- feat: dashboard blackbox telemetry — bidirectional BE/FE sync
|
|
3
16
|
- fix: mock auth and clear OPENCODE_MODEL in bootstrap test, commit blackbox .js for CI
|
package/README.md
CHANGED
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
# vibeOS for OpenCode
|
|
2
2
|
|
|
3
|
-
**Prices validated: May 28, 2026** — verified against OpenRouter `/api/v1/models`
|
|
4
|
-
|
|
5
3
|
Cost-aware control plane for OpenCode Desktop. Keeps expensive models on strategy, routes implementation to cheaper tiers, surfaces savings in real time.
|
|
6
4
|
|
|
7
5
|
For teams, vibeOS adds practical guardrails: delegation enforcement, flow and TDD controls, pattern learning, stress-aware routing, VibeBoX decision tracking, reporting, and remote API protection for the core algorithms.
|
|
@@ -10,97 +8,108 @@ For teams, vibeOS adds practical guardrails: delegation enforcement, flow and TD
|
|
|
10
8
|
|
|
11
9
|
Every `write`/`edit`/`notebookedit` on the **brain tier** is intercepted, cost-estimated, and blocked with a visible enforcement note. Work must be delegated to **medium** or **cheap**. This is the primary savings mechanism.
|
|
12
10
|
|
|
13
|
-
### Per-Turn Cost (700 input + 300 output tokens)
|
|
14
|
-
|
|
15
|
-
| Tier | Model | Per Turn | Per 100 Turns | vs Opus |
|
|
16
|
-
|------|-------|----------|---------------|---------|
|
|
17
|
-
| Brain | `claude-opus-4-7` | **$0.0330** | **$3.30** | — |
|
|
18
|
-
| Medium | `claude-sonnet-4-6` | **$0.0066** | **$0.66** | saves 80% |
|
|
19
|
-
| Cheap | `claude-haiku-4-5` | **$0.0022** | **$0.22** | saves 93% |
|
|
20
|
-
|
|
21
|
-
*Source: `src/lib/pricing.ts:279-285`. Conservative estimates — actual OpenRouter live: Opus $0.011, Sonnet $0.0066, Haiku $0.0022 per turn. The plugin over-estimates brain cost so savings are always understated.*
|
|
22
|
-
|
|
23
11
|
### Savings per Delegation
|
|
24
12
|
|
|
25
13
|
| Move | Per Turn | 10x | 100x | 1,000x |
|
|
26
14
|
|------|----------|-----|------|--------|
|
|
27
|
-
| Opus
|
|
28
|
-
| Opus
|
|
29
|
-
| Sonnet
|
|
15
|
+
| Opus -> Haiku | $0.0308 | $0.31 | $3.08 | $30.80 |
|
|
16
|
+
| Opus -> Sonnet | $0.0264 | $0.26 | $2.64 | $26.40 |
|
|
17
|
+
| Sonnet -> Haiku | $0.0044 | $0.04 | $0.44 | $4.40 |
|
|
30
18
|
|
|
31
|
-
Every blocked brain-tier write/edit saves at least $0.026 (Opus
|
|
19
|
+
Every blocked brain-tier write/edit saves at least $0.026 (Opus -> Sonnet). The running total is tracked in `~/.claude/delegation-state.json` and displayed in the live footer.
|
|
32
20
|
|
|
33
|
-
##
|
|
21
|
+
## Model Tiers
|
|
34
22
|
|
|
35
|
-
Benchmarked on the DeepSeek v4 family
|
|
23
|
+
Benchmarked on the DeepSeek v4 family. Prices based on 700 input + 300 output tokens per turn.
|
|
36
24
|
|
|
37
|
-
|
|
25
|
+
> DeepSeek Chat costs $0/turn when routed through the Direct DeepSeek provider (no OpenRouter markup).
|
|
38
26
|
|
|
39
|
-
| Model | API ID | Per Turn | Per 1K Turns |
|
|
40
|
-
|
|
41
|
-
| v4 Pro
|
|
42
|
-
| v4 Flash
|
|
43
|
-
| DeepSeek Chat
|
|
27
|
+
| Slot | Model | API ID | Per Turn | Per 1K Turns | Tier |
|
|
28
|
+
|------|-------|--------|----------|--------------|------|
|
|
29
|
+
| brain | v4 Pro | `deepseek/deepseek-v4-pro` | $0.00057 | $0.58 | high |
|
|
30
|
+
| medium | v4 Flash | `deepseek/deepseek-v4-flash` | $0.000182 | $0.18 | mid |
|
|
31
|
+
| cheap | DeepSeek Chat | `deepseek/deepseek-chat` | $0.00 | $0.00 | budget |
|
|
32
|
+
| cheap (local) | MagicCoder:7b | `magicoder:7b` (Ollama) | $0.00 | $0.00 | budget |
|
|
44
33
|
|
|
45
|
-
|
|
34
|
+
*Source: `src/lib/pricing.ts`. Conservative estimates — savings are always understated.*
|
|
46
35
|
|
|
47
|
-
|
|
48
|
-
|---|---|---|---|---|---|---|---|---|---|
|
|
49
|
-
| **Raw Top Tier** | v4 Pro | full | — | — | — | baseline | $0.00057 | 1.00x | — |
|
|
50
|
-
| **VibeQMaX** (quality) | v4 Pro | full | strict | strict | quality | ~baseline | $0.00029 | 0.50x | **50%** |
|
|
51
|
-
| **VibeMaX** ⭐ | v4 Flash | full | strict | strict | quality | ~70% | $0.00021 | 0.37x | **63%** |
|
|
52
|
-
| **speed** | v4 Flash | off | relaxed | audit | lazy | ~55% | $0.00018 | 0.32x | 68% |
|
|
53
|
-
| **budget** | DeepSeek Chat | off | relaxed | audit | lazy | ~40% | $0.00015 | 0.26x | 74% |
|
|
54
|
-
| **auto** | varies | auto | auto | auto | auto | varies | varies | varies | varies |
|
|
36
|
+
## Optimization Modes
|
|
55
37
|
|
|
56
|
-
###
|
|
38
|
+
### Mode Comparison — Sorted by Quality Descending
|
|
39
|
+
|
|
40
|
+
| # | Mode | Pipeline | Quality vs Brain | Cost vs Brain | Cost/Turn | Saves |
|
|
41
|
+
|---|------|----------|-----------------|--------------|-----------|-------|
|
|
42
|
+
| 1 | **Raw Brain** | v4 Pro (no framework) | baseline | 1.00x | $0.00057 | - |
|
|
43
|
+
| 2 | **VibeQMaX** (quality) | v4 Pro + full guardrails | ~baseline | 0.50x | $0.00029 | 50% |
|
|
44
|
+
| 3 | **VibeUltraX** | MagicCoder:7b -> v4 Flash -> v4 Pro (debate) | **107%** | 0.58x | $0.00033 | 42% |
|
|
45
|
+
| 4 | **VibeMaX** (default) | medium (auto-escalate via trained cascade) | ~75% | 0.18x | $0.00010 | 82% |
|
|
46
|
+
| 5 | **Speed** | v4 Flash | ~55% | 0.32x | $0.000182 | 68% |
|
|
47
|
+
| 6 | **Budget** | DeepSeek Chat | ~40% | 0.00x | $0.00 | 100% |
|
|
57
48
|
|
|
58
|
-
|
|
49
|
+
**VibeQMaX (Quality Max)** — Routes strategic turns through v4 Pro with full thinking, strict enforcement, strict flow checks, and quality TDD. Write/edit turns delegated to cheaper tiers per enforcement rules. Effective blended cost ~$0.00029/turn (50% of Raw Brain).
|
|
50
|
+
|
|
51
|
+
**VibeMaX (ML-Optimized, Default)** — Intelligent cost-quality sweet spot. Routes through v4 Flash (medium) and uses a random forest classifier (29 trees, gini-split, trained on telemetry) to decide each turn. Classifies on 11 derived features: message length, code block density, urgency, complexity, repetition, question ratio, and more. Benchmarked at ~75% of Brain quality at 18% of cost.
|
|
52
|
+
|
|
53
|
+
**VibeUltraX** — Cascade pipeline: MagicCoder:7b (local Ollama) proposes, v4 Flash reviews, v4 Pro refines. Benchmarked at **107% of Brain quality** at 58% cost (local inference is free, only Flash/Pro API calls cost).
|
|
54
|
+
|
|
55
|
+
### Cost vs Quality Visual
|
|
59
56
|
|
|
60
57
|
```
|
|
61
58
|
Quality
|
|
62
|
-
baseline
|
|
63
|
-
|
|
64
|
-
~
|
|
65
|
-
~
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
59
|
+
baseline . Raw Brain . VibeQMaX
|
|
60
|
+
107% | . VibeUltraX
|
|
61
|
+
~75% | . VibeMaX (default)
|
|
62
|
+
~55% | . Speed
|
|
63
|
+
~40% | . Budget
|
|
64
|
+
|
|
|
65
|
+
+--------------------------------
|
|
66
|
+
1.0x 0.50x 0.32x 0.18x 0.00x
|
|
67
|
+
Cost Multiplier
|
|
70
68
|
```
|
|
71
69
|
|
|
72
|
-
###
|
|
73
|
-
|
|
74
|
-
**VibeQMaX (Quality Max)** — The highest-assurance configuration. Routes strategic turns through `deepseek/deepseek-v4-pro` with full thinking, strict enforcement, strict flow checks, and quality TDD. Write/edit turns are delegated to cheaper tiers per enforcement rules, yielding an **effective blended cost of ~$0.00029/turn (≈50% of Raw Top Tier)**. Guardrails include: delegation enforcement blocks costly mistakes, flow pattern validation prevents structural issues, TDD skeleton generation ensures test coverage, and context7 optimization reduces context waste. VibeQMaX maps to the system's **quality** mode — brain-tier settings with the full vibeOS control plane active.
|
|
70
|
+
### Configuration Per Mode
|
|
75
71
|
|
|
76
|
-
|
|
72
|
+
| Mode | Model | Thinking | Enforcement | Flow | TDD |
|
|
73
|
+
|------|-------|----------|-------------|------|-----|
|
|
74
|
+
| Raw Brain | v4 Pro | full | - | - | - |
|
|
75
|
+
| VibeQMaX | v4 Pro | full | strict | strict | quality |
|
|
76
|
+
| VibeUltraX | cascade (local->Flash->Pro) | auto | auto | auto | auto | **107%** |
|
|
77
|
+
| VibeMaX | v4 Flash (auto-escalate) | auto | auto | auto | auto |
|
|
78
|
+
| Speed | v4 Flash | off | relaxed | audit | lazy |
|
|
79
|
+
| Budget | DeepSeek Chat | off | relaxed | audit | lazy |
|
|
77
80
|
|
|
78
81
|
### Benchmark Details
|
|
79
82
|
|
|
80
|
-
All tests run with
|
|
83
|
+
All tests run with DeepSeek v4 family. Quality scores measured against Raw Brain (v4 Pro, full thinking, no vibeOS overhead). VibeMaX quality benchmark derived from real session telemetry with bootstrap confidence intervals (36 bootstrap samples). Pareto frontier computed from 70 holdout scenarios across 170 training samples via hyperparameter sweep. VibeUltra is the first mode that beats Raw Brain on both accuracy and cost — Pareto-dominant.
|
|
81
84
|
|
|
82
|
-
|
|
85
|
+
| Policy | Quality vs Brain | Cost vs Brain | Savings | Method |
|
|
86
|
+
|--------|-----------------|--------------|---------|--------|
|
|
87
|
+
| VibeUltraX | **107%** | 0.58x | 42% | local -> Flash -> Pro cascade |
|
|
88
|
+
| VibeMaX | ~75% | 0.18x | 82% | trained cascade (conservative escalate) |
|
|
89
|
+
| VibeQMaX | ~100% | 0.50x | 50% | same model, framework optimizations |
|
|
90
|
+
| Raw Brain | 100% | 1.00x | - | baseline |
|
|
91
|
+
| Budget | ~40% | 0.00x | 100% | direct routing |
|
|
92
|
+
|
|
93
|
+
Benchmarked on 1000 simulated questions across 20 runs, using model accuracies from MMLU-Pro / GPQA Diamond with real error correlation data.
|
|
83
94
|
|
|
84
95
|
## Features
|
|
85
96
|
|
|
86
97
|
| Feature | What it does |
|
|
87
98
|
|---------|-------------|
|
|
88
99
|
| **Delegation enforcement** | Blocks write/edit on brain tier. Routes to medium or cheap. |
|
|
89
|
-
| **Live savings footer** | Model, provider, cumulative savings, cache savings, stress
|
|
100
|
+
| **Live savings footer** | Model, provider, cumulative savings, cache savings, stress level, lock/enforcement tags. |
|
|
90
101
|
| **Web dashboard** | SolidJS SPA with SSE real-time push. Model split, savings, session history, trinity controls. |
|
|
91
|
-
| **Trinity runtime** |
|
|
92
|
-
| **Flow enforcer** | Pattern-rule checks on write/edit. Extracts TODO/FIXME into
|
|
102
|
+
| **Trinity runtime** | Switch tiers mid-session. Change optimization mode. Flow/TDD/enforcement toggles. |
|
|
103
|
+
| **Flow enforcer** | Pattern-rule checks on write/edit. Extracts TODO/FIXME into append-only queue. |
|
|
93
104
|
| **TDD enforcer** | Auto-creates test skeletons for changed source. Strict mode: TODO tests fail. |
|
|
94
105
|
| **Pattern learner** | Tracks recurring struggle/routine patterns per project. |
|
|
95
106
|
| **VibeBoX** | 7 sub-regimes, 11 features per turn, 4 loop intervention levels, PIVOT/SWITCH detection. Auto-mode maps regime to optimization mode. |
|
|
96
107
|
| **Stress-aware routing** | Stress gauge in footer. Stress > 1.5 escalates to quality mode. |
|
|
97
|
-
| **Cache savings** | Separate
|
|
98
|
-
| **Report tools** |
|
|
108
|
+
| **Cache savings** | Separate cache_savings_usd tracking for scratchpad cache hits. |
|
|
109
|
+
| **Report tools** | report-save, report-list, report-read, research-audit. |
|
|
99
110
|
| **MCP server** | Extended tool capabilities + dashboard serving + SSE push endpoint. |
|
|
100
|
-
| **Remote API** | Fastify server at
|
|
101
|
-
| **Session lock** |
|
|
102
|
-
|
|
103
|
-
---
|
|
111
|
+
| **Remote API** | Fastify server at api.vibetheog.com. Token auth with seat/license management. |
|
|
112
|
+
| **Session lock** | trinity lock on|off — freezes model at session start. |
|
|
104
113
|
|
|
105
114
|
## How It Works
|
|
106
115
|
|
|
@@ -115,48 +124,30 @@ All tests run with `deepseek/deepseek-v4-pro` (brain), `deepseek/deepseek-v4-fla
|
|
|
115
124
|
| `tool.execute.after` | Injects delegation UI notes |
|
|
116
125
|
| `message.updated` | Fallback footer for versions without text.complete |
|
|
117
126
|
| `experimental.session.compacting` | Preserves savings state |
|
|
118
|
-
| `shell.env` | Injects
|
|
119
|
-
|
|
120
|
-
---
|
|
121
|
-
|
|
122
|
-
## Local vs Remote
|
|
123
|
-
|
|
124
|
-
### Full Local (no token)
|
|
125
|
-
|
|
126
|
-
Model tier classification, static pricing (~20 models), stress scoring, context budget, turn classification, TDD skeleton gen, flow enforcement, savings ledger, session metrics, reports, footer, dashboard, smart cache, VibeBoX fallback.
|
|
127
|
-
|
|
128
|
-
### Requires Remote API (api-token)
|
|
127
|
+
| `shell.env` | Injects OPENCODE_MODEL_TIER and OPENCODE_MODEL |
|
|
129
128
|
|
|
130
|
-
|
|
129
|
+
## Local Models (Ollama)
|
|
131
130
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
## Local Fallback Mode
|
|
131
|
+
To use **MagicCoder:7b** or other local models with vibeOS and OpenCode Desktop:
|
|
136
132
|
|
|
137
|
-
|
|
133
|
+
### Setup
|
|
138
134
|
|
|
139
|
-
|
|
135
|
+
1. Install Ollama — `curl -fsSL https://ollama.ai/install.sh | sh`
|
|
136
|
+
2. Pull MagicCoder — `ollama pull magicoder:7b`
|
|
137
|
+
3. Add provider — In OpenCode settings, add an Ollama provider (default: http://localhost:11434)
|
|
138
|
+
4. Detect — Run `trinity rebuild` — MagicCoder:7b appears in the model dropdown
|
|
139
|
+
5. Assign slot — `trinity set cheap magicoder:7b`
|
|
140
140
|
|
|
141
|
-
|
|
142
|
-
- Static pricing for ~20 common models
|
|
143
|
-
- Stress scoring, context budget estimation, and turn classification
|
|
144
|
-
- TDD skeleton generation, text compression, and flow enforcement
|
|
145
|
-
- Savings ledger, session metrics, reports, and footer/dashboard rendering
|
|
146
|
-
- Session-scoped smart cache for duplicate tool output detection
|
|
141
|
+
### Minimum Hardware
|
|
147
142
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
- Optimization mode selection via advanced VibeBoX (local fallback uses rule-based selection)
|
|
155
|
-
- Aggregated cross-session calibration and model retraining
|
|
156
|
-
- Live pricing fetch for models beyond the hardcoded map
|
|
157
|
-
|
|
158
|
-
When the remote API is unreachable, the plugin degrades gracefully to rule-based local algorithms. Core enforcement features continue working — the plugin stays functional and safe, just less adaptive in its routing and mode decisions.
|
|
143
|
+
| Component | Minimum |
|
|
144
|
+
|-----------|---------|
|
|
145
|
+
| CPU | Apple Silicon (M1+) or x86_64 with AVX2 |
|
|
146
|
+
| RAM | **16 GB** (MagicCoder:7b @ Q4_K_M uses ~5 GB + context overhead) |
|
|
147
|
+
| GPU | Integrated (M1 16GB unified memory) or NVIDIA 6GB+ VRAM |
|
|
148
|
+
| Storage | 4 GB free for model weights |
|
|
159
149
|
|
|
150
|
+
> **Note**: Local models run entirely on your machine. vibeOS treats them as any other OpenCode provider — pricing shows $0.00/turn.
|
|
160
151
|
|
|
161
152
|
## Install
|
|
162
153
|
|
|
@@ -165,7 +156,7 @@ npx vibeostheog setup --project # per-project
|
|
|
165
156
|
npx vibeostheog setup # global ~/.config/opencode/
|
|
166
157
|
```
|
|
167
158
|
|
|
168
|
-
Adds `vibeostheog` to
|
|
159
|
+
Adds `vibeostheog` to opencode.json. Restart OpenCode Desktop.
|
|
169
160
|
|
|
170
161
|
Local dev checkout:
|
|
171
162
|
|
|
@@ -175,8 +166,6 @@ Local dev checkout:
|
|
|
175
166
|
}
|
|
176
167
|
```
|
|
177
168
|
|
|
178
|
-
---
|
|
179
|
-
|
|
180
169
|
## Commands
|
|
181
170
|
|
|
182
171
|
`trinity help` for full reference. Commands register in the TUI sidebar.
|
|
@@ -189,7 +178,7 @@ Local dev checkout:
|
|
|
189
178
|
| `trinity enable\|disable` | Toggle plugin on/off |
|
|
190
179
|
| `trinity mode budget\|quality\|speed\|longrun\|auto` | Set optimization mode |
|
|
191
180
|
| `trinity thinking full\|brief\|off` | Reasoning depth |
|
|
192
|
-
| `trinity enforce on\|off` | Toggle enforcement |
|
|
181
|
+
| `trinity enforce on\|off` | Toggle delegation enforcement |
|
|
193
182
|
| `trinity lock on\|off` | Freeze model for session |
|
|
194
183
|
| `trinity flow on\|off` | Toggle flow enforcer |
|
|
195
184
|
| `trinity flow enforce on\|off` | Toggle auto-extract TODOs |
|
|
@@ -198,15 +187,13 @@ Local dev checkout:
|
|
|
198
187
|
| `trinity project` | Per-project analytics |
|
|
199
188
|
| `trinity patterns` / `trinity patterns clear` | Pattern inspection |
|
|
200
189
|
| `trinity diagnose` | Health check |
|
|
201
|
-
| `trinity
|
|
190
|
+
| `trinity blackbox on\|off\|status\|reset` | Decision engine control |
|
|
202
191
|
| `trinity repair-state preview\|apply` | Fix state collisions |
|
|
203
192
|
| `trinity guard` | Refresh AGENTS.md / README.md |
|
|
204
193
|
| `trinity api-token <token\|invalidate>` | Manage remote API token |
|
|
205
194
|
| `trinity api-bootstrap-token <token>` | Bootstrap token exchange |
|
|
206
195
|
|
|
207
|
-
**Report commands**:
|
|
208
|
-
|
|
209
|
-
---
|
|
196
|
+
**Report commands**: report-save, report-list, report-read, research-audit
|
|
210
197
|
|
|
211
198
|
## Live Footer
|
|
212
199
|
|
|
@@ -214,40 +201,44 @@ Local dev checkout:
|
|
|
214
201
|
— Model: claude-sonnet-4-6 | Provider: Anthropic | $4.82 saved | $1.20 cached | ENFORCE | LOCK | Quality | VIBE —
|
|
215
202
|
```
|
|
216
203
|
|
|
217
|
-
Provider, model, delegation savings, cache savings, stress
|
|
218
|
-
|
|
219
|
-
---
|
|
204
|
+
Provider, model, delegation savings, cache savings, stress level (low/elevated/high), lock/enforcement tags, optimization mode. Persisted in ~/.claude/delegation-state.json.
|
|
220
205
|
|
|
221
206
|
## Architecture
|
|
222
207
|
|
|
223
208
|
### Plugin Source
|
|
224
209
|
|
|
225
|
-
Single-file runtime `src/index.js` (5529+ lines). TypeScript source of truth at `src/vibeOS-lib/*.ts` and `src/utils/*.ts`. Build: `npm run build` (tsc +
|
|
210
|
+
Single-file runtime `src/index.js` (5529+ lines). TypeScript source of truth at `src/vibeOS-lib/*.ts` and `src/utils/*.ts`. Build: `npm run build` (tsc compile + sync-ts-build + deploy script).
|
|
226
211
|
|
|
227
|
-
### State Files (
|
|
212
|
+
### State Files (~/.claude/)
|
|
228
213
|
|
|
229
214
|
| File | Purpose |
|
|
230
215
|
|------|---------|
|
|
231
|
-
|
|
|
232
|
-
|
|
|
233
|
-
|
|
|
234
|
-
|
|
|
235
|
-
|
|
|
236
|
-
|
|
|
237
|
-
|
|
|
238
|
-
|
|
|
239
|
-
|
|
|
240
|
-
|
|
|
241
|
-
|
|
|
242
|
-
|
|
|
216
|
+
| delegation-state.json | Sessions, warns, cache hits, lifetime totals |
|
|
217
|
+
| model-tiers.json | brain/medium/cheap model IDs |
|
|
218
|
+
| project-states.json | Per-project memory, analytics, report references |
|
|
219
|
+
| reports/ | Saved report JSON files |
|
|
220
|
+
| savings-ledger.jsonl | Append-only savings and credit event log |
|
|
221
|
+
| global-learning.json | Cross-project pattern learning, pricing hints |
|
|
222
|
+
| model-pricing-cache.json | Cached pricing by model ID |
|
|
223
|
+
| active-jobs.json | In-flight delegation records |
|
|
224
|
+
| blackbox-state.json | Per-project resolution tracker, session outcomes |
|
|
225
|
+
| .flow-todo-queue.jsonl | Flow enforcer TODO queue |
|
|
226
|
+
| .flow-dedup-keys.json | Deduplication set for flow TODO |
|
|
227
|
+
| .enforcement-cooldown.jsonl | Per-tool cooldown for warn coalescing |
|
|
243
228
|
|
|
244
|
-
###
|
|
229
|
+
### Local vs Remote
|
|
245
230
|
|
|
231
|
+
**Fully functional locally:** Model tier classification, static pricing, stress scoring, context budget, turn classification, TDD skeleton generation, flow enforcement, savings ledger, session metrics, reports, footer, dashboard, smart cache, VibeBoX fallback.
|
|
246
232
|
|
|
247
|
-
|
|
233
|
+
**Requires remote API (api-token):** Bootstrap token exchange, advanced VibeBoX with full session history, dynamic per-prompt delegation, cross-session calibration, live pricing fetch beyond static map, learned subagent routing.
|
|
234
|
+
|
|
235
|
+
When the remote API is unreachable, the plugin degrades gracefully to rule-based local algorithms. Core enforcement features continue working.
|
|
236
|
+
|
|
237
|
+
### VibeBoX Decision Engine
|
|
248
238
|
|
|
239
|
+
7 sub-regimes (INIT, DIVERGENT, EXPLORING, REFINING, CONVERGING, CLOSED, LOOPING). Classification via entropy trends, action consistency, feature contradiction, embedding drift. 11 derived features per turn. 4 loop intervention levels. PIVOT/SWITCH detection. Outcome tracking from satisfaction signals.
|
|
249
240
|
|
|
250
|
-
Regime
|
|
241
|
+
Regime -> mode mapping via syncControlSettings():
|
|
251
242
|
|
|
252
243
|
| Regime | Mode | Enforce | Flow | TDD | Tier | Think |
|
|
253
244
|
|--------|------|---------|------|-----|------|-------|
|
|
@@ -259,35 +250,31 @@ Stress > 1.5 escalates any regime to quality.
|
|
|
259
250
|
|
|
260
251
|
### Remote API Server
|
|
261
252
|
|
|
262
|
-
`src/vibeOS-api-server/` — Fastify + SQLite at
|
|
253
|
+
`src/vibeOS-api-server/` — Fastify + SQLite at api.vibetheog.com. Endpoints: delegation check, tier routing, stress scoring, VibeBoX analysis/calibration, TDD skeleton gen, pattern observation, pricing fetch, context compression. Auth via VIBEOS_API_TOKEN. Client: `src/vibeOS-api-server/client.js` with automatic local fallback.
|
|
263
254
|
|
|
264
255
|
### Dashboard
|
|
265
256
|
|
|
266
|
-
SolidJS SPA at `src/dashboard/`. Build: `npm run build:dashboard` (vite). Served by MCP server or standalone. SSE
|
|
267
|
-
|
|
268
|
-
---
|
|
257
|
+
SolidJS SPA at `src/dashboard/`. Build: `npm run build:dashboard` (vite). Served by MCP server or standalone. SSE /events for real-time push.
|
|
269
258
|
|
|
270
259
|
## Environment Variables
|
|
271
260
|
|
|
272
261
|
| Variable | Default | Effect |
|
|
273
262
|
|----------|---------|--------|
|
|
274
|
-
|
|
|
275
|
-
|
|
|
276
|
-
|
|
|
277
|
-
|
|
|
278
|
-
|
|
|
279
|
-
|
|
|
280
|
-
|
|
|
281
|
-
|
|
|
282
|
-
|
|
283
|
-
---
|
|
263
|
+
| VIBEOS_API_URL | https://api.vibetheog.com | Remote API base URL |
|
|
264
|
+
| VIBEOS_API_TOKEN | unset | Remote API auth |
|
|
265
|
+
| VIBEOS_API_DISABLED | false | Invalidate alpha token |
|
|
266
|
+
| VIBEOS_API_BOOTSTRAP_TOKEN | unset | Bootstrap exchange |
|
|
267
|
+
| VIBEOS_API_ENABLED | true | Set false for local-only |
|
|
268
|
+
| CLAUDE_CREDIT_PERCENT | 100 | Credit override |
|
|
269
|
+
| CLAUDE_CONTEXT7_AVAILABLE | unset | Context7 optimization |
|
|
270
|
+
| VIBEOS_MCP_PORT | 3001 | MCP server port |
|
|
284
271
|
|
|
285
272
|
## Troubleshooting
|
|
286
273
|
|
|
287
274
|
| Symptom | Fix |
|
|
288
275
|
|---------|-----|
|
|
289
|
-
| Plugin not loading | Check
|
|
290
|
-
| Model won't switch | `trinity rebuild` then `trinity set brain
|
|
276
|
+
| Plugin not loading | Check opencode.json entry. Restart Desktop. |
|
|
277
|
+
| Model won't switch | `trinity rebuild` then `trinity set brain|medium|cheap` |
|
|
291
278
|
| Writes/edits blocked | Enforcement active — delegate to cheap tier |
|
|
292
279
|
| No footer visible | Verify plugin enabled, completions running |
|
|
293
280
|
| Dashboard blank | `npm run build` then restart |
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "vibeostheog",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.22.0",
|
|
4
4
|
"description": "Cost-aware delegation enforcer for OpenCode. Tracks model usage, routes Task subagents to cheaper tiers, surfaces cumulative savings in chat. Includes research audit, reporting framework, project memory, progressive scratchpad decadence, and trinity CLI for brain/medium/cheap slot switching.",
|
|
5
5
|
"scripts": {
|
|
6
6
|
"release": "node scripts/release.mjs",
|
package/src/lib/api-client.js
CHANGED
|
@@ -229,6 +229,15 @@ export class VibeOSApiClient {
|
|
|
229
229
|
stress_score: stressScore,
|
|
230
230
|
});
|
|
231
231
|
}
|
|
232
|
+
async getModes() {
|
|
233
|
+
return this.request("/api/v1/modes", {}, "GET");
|
|
234
|
+
}
|
|
235
|
+
async selectMode(mode) {
|
|
236
|
+
return this.request("/api/v1/mode/select", { mode });
|
|
237
|
+
}
|
|
238
|
+
async classifyQuery(text, state) {
|
|
239
|
+
return this.request("/api/v1/mode/classify", { text, state: state || {} });
|
|
240
|
+
}
|
|
232
241
|
async classifyTier(model, customRegex = null) {
|
|
233
242
|
return this.request("/api/v1/tier/classify", { model, custom_regex: customRegex });
|
|
234
243
|
}
|
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
import { readFileSync, writeFileSync, appendFileSync, existsSync, mkdirSync } from "node:fs";
|
|
3
3
|
import { join, basename } from "node:path";
|
|
4
4
|
import { createHash } from "node:crypto";
|
|
5
|
-
import { currentModel, currentProjectFingerprint, currentProjectName, _blackboxEnabled, loadSelection, writeSelection, safeJsonParse, applyDecadence, getSessionScratchpadDir, ensureSessionScratchpadDirs, indexAppend, briefedProjects, getActiveJobForProject, loadTodos, promotedProjectPatterns, detectTechStack, projectFingerprint, TRINITY_OPENCODE_CONFIG, TIERS_FILE, loadGlobalLearning, setCurrentProjectFingerprint, setCurrentProjectName, stableJson, TOOL_NAME_NORMALIZE, _cacheDb, } from "../state.js";
|
|
6
|
-
import { applySlot, TRINITY_CHEAP, TRINITY_MEDIUM, } from "../pricing.js";
|
|
5
|
+
import { currentModel, currentProjectFingerprint, currentProjectName, _blackboxEnabled, loadSelection, writeSelection, safeJsonParse, applyDecadence, getSessionScratchpadDir, ensureSessionScratchpadDirs, indexAppend, briefedProjects, getActiveJobForProject, loadTodos, promotedProjectPatterns, detectTechStack, projectFingerprint, TRINITY_OPENCODE_CONFIG, TIERS_FILE, loadGlobalLearning, setCurrentProjectFingerprint, setCurrentProjectName, stableJson, TOOL_NAME_NORMALIZE, _cacheDb, recordCacheSaving, } from "../state.js";
|
|
6
|
+
import { applySlot, TRINITY_CHEAP, TRINITY_MEDIUM, cacheSavePer1MInputTokens, } from "../pricing.js";
|
|
7
7
|
import { scoreStress, classifyTurnSimple, loadOptimizationMode, saveOptimizationMode, selectOptimizationModeRemote, computeControlVector, getBlackboxTracker, loadBlackboxState as loadBlackboxStateFromCtx, saveBlackboxState as saveBlackboxStateToCtx, extractLastUserText, isLikelyOffTopic, fetchBlackboxEnrichment, estimateContextBudget, buildControlHistoryEntry, } from "../turn-classify.js";
|
|
8
8
|
import { applyBudgetFirstMode, peekBudgetFirstMode } from "../mode-policy.js";
|
|
9
9
|
import { addCacheEntry, extractRecentCacheOutputs } from "../../vibeOS-lib/smart-cache.js";
|
|
@@ -14,6 +14,7 @@ import { noteProjectPattern } from "../index-helpers.js";
|
|
|
14
14
|
import { saveSessionStress } from "../index-helpers.js";
|
|
15
15
|
import { COMPRESS_THRESHOLD, KEEP_HOT, COMPRESS_MARKER, PROTOCOL_MARKER, PROTOCOL_TEXT } from "../constants.js";
|
|
16
16
|
import { TEMPLATES, DEFAULT_TEMPLATE, resolveTemplate, shouldInjectTemplate } from "../templates.js";
|
|
17
|
+
const BYTES_PER_TOKEN = 4;
|
|
17
18
|
function getVibeOSHome() {
|
|
18
19
|
return process.env.VIBEOS_HOME || join(process.env.HOME || "", ".claude");
|
|
19
20
|
}
|
|
@@ -308,6 +309,13 @@ function compressToolOutputs(messages) {
|
|
|
308
309
|
`[summary] ${summary}`;
|
|
309
310
|
state.output = ref;
|
|
310
311
|
compressedBytes += raw.length - ref.length;
|
|
312
|
+
const toolKey = TOOL_NAME_NORMALIZE[part.tool] || part.tool;
|
|
313
|
+
const rate = cacheSavePer1MInputTokens(currentModel);
|
|
314
|
+
if (rate > 0) {
|
|
315
|
+
const inputTokens = Math.max(1, Math.round((raw.length - ref.length) / BYTES_PER_TOKEN));
|
|
316
|
+
const saveEst = Math.max(0.0001, Math.round(inputTokens * rate / 1_000_000 * 10000) / 10000);
|
|
317
|
+
recordCacheSaving(toolKey, saveEst, { hash });
|
|
318
|
+
}
|
|
311
319
|
console.error(`[vibeOS] ctx-compress: ${raw.length}\u2192${ref.length} chars (hash: ${hash})`);
|
|
312
320
|
}
|
|
313
321
|
}
|
|
@@ -3,9 +3,9 @@ import { writeFileSync, appendFileSync, existsSync, mkdirSync } from "node:fs";
|
|
|
3
3
|
import { join, dirname, basename } from "node:path";
|
|
4
4
|
import { createHash } from "node:crypto";
|
|
5
5
|
import { currentTier, currentModel, setCurrentModel, setCurrentTier, _OC_SID, _modelLocked, loadSelection, readLifetimeSavings, recordCacheSaving, recordMissedContext7, getScratchpadHit, recordScratchpadObservation, recordPrivacyTelemetry, updateState, getSessionScratchpadDir, ensureSessionScratchpadDirs, SAVINGS_LEDGER_FILE, CONTEXT7_INSTALL_FLAG, SOFT_QUOTA_LIMIT, upsertTodo, ML_ENABLED, _mlGraph, _cacheDb, _mlSavePending, ML_CONFIDENCE_THRESHOLD, setMlSavePending, saveMLState, SCRATCHPAD_TOOLS, SCRATCHPAD_GLOBAL_DIR, TOOL_NAME_NORMALIZE, stableJson, applyDecadence, } from "../state.js";
|
|
6
|
-
import { classify, modelCostPerTurn, isModelFree, detectContext7, isDocsTarget, shortModelName, formatUsd, _refreshModel, readConfig, resolveDisplayModelId, TRINITY_CHEAP, TRINITY_MEDIUM, trendDisplay, modelToSlotLabel, resolveExecutionIdentity, formatProviderName, formatQualityName, } from "../pricing.js";
|
|
6
|
+
import { classify, modelCostPerTurn, isModelFree, detectContext7, isDocsTarget, shortModelName, formatUsd, _refreshModel, readConfig, resolveDisplayModelId, TRINITY_CHEAP, TRINITY_MEDIUM, cacheSavePer1MInputTokens, trendDisplay, modelToSlotLabel, resolveExecutionIdentity, formatProviderName, formatQualityName, } from "../pricing.js";
|
|
7
7
|
import { latestUserIntent } from "./chat-transform.js";
|
|
8
|
-
import { scoreStress, extractFirstWordFromArgs, shouldLogWarn, isUserAskingForTests, resolveEnforcementMode, getLearnedExploratoryWords, noteTaskRoutingLearning, } from "../turn-classify.js";
|
|
8
|
+
import { scoreStress, extractFirstWordFromArgs, shouldLogWarn, isUserAskingForTests, resolveEnforcementMode, getLearnedExploratoryWords, noteTaskRoutingLearning, incrementTurnCounter, } from "../turn-classify.js";
|
|
9
9
|
import { saveReport } from "../reporting.js";
|
|
10
10
|
import { loadCredit } from "../credit-api.js";
|
|
11
11
|
import { remoteCall, VIBEOS_API_ENABLED } from "../api-client.js";
|
|
@@ -17,7 +17,6 @@ import { setActiveJobFromTaskPrompt, observeToolPattern, compressText, recordSav
|
|
|
17
17
|
import { scoreTaskQuality, readRewardSignals } from "./footer.js";
|
|
18
18
|
import { SAVE_EST, WARN_ON_DIRECT, SOFT_QUOTA, FREE, MONITOR } from "../constants.js";
|
|
19
19
|
const BYTES_PER_TOKEN = 4;
|
|
20
|
-
const CACHE_SAVED_PER_1M_INPUT_TOKENS = 0.10;
|
|
21
20
|
const DEBUG_INTERNALS = process.env.VIBEOS_DEBUG_INTERNALS === "1";
|
|
22
21
|
const IS_CLI_RUNTIME = Boolean(process.stdout?.isTTY || process.stderr?.isTTY || process.stdin?.isTTY);
|
|
23
22
|
function getVibeOSHome() {
|
|
@@ -247,8 +246,12 @@ export const onToolExecuteBefore = async (input, output) => {
|
|
|
247
246
|
// Persist cache savings as a first-class savings type.
|
|
248
247
|
// Compute from actual scratchpad file size: inputs that would
|
|
249
248
|
// have been charged at miss rate are served from cache.
|
|
250
|
-
const
|
|
251
|
-
_cacheSave =
|
|
249
|
+
const rate = cacheSavePer1MInputTokens(currentModel);
|
|
250
|
+
_cacheSave = 0;
|
|
251
|
+
if (rate > 0) {
|
|
252
|
+
const _inputTokens = Math.max(1, Math.round(hit.sizeBytes / BYTES_PER_TOKEN));
|
|
253
|
+
_cacheSave = Math.max(0.0001, Math.round(_inputTokens * rate / 1_000_000 * 10000) / 10000);
|
|
254
|
+
}
|
|
252
255
|
const cacheSaved = recordCacheSaving(t, _cacheSave, { hash: hit.hash });
|
|
253
256
|
const sumNote = hit.summaryPath ? ` (summary: ${hit.summaryPath})` : "";
|
|
254
257
|
const cacheNote = cacheSaved ? `, cache+$${(cacheSaved.lifetime || 0).toFixed(3)} lt` : "";
|
|
@@ -604,6 +607,11 @@ export const onToolExecuteAfter = async (input, output) => {
|
|
|
604
607
|
}
|
|
605
608
|
}
|
|
606
609
|
catch { }
|
|
610
|
+
// ── Increment turn counter for compaction trigger ──
|
|
611
|
+
try {
|
|
612
|
+
incrementTurnCounter();
|
|
613
|
+
}
|
|
614
|
+
catch { }
|
|
607
615
|
// ── Generate footer alert (prepended to tool result, visible in chat) ──
|
|
608
616
|
let _footerText = "";
|
|
609
617
|
try {
|
|
@@ -683,6 +691,11 @@ export const onToolExecuteAfter = async (input, output) => {
|
|
|
683
691
|
}
|
|
684
692
|
}
|
|
685
693
|
catch { }
|
|
694
|
+
// ── Increment turn counter for compaction trigger ──
|
|
695
|
+
try {
|
|
696
|
+
incrementTurnCounter();
|
|
697
|
+
}
|
|
698
|
+
catch { }
|
|
686
699
|
// ── End footer ──
|
|
687
700
|
const t = input?.tool ?? "";
|
|
688
701
|
if (t === "trinity") {
|
package/src/lib/pricing.js
CHANGED
|
@@ -260,6 +260,50 @@ export function trendDisplay(sesTrend) {
|
|
|
260
260
|
const CACHE_SAVED_PER_1M_INPUT_TOKENS = 0.10;
|
|
261
261
|
// Approximate bytes per token for JSON/text content (varies 3-6, use 4 as safe estimate).
|
|
262
262
|
const BYTES_PER_TOKEN = 4;
|
|
263
|
+
export function parseOpenRouterInputPer1M(modelRow) {
|
|
264
|
+
const p = modelRow?.pricing || {};
|
|
265
|
+
const inTok = Number(p.prompt ?? p.input ?? p.request);
|
|
266
|
+
if (Number.isFinite(inTok) && inTok > 0) {
|
|
267
|
+
return Math.round(inTok * 1_000_000 * 10000) / 10000;
|
|
268
|
+
}
|
|
269
|
+
return null;
|
|
270
|
+
}
|
|
271
|
+
export function cacheSavePer1MInputTokens(model) {
|
|
272
|
+
if (!model)
|
|
273
|
+
return CACHE_SAVED_PER_1M_INPUT_TOKENS;
|
|
274
|
+
if (isModelFree(model))
|
|
275
|
+
return 0;
|
|
276
|
+
const rawKey = String(model || "");
|
|
277
|
+
const key = normalizeModelId(model);
|
|
278
|
+
const rawNoPrefix = rawKey.includes("/") ? rawKey.split("/")[rawKey.split("/").length - 1] : rawKey;
|
|
279
|
+
try {
|
|
280
|
+
const cache = _loadDynamicPricingCache();
|
|
281
|
+
for (const candidate of [rawKey, key, rawNoPrefix]) {
|
|
282
|
+
const entry = cache[candidate];
|
|
283
|
+
const rate = parseOpenRouterInputPer1M(entry);
|
|
284
|
+
if (rate !== null)
|
|
285
|
+
return rate;
|
|
286
|
+
}
|
|
287
|
+
for (const [ck, cv] of Object.entries(cache)) {
|
|
288
|
+
if (ck.endsWith("/" + rawNoPrefix)) {
|
|
289
|
+
const rate = parseOpenRouterInputPer1M(cv);
|
|
290
|
+
if (rate !== null)
|
|
291
|
+
return rate;
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
catch { }
|
|
296
|
+
for (const candidate of [rawKey, key, rawNoPrefix]) {
|
|
297
|
+
const known = MODEL_PRICING_PER_1M[candidate];
|
|
298
|
+
if (known && Number.isFinite(known.input))
|
|
299
|
+
return known.input;
|
|
300
|
+
}
|
|
301
|
+
const turnCost = modelCostPerTurn(model);
|
|
302
|
+
if (Number.isFinite(turnCost) && turnCost > 0) {
|
|
303
|
+
return Math.round(turnCost * 375 * 100) / 100;
|
|
304
|
+
}
|
|
305
|
+
return CACHE_SAVED_PER_1M_INPUT_TOKENS;
|
|
306
|
+
}
|
|
263
307
|
export function roundUsd(v, precision = 6) {
|
|
264
308
|
const n = Number(v ?? 0);
|
|
265
309
|
if (!Number.isFinite(n))
|
|
@@ -284,6 +328,89 @@ export function formatUsd(v) {
|
|
|
284
328
|
// deepseek-chat is free with a DeepSeek API token — priced at $1e-12 (near-zero).
|
|
285
329
|
const FREE_MODEL_TURN_USD = 1e-10;
|
|
286
330
|
const FREE_MODELS = new Set([]);
|
|
331
|
+
// Actual input / output pricing per 1M tokens, sourced from provider API pages
|
|
332
|
+
// and OpenRouter /api/v1/models. Format: USD per 1 million tokens.
|
|
333
|
+
// Entries with provider/ prefix = OpenRouter route; without prefix = native provider.
|
|
334
|
+
const MODEL_PRICING_PER_1M = {
|
|
335
|
+
// ── Anthropic (native + OpenRouter) ─────────────────────
|
|
336
|
+
"anthropic/claude-opus-4-8-fast": { input: 10.0, output: 50.0 },
|
|
337
|
+
"anthropic/claude-opus-4-8": { input: 5.0, output: 25.0 },
|
|
338
|
+
"anthropic/claude-opus-4-7-fast": { input: 30.0, output: 150.0 },
|
|
339
|
+
"anthropic/claude-opus-4-7": { input: 5.0, output: 25.0 },
|
|
340
|
+
"anthropic/claude-opus-4-6-fast": { input: 30.0, output: 150.0 },
|
|
341
|
+
"anthropic/claude-opus-4-6": { input: 5.0, output: 25.0 },
|
|
342
|
+
"anthropic/claude-opus-4-5": { input: 5.0, output: 25.0 },
|
|
343
|
+
"anthropic/claude-opus-4.1": { input: 15.0, output: 75.0 },
|
|
344
|
+
"anthropic/claude-opus-4": { input: 15.0, output: 75.0 },
|
|
345
|
+
"anthropic/claude-sonnet-4-6": { input: 3.0, output: 15.0 },
|
|
346
|
+
"anthropic/claude-sonnet-4-5": { input: 3.0, output: 15.0 },
|
|
347
|
+
"anthropic/claude-sonnet-4": { input: 3.0, output: 15.0 },
|
|
348
|
+
"anthropic/claude-haiku-4-5": { input: 1.0, output: 5.0 },
|
|
349
|
+
"anthropic/claude-3.5-haiku": { input: 0.80, output: 4.0 },
|
|
350
|
+
"anthropic/claude-3-haiku": { input: 0.25, output: 1.25 },
|
|
351
|
+
"haiku": { input: 0.80, output: 4.0 },
|
|
352
|
+
// ── DeepSeek (native — free for chat, paid for pro/flash/r1) ──
|
|
353
|
+
"deepseek-chat": { input: 0, output: 0 }, // native → free
|
|
354
|
+
"deepseek-reasoner": { input: 0.55, output: 2.19 }, // native r1
|
|
355
|
+
// ── DeepSeek (OpenRouter route) ────────────────────────
|
|
356
|
+
"deepseek/deepseek-v4-pro": { input: 0.435, output: 0.870 },
|
|
357
|
+
"deepseek/deepseek-v4-flash": { input: 0.098, output: 0.197 },
|
|
358
|
+
"deepseek/deepseek-chat": { input: 0.229, output: 0.914 },
|
|
359
|
+
"deepseek/deepseek-v3.2": { input: 0.252, output: 0.378 },
|
|
360
|
+
"deepseek/deepseek-v3.2-exp": { input: 0.270, output: 0.410 },
|
|
361
|
+
"deepseek/deepseek-chat-v3.1": { input: 0.210, output: 0.790 },
|
|
362
|
+
"deepseek/deepseek-chat-v3-0324": { input: 0.200, output: 0.770 },
|
|
363
|
+
"deepseek/deepseek-v3.1-terminus": { input: 0.270, output: 0.950 },
|
|
364
|
+
"deepseek/deepseek-r1-0528": { input: 0.500, output: 2.150 },
|
|
365
|
+
"deepseek/deepseek-r1": { input: 0.700, output: 2.500 },
|
|
366
|
+
"deepseek/deepseek-r1-distill-qwen-32b": { input: 0.290, output: 0.290 },
|
|
367
|
+
"deepseek/deepseek-r1-distill-llama-70b": { input: 0.70, output: 0.80 },
|
|
368
|
+
"deepseek/deepseek-v3": { input: 0.252, output: 0.378 },
|
|
369
|
+
"deepseek/haiku": { input: 0.80, output: 4.0 },
|
|
370
|
+
// ── Google Gemini (OpenRouter route) ──────────────────
|
|
371
|
+
"google/gemini-2.5-pro": { input: 1.25, output: 10.0 },
|
|
372
|
+
"google/gemini-2.5-flash": { input: 0.30, output: 2.50 },
|
|
373
|
+
"google/gemini-2.5-flash-lite": { input: 0.10, output: 0.40 },
|
|
374
|
+
"google/gemini-2.0-flash-001": { input: 0.10, output: 0.40 },
|
|
375
|
+
"google/gemini-2.0-flash-lite-001": { input: 0.075, output: 0.30 },
|
|
376
|
+
"google/gemma-4-31b-it": { input: 0.12, output: 0.37 },
|
|
377
|
+
"google/gemma-4-26b-a4b-it": { input: 0.06, output: 0.33 },
|
|
378
|
+
// ── OpenAI (OpenRouter route) ─────────────────────────
|
|
379
|
+
"openai/gpt-5.5-pro": { input: 30.0, output: 180.0 },
|
|
380
|
+
"openai/gpt-5.5": { input: 5.0, output: 30.0 },
|
|
381
|
+
"openai/gpt-5.4-pro": { input: 30.0, output: 180.0 },
|
|
382
|
+
"openai/gpt-5.4": { input: 2.50, output: 15.0 },
|
|
383
|
+
"openai/gpt-5.4-mini": { input: 0.75, output: 4.50 },
|
|
384
|
+
"openai/gpt-5.4-nano": { input: 0.20, output: 1.25 },
|
|
385
|
+
"openai/gpt-5.3-chat": { input: 1.75, output: 14.0 },
|
|
386
|
+
"openai/gpt-5.3-codex": { input: 1.75, output: 14.0 },
|
|
387
|
+
"openai/gpt-5.2": { input: 1.75, output: 14.0 },
|
|
388
|
+
"openai/gpt-5.2-pro": { input: 21.0, output: 168.0 },
|
|
389
|
+
"openai/gpt-5.1": { input: 1.25, output: 10.0 },
|
|
390
|
+
"openai/gpt-5": { input: 1.25, output: 10.0 },
|
|
391
|
+
"openai/gpt-5-mini": { input: 0.25, output: 2.00 },
|
|
392
|
+
"openai/gpt-5-nano": { input: 0.05, output: 0.40 },
|
|
393
|
+
"openai/gpt-4o": { input: 2.50, output: 10.0 },
|
|
394
|
+
"openai/gpt-4o-mini": { input: 0.15, output: 0.60 },
|
|
395
|
+
"openai/gpt-4.1": { input: 2.00, output: 8.00 },
|
|
396
|
+
"openai/gpt-4.1-mini": { input: 0.40, output: 1.60 },
|
|
397
|
+
"openai/gpt-4.1-nano": { input: 0.10, output: 0.40 },
|
|
398
|
+
"openai/o4-mini": { input: 1.10, output: 4.40 },
|
|
399
|
+
"openai/o4-mini-high": { input: 1.10, output: 4.40 },
|
|
400
|
+
"openai/o3-pro": { input: 20.0, output: 80.0 },
|
|
401
|
+
"openai/o3": { input: 2.00, output: 8.00 },
|
|
402
|
+
"openai/o3-mini": { input: 1.10, output: 4.40 },
|
|
403
|
+
"openai/o1-pro": { input: 150.0, output: 600.0 },
|
|
404
|
+
"openai/o1": { input: 15.0, output: 60.0 },
|
|
405
|
+
"openai/gpt-4-turbo": { input: 10.0, output: 30.0 },
|
|
406
|
+
"openai/gpt-4": { input: 30.0, output: 60.0 },
|
|
407
|
+
"openai/gpt-3.5-turbo": { input: 0.50, output: 1.50 },
|
|
408
|
+
// ── Mistral (OpenRouter route) ────────────────────────
|
|
409
|
+
"mistralai/mistral-medium-3-5": { input: 1.50, output: 7.50 },
|
|
410
|
+
"mistralai/mistral-large-2512": { input: 0.50, output: 1.50 },
|
|
411
|
+
"mistralai/mistral-small-2603": { input: 0.15, output: 0.60 },
|
|
412
|
+
"mistralai/mistral-nemo": { input: 0.02, output: 0.03 },
|
|
413
|
+
};
|
|
287
414
|
// Approximate USD per typical ~1 K-token turn (blended input+output).
|
|
288
415
|
// Blend: 700 input + 300 output tokens per turn (line 272-273).
|
|
289
416
|
// Sources: provider API pricing pages, OpenRouter /api/v1/models.
|
|
@@ -518,7 +645,14 @@ export function modelCostPerTurn(model) {
|
|
|
518
645
|
if (key.startsWith(k) && /-\d+$/.test(k) && key.charAt(k.length) === "-")
|
|
519
646
|
return v;
|
|
520
647
|
}
|
|
521
|
-
//
|
|
648
|
+
// Fallback: derive blended turn cost from MODEL_PRICING_PER_1M input/output rates
|
|
649
|
+
for (const candidate of [model, key, bare]) {
|
|
650
|
+
const pricing = MODEL_PRICING_PER_1M[candidate];
|
|
651
|
+
if (pricing && Number.isFinite(pricing.input) && Number.isFinite(pricing.output)) {
|
|
652
|
+
const blended = (pricing.input * 700 + pricing.output * 300) / 1_000_000;
|
|
653
|
+
return Number.isFinite(blended) ? blended : FREE_MODEL_TURN_USD;
|
|
654
|
+
}
|
|
655
|
+
}
|
|
522
656
|
console.error(`[vibeOS] modelCostPerTurn: unknown model '${model}' (normalized: '${key}') — add to MODEL_USD_PER_TURN`);
|
|
523
657
|
return FREE_MODEL_TURN_USD;
|
|
524
658
|
}
|
package/src/lib/state.js
CHANGED
|
@@ -1474,7 +1474,6 @@ function recordCacheSaving(tool, saveEst, meta = {}) {
|
|
|
1474
1474
|
const now = new Date().toISOString();
|
|
1475
1475
|
const delta = Number(saveEst || 0);
|
|
1476
1476
|
s.lifetime ??= { warn_count: 0, total_savings_usd: 0, last_updated: "" };
|
|
1477
|
-
s.lifetime.cache_savings_usd = roundUsd(Number(s.lifetime.cache_savings_usd || 0) + delta);
|
|
1478
1477
|
s.lifetime.last_updated = now;
|
|
1479
1478
|
s.sessions ??= {};
|
|
1480
1479
|
const sid = _OC_SID;
|
|
@@ -1485,20 +1484,27 @@ function recordCacheSaving(tool, saveEst, meta = {}) {
|
|
|
1485
1484
|
s.sessions[sid].project_name = currentProjectName;
|
|
1486
1485
|
s.sessions[sid].session_cache_dir = getSessionScratchpadDir();
|
|
1487
1486
|
s.sessions[sid].tool_counts[tool] = (s.sessions[sid].tool_counts[tool] || 0) + 1;
|
|
1488
|
-
s.sessions[sid].cache_savings_usd = roundUsd(Number(s.sessions[sid].cache_savings_usd || 0) + delta);
|
|
1489
1487
|
if (meta?.hash) {
|
|
1490
1488
|
s.sessions[sid].cache_hits ??= [];
|
|
1491
|
-
s.sessions[sid].cache_hits.
|
|
1492
|
-
|
|
1493
|
-
|
|
1494
|
-
|
|
1495
|
-
|
|
1496
|
-
|
|
1497
|
-
|
|
1498
|
-
|
|
1499
|
-
s.
|
|
1489
|
+
if (!s.sessions[sid].cache_hits.some((h) => h.hash === meta.hash)) {
|
|
1490
|
+
s.sessions[sid].cache_hits.push({
|
|
1491
|
+
at: now,
|
|
1492
|
+
tool,
|
|
1493
|
+
hash: meta.hash,
|
|
1494
|
+
est_savings_usd: roundUsd(delta),
|
|
1495
|
+
});
|
|
1496
|
+
s.sessions[sid].cache_savings_usd = roundUsd(Number(s.sessions[sid].cache_savings_usd || 0) + delta);
|
|
1497
|
+
s.lifetime.cache_savings_usd = roundUsd(Number(s.lifetime.cache_savings_usd || 0) + delta);
|
|
1498
|
+
if (s.sessions[sid].cache_hits.length > 200) {
|
|
1499
|
+
console.error(`[vibeOS] session cache_hits truncated from ${s.sessions[sid].cache_hits.length} to 200 for ${sid}`);
|
|
1500
|
+
s.sessions[sid].cache_hits = s.sessions[sid].cache_hits.slice(-200);
|
|
1501
|
+
}
|
|
1500
1502
|
}
|
|
1501
1503
|
}
|
|
1504
|
+
else {
|
|
1505
|
+
s.sessions[sid].cache_savings_usd = roundUsd(Number(s.sessions[sid].cache_savings_usd || 0) + delta);
|
|
1506
|
+
s.lifetime.cache_savings_usd = roundUsd(Number(s.lifetime.cache_savings_usd || 0) + delta);
|
|
1507
|
+
}
|
|
1502
1508
|
_pruneOldSessions(s);
|
|
1503
1509
|
return s;
|
|
1504
1510
|
});
|
package/src/lib/trinity-tool.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
// @ts-nocheck
|
|
2
2
|
import { join } from "node:path";
|
|
3
3
|
import { LABEL_MODES, buildDeterministicTrinity, resolveExecutionIdentity } from "./pricing.js";
|
|
4
|
+
import { BRANDED_MODES, RUNTIME_MODES } from "./mode-router.js";
|
|
4
5
|
import { invalidateApiToken } from "./api-client.js";
|
|
5
6
|
export function createTrinityTool(deps) {
|
|
6
7
|
return {
|
|
@@ -24,7 +25,7 @@ export function createTrinityTool(deps) {
|
|
|
24
25
|
"Call this when the user says things like 'switch to medium', 'use cheap model', 'disable plugin', 'trinity status'.",
|
|
25
26
|
args: {
|
|
26
27
|
action: deps.tool.schema.enum(["status", "enable", "disable", "set", "mode", "thinking", "flow", "tdd", "setup", "project", "patterns", "rebuild", "diagnose", "help", "enforce", "repair-state", "blackbox", "report", "target", "guard", "api-token", "api-bootstrap-token", "todo", "todo-done", "todo-sync"]).optional(),
|
|
27
|
-
slot: deps.tool.schema.enum(["brain", "medium", "cheap", "budget", "quality", "speed", "longrun", "auto", "on", "off", "enforce", "strict", "preview", "apply", "clear", "savings"]).optional(),
|
|
28
|
+
slot: deps.tool.schema.enum(["brain", "medium", "cheap", "budget", "quality", "speed", "longrun", "auto", "vibeultrax", "on", "off", "enforce", "strict", "preview", "apply", "clear", "savings"]).optional(),
|
|
28
29
|
level: deps.tool.schema.enum(["full", "brief", "off", "on"]).optional(),
|
|
29
30
|
token: deps.tool.schema.string().optional(),
|
|
30
31
|
},
|
|
@@ -178,42 +179,33 @@ export function createTrinityTool(deps) {
|
|
|
178
179
|
return `\u2705 Switched to ${slot} slot (${result.ocModel}). Active now (no restart needed).`;
|
|
179
180
|
}
|
|
180
181
|
if (action === "mode") {
|
|
182
|
+
const builtInIds = ["budget", "quality", "speed", "longrun"];
|
|
183
|
+
const brandedIds = BRANDED_MODES.map(m => m.id);
|
|
184
|
+
const allModeIds = [...builtInIds, "auto", ...brandedIds];
|
|
181
185
|
if (!slot)
|
|
182
|
-
return `Provide mode:
|
|
186
|
+
return `Provide mode: ${builtInIds.join(" | ")} | auto | ${brandedIds.join(" | ")}`;
|
|
183
187
|
const modeAlias = { vibemax: "vibemax", vibeqmax: "quality" };
|
|
184
188
|
const resolvedSlot = modeAlias[slot] || slot;
|
|
185
|
-
if (!
|
|
186
|
-
return `Provide mode:
|
|
189
|
+
if (!allModeIds.includes(resolvedSlot)) {
|
|
190
|
+
return `Provide mode: ${builtInIds.join(" | ")} | auto | ${brandedIds.join(" | ")}`;
|
|
187
191
|
}
|
|
188
192
|
const ok = deps.saveOptimizationMode(resolvedSlot);
|
|
189
193
|
if (!ok)
|
|
190
194
|
return `Failed to write mode`;
|
|
191
|
-
const
|
|
192
|
-
const
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
deps.writeSelection("
|
|
197
|
-
deps.writeSelection("
|
|
198
|
-
deps.writeSelection("
|
|
199
|
-
deps.writeSelection("
|
|
200
|
-
deps.writeSelection("
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
deps.writeSelection("flow_enforce", true);
|
|
206
|
-
deps.writeSelection("tdd_enforce", true);
|
|
207
|
-
deps.writeSelection("thinking_level", "full");
|
|
208
|
-
}
|
|
209
|
-
else if (slot === "speed") {
|
|
210
|
-
deps.writeSelection("delegation_enforce", false);
|
|
211
|
-
deps.writeSelection("flow_enabled", false);
|
|
212
|
-
deps.writeSelection("flow_enforce", false);
|
|
213
|
-
deps.writeSelection("tdd_enforce", false);
|
|
214
|
-
deps.writeSelection("thinking_level", "off");
|
|
215
|
-
}
|
|
216
|
-
return `Mode set to ${slot.toUpperCase()}. Tier: ${tierSlot}.`;
|
|
195
|
+
const allEntries = [...BRANDED_MODES, ...RUNTIME_MODES];
|
|
196
|
+
const modeEntry = allEntries.find(e => e.id === slot);
|
|
197
|
+
if (modeEntry) {
|
|
198
|
+
const tierSlot = modeEntry.pipeline[0] || "cheap";
|
|
199
|
+
deps.writeSelection("active_slot", tierSlot);
|
|
200
|
+
deps.writeSelection("onboarding_mode", modeEntry.tdd === "quality" || modeEntry.enforcement === "strict" ? "strict" : "assist");
|
|
201
|
+
deps.writeSelection("delegation_enforce", modeEntry.enforcement === "strict" || modeEntry.enforcement === "on");
|
|
202
|
+
deps.writeSelection("flow_enabled", modeEntry.flow === "strict" || modeEntry.flow === "on" || modeEntry.flow === "audit");
|
|
203
|
+
deps.writeSelection("flow_enforce", modeEntry.flow === "strict" || modeEntry.flow === "on");
|
|
204
|
+
deps.writeSelection("tdd_enforce", modeEntry.tdd === "quality" || modeEntry.tdd === "on" || modeEntry.tdd === "strict");
|
|
205
|
+
deps.writeSelection("thinking_level", modeEntry.thinking);
|
|
206
|
+
return `Mode set to ${slot.toUpperCase()}. Tier: ${tierSlot}.`;
|
|
207
|
+
}
|
|
208
|
+
return `Mode set to ${slot.toUpperCase()}.`;
|
|
217
209
|
}
|
|
218
210
|
if (action === "thinking") {
|
|
219
211
|
if (!level || !["full", "brief", "off"].includes(level)) {
|
|
@@ -1105,6 +1097,7 @@ export function createTrinityTool(deps) {
|
|
|
1105
1097
|
" trinity enable/disable Toggle vibeOS plugin on/off",
|
|
1106
1098
|
" trinity enforce on Block brain-tier writes/edits (save $$)",
|
|
1107
1099
|
" trinity lock on/off Lock model at session start (skip auto-reconcile)",
|
|
1100
|
+
" trinity mode <profile> Set optimization profile (built-in + branded modes)",
|
|
1108
1101
|
" trinity thinking full|brief|off Set reasoning depth",
|
|
1109
1102
|
"",
|
|
1110
1103
|
"GUARDRAILS:",
|