@plexor-dev/claude-code-plugin-staging 0.1.0-beta.27 → 0.1.0-beta.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,7 +17,7 @@
17
17
  const path = require('path');
18
18
 
19
19
  // Use lib modules
20
- let ConfigManager, SessionManager, LocalCache, Logger, ServerSync;
20
+ let ConfigManager, SessionManager, LocalCache, Logger, ServerSync, SupervisorEmitter;
21
21
  try {
22
22
  ConfigManager = require('../lib/config');
23
23
  SessionManager = require('../lib/session');
@@ -26,6 +26,8 @@ try {
26
26
  // Issue #701: Phase 2 - Server sync for persistent session state
27
27
  const serverSyncModule = require('../lib/server-sync');
28
28
  ServerSync = serverSyncModule.getServerSync;
29
+ // Phase 1 supervisor UX
30
+ SupervisorEmitter = require('../lib/supervisor').SupervisorEmitter;
29
31
  } catch {
30
32
  // Fallback inline implementations if lib not found
31
33
  const fs = require('fs');
@@ -171,12 +173,18 @@ try {
171
173
  scheduleSync: () => {},
172
174
  needsSync: () => false
173
175
  });
176
+
177
+ // Fallback SupervisorEmitter (no-op)
178
+ SupervisorEmitter = class {
179
+ emit() {}
180
+ };
174
181
  }
175
182
 
176
183
  const logger = new Logger('track-response');
177
184
  const config = new ConfigManager();
178
185
  const cache = new LocalCache();
179
186
  const session = new SessionManager();
187
+ const supervisor = new SupervisorEmitter();
180
188
 
181
189
  // Issue #701: Phase 2 - Initialize server sync (lazy, initialized on first use)
182
190
  let serverSync = null;
@@ -189,7 +197,7 @@ async function getServerSync() {
189
197
  if (settings.apiKey && settings.enabled) {
190
198
  serverSync = ServerSync({
191
199
  apiKey: settings.apiKey,
192
- baseUrl: settings.apiUrl || 'https://api.plexor.dev',
200
+ baseUrl: settings.apiUrl || 'http://127.0.0.1:8000',
193
201
  enabled: settings.serverSyncEnabled !== false
194
202
  });
195
203
  } else {
@@ -221,6 +229,11 @@ async function main() {
221
229
  const plexorMeta = response._plexor;
222
230
  emitPlexorOutcomeSummary(response, plexorMeta, outputTokens);
223
231
 
232
+ // Phase 1 supervisor UX: concise single-line routing summary
233
+ supervisor.emit(response, plexorMeta);
234
+
235
+ // Context warnings now handled by supervisor.emit() (Phase 5) to avoid duplicates
236
+
224
237
  // Issue #701: Track ALL responses, not just when enabled
225
238
  // This ensures session stats are always accurate
226
239
  if (plexorMeta) {
@@ -674,3 +687,29 @@ function emitPlexorOutcomeSummary(response, plexorMeta, outputTokens) {
674
687
  logger.ux(msg);
675
688
  }
676
689
  }
690
+
691
+ /**
692
+ * Proactive compact warning: emit context-size alerts at 70K and 80K prompt tokens.
693
+ * Uses the per-request prompt token count (usage.input_tokens / usage.prompt_tokens)
694
+ * which represents the current context window size for that call.
695
+ */
696
+ function emitCompactWarning(response, plexorMeta) {
697
+ if (!logger || typeof logger.ux !== 'function') return;
698
+
699
+ const promptTokens =
700
+ toNumber(response?.plexor_prompt_tokens) ??
701
+ toNumber(response?.usage?.input_tokens) ??
702
+ toNumber(response?.usage?.prompt_tokens) ??
703
+ toNumber(plexorMeta?.optimized_tokens) ??
704
+ null;
705
+
706
+ if (promptTokens === null || promptTokens < 70000) return;
707
+
708
+ const tokensK = Math.round(promptTokens / 1000);
709
+
710
+ if (promptTokens >= 80000) {
711
+ logger.ux(`\u26a0 Context at ${tokensK}K tokens \u2014 recommend /compact to prevent errors`);
712
+ } else {
713
+ logger.ux(`Context at ${tokensK}K tokens \u2014 approaching provider limits`);
714
+ }
715
+ }
package/lib/supervisor.js CHANGED
@@ -1,5 +1,5 @@
1
1
  /**
2
- * Plexor Supervisor Emitter — Phases 1-4
2
+ * Plexor Supervisor Emitter — Phases 1-5
3
3
  *
4
4
  * Phase 1: Basic routing summary
5
5
  * [PLEXOR: Routed to {provider}/{model}, {latency}ms, {routing_source}]
@@ -13,15 +13,31 @@
13
13
  * Phase 4: Scaffolding gate blocked detection
14
14
  * [PLEXOR: Scaffolding gate: {model} blocked, using {alternative}]
15
15
  *
16
+ * Phase 5: Session narration — cumulative tokens, cost, provider reliability,
17
+ * context warnings, auto-compact suggestions
18
+ * [PLEXOR: Session: 12 turns, $0.42 cost, 45K tokens]
19
+ * [PLEXOR: Provider reliability (last 5): DeepSeek 4/5 tool_calls, Gemini 1/5]
20
+ * [PLEXOR: Context at 72K tokens — approaching provider limits]
21
+ * [PLEXOR: Context at 82K — recommend /compact to prevent errors]
22
+ *
16
23
  * This module is consumed by track-response.js to surface routing
17
24
  * decisions to the developer without requiring them to parse verbose logs.
18
25
  */
19
26
 
27
+ const fs = require('fs');
28
+ const path = require('path');
29
+
20
30
  const CYAN = '\x1b[36m';
21
31
  const YELLOW = '\x1b[33m';
22
32
  const RED = '\x1b[31m';
33
+ const MAGENTA = '\x1b[35m';
23
34
  const RESET = '\x1b[0m';
24
35
 
36
+ const CONTEXT_WARNING_THRESHOLD = 70000;
37
+ const CONTEXT_COMPACT_THRESHOLD = 80000;
38
+ const SESSION_STATE_PATH = path.join(process.env.HOME || '', '.plexor', 'supervisor-session.json');
39
+ const SESSION_TIMEOUT_MS = 30 * 60 * 1000; // 30 min — start fresh if idle
40
+
25
41
  class SupervisorEmitter {
26
42
  /**
27
43
  * @param {object} [opts]
@@ -34,6 +50,55 @@ class SupervisorEmitter {
34
50
  } else {
35
51
  this.enabled = opts.enabled !== false;
36
52
  }
53
+
54
+ // Phase 5: Session-level state — restored from disk across hook invocations
55
+ this._loadSessionState();
56
+ }
57
+
58
+ /**
59
+ * Load persisted session state from disk. Resets if stale (>30 min idle).
60
+ */
61
+ _loadSessionState() {
62
+ try {
63
+ const raw = fs.readFileSync(SESSION_STATE_PATH, 'utf8');
64
+ const state = JSON.parse(raw);
65
+ const lastTs = state.lastTimestamp || 0;
66
+ if (Date.now() - lastTs > SESSION_TIMEOUT_MS) {
67
+ // Session expired — start fresh
68
+ this._resetSessionState();
69
+ return;
70
+ }
71
+ this._turnCount = state.turnCount || 0;
72
+ this._contextTokens = state.contextTokens || 0; // current context window size
73
+ this._cumulativeCost = state.cumulativeCost || 0;
74
+ this._providerHistory = state.providerHistory || [];
75
+ } catch {
76
+ this._resetSessionState();
77
+ }
78
+ }
79
+
80
+ _resetSessionState() {
81
+ this._turnCount = 0;
82
+ this._contextTokens = 0;
83
+ this._cumulativeCost = 0;
84
+ this._providerHistory = [];
85
+ }
86
+
87
+ /**
88
+ * Persist session state to disk for cross-process continuity.
89
+ */
90
+ _saveSessionState() {
91
+ try {
92
+ const dir = path.dirname(SESSION_STATE_PATH);
93
+ if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
94
+ fs.writeFileSync(SESSION_STATE_PATH, JSON.stringify({
95
+ turnCount: this._turnCount,
96
+ contextTokens: this._contextTokens,
97
+ cumulativeCost: this._cumulativeCost,
98
+ providerHistory: this._providerHistory.slice(-20), // keep last 20
99
+ lastTimestamp: Date.now(),
100
+ }));
101
+ } catch { /* best-effort persistence */ }
37
102
  }
38
103
 
39
104
  /**
@@ -201,6 +266,9 @@ class SupervisorEmitter {
201
266
  return;
202
267
  }
203
268
 
269
+ // Phase 5: Accumulate session state before emitting
270
+ this._accumulateSessionState(response, plexorMeta);
271
+
204
272
  // Phase 4: Scaffolding gate (highest priority — emit first if present)
205
273
  const scaffoldingNotice = this.buildScaffoldingGateNotice(response, plexorMeta);
206
274
  if (scaffoldingNotice) {
@@ -218,8 +286,134 @@ class SupervisorEmitter {
218
286
  if (summary) {
219
287
  process.stderr.write(`${CYAN}${summary}${RESET}\n`);
220
288
  }
289
+
290
+ // Phase 5: Session narration (after per-turn messages)
291
+ this._emitSessionNarration();
292
+ }
293
+
294
+ // ---- Phase 5: session narration ----
295
+
296
+ /**
297
+ * Accumulate per-turn data into session state.
298
+ * Called at the start of emit() so all session fields are current
299
+ * before any Phase 5 messages are built.
300
+ */
301
+ _accumulateSessionState(response, plexorMeta) {
302
+ this._turnCount++;
303
+
304
+ // Context window size — prompt_tokens IS the current context size (not cumulative).
305
+ // Each turn's prompt_tokens includes the full conversation context.
306
+ const usage = response?.usage || response?.plexor?.usage || {};
307
+ const promptTokens = Number(usage.prompt_tokens) || 0;
308
+ if (promptTokens > 0) {
309
+ this._contextTokens = promptTokens; // replace, not accumulate
310
+ }
311
+
312
+ // Cumulative cost
313
+ const costUsd = Number(
314
+ response?.plexor_cost_usd ??
315
+ response?.plexor?.cost_usd ??
316
+ plexorMeta?.cost_usd ??
317
+ 0
318
+ );
319
+ if (Number.isFinite(costUsd)) {
320
+ this._cumulativeCost += costUsd;
321
+ }
322
+
323
+ // Provider reliability tracking
324
+ const provider = this._resolveProvider(response, plexorMeta);
325
+ if (provider) {
326
+ const stopReason =
327
+ response?.stop_reason ||
328
+ response?.choices?.[0]?.finish_reason ||
329
+ response?.plexor?.stop_reason ||
330
+ null;
331
+ this._providerHistory.push({
332
+ provider,
333
+ hadToolCalls: stopReason === 'tool_use' || stopReason === 'tool_calls',
334
+ });
335
+ }
336
+ }
337
+
338
+ /**
339
+ * Emit session-level narration lines based on accumulated state.
340
+ */
341
+ _emitSessionNarration() {
342
+ // Session summary line — every turn
343
+ const tokenStr = this._formatTokenCount(this._contextTokens);
344
+ const costStr = this._cumulativeCost < 0.01
345
+ ? `$${this._cumulativeCost.toFixed(4)}`
346
+ : `$${this._cumulativeCost.toFixed(2)}`;
347
+ process.stderr.write(
348
+ `${MAGENTA}[PLEXOR: Session: ${this._turnCount} turns, ${costStr} cost, ${tokenStr} context]${RESET}\n`
349
+ );
350
+
351
+ // Provider reliability digest — every 5th turn
352
+ if (this._turnCount % 5 === 0 && this._providerHistory.length > 0) {
353
+ const digest = this._buildProviderReliabilityDigest();
354
+ if (digest) {
355
+ process.stderr.write(`${MAGENTA}${digest}${RESET}\n`);
356
+ }
357
+ }
358
+
359
+ // Context warning at 70K tokens (uses real context window size, not cumulative)
360
+ if (this._contextTokens >= CONTEXT_COMPACT_THRESHOLD) {
361
+ const kTokens = Math.round(this._contextTokens / 1000);
362
+ process.stderr.write(
363
+ `${YELLOW}[PLEXOR: Context at ${kTokens}K \u2014 recommend /compact to prevent errors]${RESET}\n`
364
+ );
365
+ } else if (this._contextTokens >= CONTEXT_WARNING_THRESHOLD) {
366
+ const kTokens = Math.round(this._contextTokens / 1000);
367
+ process.stderr.write(
368
+ `${YELLOW}[PLEXOR: Context at ${kTokens}K tokens \u2014 approaching provider limits]${RESET}\n`
369
+ );
370
+ }
371
+
372
+ // Persist state for next hook invocation
373
+ this._saveSessionState();
221
374
  }
222
375
 
376
+ /**
377
+ * Build provider reliability digest from the last 5 entries in history.
378
+ * Format: [PLEXOR: Provider reliability (last 5): DeepSeek 4/5 tool_calls, Gemini 1/5]
379
+ */
380
+ _buildProviderReliabilityDigest() {
381
+ const recent = this._providerHistory.slice(-5);
382
+ const totals = {};
383
+ const toolHits = {};
384
+
385
+ for (const entry of recent) {
386
+ const p = entry.provider;
387
+ totals[p] = (totals[p] || 0) + 1;
388
+ if (entry.hadToolCalls) {
389
+ toolHits[p] = (toolHits[p] || 0) + 1;
390
+ }
391
+ }
392
+
393
+ const parts = Object.keys(totals).map(p => {
394
+ const hits = toolHits[p] || 0;
395
+ return `${p}: ${hits}/${totals[p]} tool_calls`;
396
+ });
397
+
398
+ if (parts.length === 0) return null;
399
+ return `[PLEXOR: Provider reliability (last 5): ${parts.join(', ')}]`;
400
+ }
401
+
402
+ /**
403
+ * Format token count: 1234 -> "1.2K", 123456 -> "123K"
404
+ */
405
+ _formatTokenCount(tokens) {
406
+ if (tokens < 1000) return String(tokens);
407
+ if (tokens < 10000) return `${(tokens / 1000).toFixed(1)}K`;
408
+ return `${Math.round(tokens / 1000)}K`;
409
+ }
410
+
411
+ // ---- Phase 5 accessors (for testing) ----
412
+
413
+ get turnCount() { return this._turnCount; }
414
+ get cumulativeTokens() { return this._contextTokens; }
415
+ get cumulativeCost() { return this._cumulativeCost; }
416
+
223
417
  // ---- private helpers ----
224
418
 
225
419
  _resolveProvider(response, meta) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@plexor-dev/claude-code-plugin-staging",
3
- "version": "0.1.0-beta.27",
3
+ "version": "0.1.0-beta.29",
4
4
  "description": "STAGING - LLM cost optimization plugin for Claude Code (internal testing)",
5
5
  "main": "lib/constants.js",
6
6
  "bin": {