pi-antigravity-rotator 1.12.2 → 1.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,28 @@
1
1
  # Changelog
2
2
 
3
+ ## [1.13.0] - 2026-05-19
4
+
5
+ ### Removed
6
+ - **Pro Family Features**: Completely removed legacy Pro Family sharing infrastructure (Advisor recommendations, dual-window tracking, and associated UI elements) to simplify architecture for unified quota pools.
7
+
8
+ ### Added
9
+ - **Quota Reset Countdown**: Added a new column to the Quota Forecast dashboard component that displays the exact time remaining until the next quota reset.
10
+ - **Token Usage Metrics Output**: The proxy now correctly captures and forwards precise input/output token counts from the upstream API back to the client, fully enabling usage statistics reporting in compatible adapters.
11
+
12
+ ## [1.12.4] - 2026-05-18
13
+
14
+ ### Added
15
+ - **Claude `cache_control` stripping**: Anthropic requests often include `cache_control` objects which Google Cloud Code Assist API rejects with "Extra inputs are not permitted". The proxy now safely strips `cache_control` from all system and message content blocks before forwarding them to Gemini.
16
+ - **Claude `VALIDATED` Function Calling**: Automatically enforces `toolConfig: { functionCallingConfig: { mode: "VALIDATED" } }` for Claude models when tools are present, ensuring stricter schema adherence.
17
+ - **Adaptive Thinking Budgets**: Replaced static thinking budget values with a dynamic `MODEL_SPECS` mapping. `gemini-3-flash` now correctly uses adaptive thinking budgets (`-1`) which allows the model to decide its own optimal reasoning length, while Pro models use strict budgets (e.g. `10001` for high).
18
+ - **Max Output Tokens Enforcement**: The proxy now enforces hard `maxOutputTokens` caps based on the specific model's upper limits (e.g. `65535` vs `64000`), dynamically adjusting them to ensure there is enough room for both the thinking budget and the final output response without triggering upstream validation errors.
19
+
20
+ ## [1.12.3] - 2026-05-18
21
+
22
+ ### Fixed
23
+ - **Gemini 3.1 Pro High Deprecation (`400 Invalid Argument`)**: Google Cloud Code Assist deprecated the internal string `"gemini-3.1-pro-high"` and replaced it with `"gemini-pro-agent"`. The proxy now automatically maps `"gemini-3.1-pro-high"` to `"gemini-pro-agent"` under the hood when constructing the upstream payload, preventing `400` validation errors while allowing clients to continue using the `-high` alias.
24
+ - **Missing `thought_signature` on Tool Calls (`400 Invalid Argument`)**: Gemini thinking models strictly require a cryptographic Base64 `thought_signature` for all `functionCall` history parts, which the proxy normally caches in RAM. To prevent API rejection on cache misses (e.g. after a proxy restart or when using synthetic tool IDs), the proxy now gracefully collapses the orphaned tool exchange into a neutral user summary (`[Context: The assistant used tools...]`). This preserves the conversation context without triggering the `400` error or teaching the model bad tool-calling formats.
25
+
3
26
  ## [1.12.2] - 2026-05-18
4
27
 
5
28
  ### Fixed
package/README.md CHANGED
@@ -15,7 +15,6 @@ Multi-account rotation proxy for Google Antigravity. Distributes API usage acros
15
15
  - **Protective pause** -- Pauses all routing for several hours after serious ToS/abuse-style flags so the rest of the pool is not burned
16
16
  - **Token auto-refresh** -- Tokens are refreshed automatically before expiry; no manual management
17
17
  - **Endpoint cascade** -- Tries daily, autopush, and prod API endpoints for resilience
18
- - **Pro Family Advisor** -- Scans your account pool and alerts you if there are major imbalances (like some accounts never getting used because of routing bias), giving you actionable steps to optimize token distribution
19
18
  - **Advanced Telemetry & Statistics** -- Track exactly how much USD you save compared to a paid API plan, predict quota depletion with the Forecast grid, view Latency tracking (p50/p95), and explore 60-day historical usage heatmaps
20
19
  - **Web dashboard** -- Real-time view of model routing table, per-account quota bars with per-model timers, and flagged account alerts
21
20
  - **Auto-update notifications** -- The dashboard checks npm for new releases every 30 minutes and shows a banner with one-click update when a newer version is available
@@ -93,13 +92,12 @@ After starting the proxy, open `http://localhost:51200/dashboard` or `http://<yo
93
92
  The dashboard shows:
94
93
 
95
94
  - **Top Status & Controls** -- Real-time routing state, uptime, requests, and PII masking toggle.
96
- - **Pro Family Advisor & Dual-Window Tracking** -- Advanced logic that tracks and compares both Pro and Free quota windows simultaneously. The Advisor analyzes cumulative quota to suggest mathematical upgrades/downgrades.
97
95
  - **Token Usage & Savings** -- Interactive chart (`1h`, `2h`, `4h`, `8h`, `12h`, `1d`, `7d`, `1m`) showing token consumption by model, with estimated USD savings and `CSV`/`JSON` export options.
98
96
  - **Activity Heatmap** -- 60-day responsive GitHub-style contribution grid showing request intensity hour by hour.
99
97
  - **Latency (p50/p95)** -- Real-time median and 95th percentile tracking for Time-to-First-Byte (TTFB) and Total Duration per model.
100
98
  - **Quota Forecast** -- Predictive modeling showing when each model's quota will run out based on the current requests/hour burn rate.
101
99
  - **Searchable Request Log** -- Live feed of the last 200 requests with exact timestamps, models, masked accounts, status codes, and latency.
102
- - **Account Cards** -- Sorted by total quota. Shows status (`active`, `ready`, `cooldown`, `flagged`, `disabled`), dual-window trackers, quota bars with timers, and precise error messages.
100
+ - **Account Cards** -- Sorted by total quota. Shows status (`active`, `ready`, `cooldown`, `flagged`, `disabled`), quota bars with timers, and precise error messages.
103
101
  - **Operator Panels** -- "Attention Needed" summaries for quarantined accounts and a real-time event feed of rotator actions.
104
102
 
105
103
  ![Dashboard](dashboard.png)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-antigravity-rotator",
3
- "version": "1.12.2",
3
+ "version": "1.13.0",
4
4
  "description": "Multi-account rotation proxy for Google Antigravity with per-model routing, real-time quota tracking, and infringement detection",
5
5
  "license": "MIT",
6
6
  "type": "module",
package/src/compat.ts CHANGED
@@ -86,6 +86,64 @@ export interface CompatCompletion {
86
86
  toolCalls?: OpenAIToolCall[];
87
87
  }
88
88
 
89
+ // ---------------------------------------------------------------------------
90
+ // Model-specific specs — mirrors Antigravity-Manager model_specs.json
91
+ // ---------------------------------------------------------------------------
92
+ interface ModelSpec {
93
+ maxOutputTokens: number;
94
+ thinkingBudget: number; // -1 = adaptive (model decides), >=0 = fixed
95
+ isThinking: boolean;
96
+ }
97
+ const MODEL_SPECS: Record<string, ModelSpec> = {
98
+ "gemini-pro-agent": { maxOutputTokens: 65535, thinkingBudget: 10001, isThinking: true },
99
+ "gemini-3-flash-agent": { maxOutputTokens: 65536, thinkingBudget: -1, isThinking: true },
100
+ "gemini-3-pro-high": { maxOutputTokens: 65535, thinkingBudget: 10001, isThinking: true },
101
+ "gemini-3-pro-low": { maxOutputTokens: 65535, thinkingBudget: 1001, isThinking: true },
102
+ "gemini-3.1-pro-high": { maxOutputTokens: 65535, thinkingBudget: 10001, isThinking: true },
103
+ "gemini-3.1-pro-low": { maxOutputTokens: 65535, thinkingBudget: 1001, isThinking: true },
104
+ "gemini-3.1-pro-preview": { maxOutputTokens: 65535, thinkingBudget: 10001, isThinking: true },
105
+ "gemini-3-flash": { maxOutputTokens: 65536, thinkingBudget: 32768, isThinking: true },
106
+ "gemini-2.5-flash": { maxOutputTokens: 65535, thinkingBudget: 24576, isThinking: true },
107
+ "gemini-2.5-pro": { maxOutputTokens: 65535, thinkingBudget: 1024, isThinking: true },
108
+ "claude-sonnet-4-6": { maxOutputTokens: 64000, thinkingBudget: 32768, isThinking: true },
109
+ "claude-sonnet-4-6-thinking":{ maxOutputTokens: 64000, thinkingBudget: 32768, isThinking: true },
110
+ "claude-opus-4-6-thinking": { maxOutputTokens: 64000, thinkingBudget: 32768, isThinking: true },
111
+ };
112
+ const GEMINI_MAX_OUTPUT_TOKENS = 65536;
113
+ const CLAUDE_MAX_OUTPUT_TOKENS = 64000;
114
+ const FALLBACK_THINKING_BUDGET = 24576;
115
+ const CLAUDE_DEFAULT_THINKING_BUDGET = 32768;
116
+
117
+ function getModelFamily(model: string): "claude" | "gemini" | "unknown" {
118
+ const l = model.toLowerCase();
119
+ if (l.includes("claude")) return "claude";
120
+ if (l.includes("gemini")) return "gemini";
121
+ return "unknown";
122
+ }
123
+
124
+ function getModelSpec(model: string): ModelSpec {
125
+ const lower = model.toLowerCase();
126
+ if (MODEL_SPECS[lower]) return MODEL_SPECS[lower];
127
+ for (const [key, spec] of Object.entries(MODEL_SPECS)) {
128
+ if (lower.includes(key)) return spec;
129
+ }
130
+ const family = getModelFamily(model);
131
+ if (family === "claude") return { maxOutputTokens: CLAUDE_MAX_OUTPUT_TOKENS, thinkingBudget: CLAUDE_DEFAULT_THINKING_BUDGET, isThinking: true };
132
+ if (family === "gemini") return { maxOutputTokens: GEMINI_MAX_OUTPUT_TOKENS, thinkingBudget: FALLBACK_THINKING_BUDGET, isThinking: true };
133
+ return { maxOutputTokens: 65536, thinkingBudget: FALLBACK_THINKING_BUDGET, isThinking: false };
134
+ }
135
+
136
+ function isThinkingModel(model: string): boolean {
137
+ const spec = getModelSpec(model);
138
+ if (spec.isThinking) return true;
139
+ const l = model.toLowerCase();
140
+ if (l.includes("gemini")) {
141
+ const m = l.match(/gemini-(\d+)/);
142
+ if (m && parseInt(m[1], 10) >= 3) return true;
143
+ }
144
+ return false;
145
+ }
146
+
89
147
  type AntigravityPart = { text: string } | { inlineData: { mimeType: string; data: string } };
90
148
 
91
149
  function isRecord(value: unknown): value is Record<string, unknown> {
@@ -120,12 +178,29 @@ function cacheThoughtSignature(callId: string, signature: string): void {
120
178
  thoughtSignatureCache.set(callId, signature);
121
179
  }
122
180
 
181
+ /**
182
+ * Strip cache_control fields from content blocks.
183
+ * Cloud Code API rejects cache_control with "Extra inputs are not permitted".
184
+ */
185
+ function cleanCacheControl<T>(content: T): T {
186
+ if (!Array.isArray(content)) return content;
187
+ return content.map((block: Record<string, unknown>) => {
188
+ if (!block || typeof block !== "object") return block;
189
+ if ("cache_control" in block) {
190
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
191
+ const { cache_control: _cc, ...rest } = block;
192
+ return rest;
193
+ }
194
+ return block;
195
+ }) as T;
196
+ }
197
+
123
198
  function extractText(content: ChatMessage["content"]): string {
124
199
  if (typeof content === "string") return content;
125
200
  if (!Array.isArray(content)) return "";
126
- return content
127
- .filter((p) => (p.type === "text" && typeof p.text === "string") || (p.type === "thinking" && typeof p.thinking === "string"))
128
- .map((p) => p.type === "thinking" ? `[Thinking]\n${p.thinking}\n[/Thinking]` : p.text)
201
+ return cleanCacheControl(content)
202
+ .filter((p: { type?: string; text?: string; thinking?: string }) => (p.type === "text" && typeof p.text === "string") || (p.type === "thinking" && typeof p.thinking === "string"))
203
+ .map((p: { type?: string; text?: string; thinking?: string }) => p.type === "thinking" ? `[Thinking]\n${p.thinking}\n[/Thinking]` : (p.text as string))
129
204
  .join("\n");
130
205
  }
131
206
 
@@ -246,12 +321,15 @@ function sanitizeClaudeViaGeminiSchema(schema: unknown): unknown {
246
321
  if (!isRecord(schema)) return schema;
247
322
 
248
323
  // Only remove fields that Gemini's API layer truly rejects at the network level.
249
- // We keep Draft 2020-12 keywords like minimum/maximum/pattern/title/etc.
324
+ // We keep standard Draft 2020-12 keywords but must strip exclusiveMinimum/exclusiveMaximum
325
+ // as boolean values (Draft 4) — the API layer rejects them even for Claude-bound requests.
250
326
  const UNSUPPORTED = new Set([
251
327
  "$schema", "$id", "$ref", "$defs", "definitions",
252
328
  "if", "then", "else", "not",
253
329
  "patternProperties", "unevaluatedProperties", "unevaluatedItems",
254
330
  "contentEncoding", "contentMediaType",
331
+ // Gemini's protobuf layer rejects these regardless of target model
332
+ "exclusiveMinimum", "exclusiveMaximum",
255
333
  ]);
256
334
 
257
335
  const out: Record<string, unknown> = {};
@@ -411,10 +489,41 @@ export function openAIToAntigravityBody(input: OpenAIChatCompletionRequest): Req
411
489
  // Determine if model is Claude — affects schema sanitization and tool call ID handling
412
490
  const isClaude = /^claude-/i.test(input.model);
413
491
 
492
+ // Use model specs to determine thinking support
493
+ const isThinking = isThinkingModel(input.model);
494
+ const isGeminiThinking = !isClaude && isThinking;
495
+
414
496
  const contents: GeminiContent[] = [];
415
497
  for (let i = 0; i < conversationMessages.length; i++) {
416
498
  const msg = conversationMessages[i];
417
499
  if (msg.role === "assistant") {
500
+ // Check if this is a thinking model turn with tool calls that have no cached signatures.
501
+ // If so, we collapse the tool exchange into a neutral user summary instead of
502
+ // injecting [Tool call: ...] text that the model will learn to mimic.
503
+ const hasMissingSig =
504
+ isGeminiThinking &&
505
+ Array.isArray(msg.tool_calls) &&
506
+ msg.tool_calls.length > 0 &&
507
+ !thoughtSignatureCache.has(msg.tool_calls[0].id);
508
+
509
+ if (hasMissingSig) {
510
+ // Build a summary of what the model did and what results came back.
511
+ // We collect the paired tool result(s) from the immediately following messages.
512
+ const toolNames = msg.tool_calls!.map((tc) => tc.function.name).join(", ");
513
+ const resultParts: string[] = [];
514
+ while (i + 1 < conversationMessages.length && conversationMessages[i + 1].role === "tool") {
515
+ i++;
516
+ const toolMsg = conversationMessages[i];
517
+ const toolText = typeof toolMsg.content === "string" ? toolMsg.content : extractText(toolMsg.content);
518
+ resultParts.push(`${toolMsg.name || "tool"}: ${toolText.slice(0, 500)}`);
519
+ }
520
+ const summaryText = `[Context: The assistant used tools (${toolNames}) and received results:\n${resultParts.join("\n")}]`;
521
+ contents.push({ role: "user", parts: [{ text: summaryText }] });
522
+ // Add a minimal model acknowledgement to avoid consecutive user turns
523
+ contents.push({ role: "model", parts: [{ text: "Understood, I have the tool results." }] });
524
+ continue;
525
+ }
526
+
418
527
  const parts: unknown[] = [];
419
528
  if (msg.content) {
420
529
  const textContent = typeof msg.content === "string" ? msg.content : extractText(msg.content);
@@ -427,28 +536,24 @@ export function openAIToAntigravityBody(input: OpenAIChatCompletionRequest): Req
427
536
  // signatures on older historical turns are silently ignored.
428
537
  let isFirstInMessage = true;
429
538
  for (const tc of msg.tool_calls) {
539
+ let args: unknown;
430
540
  try {
431
- const args = typeof tc.function.arguments === "string" ? JSON.parse(tc.function.arguments) : tc.function.arguments;
432
- // Only the first functionCall part in a model turn needs the signature
433
- const cachedSig = isFirstInMessage ? thoughtSignatureCache.get(tc.id) : undefined;
434
- parts.push({
435
- ...(cachedSig ? { thoughtSignature: cachedSig } : {}),
436
- // Include id only for Claude — Gemini native models reject the id field
437
- functionCall: { ...(isClaude ? { id: tc.id } : {}), name: tc.function.name, args },
438
- });
541
+ args = typeof tc.function.arguments === "string" ? JSON.parse(tc.function.arguments) : tc.function.arguments;
439
542
  } catch {
440
- const cachedSig = isFirstInMessage ? thoughtSignatureCache.get(tc.id) : undefined;
441
- parts.push({
442
- ...(cachedSig ? { thoughtSignature: cachedSig } : {}),
443
- functionCall: { ...(isClaude ? { id: tc.id } : {}), name: tc.function.name, args: {} },
444
- });
543
+ args = {};
445
544
  }
545
+ // Only the first functionCall part in a model turn needs the signature
546
+ const cachedSig = isFirstInMessage ? thoughtSignatureCache.get(tc.id) : undefined;
547
+ parts.push({
548
+ ...(cachedSig ? { thoughtSignature: cachedSig } : {}),
549
+ // Include id only for Claude — Gemini native models reject the id field
550
+ functionCall: { ...(isClaude ? { id: tc.id } : {}), name: tc.function.name, args },
551
+ });
446
552
  isFirstInMessage = false;
447
553
  }
448
554
  }
449
555
  if (parts.length > 0) contents.push({ role: "model", parts });
450
556
  } else if (msg.role === "tool") {
451
- const prevMsg = conversationMessages[i - 1];
452
557
  const responseText = typeof msg.content === "string" ? msg.content : extractText(msg.content);
453
558
  const fnName = msg.name || "unknown";
454
559
  // Include tool_call_id so Gemini can pass it as tool_use_id to Claude
@@ -460,6 +565,7 @@ export function openAIToAntigravityBody(input: OpenAIChatCompletionRequest): Req
460
565
  } else {
461
566
  // user message
462
567
  const msgParts = extractParts(msg.content);
568
+
463
569
  if (msgParts.length > 0) contents.push({ role: "user", parts: msgParts });
464
570
  }
465
571
  }
@@ -472,35 +578,84 @@ export function openAIToAntigravityBody(input: OpenAIChatCompletionRequest): Req
472
578
  const geminiTools = convertOpenAIToolsToGemini(inputTools, isClaude);
473
579
  const geminiToolConfig = input.tool_choice !== undefined ? convertToolChoiceToGemini(input.tool_choice) : undefined;
474
580
 
475
- // Map OpenAI reasoning_effort Gemini thinkingLevel
476
- const thinkingLevel = mapReasoningEffortToThinkingLevel(input.reasoning_effort, input.model);
581
+ // Cap maxOutputTokens to model limits and build thinkingConfig
582
+ const modelSpec = getModelSpec(input.model);
583
+ const modelFamily = getModelFamily(input.model);
584
+ let maxOutputTokens = typeof input.max_tokens === "number" ? input.max_tokens : undefined;
585
+ if (maxOutputTokens && maxOutputTokens > modelSpec.maxOutputTokens) {
586
+ compatLogger.debug(`Capping ${input.model} maxOutputTokens ${maxOutputTokens} → ${modelSpec.maxOutputTokens}`);
587
+ maxOutputTokens = modelSpec.maxOutputTokens;
588
+ }
589
+
590
+ let thinkingConfigObj: Record<string, unknown> | undefined;
591
+ if (modelFamily === "claude" && isThinking) {
592
+ // Claude: snake_case keys required by v1internal
593
+ const tb = modelSpec.thinkingBudget;
594
+ thinkingConfigObj = { include_thoughts: true, thinking_budget: tb };
595
+ if (!maxOutputTokens || maxOutputTokens <= tb) {
596
+ maxOutputTokens = Math.min(tb + 8192, modelSpec.maxOutputTokens);
597
+ compatLogger.debug(`Adjusted Claude maxOutputTokens → ${maxOutputTokens}`);
598
+ }
599
+ } else if (isThinking) {
600
+ // Gemini: camelCase keys; thinkingBudget=-1 means adaptive (omit the field)
601
+ const tb = modelSpec.thinkingBudget;
602
+ thinkingConfigObj = tb === -1
603
+ ? { includeThoughts: true }
604
+ : { includeThoughts: true, thinkingBudget: tb };
605
+ if (tb !== -1 && (!maxOutputTokens || maxOutputTokens <= tb)) {
606
+ maxOutputTokens = Math.min(tb + 8192, modelSpec.maxOutputTokens);
607
+ compatLogger.debug(`Adjusted Gemini maxOutputTokens → ${maxOutputTokens}`);
608
+ }
609
+ } else if (input.reasoning_effort) {
610
+ // Non-thinking models with explicit reasoning_effort hint
611
+ const budgets: Record<string, number> = { low: Math.round(modelSpec.thinkingBudget / 4), medium: Math.round(modelSpec.thinkingBudget / 2), high: modelSpec.thinkingBudget };
612
+ const b = budgets[input.reasoning_effort.toLowerCase()];
613
+ if (b) thinkingConfigObj = { includeThoughts: true, thinkingBudget: b };
614
+ }
615
+
616
+ const generationConfig: Record<string, unknown> = {
617
+ ...(typeof input.temperature === "number" ? { temperature: input.temperature } : {}),
618
+ ...(maxOutputTokens ? { maxOutputTokens } : {}),
619
+ ...(thinkingConfigObj ? { thinkingConfig: thinkingConfigObj } : {}),
620
+ };
477
621
 
478
622
  const request: Record<string, unknown> = {
479
623
  contents,
480
- generationConfig: {
481
- ...(typeof input.temperature === "number" ? { temperature: input.temperature } : {}),
482
- ...(typeof input.max_tokens === "number" ? { maxOutputTokens: input.max_tokens } : {}),
483
- // Always request thought blocks. Models that don't support thinking ignore this.
484
- thinkingConfig: {
485
- includeThoughts: true,
486
- ...(thinkingLevel ? { thinkingLevel } : {}),
487
- },
488
- },
624
+ generationConfig,
489
625
  };
490
626
 
491
627
  if (systemParts.length > 0) {
492
- request.systemInstruction = {
493
- role: "user",
494
- parts: [{ text: systemParts.join("\n\n") }],
495
- };
628
+ if (!isClaude && isThinking) {
629
+ // Gemini thinking models (gemini-3.1-pro-high/low) reject the systemInstruction
630
+ // field entirely — prepend system prompt to the first user content turn instead.
631
+ const firstTurn = contents[0];
632
+ if (firstTurn && firstTurn.role === "user" && (firstTurn.parts[0] as any)?.text !== undefined) {
633
+ (firstTurn.parts[0] as any).text = systemParts.join("\n\n") + "\n\n" + (firstTurn.parts[0] as any).text;
634
+ } else if (firstTurn && firstTurn.role === "user") {
635
+ firstTurn.parts.unshift({ text: systemParts.join("\n\n") + "\n\n" });
636
+ } else {
637
+ contents.unshift({
638
+ role: "user",
639
+ parts: [{ text: systemParts.join("\n\n") }],
640
+ });
641
+ }
642
+ } else {
643
+ request.systemInstruction = {
644
+ role: "system",
645
+ parts: [{ text: systemParts.join("\n\n") }],
646
+ };
647
+ }
496
648
  }
497
649
 
498
650
  if (geminiTools.length > 0) request.tools = geminiTools;
499
651
  if (geminiToolConfig) request.toolConfig = geminiToolConfig;
500
652
 
653
+ let mappedModel = input.model;
654
+ if (mappedModel === "gemini-3.1-pro-high") mappedModel = "gemini-pro-agent";
655
+
501
656
  return {
502
657
  project: "compat-placeholder",
503
- model: input.model,
658
+ model: mappedModel,
504
659
  userAgent: "antigravity",
505
660
  requestType: "agent",
506
661
  request,
@@ -522,28 +677,47 @@ export function anthropicToAntigravityBody(input: AnthropicMessagesRequest): Req
522
677
  }
523
678
 
524
679
  /**
525
- * Maps an OpenAI reasoning_effort string to a Gemini thinkingLevel.
526
- * Gemini 3 Pro only supports LOW and HIGH; Flash supports MINIMAL/LOW/MEDIUM/HIGH.
680
+ * Maps an OpenAI reasoning_effort / model name suffix to a Gemini thinkingBudget integer.
681
+ * Cloud Code Assist uses thinkingBudget (integer token count), not thinkingLevel (string).
682
+ * Values match models.json: -high=10001, -low=1001, flash=dynamic(-1 means dynamic).
683
+ * Returns undefined for models that don't need an explicit budget (e.g. Claude, plain flash).
527
684
  */
528
- function mapReasoningEffortToThinkingLevel(effort: string | undefined, modelId: string): string | undefined {
529
- const isGemini3Pro = /gemini-3(?:\.1)?-pro/i.test(modelId);
530
-
685
+ function mapReasoningEffortToThinkingLevel(effort: string | undefined, modelId: string): number | undefined {
686
+ const lowerModel = modelId.toLowerCase();
687
+ const isGemini31Pro = /gemini-3\.1-pro/i.test(modelId);
688
+ const isGemini3Flash = lowerModel.includes("gemini-3-flash");
689
+
531
690
  let effectiveEffort = effort;
532
691
  if (!effectiveEffort) {
533
- const lowerModel = modelId.toLowerCase();
534
- if (lowerModel.endsWith("-high") || lowerModel.includes("claude-")) effectiveEffort = "high";
692
+ if (lowerModel.endsWith("-high") || lowerModel.includes("gemini-pro-agent")) effectiveEffort = "high";
535
693
  else if (lowerModel.endsWith("-low")) effectiveEffort = "low";
536
- else if (lowerModel.includes("gemini-3-flash")) effectiveEffort = "high";
694
+ else if (isGemini3Flash) effectiveEffort = "high";
695
+ // Claude models: skip — thinking is handled by the anthropic-beta header
537
696
  }
538
697
 
539
698
  if (!effectiveEffort) return undefined;
540
699
 
541
- switch (effectiveEffort.toLowerCase()) {
542
- case "low": return isGemini3Pro ? "LOW" : "LOW";
543
- case "medium": return isGemini3Pro ? "HIGH" : "MEDIUM";
544
- case "high": return "HIGH";
545
- default: return undefined;
700
+ // Gemini 3.1 Pro uses fixed budgets matching models.json
701
+ if (isGemini31Pro) {
702
+ switch (effectiveEffort.toLowerCase()) {
703
+ case "high": return 10001;
704
+ case "medium": return 5000;
705
+ case "low": return 1001;
706
+ default: return undefined;
707
+ }
708
+ }
709
+
710
+ // Flash uses dynamic budget (-1 means let the model decide)
711
+ if (isGemini3Flash) {
712
+ switch (effectiveEffort.toLowerCase()) {
713
+ case "high": return -1;
714
+ case "medium": return 4096;
715
+ case "low": return 1024;
716
+ default: return undefined;
717
+ }
546
718
  }
719
+
720
+ return undefined;
547
721
  }
548
722
 
549
723
  export function parseAntigravitySse(raw: string): CompatCompletion {
@@ -665,6 +839,10 @@ function writeOpenAIStream(res: ServerResponse, model: string, completion: Compa
665
839
  }
666
840
  res.write(`data: ${JSON.stringify({ id, object: "chat.completion.chunk", created, model, choices: [{ index: 0, delta: {}, finish_reason: "stop" }] })}\n\n`);
667
841
  }
842
+ // Emit usage chunk so agents (hermes, openwebui) can display token statistics
843
+ if (completion.inputTokens > 0 || completion.outputTokens > 0) {
844
+ res.write(`data: ${JSON.stringify({ id, object: "chat.completion.chunk", created, model, choices: [], usage: { prompt_tokens: completion.inputTokens, completion_tokens: completion.outputTokens, total_tokens: completion.inputTokens + completion.outputTokens } })}\n\n`);
845
+ }
668
846
  res.write("data: [DONE]\n\n");
669
847
  res.end();
670
848
  }
@@ -684,7 +862,8 @@ function writeAnthropicStream(res: ServerResponse, model: string, completion: Co
684
862
  res.write(`event: content_block_start\ndata: ${JSON.stringify({ type: "content_block_start", index: contentIndex, content_block: { type: "text", text: "" } })}\n\n`);
685
863
  if (completion.text) res.write(`event: content_block_delta\ndata: ${JSON.stringify({ type: "content_block_delta", index: contentIndex, delta: { type: "text_delta", text: completion.text } })}\n\n`);
686
864
  res.write(`event: content_block_stop\ndata: ${JSON.stringify({ type: "content_block_stop", index: contentIndex })}\n\n`);
687
- res.write(`event: message_delta\ndata: ${JSON.stringify({ type: "message_delta", delta: { stop_reason: "end_turn", stop_sequence: null }, usage: { output_tokens: completion.outputTokens } })}\n\n`);
865
+ // message_delta: include both input_tokens and output_tokens so hermes shows full context count
866
+ res.write(`event: message_delta\ndata: ${JSON.stringify({ type: "message_delta", delta: { stop_reason: "end_turn", stop_sequence: null }, usage: { input_tokens: completion.inputTokens, output_tokens: completion.outputTokens } })}\n\n`);
688
867
  res.write(`event: message_stop\ndata: ${JSON.stringify({ type: "message_stop" })}\n\n`);
689
868
  res.end();
690
869
  }
@@ -812,10 +991,15 @@ async function streamCompatSse(
812
991
  if (!reqClosed && !res.writableEnded) {
813
992
  if (format === "openai") {
814
993
  res.write(`data: ${JSON.stringify({ id, object: "chat.completion.chunk", created, model, choices: [{ index: 0, delta: {}, finish_reason: "stop" }] })}\n\n`);
994
+ // Emit a usage chunk so agents (hermes, openwebui, etc.) can display token statistics
995
+ if (inputTokens > 0 || outputTokens > 0) {
996
+ res.write(`data: ${JSON.stringify({ id, object: "chat.completion.chunk", created, model, choices: [], usage: { prompt_tokens: inputTokens, completion_tokens: outputTokens, total_tokens: inputTokens + outputTokens } })}\n\n`);
997
+ }
815
998
  res.write("data: [DONE]\n\n");
816
999
  } else {
817
1000
  res.write(`event: content_block_stop\ndata: ${JSON.stringify({ type: "content_block_stop", index: 0 })}\n\n`);
818
- res.write(`event: message_delta\ndata: ${JSON.stringify({ type: "message_delta", delta: { stop_reason: "end_turn", stop_sequence: null }, usage: { output_tokens: outputTokens } })}\n\n`);
1001
+ // message_delta carries output_tokens; also include input_tokens so Hermes shows full context count
1002
+ res.write(`event: message_delta\ndata: ${JSON.stringify({ type: "message_delta", delta: { stop_reason: "end_turn", stop_sequence: null }, usage: { input_tokens: inputTokens, output_tokens: outputTokens } })}\n\n`);
819
1003
  res.write(`event: message_stop\ndata: ${JSON.stringify({ type: "message_stop" })}\n\n`);
820
1004
  }
821
1005
  res.end();
package/src/dashboard.ts CHANGED
@@ -519,108 +519,7 @@ const DASHBOARD_HTML = `<!DOCTYPE html>
519
519
  font-size: 10px;
520
520
  line-height: 1.6;
521
521
  }
522
- .dw-badge {
523
- display: inline-block;
524
- width: 32px;
525
- text-align: center;
526
- font-weight: 700;
527
- font-size: 9px;
528
- border-radius: 3px;
529
- padding: 1px 4px;
530
- flex-shrink: 0;
531
- }
532
- .dw-badge-pro {
533
- background: rgba(52, 211, 153, 0.15);
534
- color: var(--green);
535
- }
536
- .dw-badge-free {
537
- background: rgba(250, 204, 21, 0.12);
538
- color: var(--yellow);
539
- }
540
- .dw-quota {
541
- font-weight: 700;
542
- min-width: 28px;
543
- }
544
- .dw-reset {
545
- color: var(--text-dim);
546
- }
547
- .dw-empty {
548
- color: var(--text-dim);
549
- font-style: italic;
550
- opacity: 0.5;
551
- }
552
-
553
- .advisor-panel {
554
- background: var(--surface);
555
- border: 1px solid var(--border);
556
- border-radius: var(--radius);
557
- padding: 16px 18px;
558
- margin-bottom: 24px;
559
- }
560
-
561
- .advisor-title {
562
- font-size: 11px;
563
- text-transform: uppercase;
564
- letter-spacing: 0.8px;
565
- color: var(--text-dim);
566
- margin-bottom: 10px;
567
- display: flex;
568
- align-items: center;
569
- gap: 8px;
570
- }
571
-
572
- .advisor-slots {
573
- font-size: 12px;
574
- font-family: 'JetBrains Mono', monospace;
575
- color: var(--text);
576
- margin-left: auto;
577
- text-transform: none;
578
- letter-spacing: 0;
579
- }
580
-
581
- .advisor-action {
582
- display: flex;
583
- align-items: center;
584
- gap: 10px;
585
- padding: 8px 10px;
586
- margin-bottom: 6px;
587
- border-radius: 8px;
588
- font-size: 12px;
589
- }
590
-
591
- .advisor-action.add-pro {
592
- background: rgba(52, 211, 153, 0.06);
593
- border-left: 3px solid var(--green);
594
- }
595
-
596
- .advisor-action.remove-pro {
597
- background: rgba(251, 191, 36, 0.06);
598
- border-left: 3px solid var(--yellow);
599
- }
600
-
601
- .advisor-action-type {
602
- font-weight: 600;
603
- font-size: 10px;
604
- text-transform: uppercase;
605
- letter-spacing: 0.5px;
606
- padding: 2px 6px;
607
- border-radius: 4px;
608
- flex-shrink: 0;
609
- }
610
-
611
- .advisor-action.add-pro .advisor-action-type {
612
- background: rgba(52, 211, 153, 0.15);
613
- color: var(--green);
614
- }
615
-
616
- .advisor-action.remove-pro .advisor-action-type {
617
- background: rgba(251, 191, 36, 0.15);
618
- color: var(--yellow);
619
- }
620
522
 
621
- .advisor-action-label { font-weight: 500; }
622
- .advisor-action-reason { color: var(--text-dim); font-size: 11px; margin-left: auto; }
623
- .advisor-empty { color: var(--text-dim); font-size: 12px; font-style: italic; }
624
523
  .routing-panel {
625
524
  border-radius: var(--radius);
626
525
  padding: 12px 14px;
@@ -1386,10 +1285,7 @@ const DASHBOARD_HTML = `<!DOCTYPE html>
1386
1285
  <svg viewBox="0 0 24 24"><path d="M12 8v5"/><path d="M12 17.5h.01"/><path d="M10.3 3.8 2.9 17a2 2 0 0 0 1.75 3h14.7A2 2 0 0 0 21.1 17L13.7 3.8a2 2 0 0 0-3.4 0Z"/></svg>
1387
1286
  <span class="header-icon-badge attention" id="attentionBadge" style="display:none">0</span>
1388
1287
  </button>
1389
- <button class="header-icon-btn advisor" id="advisorBtn" onclick="openModal('advisorModal')" title="Pro Family Advisor" aria-label="Open Pro Family Advisor">
1390
- <svg viewBox="0 0 24 24"><path d="m5 15 2-9 5 5 5-5 2 9"/><path d="M4 19h16"/></svg>
1391
- <span class="header-icon-badge advisor" id="advisorBadge" style="display:none">0</span>
1392
- </button>
1288
+
1393
1289
  <button class="header-icon-btn heart-beat" id="kofiBtn" onclick="openModal('donationModal')" title="Support the Creator" aria-label="Buy me a coffee">
1394
1290
  <svg viewBox="0 0 24 24"><path d="M20.84 4.61a5.5 5.5 0 0 0-7.78 0L12 5.67l-1.06-1.06a5.5 5.5 0 0 0-7.78 7.78l1.06 1.06L12 21.23l7.78-7.78 1.06-1.06a5.5 5.5 0 0 0 0-7.78z"></path></svg>
1395
1291
  </button>
@@ -1481,15 +1377,7 @@ const DASHBOARD_HTML = `<!DOCTYPE html>
1481
1377
  </div>
1482
1378
  </div>
1483
1379
 
1484
- <div class="modal" id="advisorModal" onclick="closeModal(event, 'advisorModal')">
1485
- <div class="modal-card" onclick="event.stopPropagation()">
1486
- <div class="modal-header">
1487
- <strong>Pro Family Advisor</strong>
1488
- <button class="modal-close" onclick="closeModal(null, 'advisorModal')" aria-label="Close advisor modal">×</button>
1489
- </div>
1490
- <div id="proAdvisor"></div>
1491
- </div>
1492
- </div>
1380
+
1493
1381
 
1494
1382
  <div class="modal" id="donationModal" onclick="closeModal(event, 'donationModal')">
1495
1383
  <div class="modal-card" onclick="event.stopPropagation()" style="max-width: 500px;">
@@ -1582,102 +1470,7 @@ function renderQuotaBars(account) {
1582
1470
  return '<div class="quota-section"><div class="quota-section-title">Quota (per model)</div>' + rows + '</div>';
1583
1471
  }
1584
1472
 
1585
- function renderDualWindows(account) {
1586
- var qw = account.quotaWindows;
1587
- if (!qw) return '';
1588
- var models = Object.keys(qw);
1589
- if (models.length === 0) return '';
1590
- var now = Date.now();
1591
- var rows = models.map(function(modelKey) {
1592
- var t = qw[modelKey];
1593
- var shortName = modelKey.split('-').slice(0, 2).join('-');
1594
- if (shortName === 'claude-opus') shortName = 'claude'; // Clean up Claude display name
1595
-
1596
- var proLine = '';
1597
- var freeLine = '';
1598
-
1599
- // PRO line
1600
- if (t.pro && t.pro.lastSeen > 0) {
1601
- var pQuota = t.pro.lastQuota;
1602
- var pReset = '';
1603
- if (t.pro.resetTimeMs > 0) {
1604
- var pRemain = t.pro.resetTimeMs - now;
1605
- if (pRemain > 0) {
1606
- var isRolling5h = pQuota === 100 && Math.abs(pRemain - (5 * 3600000)) < 600000;
1607
- var isRolling7d = pQuota === 100 && Math.abs(pRemain - (7 * 86400000)) < 600000;
1608
- if (isRolling5h || isRolling7d) {
1609
- pReset = '<span style="color:var(--green)">idle</span>';
1610
- } else {
1611
- pReset = 'resets in ' + formatDuration(pRemain);
1612
- }
1613
- } else {
1614
- // Reset has passed
1615
- var was5h = (t.pro.resetTimeMs - t.pro.lastSeen) < (24 * 3600 * 1000);
1616
- if (was5h) {
1617
- pQuota = Math.min(100, pQuota + 40);
1618
- pReset = '<span style="color:var(--green)">+40% idle</span>';
1619
- } else {
1620
- pQuota = 100;
1621
- pReset = '<span style="color:var(--text-dim)">idle</span>';
1622
- }
1623
- }
1624
- }
1625
- var pqColor = pQuota > 50 ? 'var(--green)' : pQuota > 20 ? 'var(--yellow)' : 'var(--red)';
1626
- proLine = '<div class="dw-row">' +
1627
- '<span class="dw-badge dw-badge-pro">PRO</span>' +
1628
- '<span class="dw-quota" style="color:' + pqColor + '">' + pQuota + '%</span>' +
1629
- '<span class="dw-reset">' + (pReset || '--') + '</span>' +
1630
- '</div>';
1631
- } else {
1632
- proLine = '<div class="dw-row"><span class="dw-badge dw-badge-pro">PRO</span><span class="dw-empty">no data</span></div>';
1633
- }
1634
-
1635
- // FREE line
1636
- if (t.free && t.free.lastSeen > 0) {
1637
- var fQuota = t.free.lastQuota;
1638
- var fReset = '';
1639
- if (t.free.resetTimeMs > 0) {
1640
- var fRemain = t.free.resetTimeMs - now;
1641
- if (fRemain > 0) {
1642
- var isRolling5h = fQuota === 100 && Math.abs(fRemain - (5 * 3600000)) < 600000;
1643
- var isRolling7d = fQuota === 100 && Math.abs(fRemain - (7 * 86400000)) < 600000;
1644
- if (isRolling5h || isRolling7d) {
1645
- fReset = '<span style="color:var(--green)">idle</span>';
1646
- } else {
1647
- fReset = 'resets in ' + formatDuration(fRemain);
1648
- }
1649
- } else {
1650
- // Reset has passed
1651
- var fWas5h = (t.free.resetTimeMs - t.free.lastSeen) < (24 * 3600 * 1000);
1652
- if (fWas5h) {
1653
- fQuota = Math.min(100, fQuota + 40);
1654
- fReset = '<span style="color:var(--green)">+40% idle</span>';
1655
- } else {
1656
- fQuota = 100;
1657
- fReset = '<span style="color:var(--text-dim)">idle</span>';
1658
- }
1659
- }
1660
- }
1661
- var fqColor = fQuota > 50 ? 'var(--green)' : fQuota > 20 ? 'var(--yellow)' : 'var(--red)';
1662
- freeLine = '<div class="dw-row">' +
1663
- '<span class="dw-badge dw-badge-free">FREE</span>' +
1664
- '<span class="dw-quota" style="color:' + fqColor + '">' + fQuota + '%</span>' +
1665
- '<span class="dw-reset">' + (fReset || '--') + '</span>' +
1666
- '</div>';
1667
- } else {
1668
- freeLine = '<div class="dw-row"><span class="dw-badge dw-badge-free">FREE</span><span class="dw-empty">no data</span></div>';
1669
- }
1670
1473
 
1671
- return '<div class="dw-model">' +
1672
- '<div class="dw-model-name">' + shortName + '</div>' +
1673
- proLine + freeLine +
1674
- '</div>';
1675
- }).join('');
1676
-
1677
- var swapAllBtn = '<button class="btn-clear-flight" style="margin-left:auto;font-size:8px;padding:1px 4px" title="Manually swap Pro/Free classification for this entire account" onclick="swapWindows(\\'' + jsString(account.email) + '\\')">Swap All</button>';
1678
-
1679
- return '<div class="dw-section"><div class="dw-title" style="display:flex;align-items:center">Quota Windows (Pro / Free)' + swapAllBtn + '</div>' + rows + '</div>';
1680
- }
1681
1474
 
1682
1475
  function renderAccounts(data) {
1683
1476
  window.__lastData = data;
@@ -1810,14 +1603,12 @@ function renderAccounts(data) {
1810
1603
  '<div class="card-label">' + escapeHtml(maskText(a.label)) + '</div>' +
1811
1604
  '<div class="card-badges">' +
1812
1605
  (a.proDetected ? '<span class="badge badge-pro">PRO</span>' : '<span class="badge badge-free">FREE</span>') +
1813
- (a.familyManager ? '<span class="badge badge-fmgr">FAMILY MGR</span>' : '') +
1814
1606
  '<span class="badge badge-' + escapeHtml(a.status) + (isActive ? ' pulse' : '') + '">' + escapeHtml(a.status) + '</span>' +
1815
1607
  modelBadges +
1816
1608
  '</div>' +
1817
1609
  '</div>' +
1818
1610
  '<div class="card-email">' + escapeHtml(maskEmail(a.email)) + '</div>' +
1819
1611
  (a.quota && a.quota.length > 0 ? renderQuotaBars(a) : '') +
1820
- renderDualWindows(a) +
1821
1612
  '<div class="card-stats">' +
1822
1613
  '<div class="card-stat"><div class="stat-label">Requests</div><div class="stat-value">' +
1823
1614
  a.requestsSinceRotation + ' / ' + a.totalRequests + ' total</div></div>' +
@@ -1851,7 +1642,7 @@ function renderAccounts(data) {
1851
1642
  '</div>';
1852
1643
  }).join('');
1853
1644
 
1854
- renderProAdvisor(data.proAdvisor);
1645
+
1855
1646
  }
1856
1647
 
1857
1648
 
@@ -1992,7 +1783,6 @@ function renderListView() {
1992
1783
  var tierBadge = a.proDetected
1993
1784
  ? '<span class="badge badge-pro" style="font-size:9px">PRO</span>'
1994
1785
  : '<span class="badge badge-free" style="font-size:9px">FREE</span>';
1995
- if (a.familyManager) tierBadge += '<span class="badge badge-fmgr" style="font-size:9px">FMGR</span>';
1996
1786
 
1997
1787
  var quotaCell = avgQuota === null
1998
1788
  ? '<span style="color:var(--text-dim)">--</span>'
@@ -2620,6 +2410,7 @@ function renderForecastPanel(data) {
2620
2410
  '<th style="padding:4px 8px">Accounts</th>' +
2621
2411
  '<th style="padding:4px 8px">Burn Rate</th>' +
2622
2412
  '<th style="padding:4px 8px">Estimate</th>' +
2413
+ '<th style="padding:4px 8px">Next Reset</th>' +
2623
2414
  '</tr>';
2624
2415
 
2625
2416
  models.forEach(function(m) {
@@ -2632,6 +2423,23 @@ function renderForecastPanel(data) {
2632
2423
  if (m === 'claude-opus-4-6-thinking') displayName = 'claude';
2633
2424
  if (m === 'gemini-3.1-pro') displayName = 'gemini-3.1-pro';
2634
2425
 
2426
+ var minResetRemaining = null;
2427
+ q.entries.forEach(function(entry) {
2428
+ if (entry.resetTime && entry.timerType !== 'fresh') {
2429
+ var remaining = new Date(entry.resetTime).getTime() - now;
2430
+ if (remaining > 0) {
2431
+ var isRolling5h = entry.percentRemaining === 100 && Math.abs(remaining - (5 * 3600000)) < 600000;
2432
+ var isRolling7d = entry.percentRemaining === 100 && Math.abs(remaining - (7 * 86400000)) < 600000;
2433
+ if (!isRolling5h && !isRolling7d) {
2434
+ if (minResetRemaining === null || remaining < minResetRemaining) {
2435
+ minResetRemaining = remaining;
2436
+ }
2437
+ }
2438
+ }
2439
+ }
2440
+ });
2441
+ var nextResetLabel = minResetRemaining !== null ? formatDuration(minResetRemaining) : '--';
2442
+
2635
2443
  // Estimate: assume ~100 requests per full 100% quota window (empirical)
2636
2444
  // Total remaining "request capacity" ≈ sum of (percent/100 * 100) per account
2637
2445
  var totalCapacity = q.totalPercent; // each 1% ≈ 1 request remaining
@@ -2669,6 +2477,7 @@ function renderForecastPanel(data) {
2669
2477
  '<td style="padding:4px 8px;text-align:center">' + q.accountCount + '</td>' +
2670
2478
  '<td style="padding:4px 8px">' + rateLabel + '</td>' +
2671
2479
  '<td style="padding:4px 8px;color:' + estimateColor + ';font-weight:700">' + estimateLabel + '</td>' +
2480
+ '<td style="padding:4px 8px;color:var(--text-dim)">' + nextResetLabel + '</td>' +
2672
2481
  '</tr>';
2673
2482
  });
2674
2483
 
@@ -2931,44 +2740,7 @@ async function clearCircuitBreaker(modelKey) {
2931
2740
  refresh();
2932
2741
  }
2933
2742
 
2934
- async function swapWindows(email) {
2935
- if (!confirm('Manually swap Pro and Free data for ALL models on this account? Use this only if the algorithm classified the account tier backward.')) return;
2936
- await authFetch('/api/account/swap-windows/' + encodeURIComponent(email), { method: 'POST' });
2937
- refresh();
2938
- }
2939
2743
 
2940
- function renderProAdvisor(advisor) {
2941
- var panel = document.getElementById('proAdvisor');
2942
- var button = document.getElementById('advisorBtn');
2943
- var badge = document.getElementById('advisorBadge');
2944
- if (!advisor) {
2945
- panel.innerHTML = '<div class="modal-empty">No advisor data available.</div>';
2946
- badge.style.display = 'none';
2947
- button.classList.remove('has-items');
2948
- return;
2949
- }
2950
- var title = '<div class="advisor-title">Pro Family Advisor' +
2951
- '<span class="advisor-slots">Slots: ' + advisor.currentProCount + '/' + advisor.maxProSlots + '</span></div>';
2952
- if (advisor.actions.length === 0) {
2953
- panel.innerHTML = title + '<div class="advisor-empty">No actions recommended</div>';
2954
- badge.style.display = 'none';
2955
- button.classList.remove('has-items');
2956
- return;
2957
- }
2958
- var rows = advisor.actions.map(function(a) {
2959
- var cls = a.type === 'add-pro' ? 'add-pro' : 'remove-pro';
2960
- var typeLabel = a.type === 'add-pro' ? 'Add Pro' : 'Remove Pro';
2961
- return '<div class="advisor-action ' + cls + '">' +
2962
- '<span class="advisor-action-type">' + typeLabel + '</span>' +
2963
- '<span class="advisor-action-label">' + escapeHtml(maskText(a.label)) + '</span>' +
2964
- '<span class="advisor-action-reason">' + escapeHtml(a.reason) + '</span>' +
2965
- '</div>';
2966
- }).join('');
2967
- panel.innerHTML = title + rows;
2968
- badge.style.display = 'inline-flex';
2969
- badge.textContent = String(advisor.actions.length);
2970
- button.classList.add('has-items');
2971
- }
2972
2744
 
2973
2745
  function openModal(id) {
2974
2746
  var modal = document.getElementById(id);
package/src/proxy.ts CHANGED
@@ -1040,26 +1040,7 @@ export function startProxy(rotator: AccountRotator, port: number): void {
1040
1040
  return;
1041
1041
  }
1042
1042
 
1043
- if (method === "POST" && url?.startsWith("/api/account/swap-windows/")) {
1044
- if (!requireAdmin(req, res)) return;
1045
- const rest = url.slice("/api/account/swap-windows/".length);
1046
- const email = decodeURIComponent(rest);
1047
- const account = rotator.getAccountByEmail(email);
1048
- if (account && account.quotaWindows) {
1049
- for (const m of Object.keys(account.quotaWindows)) {
1050
- const temp = account.quotaWindows[m].pro;
1051
- account.quotaWindows[m].pro = account.quotaWindows[m].free;
1052
- account.quotaWindows[m].free = temp;
1053
- }
1054
- rotator.saveState();
1055
- res.writeHead(200);
1056
- res.end(JSON.stringify({ success: true }));
1057
- } else {
1058
- res.writeHead(404);
1059
- res.end("Account not found");
1060
- }
1061
- return;
1062
- }
1043
+
1063
1044
 
1064
1045
  if (method === "POST" && (url === "/api/settings/fresh-window-starts/on" || url === "/api/settings/fresh-window-starts/off")) {
1065
1046
  if (!requireAdmin(req, res)) return;
package/src/rotator.ts CHANGED
@@ -10,9 +10,6 @@ import {
10
10
  type ModelQuota,
11
11
  type ModelRotationState,
12
12
  type PersistedState,
13
- type QuotaWindowHistory,
14
- type DualWindowTracker,
15
- type ProAdvisorAction,
16
13
  type StatusResponse,
17
14
  type TokenBucket,
18
15
  type TokenUsageData,
@@ -97,7 +94,6 @@ export class AccountRotator {
97
94
  inFlightRequests: 0,
98
95
  inFlightByModel: {},
99
96
  allowFreshWindowStartsOverride: false,
100
- quotaWindows: {},
101
97
  dailyRequestCount: 0,
102
98
  dailyRequestDay: currentUtcDay(),
103
99
  }));
@@ -148,7 +144,6 @@ export class AccountRotator {
148
144
  account.disabled = saved.disabled;
149
145
  account.flagged = saved.flagged ?? false;
150
146
  account.allowFreshWindowStartsOverride = saved.allowFreshWindowStartsOverride ?? false;
151
- account.quotaWindows = saved.quotaWindows ?? {};
152
147
  }
153
148
  }
154
149
  // Cap any stale cooldowns to 30 min max from now
@@ -230,7 +225,6 @@ export class AccountRotator {
230
225
  disabled: account.disabled,
231
226
  flagged: account.flagged,
232
227
  allowFreshWindowStartsOverride: account.allowFreshWindowStartsOverride,
233
- quotaWindows: account.quotaWindows,
234
228
  };
235
229
  }
236
230
  try {
@@ -376,77 +370,6 @@ export class AccountRotator {
376
370
  this.log(`RAW POLL ${account.config.email} -> ${rawLog}`);
377
371
  // ---------------------------------------
378
372
 
379
- // Record dual-window quota tracking per model (Immutable Anchors Architecture)
380
- const now = Date.now();
381
- const FIVE_HOURS_10MIN = (5 * 60 + 10) * 60 * 1000;
382
- const FIVE_MIN = 5 * 60 * 1000;
383
-
384
- // Step 1: Initialize tracking and check for the definitive PRO signal (genuine 5h timer)
385
- let accountIsDefinitivelyPro = false;
386
- for (const q of account.quota) {
387
- if (!account.quotaWindows[q.modelKey]) {
388
- account.quotaWindows[q.modelKey] = {
389
- pro: { lastSeen: 0, resetTimeMs: 0, resetTime: null, lastQuota: -1 },
390
- free: { lastSeen: 0, resetTimeMs: 0, resetTime: null, lastQuota: -1 },
391
- };
392
- }
393
- if (q.timerType === "5h") {
394
- const currentResetMs = q.resetTime ? new Date(q.resetTime).getTime() : 0;
395
- if (currentResetMs === 0 || (currentResetMs - now) <= FIVE_HOURS_10MIN) {
396
- accountIsDefinitivelyPro = true;
397
- }
398
- }
399
- }
400
-
401
- // Step 2: Update permanent anchors based on the definitive signal
402
- for (const q of account.quota) {
403
- if (q.timerType === "fresh") continue; // Fresh gives us no reset time to anchor
404
- const tracker = account.quotaWindows[q.modelKey];
405
- const currentResetMs = q.resetTime ? new Date(q.resetTime).getTime() : 0;
406
- if (currentResetMs === 0) continue;
407
-
408
- // Has the real-world time passed the existing Pro anchor?
409
- if (tracker.pro.resetTimeMs > 0 && now > tracker.pro.resetTimeMs) {
410
- // The old Pro anchor expired naturally. We clear it to make room for a new cycle.
411
- tracker.pro.resetTimeMs = 0;
412
- tracker.pro.resetTime = null;
413
- }
414
- // Has the real-world time passed the existing Free anchor?
415
- if (tracker.free.resetTimeMs > 0 && now > tracker.free.resetTimeMs) {
416
- // The old Free anchor expired naturally. We clear it to make room for a new cycle.
417
- tracker.free.resetTimeMs = 0;
418
- tracker.free.resetTime = null;
419
- }
420
-
421
- const matchesPro = tracker.pro.resetTimeMs > 0 && Math.abs(currentResetMs - tracker.pro.resetTimeMs) < FIVE_MIN;
422
- const matchesFree = tracker.free.resetTimeMs > 0 && Math.abs(currentResetMs - tracker.free.resetTimeMs) < FIVE_MIN;
423
-
424
- if (matchesPro) {
425
- // It's the Pro window. Update quota.
426
- tracker.pro.lastSeen = now;
427
- tracker.pro.lastQuota = q.percentRemaining;
428
- } else if (matchesFree) {
429
- // It's the Free window. Update quota.
430
- tracker.free.lastSeen = now;
431
- tracker.free.lastQuota = q.percentRemaining;
432
- } else {
433
- // This is a BRAND NEW reset time (doesn't match either anchor).
434
- // We must assign it to either the Pro bucket or the Free bucket.
435
- if (accountIsDefinitivelyPro) {
436
- // We have absolute proof the account is Pro right now.
437
- tracker.pro.lastSeen = now;
438
- tracker.pro.resetTimeMs = currentResetMs;
439
- tracker.pro.resetTime = q.resetTime;
440
- tracker.pro.lastQuota = q.percentRemaining;
441
- } else {
442
- // We have NO proof the account is Pro. Assume Free.
443
- tracker.free.lastSeen = now;
444
- tracker.free.resetTimeMs = currentResetMs;
445
- tracker.free.resetTime = q.resetTime;
446
- tracker.free.lastQuota = q.percentRemaining;
447
- }
448
- }
449
- }
450
373
  } catch {
451
374
  // Network error, skip
452
375
  }
@@ -1468,9 +1391,7 @@ export class AccountRotator {
1468
1391
  quota: a.quota,
1469
1392
  inFlightRequests: a.inFlightRequests,
1470
1393
  inFlightByModel: a.inFlightByModel,
1471
- proDetected: this.isProAccount(a),
1472
- quotaWindows: a.quotaWindows,
1473
- familyManager: !!a.config.familyManager,
1394
+ proDetected: a.config.type === "pro",
1474
1395
  allowFreshWindowStartsOverride: a.allowFreshWindowStartsOverride,
1475
1396
  effectiveFreshWindowStartsAllowed: this.isEffectiveFreshWindowAllowed(a),
1476
1397
  };
@@ -1513,7 +1434,6 @@ export class AccountRotator {
1513
1434
  },
1514
1435
  routingHealth,
1515
1436
  accounts,
1516
- proAdvisor: this.getProAdvisor(),
1517
1437
  recentEvents: [...this.recentEvents],
1518
1438
  requestLog: this.requestLog.slice(0, 100),
1519
1439
  tokenUsage: this.getTokenUsage(),
@@ -1556,7 +1476,7 @@ export class AccountRotator {
1556
1476
  ).length;
1557
1477
 
1558
1478
  return {
1559
- wasProAccount: this.isProAccount(account),
1479
+ wasProAccount: account.config.type === "pro",
1560
1480
  accountQuotaPercent: quota,
1561
1481
  timerType,
1562
1482
  poolSize: this.accounts.length,
@@ -1599,7 +1519,6 @@ export class AccountRotator {
1599
1519
  inFlightRequests: 0,
1600
1520
  inFlightByModel: {},
1601
1521
  allowFreshWindowStartsOverride: false,
1602
- quotaWindows: {},
1603
1522
  dailyRequestCount: 0,
1604
1523
  dailyRequestDay: currentUtcDay(),
1605
1524
  };
@@ -1642,153 +1561,6 @@ export class AccountRotator {
1642
1561
  return this.accounts.find((a) => a.config.email === email);
1643
1562
  }
1644
1563
 
1645
- // =========================================================================
1646
- // Pro Family Sharing Advisor
1647
- // =========================================================================
1648
-
1649
- // Model keys relevant for Pro advisor decisions (ignore Flash)
1650
- private static PRO_ADVISOR_MODELS = ["gemini-3.1-pro", "claude-opus-4-6-thinking"];
1651
-
1652
- /**
1653
- * Check if a model's current 7d timer is the Pro cooldown (not Free).
1654
- * Uses the dual-window tracker: compares current resetTime against recorded Pro resetTime.
1655
- */
1656
- private isProOriginatedTimer(account: AccountRuntime, modelKey: string): boolean {
1657
- const tracker = account.quotaWindows[modelKey];
1658
- if (!tracker || tracker.pro.lastSeen === 0) return false;
1659
-
1660
- const currentQuota = account.quota.find(
1661
- (q) => q.modelKey.includes(modelKey) || modelKey.includes(q.modelKey),
1662
- );
1663
- if (!currentQuota || currentQuota.timerType !== "7d") return false;
1664
-
1665
- const currentResetMs = currentQuota.resetTime ? new Date(currentQuota.resetTime).getTime() : 0;
1666
- if (tracker.pro.resetTimeMs === 0 || currentResetMs === 0) return false;
1667
-
1668
- // Tight 5-min tolerance against permanent anchor
1669
- return Math.abs(currentResetMs - tracker.pro.resetTimeMs) < 300000;
1670
- }
1671
-
1672
- /**
1673
- * An account is currently considered "Pro" if, during the very last quota poll,
1674
- * its advisor models were tracked in the PRO bucket of the dual-window tracker.
1675
- */
1676
- private isProAccount(account: AccountRuntime): boolean {
1677
- if (account.lastQuotaPoll === 0) return false;
1678
-
1679
- for (const m of AccountRotator.PRO_ADVISOR_MODELS) {
1680
- const tracker = account.quotaWindows[m];
1681
- if (!tracker) continue;
1682
- // If the Pro window was updated exactly during the last poll, it's Pro.
1683
- // Give a tiny 1s margin for JS execution timing.
1684
- if (tracker.pro.lastSeen > 0 && Math.abs(tracker.pro.lastSeen - account.lastQuotaPoll) < 1000) {
1685
- return true;
1686
- }
1687
- }
1688
- return false;
1689
- }
1690
-
1691
- /**
1692
- * Get the "other" window's quota info for an account/model.
1693
- * If currently showing Pro timer → returns Free window data (and vice versa).
1694
- */
1695
- private getAlternateWindow(account: AccountRuntime, modelKey: string): { type: "pro" | "free"; quota: number; resetTimeMs: number; resetTime: string | null } | null {
1696
- const tracker = account.quotaWindows[modelKey];
1697
- if (!tracker) return null;
1698
- const currentQuota = account.quota.find(
1699
- (q) => q.modelKey.includes(modelKey) || modelKey.includes(q.modelKey),
1700
- );
1701
- if (!currentQuota) return null;
1702
-
1703
- if (this.isProOriginatedTimer(account, modelKey) || currentQuota.timerType === "5h") {
1704
- // Currently on Pro — return Free window
1705
- if (tracker.free.lastSeen === 0) return null;
1706
- return { type: "free", quota: tracker.free.lastQuota, resetTimeMs: tracker.free.resetTimeMs, resetTime: tracker.free.resetTime };
1707
- } else {
1708
- // Currently on Free — return Pro window
1709
- if (tracker.pro.lastSeen === 0) return null;
1710
- return { type: "pro", quota: tracker.pro.lastQuota, resetTimeMs: tracker.pro.resetTimeMs, resetTime: tracker.pro.resetTime };
1711
- }
1712
- }
1713
-
1714
- private getProAdvisor(): StatusResponse["proAdvisor"] {
1715
- const maxSlots = this.config.proSlots ?? 6;
1716
- const proAccounts = this.accounts.filter((a) => !a.disabled && !a.flagged && this.isProAccount(a));
1717
- const currentProCount = proAccounts.length;
1718
- const actions: ProAdvisorAction[] = [];
1719
-
1720
- // Comparative Quota Analysis Logic (Cumulative Score)
1721
- for (const account of this.accounts) {
1722
- if (account.disabled || account.flagged) continue;
1723
-
1724
- let totalProScore = 0;
1725
- let totalFreeScore = 0;
1726
- let hasAnyProData = false;
1727
- let hasAnyFreeData = false;
1728
-
1729
- for (const modelKey of AccountRotator.PRO_ADVISOR_MODELS) {
1730
- const tracker = account.quotaWindows[modelKey];
1731
- if (!tracker) continue;
1732
- if (tracker.pro.lastSeen > 0) {
1733
- totalProScore += Math.max(0, tracker.pro.lastQuota);
1734
- hasAnyProData = true;
1735
- }
1736
- if (tracker.free.lastSeen > 0) {
1737
- totalFreeScore += Math.max(0, tracker.free.lastQuota);
1738
- hasAnyFreeData = true;
1739
- }
1740
- }
1741
-
1742
- // If a tier has no data at all, its score is effectively 0
1743
- const effectivePro = hasAnyProData ? totalProScore : 0;
1744
- const effectiveFree = hasAnyFreeData ? totalFreeScore : 0;
1745
-
1746
- const isCurrentlyPro = this.isProAccount(account);
1747
-
1748
- if (isCurrentlyPro) {
1749
- // Account is currently in PRO tier
1750
- if (account.config.familyManager) continue; // Never remove FM
1751
-
1752
- if (effectiveFree > effectivePro) {
1753
- actions.push({
1754
- type: "remove-pro",
1755
- email: account.config.email,
1756
- label: account.config.label || account.config.email,
1757
- reason: `Free tier has significantly more combined quota (${effectiveFree}%) than Pro tier (${effectivePro}%). Downgrade to use Free tokens.`,
1758
- });
1759
- } else if (effectivePro === 0 && effectiveFree === 0) {
1760
- actions.push({
1761
- type: "remove-pro",
1762
- email: account.config.email,
1763
- label: account.config.label || account.config.email,
1764
- reason: `All quota exhausted (0%). Safe to remove from Pro family to free up a slot.`,
1765
- });
1766
- }
1767
- } else {
1768
- // Account is currently in FREE tier
1769
- if (effectivePro > effectiveFree) {
1770
- actions.push({
1771
- type: "add-pro",
1772
- email: account.config.email,
1773
- label: account.config.label || account.config.email,
1774
- reason: `Pro tier has significantly more combined quota (${effectivePro}%) than Free tier (${effectiveFree}%). Upgrade to use Pro tokens.`,
1775
- _diff: effectivePro - effectiveFree, // temporary property for sorting
1776
- } as ProAdvisorAction & { _diff: number });
1777
- }
1778
- }
1779
- }
1780
-
1781
- // Sort add-pro actions by highest Pro quota difference
1782
- actions.sort((a, b) => {
1783
- if (a.type === "add-pro" && b.type === "add-pro") {
1784
- return ((b as any)._diff || 0) - ((a as any)._diff || 0);
1785
- }
1786
- return 0;
1787
- });
1788
-
1789
- return { currentProCount, maxProSlots: maxSlots, actions };
1790
- }
1791
-
1792
1564
  private shouldUseRequestCountRotation(account: AccountRuntime, model?: string): boolean {
1793
1565
  if (!this.config.useRequestCountRotationWhenQuotaUnknownOnly) return true;
1794
1566
  const modelKey = model ? resolveQuotaModelKey(model) : null;
package/src/telemetry.ts CHANGED
@@ -67,7 +67,6 @@ export function trackFeature(feature: string): void {
67
67
  export function getFeaturesSnapshot(): Record<string, boolean> {
68
68
  return {
69
69
  dashboard: _featuresUsed.has("dashboard"),
70
- proAdvisor: _featuresUsed.has("proAdvisor"),
71
70
  freshWindowToggle: _featuresUsed.has("freshWindowToggle"),
72
71
  hostedLogin: _featuresUsed.has("hostedLogin"),
73
72
  };
package/src/types.ts CHANGED
@@ -11,8 +11,6 @@ export interface AccountConfig {
11
11
  label?: string;
12
12
  // Optional - pro/free is detected dynamically from quota API reset times
13
13
  type?: AccountType;
14
- // This account owns the family plan and can never be removed from Pro
15
- familyManager?: boolean;
16
14
  }
17
15
 
18
16
  export interface Config {
@@ -23,8 +21,6 @@ export interface Config {
23
21
  rotateOnQuotaDrop: number;
24
22
  // How often to poll quota (ms). Default: 5min
25
23
  quotaPollIntervalMs: number;
26
- // Max simultaneous Pro accounts (owner + members). Default: 6
27
- proSlots?: number;
28
24
  // Hard cap on parallel requests per account. Conservative default is 1.
29
25
  maxConcurrentRequestsPerAccount?: number;
30
26
  // Hard cap on parallel requests per projectId/model. Conservative default is 1.
@@ -159,7 +155,6 @@ export interface AccountRuntime {
159
155
  inFlightRequests: number;
160
156
  inFlightByModel: Record<string, number>;
161
157
  allowFreshWindowStartsOverride: boolean;
162
- quotaWindows: QuotaWindowHistory;
163
158
  dailyRequestCount: number;
164
159
  dailyRequestDay: string;
165
160
  }
@@ -171,21 +166,6 @@ export interface ModelRotationState {
171
166
  requestsOnActiveAccount: number;
172
167
  }
173
168
 
174
- // Persisted state across restarts
175
- export interface QuotaWindowInfo {
176
- lastSeen: number; // timestamp of last observation
177
- resetTimeMs: number; // epoch ms of the resetTime
178
- resetTime: string | null; // ISO string for display
179
- lastQuota: number; // percentRemaining when last seen
180
- }
181
-
182
- export interface DualWindowTracker {
183
- pro: QuotaWindowInfo;
184
- free: QuotaWindowInfo;
185
- }
186
-
187
- // Per-account quota window tracking: keyed by model key
188
- export type QuotaWindowHistory = Record<string, DualWindowTracker>;
189
169
 
190
170
  export interface PersistedSafetyState {
191
171
  day: string;
@@ -218,7 +198,6 @@ export interface PersistedState {
218
198
  disabled: boolean;
219
199
  flagged: boolean;
220
200
  allowFreshWindowStartsOverride?: boolean;
221
- quotaWindows?: QuotaWindowHistory;
222
201
  }
223
202
  >;
224
203
  }
@@ -275,12 +254,6 @@ export interface StatusResponse {
275
254
  disabledCount: number;
276
255
  errorCount: number;
277
256
  };
278
- // Pro family sharing advisor
279
- proAdvisor: {
280
- currentProCount: number;
281
- maxProSlots: number;
282
- actions: ProAdvisorAction[];
283
- };
284
257
  recentEvents: RecentEvent[];
285
258
  requestLog: RequestLogEntry[];
286
259
  tokenUsage: TokenUsageData;
@@ -307,19 +280,10 @@ export interface AccountStatus {
307
280
  inFlightByModel: Record<string, number>;
308
281
  // Pro family sharing
309
282
  proDetected: boolean;
310
- quotaWindows: QuotaWindowHistory;
311
- familyManager: boolean;
312
283
  allowFreshWindowStartsOverride: boolean;
313
284
  effectiveFreshWindowStartsAllowed: boolean;
314
285
  }
315
286
 
316
- // Pro advisor suggestion
317
- export interface ProAdvisorAction {
318
- type: "add-pro" | "remove-pro";
319
- email: string;
320
- label: string;
321
- reason: string;
322
- }
323
287
 
324
288
  export interface RecentEvent {
325
289
  timestamp: number;