npm - @monotykamary/pi-tps - Versions diffs - 1.0.0 → 1.1.1 - Mend

@monotykamary/pi-tps 1.0.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/extensions/pi-tps/__tests__/dynamic-tps-cap.test.ts +390 -0
package/extensions/pi-tps/index.ts +49 -1
package/npm-shrinkwrap.json +2 -2
package/package.json +1 -1

package/extensions/pi-tps/__tests__/dynamic-tps-cap.test.ts ADDED Viewed

@@ -0,0 +1,390 @@
+import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
+import type { AssistantMessage } from '@earendil-works/pi-ai';
+import { createTestFixture, activateExtension } from './helpers';
+describe('pi-tps extension — dynamic TPS cap', () => {
+  let fixture: ReturnType<typeof createTestFixture>;
+  beforeEach(async () => {
+    fixture = createTestFixture();
+    await activateExtension(fixture);
+  });
+  afterEach(() => {
+    vi.restoreAllMocks();
+  });
+  /**
+   * Drive a turn with mocked performance.now() timestamps.
+   * Set `isToolCall: true` to simulate a tool_execution_start during the turn.
+   */
+  function driveTurn(clocks: {
+    turnStart: number;
+    messageStart: number;
+    firstUpdate: number;
+    streamUpdates: number[];
+    messageEnd: number;
+    turnEnd?: number;
+    isToolCall?: boolean;
+  }) {
+    const { handlers, notifySpy, appendEntrySpy } = fixture;
+    const timestamps = [
+      clocks.turnStart,
+      clocks.turnStart,
+      clocks.messageStart,
+      clocks.firstUpdate,
+      ...clocks.streamUpdates,
+      clocks.messageEnd,
+      clocks.turnEnd ?? clocks.messageEnd,
+    ];
+    let callIdx = 0;
+    const spy = vi.spyOn(performance, 'now').mockImplementation(() => {
+      return timestamps[Math.min(callIdx++, timestamps.length - 1)];
+    });
+    const assistantMessage: AssistantMessage = {
+      role: 'assistant',
+      content: [{ type: 'text', text: 'Response' }],
+      api: 'openai-completions',
+      provider: 'openai',
+      model: 'gpt-4',
+      usage: {
+        input: 50,
+        output: 20,
+        cacheRead: 0,
+        cacheWrite: 0,
+        totalTokens: 70,
+        cost: { input: 0.001, output: 0.002, cacheRead: 0, cacheWrite: 0, total: 0.003 },
+      },
+      stopReason: clocks.isToolCall ? 'toolUse' : 'stop',
+      timestamp: Date.now(),
+    };
+    handlers['turn_start']?.({ type: 'turn_start', turnIndex: 0, timestamp: Date.now() });
+    handlers['message_start']?.({ type: 'message_start', message: assistantMessage });
+    handlers['message_update']?.({
+      type: 'message_update',
+      message: assistantMessage,
+      assistantMessageEvent: { type: 'text_delta', delta: 't' },
+    });
+    for (const _ts of clocks.streamUpdates) {
+      handlers['message_update']?.({
+        type: 'message_update',
+        message: assistantMessage,
+        assistantMessageEvent: { type: 'text_delta', delta: 't' },
+      });
+    }
+    // Simulate tool_execution_start if this is a tool call turn
+    if (clocks.isToolCall) {
+      handlers['tool_execution_start']?.({
+        type: 'tool_execution_start',
+        toolCallId: 'call_123',
+        toolName: 'bash',
+        args: { command: 'ls' },
+      });
+    }
+    handlers['message_end']?.({ type: 'message_end', message: assistantMessage });
+    handlers['turn_end']?.(
+      { type: 'turn_end', turnIndex: 0, message: assistantMessage, toolResults: [] },
+      fixture.mockCtx
+    );
+    spy.mockRestore();
+    return { notifySpy, appendEntrySpy };
+  }
+  // ── Cap is set by reliable streaming turns ────────────────────────────────
+  it('should set the TPS cap from a reliable streaming turn (primary branch, no tool call)', () => {
+    // 20 tokens / 0.4s = 50 TPS from primary branch
+    const { appendEntrySpy } = driveTurn({
+      turnStart: 0,
+      messageStart: 200,
+      firstUpdate: 200.123,
+      streamUpdates: [400, 500, 600, 700, 800],
+      messageEnd: 900,
+    });
+    const [, data] = appendEntrySpy.mock.calls[0];
+    // TPS should be ~50, and isPrimaryBranch should be true
+    expect(data.tps).toBeGreaterThanOrEqual(40);
+    expect(data.tps).toBeLessThanOrEqual(60);
+    expect(data.isPrimaryBranch).toBe(true);
+  });
+  // ── Cap is applied to tool-call turns ─────────────────────────────────────
+  it('should clamp tool-call TPS to the cap set by a prior streaming turn', () => {
+    // Turn 1: reliable streaming response → sets cap at ~50 TPS
+    driveTurn({
+      turnStart: 0,
+      messageStart: 200,
+      firstUpdate: 200.123,
+      streamUpdates: [400, 500, 600, 700, 800],
+      messageEnd: 900,
+    });
+    // Turn 2: tool call with fallback TPS (2 updates, 250ms generationMs)
+    // Without cap: 20 tokens / 0.25s ≈ 80 TPS (feasible but from short window)
+    // With cap: min(80, 50) = 50 TPS
+    const { appendEntrySpy, notifySpy } = driveTurn({
+      turnStart: 0,
+      messageStart: 100,
+      firstUpdate: 100.1,
+      streamUpdates: [100.15, 100.3],
+      messageEnd: 350,
+      isToolCall: true,
+    });
+    const [, data] = appendEntrySpy.mock.calls[1];
+    expect(data.tps).not.toBeNull();
+    // Must be clamped to the ~50 TPS cap, not the inflated fallback value
+    expect(data.tps).toBeLessThanOrEqual(55);
+    expect(data.tps).toBeGreaterThan(0);
+  });
+  // ── Tool calls do not set the cap from fallback ─────────────────────────────
+  it('should not let fallback-branch tool-call turns set the cap', () => {
+    // Turn 1: tool call with fallback TPS — should NOT set the cap
+    const { appendEntrySpy: spy1 } = driveTurn({
+      turnStart: 0,
+      messageStart: 100,
+      firstUpdate: 100.1,
+      streamUpdates: [100.15, 100.3],
+      messageEnd: 350,
+      isToolCall: true,
+    });
+    const [, data1] = spy1.mock.calls[0];
+    // No cap → fallback tool call TPS is null
+    expect(data1.tps).toBeNull();
+    // Turn 2: reliable streaming response at ~50 TPS → sets the cap
+    const { appendEntrySpy: spy2 } = driveTurn({
+      turnStart: 0,
+      messageStart: 200,
+      firstUpdate: 200.123,
+      streamUpdates: [400, 500, 600, 700, 800],
+      messageEnd: 900,
+    });
+    const [, data2] = spy2.mock.calls[1];
+    expect(data2.tps).toBeGreaterThanOrEqual(40);
+    expect(data2.tps).toBeLessThanOrEqual(60);
+    // Turn 3: another fallback tool call — should now be clamped to 50
+    const { appendEntrySpy: spy3 } = driveTurn({
+      turnStart: 0,
+      messageStart: 100,
+      firstUpdate: 100.1,
+      streamUpdates: [100.15, 100.3],
+      messageEnd: 350,
+      isToolCall: true,
+    });
+    const [, data3] = spy3.mock.calls[2];
+    expect(data3.tps).not.toBeNull();
+    expect(data3.tps).toBeLessThanOrEqual(55);
+  });
+  // ── Primary-branch tool calls (reasoning) set the cap ──────────────────────
+  it('should let primary-branch tool-call turns set the cap (e.g. reasoning before tool call)', () => {
+    // Turn 1: tool call with PRIMARY-branch TPS (reasoning + tool call, enough updates/time)
+    // 20 tokens / 0.4s = 50 TPS from primary branch, isToolCall = true
+    const { appendEntrySpy: spy1 } = driveTurn({
+      turnStart: 0,
+      messageStart: 200,
+      firstUpdate: 200.123,
+      streamUpdates: [400, 500, 600, 700, 800],
+      messageEnd: 900,
+      isToolCall: true,
+    });
+    const [, data1] = spy1.mock.calls[0];
+    // Primary branch + isToolCall → TPS is still computed (not null/capped)
+    expect(data1.tps).toBeGreaterThanOrEqual(40);
+    expect(data1.tps).toBeLessThanOrEqual(60);
+    expect(data1.isPrimaryBranch).toBe(true);
+    // Turn 2: fallback tool call — should be clamped to the cap from turn 1
+    const { appendEntrySpy: spy2 } = driveTurn({
+      turnStart: 0,
+      messageStart: 100,
+      firstUpdate: 100.1,
+      streamUpdates: [100.15, 100.3],
+      messageEnd: 350,
+      isToolCall: true,
+    });
+    const [, data2] = spy2.mock.calls[1];
+    expect(data2.tps).not.toBeNull();
+    // Clamped to ~50 cap set by the primary-branch tool call in turn 1
+    expect(data2.tps).toBeLessThanOrEqual(55);
+  });
+  // ── Cold start: no cap yet ────────────────────────────────────────────────
+  it('should show null TPS for tool calls when no cap exists yet', () => {
+    const { notifySpy, appendEntrySpy } = driveTurn({
+      turnStart: 0,
+      messageStart: 100,
+      firstUpdate: 100.1,
+      streamUpdates: [100.15, 100.3],
+      messageEnd: 350,
+      isToolCall: true,
+    });
+    const notification = notifySpy.mock.calls[0][0] as string;
+    // No streaming turn has set the cap yet → tool call TPS is null
+    expect(notification).toContain('TPS —');
+    const [, data] = appendEntrySpy.mock.calls[0];
+    expect(data.tps).toBeNull();
+  });
+  // ── Non-tool-call fallback turns are not clamped ──────────────────────────
+  it('should not clamp non-tool-call fallback TPS', () => {
+    // Turn 1: set cap at ~50 TPS from a reliable streaming turn
+    driveTurn({
+      turnStart: 0,
+      messageStart: 200,
+      firstUpdate: 200.123,
+      streamUpdates: [400, 500, 600, 700, 800],
+      messageEnd: 900,
+    });
+    // Turn 2: non-tool-call fallback (e.g. short burst response)
+    // This should NOT be clamped — only tool calls get capped
+    const { appendEntrySpy } = driveTurn({
+      turnStart: 0,
+      messageStart: 100,
+      firstUpdate: 100.1,
+      streamUpdates: [100.15, 100.3],
+      messageEnd: 350,
+      isToolCall: false,
+    });
+    const [, data] = appendEntrySpy.mock.calls[1];
+    expect(data.tps).not.toBeNull();
+    // Non-tool-call fallback TPS is uncapped — may be high
+    expect(data.tps).toBeGreaterThan(50);
+  });
+  // ── Cap is per-model ──────────────────────────────────────────────────────
+  it('should maintain separate caps per model', () => {
+    // Turn 1: openai/gpt-4 streaming → sets cap at ~50 TPS
+    driveTurn({
+      turnStart: 0,
+      messageStart: 200,
+      firstUpdate: 200.123,
+      streamUpdates: [400, 500, 600, 700, 800],
+      messageEnd: 900,
+    });
+    // Turn 2: deepseek/deepseek-v3 tool call → no cap for deepseek yet, uncapped
+    // Use driveTurn with a different provider/model to avoid the gpt-4 cap
+    const { handlers, appendEntrySpy } = fixture;
+    const deepseek: AssistantMessage = {
+      role: 'assistant',
+      content: [{ type: 'text', text: 'Hi' }],
+      api: 'openai-completions',
+      provider: 'deepseek',
+      model: 'deepseek-v3',
+      usage: {
+        input: 50,
+        output: 20,
+        cacheRead: 0,
+        cacheWrite: 0,
+        totalTokens: 70,
+        cost: { input: 0.001, output: 0.002, cacheRead: 0, cacheWrite: 0, total: 0.003 },
+      },
+      stopReason: 'toolUse',
+      timestamp: Date.now(),
+    };
+    let callIdx = 0;
+    const timestamps = [0, 0, 100, 100.1, 100.15, 100.3, 300, 300];
+    const spy = vi.spyOn(performance, 'now').mockImplementation(() => {
+      return timestamps[Math.min(callIdx++, timestamps.length - 1)];
+    });
+    handlers['turn_start']?.({ type: 'turn_start', turnIndex: 1, timestamp: Date.now() });
+    handlers['message_start']?.({ type: 'message_start', message: deepseek });
+    handlers['message_update']?.({
+      type: 'message_update',
+      message: deepseek,
+      assistantMessageEvent: { type: 'text_delta', delta: 't' },
+    });
+    handlers['message_update']?.({
+      type: 'message_update',
+      message: deepseek,
+      assistantMessageEvent: { type: 'text_delta', delta: 't' },
+    });
+    handlers['message_update']?.({
+      type: 'message_update',
+      message: deepseek,
+      assistantMessageEvent: { type: 'text_delta', delta: 't' },
+    });
+    handlers['tool_execution_start']?.({
+      type: 'tool_execution_start',
+      toolCallId: 'call_1',
+      toolName: 'bash',
+      args: {},
+    });
+    handlers['message_end']?.({ type: 'message_end', message: deepseek });
+    handlers['turn_end']?.(
+      { type: 'turn_end', turnIndex: 1, message: deepseek, toolResults: [] },
+      fixture.mockCtx
+    );
+    spy.mockRestore();
+    const [, data2] = appendEntrySpy.mock.calls[1];
+    // DeepSeek has no cap yet → tool call TPS is null
+    expect(data2.tps).toBeNull();
+  });
+  // ── Cap only goes up ──────────────────────────────────────────────────────
+  it('should only raise the cap, never lower it', () => {
+    // Turn 1: sets cap at ~50 TPS
+    driveTurn({
+      turnStart: 0,
+      messageStart: 200,
+      firstUpdate: 200.123,
+      streamUpdates: [400, 500, 600, 700, 800],
+      messageEnd: 900,
+    });
+    // Turn 2: slower streaming response at ~25 TPS → cap stays at 50
+    const { appendEntrySpy } = driveTurn({
+      turnStart: 0,
+      messageStart: 200,
+      firstUpdate: 200.123,
+      streamUpdates: [600, 800, 1000, 1200, 1400],
+      messageEnd: 1500,
+    });
+    const [, data2] = appendEntrySpy.mock.calls[1];
+    // This turn's TPS is 25, but the cap should still be 50
+    expect(data2.tps).toBeGreaterThanOrEqual(15);
+    expect(data2.tps).toBeLessThanOrEqual(35);
+    // Turn 3: tool call → should be capped at 50, not 25
+    const { appendEntrySpy: spy3 } = driveTurn({
+      turnStart: 0,
+      messageStart: 100,
+      firstUpdate: 100.1,
+      streamUpdates: [100.15, 100.3],
+      messageEnd: 350,
+      isToolCall: true,
+    });
+    const [, data3] = spy3.mock.calls[2];
+    expect(data3.tps).not.toBeNull();
+    // Capped at 50 (the higher of the two streaming measurements)
+    expect(data3.tps).toBeLessThanOrEqual(55);
+  });
+});

package/extensions/pi-tps/index.ts CHANGED Viewed

@@ -60,6 +60,13 @@ interface SessionTreeEvent {
   oldLeafId: string | null;
 }
+interface ToolExecutionStartEvent {
+  type: 'tool_execution_start';
+  toolCallId: string;
+  toolName: string;
+  args: unknown;
+}
 // ─── Constants ──────────────────────────────────────────────────────────────
 /** Minimum gap between token updates to count as a stall (ms) */
@@ -81,6 +88,7 @@ interface TurnTelemetry {
     messageCount: number; // assistant messages in this turn
   };
   tps: number | null; // output / (streamMs / 1000), null when burst/degenerate
+  isPrimaryBranch: boolean; // TPS came from primary-branch (reliable) measurement
   cost: {
     input: number;
     output: number;
@@ -108,6 +116,8 @@ interface TurnTiming {
   stallCount: number;
   inStall: boolean;
   messageCount: number;
+  isToolCall: boolean; // tool_execution_start fired during this turn
+  isPrimaryBranch: boolean; // TPS came from primary-branch (reliable) measurement
 }
 // ─── Helpers ────────────────────────────────────────────────────────────────
@@ -293,7 +303,7 @@ function buildTelemetry(timing: TurnTiming, turnEndMs: number): TurnTelemetry |
   const MIN_STREAM_MS = 1;
   const MIN_STREAM_UPDATES = 5;
   const MIN_INTER_CHUNK_MS = 1;
-  const MIN_GENERATION_MS = 50;
+  const MIN_GENERATION_MS = 200;
   const ACTIVE_TIME_THRESHOLD_MS = 200;
   const STALL_REDUCTION_DENOM = 2;
   const STALL_DOMINANCE_RATIO = 0.85;
@@ -336,6 +346,7 @@ function buildTelemetry(timing: TurnTiming, turnEndMs: number): TurnTelemetry |
   //              Includes TTFT, underestimates, but never overshoots.
   //   Else:      null — structurally unidentifiable.
   let tps: number | null = null;
+  let isPrimaryBranch = false;
   if (
     streamMs !== null &&
     streamMs >= MIN_STREAM_MS &&
@@ -351,6 +362,7 @@ function buildTelemetry(timing: TurnTiming, turnEndMs: number): TurnTelemetry |
     const effectiveStreamMs = streamMs - timing.stallMs;
     const raw = output / (effectiveStreamMs / 1000);
     tps = Math.round(raw * 10) / 10;
+    isPrimaryBranch = true;
   } else if (timing.updateCount >= 2 && timing.totalGenerationMs >= MIN_GENERATION_MS) {
     // Fallback: use generationMs (message_start → message_end) minus
     // stalls. This includes TTFT, so it underestimates generation speed,
@@ -386,6 +398,7 @@ function buildTelemetry(timing: TurnTiming, turnEndMs: number): TurnTelemetry |
       messageCount: timing.messageCount,
     },
     tps,
+    isPrimaryBranch,
     cost: hasCost
       ? {
           input: costInput,
@@ -405,6 +418,10 @@ export default function tpsExtension(pi: ExtensionAPI) {
   // Current turn timing state
   let currentTiming: TurnTiming | null = null;
+  // Per-model TPS cap: highest reliable (primary-branch, non-tool-call) TPS observed.
+  // Tool-call turns get clamped to this value. Only set by reliable streaming measurements.
+  const tpsCaps = new Map<string, number>(); // "provider:modelId" → cap
   // Cached session entries for argument completion (captured on session_start / session_tree)
   let cachedEntries: Array<{ type?: string; customType?: string; data?: unknown }> = [];
@@ -473,6 +490,8 @@ export default function tpsExtension(pi: ExtensionAPI) {
       stallCount: 0,
       inStall: false,
       messageCount: 0,
+      isToolCall: false,
+      isPrimaryBranch: false,
     };
   });
@@ -537,6 +556,13 @@ export default function tpsExtension(pi: ExtensionAPI) {
     currentTiming.lastUpdateMs = now;
   });
+  // Track when a tool starts executing — marks this turn as a tool call
+  // for the dynamic TPS cap (tool-call turns only get capped, never set the cap).
+  pi.on('tool_execution_start', (_event: ToolExecutionStartEvent) => {
+    if (!currentTiming) return;
+    currentTiming.isToolCall = true;
+  });
   // Track when a message ends
   pi.on('message_end', (event: MessageEndEvent) => {
     if (!currentTiming) return;
@@ -569,6 +595,28 @@ export default function tpsExtension(pi: ExtensionAPI) {
     const telemetry = buildTelemetry(timing, turnEndMs);
     if (!telemetry) return;
+    // ── Dynamic TPS cap ────────────────────────────────────────────────
+    // Only non-tool-call, primary-branch (reliable) measurements set the cap.
+    // Tool-call turns get clamped to the cap to prevent inflation from
+    // short outputs over tiny time windows.
+    const modelKey = `${telemetry.model.provider}:${telemetry.model.modelId}`;
+    if (telemetry.isPrimaryBranch && telemetry.tps !== null) {
+      const currentCap = tpsCaps.get(modelKey);
+      if (currentCap === undefined || telemetry.tps > currentCap) {
+        tpsCaps.set(modelKey, telemetry.tps);
+      }
+    }
+    if (timing.isToolCall && telemetry.tps !== null) {
+      const cap = tpsCaps.get(modelKey);
+      if (cap !== undefined) {
+        telemetry.tps = Math.min(telemetry.tps, cap);
+      } else {
+        telemetry.tps = null;
+      }
+    }
     // Persist structured telemetry to session for export and rehydration
     pi.appendEntry('tps', telemetry);

package/npm-shrinkwrap.json CHANGED Viewed

@@ -1,12 +1,12 @@
 {
   "name": "pi-tps",
-  "version": "1.0.0",
+  "version": "1.1.1",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "pi-tps",
-      "version": "1.0.0",
+      "version": "1.1.1",
       "hasInstallScript": true,
       "license": "MIT",
       "devDependencies": {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@monotykamary/pi-tps",
-  "version": "1.0.0",
+  "version": "1.1.1",
   "description": "Tokens-per-second tracker for pi — see your LLM generation speed after every agent turn",
   "keywords": [
     "pi-package"