npm - stagent - Versions diffs - 0.1.10 → 0.1.12 - Mend

stagent 0.1.10 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (112) hide show

package/README.md +58 -27
package/package.json +3 -3
package/src/__tests__/e2e/blueprint.test.ts +63 -0
package/src/__tests__/e2e/cross-runtime.test.ts +77 -0
package/src/__tests__/e2e/helpers.ts +286 -0
package/src/__tests__/e2e/parallel-workflow.test.ts +120 -0
package/src/__tests__/e2e/sequence-workflow.test.ts +109 -0
package/src/__tests__/e2e/setup.ts +156 -0
package/src/__tests__/e2e/single-task.test.ts +170 -0
package/src/app/api/command-palette/recent/route.ts +41 -18
package/src/app/api/context/batch/route.ts +44 -0
package/src/app/api/permissions/presets/route.ts +80 -0
package/src/app/api/playbook/status/route.ts +15 -0
package/src/app/api/profiles/route.ts +23 -21
package/src/app/api/settings/pricing/route.ts +15 -0
package/src/app/costs/page.tsx +53 -43
package/src/app/globals.css +0 -5
package/src/app/playbook/[slug]/page.tsx +76 -0
package/src/app/playbook/page.tsx +54 -0
package/src/app/profiles/page.tsx +7 -4
package/src/app/settings/page.tsx +2 -2
package/src/app/tasks/page.tsx +5 -0
package/src/components/costs/cost-dashboard.tsx +226 -320
package/src/components/dashboard/activity-feed.tsx +6 -2
package/src/components/notifications/batch-proposal-review.tsx +150 -0
package/src/components/notifications/notification-item.tsx +6 -3
package/src/components/notifications/pending-approval-host.tsx +57 -11
package/src/components/playbook/adoption-heatmap.tsx +69 -0
package/src/components/playbook/journey-card.tsx +110 -0
package/src/components/playbook/playbook-action-button.tsx +22 -0
package/src/components/playbook/playbook-browser.tsx +143 -0
package/src/components/playbook/playbook-card.tsx +102 -0
package/src/components/playbook/playbook-detail-view.tsx +223 -0
package/src/components/playbook/playbook-homepage.tsx +142 -0
package/src/components/playbook/playbook-toc.tsx +90 -0
package/src/components/playbook/playbook-updated-badge.tsx +23 -0
package/src/components/playbook/related-docs.tsx +30 -0
package/src/components/profiles/__tests__/learned-context-panel.test.tsx +175 -0
package/src/components/profiles/context-proposal-review.tsx +7 -3
package/src/components/profiles/learned-context-panel.tsx +116 -8
package/src/components/profiles/profile-detail-view.tsx +7 -19
package/src/components/profiles/profile-form-view.tsx +0 -22
package/src/components/settings/__tests__/auth-config-section.test.tsx +147 -0
package/src/components/settings/api-key-form.tsx +5 -43
package/src/components/settings/auth-config-section.tsx +10 -6
package/src/components/settings/auth-status-badge.tsx +8 -0
package/src/components/settings/budget-guardrails-section.tsx +403 -620
package/src/components/settings/connection-test-control.tsx +63 -0
package/src/components/settings/permissions-section.tsx +85 -75
package/src/components/settings/permissions-sections.tsx +24 -0
package/src/components/settings/presets-section.tsx +159 -0
package/src/components/settings/pricing-registry-panel.tsx +164 -0
package/src/components/shared/app-sidebar.tsx +2 -0
package/src/components/shared/command-palette.tsx +30 -0
package/src/components/shared/light-markdown.tsx +134 -0
package/src/components/workflows/loop-status-view.tsx +8 -4
package/src/components/workflows/workflow-status-view.tsx +16 -9
package/src/lib/agents/__tests__/claude-agent.test.ts +7 -2
package/src/lib/agents/__tests__/learned-context.test.ts +500 -0
package/src/lib/agents/__tests__/pattern-extractor.test.ts +243 -0
package/src/lib/agents/__tests__/sweep.test.ts +202 -0
package/src/lib/agents/claude-agent.ts +104 -78
package/src/lib/agents/learned-context.ts +32 -28
package/src/lib/agents/learning-session.ts +234 -0
package/src/lib/agents/pattern-extractor.ts +34 -64
package/src/lib/agents/profiles/__tests__/sort.test.ts +42 -0
package/src/lib/agents/profiles/builtins/code-reviewer/profile.yaml +0 -1
package/src/lib/agents/profiles/builtins/data-analyst/profile.yaml +0 -1
package/src/lib/agents/profiles/builtins/devops-engineer/profile.yaml +0 -1
package/src/lib/agents/profiles/builtins/document-writer/profile.yaml +0 -1
package/src/lib/agents/profiles/builtins/general/profile.yaml +0 -1
package/src/lib/agents/profiles/builtins/health-fitness-coach/profile.yaml +0 -1
package/src/lib/agents/profiles/builtins/learning-coach/profile.yaml +0 -1
package/src/lib/agents/profiles/builtins/project-manager/profile.yaml +0 -1
package/src/lib/agents/profiles/builtins/researcher/profile.yaml +0 -1
package/src/lib/agents/profiles/builtins/shopping-assistant/profile.yaml +0 -1
package/src/lib/agents/profiles/builtins/sweep/profile.yaml +0 -1
package/src/lib/agents/profiles/builtins/technical-writer/profile.yaml +0 -1
package/src/lib/agents/profiles/builtins/travel-planner/profile.yaml +0 -1
package/src/lib/agents/profiles/builtins/wealth-manager/profile.yaml +0 -1
package/src/lib/agents/profiles/registry.ts +0 -1
package/src/lib/agents/profiles/sort.ts +7 -0
package/src/lib/agents/profiles/types.ts +0 -1
package/src/lib/agents/runtime/catalog.ts +1 -1
package/src/lib/agents/runtime/claude.ts +66 -0
package/src/lib/constants/settings.ts +1 -0
package/src/lib/constants/task-status.ts +6 -0
package/src/lib/data/seed-data/profiles.ts +0 -3
package/src/lib/db/schema.ts +3 -0
package/src/lib/docs/adoption.ts +105 -0
package/src/lib/docs/journey-tracker.ts +21 -0
package/src/lib/docs/reader.ts +102 -0
package/src/lib/docs/types.ts +54 -0
package/src/lib/docs/usage-stage.ts +60 -0
package/src/lib/notifications/actionable.ts +18 -10
package/src/lib/settings/__tests__/budget-guardrails.test.ts +86 -24
package/src/lib/settings/budget-guardrails.ts +213 -85
package/src/lib/settings/permission-presets.ts +150 -0
package/src/lib/settings/runtime-setup.ts +71 -0
package/src/lib/usage/__tests__/ledger.test.ts +29 -5
package/src/lib/usage/__tests__/pricing-registry.test.ts +78 -0
package/src/lib/usage/ledger.ts +4 -2
package/src/lib/usage/pricing-registry.ts +570 -0
package/src/lib/usage/pricing.ts +15 -41
package/src/lib/utils/__tests__/learned-context-history.test.ts +171 -0
package/src/lib/utils/learned-context-history.ts +150 -0
package/src/lib/validators/__tests__/profile.test.ts +0 -15
package/src/lib/validators/__tests__/settings.test.ts +23 -16
package/src/lib/validators/profile.ts +0 -1
package/src/lib/validators/settings.ts +3 -9
package/src/lib/workflows/__tests__/engine.test.ts +2 -0
package/src/lib/workflows/engine.ts +20 -1

package/src/lib/settings/permission-presets.ts ADDED Viewed

@@ -0,0 +1,150 @@
+import { getAllowedPermissions, addAllowedPermission, removeAllowedPermission } from "./permissions";
+// ---------------------------------------------------------------------------
+// Preset definitions
+// ---------------------------------------------------------------------------
+export interface PermissionPreset {
+  id: string;
+  name: string;
+  description: string;
+  risk: "low" | "medium" | "high";
+  patterns: string[];
+}
+/**
+ * Built-in permission presets. Presets are layered — higher-risk presets
+ * include all patterns from lower-risk ones.
+ */
+export const PRESETS: PermissionPreset[] = [
+  {
+    id: "read-only",
+    name: "Read Only",
+    description: "Safe read operations — no file mutations or shell commands",
+    risk: "low",
+    patterns: ["Read", "Glob", "Grep", "LS", "NotebookRead"],
+  },
+  {
+    id: "git-safe",
+    name: "Git Safe",
+    description: "Read operations plus file editing and git commands",
+    risk: "medium",
+    patterns: [
+      // Includes all read-only patterns
+      "Read",
+      "Glob",
+      "Grep",
+      "LS",
+      "NotebookRead",
+      // Plus write + git
+      "Write",
+      "Edit",
+      "Bash(command:git *)",
+    ],
+  },
+  {
+    id: "full-auto",
+    name: "Full Auto",
+    description: "All tools auto-approved — maximum agent autonomy",
+    risk: "high",
+    patterns: [
+      // All safe tools
+      "Read",
+      "Glob",
+      "Grep",
+      "LS",
+      "NotebookRead",
+      "Write",
+      "Edit",
+      // All bash and other tools
+      "Bash",
+      "NotebookEdit",
+      "WebFetch",
+      "WebSearch",
+    ],
+  },
+];
+// ---------------------------------------------------------------------------
+// Preset operations
+// ---------------------------------------------------------------------------
+/**
+ * Get a preset by ID, or undefined if not found.
+ */
+export function getPreset(presetId: string): PermissionPreset | undefined {
+  return PRESETS.find((p) => p.id === presetId);
+}
+/**
+ * Check which presets are currently fully active (all patterns present).
+ */
+export async function getActivePresets(): Promise<string[]> {
+  const current = await getAllowedPermissions();
+  const currentSet = new Set(current);
+  return PRESETS.filter((preset) =>
+    preset.patterns.every((p) => currentSet.has(p))
+  ).map((p) => p.id);
+}
+/**
+ * Check if a specific preset is fully active.
+ */
+export async function isPresetActive(presetId: string): Promise<boolean> {
+  const preset = getPreset(presetId);
+  if (!preset) return false;
+  const current = await getAllowedPermissions();
+  const currentSet = new Set(current);
+  return preset.patterns.every((p) => currentSet.has(p));
+}
+/**
+ * Enable a preset — adds all its patterns to the permission store.
+ * Existing patterns are preserved (additive, no duplicates).
+ */
+export async function applyPreset(presetId: string): Promise<void> {
+  const preset = getPreset(presetId);
+  if (!preset) {
+    throw new Error(`Unknown preset: ${presetId}`);
+  }
+  for (const pattern of preset.patterns) {
+    await addAllowedPermission(pattern);
+  }
+}
+/**
+ * Disable a preset — removes only patterns that are unique to this preset
+ * (not present in any other active preset or individually approved).
+ *
+ * Patterns shared with other active presets are kept.
+ */
+export async function removePreset(presetId: string): Promise<void> {
+  const preset = getPreset(presetId);
+  if (!preset) {
+    throw new Error(`Unknown preset: ${presetId}`);
+  }
+  // Gather patterns that belong to OTHER presets (excluding the one being removed)
+  const otherPresetPatterns = new Set<string>();
+  const activePresets = await getActivePresets();
+  for (const otherId of activePresets) {
+    if (otherId === presetId) continue;
+    const other = getPreset(otherId);
+    if (other) {
+      for (const p of other.patterns) {
+        otherPresetPatterns.add(p);
+      }
+    }
+  }
+  // Remove only patterns unique to this preset
+  for (const pattern of preset.patterns) {
+    if (!otherPresetPatterns.has(pattern)) {
+      await removeAllowedPermission(pattern);
+    }
+  }
+}

package/src/lib/settings/runtime-setup.ts ADDED Viewed

@@ -0,0 +1,71 @@
+import {
+  getRuntimeCatalogEntry,
+  SUPPORTED_AGENT_RUNTIMES,
+  type AgentRuntimeId,
+} from "@/lib/agents/runtime/catalog";
+import { getAuthSettings } from "./auth";
+import { getOpenAIAuthSettings } from "./openai-auth";
+import type { ApiKeySource, AuthMethod } from "@/lib/constants/settings";
+export type RuntimeBillingMode = "usage" | "subscription";
+export type RuntimeSetupMethod = AuthMethod | "none";
+export interface RuntimeSetupState {
+  runtimeId: AgentRuntimeId;
+  label: string;
+  providerId: "anthropic" | "openai";
+  configured: boolean;
+  authMethod: RuntimeSetupMethod;
+  apiKeySource: ApiKeySource;
+  billingMode: RuntimeBillingMode;
+}
+export async function getRuntimeSetupStates(): Promise<
+  Record<AgentRuntimeId, RuntimeSetupState>
+> {
+  const [claudeAuth, openAIAuth] = await Promise.all([
+    getAuthSettings(),
+    getOpenAIAuthSettings(),
+  ]);
+  const claudeRuntime = getRuntimeCatalogEntry("claude-code");
+  const openAIRuntime = getRuntimeCatalogEntry("openai-codex-app-server");
+  const claudeAuthMethod: RuntimeSetupMethod =
+    claudeAuth.method === "oauth" || claudeAuth.apiKeySource === "oauth"
+      ? "oauth"
+      : claudeAuth.hasKey
+        ? "api_key"
+        : "none";
+  const claudeConfigured =
+    claudeAuth.hasKey || claudeAuth.apiKeySource === "oauth";
+  const states = {
+    "claude-code": {
+      runtimeId: "claude-code",
+      label: claudeRuntime.label,
+      providerId: claudeRuntime.providerId,
+      configured: claudeConfigured,
+      authMethod: claudeAuthMethod,
+      apiKeySource: claudeAuth.apiKeySource,
+      billingMode: claudeAuthMethod === "oauth" ? "subscription" : "usage",
+    },
+    "openai-codex-app-server": {
+      runtimeId: "openai-codex-app-server",
+      label: openAIRuntime.label,
+      providerId: openAIRuntime.providerId,
+      configured: openAIAuth.hasKey,
+      authMethod: openAIAuth.hasKey ? "api_key" : "none",
+      apiKeySource: openAIAuth.apiKeySource,
+      billingMode: "usage",
+    },
+  } satisfies Record<AgentRuntimeId, RuntimeSetupState>;
+  return states;
+}
+export function listConfiguredRuntimeIds(
+  states: Record<AgentRuntimeId, RuntimeSetupState>
+) {
+  return SUPPORTED_AGENT_RUNTIMES.filter((runtimeId) => states[runtimeId].configured);
+}

package/src/lib/usage/__tests__/ledger.test.ts CHANGED Viewed

@@ -33,9 +33,10 @@ function formatLocalDay(date: Date) {
 }
 describe("usage ledger", () => {
-  it("records normalized ledger rows with derived and unknown pricing states", async () => {
+  it("records normalized ledger rows with derived, fallback, and unknown pricing states", async () => {
     const { db, usageLedger, recordUsageLedgerEntry } = await loadUsageModules();
+    // Known model — gets specific pricing rule
     await recordUsageLedgerEntry({
       activityType: "task_assist",
       runtimeId: "claude-code",
@@ -49,6 +50,7 @@ describe("usage ledger", () => {
       finishedAt: new Date("2026-03-10T08:01:00.000Z"),
     });
+    // Unknown model — hits catch-all fallback pricing (conservative estimate)
     await recordUsageLedgerEntry({
       activityType: "task_assist",
       runtimeId: "openai-codex-app-server",
@@ -62,15 +64,37 @@ describe("usage ledger", () => {
       finishedAt: new Date("2026-03-10T09:01:00.000Z"),
     });
+    // Null modelId — gets unknown_pricing (no model to match)
+    await recordUsageLedgerEntry({
+      activityType: "task_run",
+      runtimeId: "claude-code",
+      providerId: "anthropic",
+      modelId: null,
+      inputTokens: 100,
+      outputTokens: 50,
+      totalTokens: 150,
+      status: "completed",
+      startedAt: new Date("2026-03-10T10:00:00.000Z"),
+      finishedAt: new Date("2026-03-10T10:01:00.000Z"),
+    });
     const rows = await db.select().from(usageLedger);
-    expect(rows).toHaveLength(2);
+    expect(rows).toHaveLength(3);
-    const priced = rows.find((row) => row.providerId === "anthropic");
+    // Known: specific pricing
+    const priced = rows.find((row) => row.modelId === "claude-sonnet-4-20250514");
     expect(priced?.costMicros).toBe(10_500);
     expect(priced?.status).toBe("completed");
-    expect(priced?.pricingVersion).toBe("registry-2026-03-12");
+    expect(priced?.pricingVersion).toBe("anthropic-claude-sonnet");
+    // Unknown model: fallback pricing (conservative Opus-tier for OpenAI: $10/$30)
+    const fallback = rows.find((row) => row.modelId === "codex-unknown");
+    expect(fallback?.costMicros).toBeGreaterThan(0);
+    expect(fallback?.status).toBe("completed");
+    expect(fallback?.pricingVersion).toBe("openai-fallback");
-    const unknown = rows.find((row) => row.providerId === "openai");
+    // Null modelId: truly unknown
+    const unknown = rows.find((row) => row.modelId === null);
     expect(unknown?.costMicros).toBeNull();
     expect(unknown?.status).toBe("unknown_pricing");
     expect(unknown?.pricingVersion).toBeNull();

package/src/lib/usage/__tests__/pricing-registry.test.ts ADDED Viewed

@@ -0,0 +1,78 @@
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import { mkdtempSync, rmSync } from "fs";
+import { join } from "path";
+import { tmpdir } from "os";
+let tempDir: string;
+beforeEach(() => {
+  tempDir = mkdtempSync(join(tmpdir(), "stagent-pricing-registry-"));
+  vi.resetModules();
+  vi.stubEnv("STAGENT_DATA_DIR", tempDir);
+});
+afterEach(() => {
+  vi.unstubAllEnvs();
+  vi.unstubAllGlobals();
+  rmSync(tempDir, { recursive: true, force: true });
+});
+describe("pricing registry", () => {
+  it("refreshes pricing from official provider pages and updates visible rows", async () => {
+    const fetch = vi.fn(async (input: string) => {
+      if (input.includes("anthropic.com")) {
+        return new Response(`
+          <html>
+            <body>
+              Claude Sonnet 4 $3 / 1M input tokens $15 / 1M output tokens
+              Claude Opus 4 $15 / 1M input tokens $75 / 1M output tokens
+              Claude Haiku 3.5 $0.80 / 1M input tokens $4 / 1M output tokens
+              Claude Pro $20
+              Max 5x $100
+              Max 20x $200
+            </body>
+          </html>
+        `);
+      }
+      return new Response(`
+        <html>
+          <body>
+            GPT-5 $10 / 1M input tokens $30 / 1M output tokens
+            GPT-4o $2.50 / 1M input tokens $10 / 1M output tokens
+          </body>
+        </html>
+      `);
+    });
+    vi.stubGlobal("fetch", fetch);
+    const { refreshPricingRegistry } = await import("../pricing-registry");
+    const snapshot = await refreshPricingRegistry();
+    expect(snapshot.providers.anthropic.rows.find((row) => row.key === "anthropic-plan-pro")?.monthlyPriceUsd).toBe(20);
+    expect(snapshot.providers.anthropic.rows.find((row) => row.key === "anthropic-claude-sonnet")?.inputCostPerMillionMicros).toBe(3_000_000);
+    expect(snapshot.providers.openai.rows.find((row) => row.key === "openai-gpt-5")?.outputCostPerMillionMicros).toBe(30_000_000);
+  });
+  it("keeps last-known-good pricing when a refresh fails", async () => {
+    const fetch = vi.fn(async (input: string) => {
+      if (input.includes("anthropic.com")) {
+        return new Response(`<html><body>Claude Pro $20</body></html>`);
+      }
+      throw new Error("network down");
+    });
+    vi.stubGlobal("fetch", fetch);
+    const { refreshPricingRegistry, getPricingRegistrySnapshot } = await import(
+      "../pricing-registry"
+    );
+    await refreshPricingRegistry();
+    const snapshot = await getPricingRegistrySnapshot();
+    expect(snapshot.providers.anthropic.refreshError).toBeNull();
+    expect(snapshot.providers.openai.refreshError).toContain("network down");
+    expect(snapshot.providers.openai.rows.find((row) => row.key === "openai-gpt-5")?.inputCostPerMillionMicros).toBe(10_000_000);
+  });
+});

package/src/lib/usage/ledger.ts CHANGED Viewed

@@ -15,7 +15,9 @@ export type UsageActivityType =
   | "workflow_step"
   | "scheduled_firing"
   | "task_assist"
-  | "profile_test";
+  | "profile_test"
+  | "pattern_extraction"
+  | "context_summarization";
 export type UsageLedgerStatus =
   | "completed"
@@ -205,7 +207,7 @@ export async function recordUsageLedgerEntry(input: UsageLedgerWriteInput) {
     (normalizedInputTokens != null && normalizedOutputTokens != null
       ? normalizedInputTokens + normalizedOutputTokens
       : null);
-  const { costMicros, pricingVersion } = deriveUsageCostMicros({
+  const { costMicros, pricingVersion } = await deriveUsageCostMicros({
     providerId: input.providerId,
     modelId: input.modelId,
     inputTokens: normalizedInputTokens,