stagent 0.1.10 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/README.md +58 -27
  2. package/package.json +3 -3
  3. package/src/__tests__/e2e/blueprint.test.ts +63 -0
  4. package/src/__tests__/e2e/cross-runtime.test.ts +77 -0
  5. package/src/__tests__/e2e/helpers.ts +286 -0
  6. package/src/__tests__/e2e/parallel-workflow.test.ts +120 -0
  7. package/src/__tests__/e2e/sequence-workflow.test.ts +109 -0
  8. package/src/__tests__/e2e/setup.ts +156 -0
  9. package/src/__tests__/e2e/single-task.test.ts +170 -0
  10. package/src/app/api/command-palette/recent/route.ts +41 -18
  11. package/src/app/api/context/batch/route.ts +44 -0
  12. package/src/app/api/permissions/presets/route.ts +80 -0
  13. package/src/app/api/playbook/status/route.ts +15 -0
  14. package/src/app/api/profiles/route.ts +23 -21
  15. package/src/app/api/settings/pricing/route.ts +15 -0
  16. package/src/app/costs/page.tsx +53 -43
  17. package/src/app/globals.css +0 -5
  18. package/src/app/playbook/[slug]/page.tsx +76 -0
  19. package/src/app/playbook/page.tsx +54 -0
  20. package/src/app/profiles/page.tsx +7 -4
  21. package/src/app/settings/page.tsx +2 -2
  22. package/src/app/tasks/page.tsx +5 -0
  23. package/src/components/costs/cost-dashboard.tsx +226 -320
  24. package/src/components/dashboard/activity-feed.tsx +6 -2
  25. package/src/components/notifications/batch-proposal-review.tsx +150 -0
  26. package/src/components/notifications/notification-item.tsx +6 -3
  27. package/src/components/notifications/pending-approval-host.tsx +57 -11
  28. package/src/components/playbook/adoption-heatmap.tsx +69 -0
  29. package/src/components/playbook/journey-card.tsx +110 -0
  30. package/src/components/playbook/playbook-action-button.tsx +22 -0
  31. package/src/components/playbook/playbook-browser.tsx +143 -0
  32. package/src/components/playbook/playbook-card.tsx +102 -0
  33. package/src/components/playbook/playbook-detail-view.tsx +223 -0
  34. package/src/components/playbook/playbook-homepage.tsx +142 -0
  35. package/src/components/playbook/playbook-toc.tsx +90 -0
  36. package/src/components/playbook/playbook-updated-badge.tsx +23 -0
  37. package/src/components/playbook/related-docs.tsx +30 -0
  38. package/src/components/profiles/__tests__/learned-context-panel.test.tsx +175 -0
  39. package/src/components/profiles/context-proposal-review.tsx +7 -3
  40. package/src/components/profiles/learned-context-panel.tsx +116 -8
  41. package/src/components/profiles/profile-detail-view.tsx +7 -19
  42. package/src/components/profiles/profile-form-view.tsx +0 -22
  43. package/src/components/settings/__tests__/auth-config-section.test.tsx +147 -0
  44. package/src/components/settings/api-key-form.tsx +5 -43
  45. package/src/components/settings/auth-config-section.tsx +10 -6
  46. package/src/components/settings/auth-status-badge.tsx +8 -0
  47. package/src/components/settings/budget-guardrails-section.tsx +403 -620
  48. package/src/components/settings/connection-test-control.tsx +63 -0
  49. package/src/components/settings/permissions-section.tsx +85 -75
  50. package/src/components/settings/permissions-sections.tsx +24 -0
  51. package/src/components/settings/presets-section.tsx +159 -0
  52. package/src/components/settings/pricing-registry-panel.tsx +164 -0
  53. package/src/components/shared/app-sidebar.tsx +2 -0
  54. package/src/components/shared/command-palette.tsx +30 -0
  55. package/src/components/shared/light-markdown.tsx +134 -0
  56. package/src/components/workflows/loop-status-view.tsx +8 -4
  57. package/src/components/workflows/workflow-status-view.tsx +16 -9
  58. package/src/lib/agents/__tests__/claude-agent.test.ts +7 -2
  59. package/src/lib/agents/__tests__/learned-context.test.ts +500 -0
  60. package/src/lib/agents/__tests__/pattern-extractor.test.ts +243 -0
  61. package/src/lib/agents/__tests__/sweep.test.ts +202 -0
  62. package/src/lib/agents/claude-agent.ts +104 -78
  63. package/src/lib/agents/learned-context.ts +32 -28
  64. package/src/lib/agents/learning-session.ts +234 -0
  65. package/src/lib/agents/pattern-extractor.ts +34 -64
  66. package/src/lib/agents/profiles/__tests__/sort.test.ts +42 -0
  67. package/src/lib/agents/profiles/builtins/code-reviewer/profile.yaml +0 -1
  68. package/src/lib/agents/profiles/builtins/data-analyst/profile.yaml +0 -1
  69. package/src/lib/agents/profiles/builtins/devops-engineer/profile.yaml +0 -1
  70. package/src/lib/agents/profiles/builtins/document-writer/profile.yaml +0 -1
  71. package/src/lib/agents/profiles/builtins/general/profile.yaml +0 -1
  72. package/src/lib/agents/profiles/builtins/health-fitness-coach/profile.yaml +0 -1
  73. package/src/lib/agents/profiles/builtins/learning-coach/profile.yaml +0 -1
  74. package/src/lib/agents/profiles/builtins/project-manager/profile.yaml +0 -1
  75. package/src/lib/agents/profiles/builtins/researcher/profile.yaml +0 -1
  76. package/src/lib/agents/profiles/builtins/shopping-assistant/profile.yaml +0 -1
  77. package/src/lib/agents/profiles/builtins/sweep/profile.yaml +0 -1
  78. package/src/lib/agents/profiles/builtins/technical-writer/profile.yaml +0 -1
  79. package/src/lib/agents/profiles/builtins/travel-planner/profile.yaml +0 -1
  80. package/src/lib/agents/profiles/builtins/wealth-manager/profile.yaml +0 -1
  81. package/src/lib/agents/profiles/registry.ts +0 -1
  82. package/src/lib/agents/profiles/sort.ts +7 -0
  83. package/src/lib/agents/profiles/types.ts +0 -1
  84. package/src/lib/agents/runtime/catalog.ts +1 -1
  85. package/src/lib/agents/runtime/claude.ts +66 -0
  86. package/src/lib/constants/settings.ts +1 -0
  87. package/src/lib/constants/task-status.ts +6 -0
  88. package/src/lib/data/seed-data/profiles.ts +0 -3
  89. package/src/lib/db/schema.ts +3 -0
  90. package/src/lib/docs/adoption.ts +105 -0
  91. package/src/lib/docs/journey-tracker.ts +21 -0
  92. package/src/lib/docs/reader.ts +102 -0
  93. package/src/lib/docs/types.ts +54 -0
  94. package/src/lib/docs/usage-stage.ts +60 -0
  95. package/src/lib/notifications/actionable.ts +18 -10
  96. package/src/lib/settings/__tests__/budget-guardrails.test.ts +86 -24
  97. package/src/lib/settings/budget-guardrails.ts +213 -85
  98. package/src/lib/settings/permission-presets.ts +150 -0
  99. package/src/lib/settings/runtime-setup.ts +71 -0
  100. package/src/lib/usage/__tests__/ledger.test.ts +29 -5
  101. package/src/lib/usage/__tests__/pricing-registry.test.ts +78 -0
  102. package/src/lib/usage/ledger.ts +4 -2
  103. package/src/lib/usage/pricing-registry.ts +570 -0
  104. package/src/lib/usage/pricing.ts +15 -41
  105. package/src/lib/utils/__tests__/learned-context-history.test.ts +171 -0
  106. package/src/lib/utils/learned-context-history.ts +150 -0
  107. package/src/lib/validators/__tests__/profile.test.ts +0 -15
  108. package/src/lib/validators/__tests__/settings.test.ts +23 -16
  109. package/src/lib/validators/profile.ts +0 -1
  110. package/src/lib/validators/settings.ts +3 -9
  111. package/src/lib/workflows/__tests__/engine.test.ts +2 -0
  112. package/src/lib/workflows/engine.ts +20 -1
@@ -0,0 +1,150 @@
1
+ import { getAllowedPermissions, addAllowedPermission, removeAllowedPermission } from "./permissions";
2
+
3
+ // ---------------------------------------------------------------------------
4
+ // Preset definitions
5
+ // ---------------------------------------------------------------------------
6
+
7
+ export interface PermissionPreset {
8
+ id: string;
9
+ name: string;
10
+ description: string;
11
+ risk: "low" | "medium" | "high";
12
+ patterns: string[];
13
+ }
14
+
15
+ /**
16
+ * Built-in permission presets. Presets are layered — higher-risk presets
17
+ * include all patterns from lower-risk ones.
18
+ */
19
+ export const PRESETS: PermissionPreset[] = [
20
+ {
21
+ id: "read-only",
22
+ name: "Read Only",
23
+ description: "Safe read operations — no file mutations or shell commands",
24
+ risk: "low",
25
+ patterns: ["Read", "Glob", "Grep", "LS", "NotebookRead"],
26
+ },
27
+ {
28
+ id: "git-safe",
29
+ name: "Git Safe",
30
+ description: "Read operations plus file editing and git commands",
31
+ risk: "medium",
32
+ patterns: [
33
+ // Includes all read-only patterns
34
+ "Read",
35
+ "Glob",
36
+ "Grep",
37
+ "LS",
38
+ "NotebookRead",
39
+ // Plus write + git
40
+ "Write",
41
+ "Edit",
42
+ "Bash(command:git *)",
43
+ ],
44
+ },
45
+ {
46
+ id: "full-auto",
47
+ name: "Full Auto",
48
+ description: "All tools auto-approved — maximum agent autonomy",
49
+ risk: "high",
50
+ patterns: [
51
+ // All safe tools
52
+ "Read",
53
+ "Glob",
54
+ "Grep",
55
+ "LS",
56
+ "NotebookRead",
57
+ "Write",
58
+ "Edit",
59
+ // All bash and other tools
60
+ "Bash",
61
+ "NotebookEdit",
62
+ "WebFetch",
63
+ "WebSearch",
64
+ ],
65
+ },
66
+ ];
67
+
68
+ // ---------------------------------------------------------------------------
69
+ // Preset operations
70
+ // ---------------------------------------------------------------------------
71
+
72
+ /**
73
+ * Get a preset by ID, or undefined if not found.
74
+ */
75
+ export function getPreset(presetId: string): PermissionPreset | undefined {
76
+ return PRESETS.find((p) => p.id === presetId);
77
+ }
78
+
79
+ /**
80
+ * Check which presets are currently fully active (all patterns present).
81
+ */
82
+ export async function getActivePresets(): Promise<string[]> {
83
+ const current = await getAllowedPermissions();
84
+ const currentSet = new Set(current);
85
+
86
+ return PRESETS.filter((preset) =>
87
+ preset.patterns.every((p) => currentSet.has(p))
88
+ ).map((p) => p.id);
89
+ }
90
+
91
+ /**
92
+ * Check if a specific preset is fully active.
93
+ */
94
+ export async function isPresetActive(presetId: string): Promise<boolean> {
95
+ const preset = getPreset(presetId);
96
+ if (!preset) return false;
97
+
98
+ const current = await getAllowedPermissions();
99
+ const currentSet = new Set(current);
100
+ return preset.patterns.every((p) => currentSet.has(p));
101
+ }
102
+
103
+ /**
104
+ * Enable a preset — adds all its patterns to the permission store.
105
+ * Existing patterns are preserved (additive, no duplicates).
106
+ */
107
+ export async function applyPreset(presetId: string): Promise<void> {
108
+ const preset = getPreset(presetId);
109
+ if (!preset) {
110
+ throw new Error(`Unknown preset: ${presetId}`);
111
+ }
112
+
113
+ for (const pattern of preset.patterns) {
114
+ await addAllowedPermission(pattern);
115
+ }
116
+ }
117
+
118
+ /**
119
+ * Disable a preset — removes only patterns that are unique to this preset
120
+ * (not present in any other active preset or individually approved).
121
+ *
122
+ * Patterns shared with other active presets are kept.
123
+ */
124
+ export async function removePreset(presetId: string): Promise<void> {
125
+ const preset = getPreset(presetId);
126
+ if (!preset) {
127
+ throw new Error(`Unknown preset: ${presetId}`);
128
+ }
129
+
130
+ // Gather patterns that belong to OTHER presets (excluding the one being removed)
131
+ const otherPresetPatterns = new Set<string>();
132
+ const activePresets = await getActivePresets();
133
+
134
+ for (const otherId of activePresets) {
135
+ if (otherId === presetId) continue;
136
+ const other = getPreset(otherId);
137
+ if (other) {
138
+ for (const p of other.patterns) {
139
+ otherPresetPatterns.add(p);
140
+ }
141
+ }
142
+ }
143
+
144
+ // Remove only patterns unique to this preset
145
+ for (const pattern of preset.patterns) {
146
+ if (!otherPresetPatterns.has(pattern)) {
147
+ await removeAllowedPermission(pattern);
148
+ }
149
+ }
150
+ }
@@ -0,0 +1,71 @@
1
+ import {
2
+ getRuntimeCatalogEntry,
3
+ SUPPORTED_AGENT_RUNTIMES,
4
+ type AgentRuntimeId,
5
+ } from "@/lib/agents/runtime/catalog";
6
+ import { getAuthSettings } from "./auth";
7
+ import { getOpenAIAuthSettings } from "./openai-auth";
8
+ import type { ApiKeySource, AuthMethod } from "@/lib/constants/settings";
9
+
10
+ export type RuntimeBillingMode = "usage" | "subscription";
11
+ export type RuntimeSetupMethod = AuthMethod | "none";
12
+
13
+ export interface RuntimeSetupState {
14
+ runtimeId: AgentRuntimeId;
15
+ label: string;
16
+ providerId: "anthropic" | "openai";
17
+ configured: boolean;
18
+ authMethod: RuntimeSetupMethod;
19
+ apiKeySource: ApiKeySource;
20
+ billingMode: RuntimeBillingMode;
21
+ }
22
+
23
+ export async function getRuntimeSetupStates(): Promise<
24
+ Record<AgentRuntimeId, RuntimeSetupState>
25
+ > {
26
+ const [claudeAuth, openAIAuth] = await Promise.all([
27
+ getAuthSettings(),
28
+ getOpenAIAuthSettings(),
29
+ ]);
30
+
31
+ const claudeRuntime = getRuntimeCatalogEntry("claude-code");
32
+ const openAIRuntime = getRuntimeCatalogEntry("openai-codex-app-server");
33
+
34
+ const claudeAuthMethod: RuntimeSetupMethod =
35
+ claudeAuth.method === "oauth" || claudeAuth.apiKeySource === "oauth"
36
+ ? "oauth"
37
+ : claudeAuth.hasKey
38
+ ? "api_key"
39
+ : "none";
40
+ const claudeConfigured =
41
+ claudeAuth.hasKey || claudeAuth.apiKeySource === "oauth";
42
+
43
+ const states = {
44
+ "claude-code": {
45
+ runtimeId: "claude-code",
46
+ label: claudeRuntime.label,
47
+ providerId: claudeRuntime.providerId,
48
+ configured: claudeConfigured,
49
+ authMethod: claudeAuthMethod,
50
+ apiKeySource: claudeAuth.apiKeySource,
51
+ billingMode: claudeAuthMethod === "oauth" ? "subscription" : "usage",
52
+ },
53
+ "openai-codex-app-server": {
54
+ runtimeId: "openai-codex-app-server",
55
+ label: openAIRuntime.label,
56
+ providerId: openAIRuntime.providerId,
57
+ configured: openAIAuth.hasKey,
58
+ authMethod: openAIAuth.hasKey ? "api_key" : "none",
59
+ apiKeySource: openAIAuth.apiKeySource,
60
+ billingMode: "usage",
61
+ },
62
+ } satisfies Record<AgentRuntimeId, RuntimeSetupState>;
63
+
64
+ return states;
65
+ }
66
+
67
+ export function listConfiguredRuntimeIds(
68
+ states: Record<AgentRuntimeId, RuntimeSetupState>
69
+ ) {
70
+ return SUPPORTED_AGENT_RUNTIMES.filter((runtimeId) => states[runtimeId].configured);
71
+ }
@@ -33,9 +33,10 @@ function formatLocalDay(date: Date) {
33
33
  }
34
34
 
35
35
  describe("usage ledger", () => {
36
- it("records normalized ledger rows with derived and unknown pricing states", async () => {
36
+ it("records normalized ledger rows with derived, fallback, and unknown pricing states", async () => {
37
37
  const { db, usageLedger, recordUsageLedgerEntry } = await loadUsageModules();
38
38
 
39
+ // Known model — gets specific pricing rule
39
40
  await recordUsageLedgerEntry({
40
41
  activityType: "task_assist",
41
42
  runtimeId: "claude-code",
@@ -49,6 +50,7 @@ describe("usage ledger", () => {
49
50
  finishedAt: new Date("2026-03-10T08:01:00.000Z"),
50
51
  });
51
52
 
53
+ // Unknown model — hits catch-all fallback pricing (conservative estimate)
52
54
  await recordUsageLedgerEntry({
53
55
  activityType: "task_assist",
54
56
  runtimeId: "openai-codex-app-server",
@@ -62,15 +64,37 @@ describe("usage ledger", () => {
62
64
  finishedAt: new Date("2026-03-10T09:01:00.000Z"),
63
65
  });
64
66
 
67
+ // Null modelId — gets unknown_pricing (no model to match)
68
+ await recordUsageLedgerEntry({
69
+ activityType: "task_run",
70
+ runtimeId: "claude-code",
71
+ providerId: "anthropic",
72
+ modelId: null,
73
+ inputTokens: 100,
74
+ outputTokens: 50,
75
+ totalTokens: 150,
76
+ status: "completed",
77
+ startedAt: new Date("2026-03-10T10:00:00.000Z"),
78
+ finishedAt: new Date("2026-03-10T10:01:00.000Z"),
79
+ });
80
+
65
81
  const rows = await db.select().from(usageLedger);
66
- expect(rows).toHaveLength(2);
82
+ expect(rows).toHaveLength(3);
67
83
 
68
- const priced = rows.find((row) => row.providerId === "anthropic");
84
+ // Known: specific pricing
85
+ const priced = rows.find((row) => row.modelId === "claude-sonnet-4-20250514");
69
86
  expect(priced?.costMicros).toBe(10_500);
70
87
  expect(priced?.status).toBe("completed");
71
- expect(priced?.pricingVersion).toBe("registry-2026-03-12");
88
+ expect(priced?.pricingVersion).toBe("anthropic-claude-sonnet");
89
+
90
+ // Unknown model: fallback pricing (conservative Opus-tier for OpenAI: $10/$30)
91
+ const fallback = rows.find((row) => row.modelId === "codex-unknown");
92
+ expect(fallback?.costMicros).toBeGreaterThan(0);
93
+ expect(fallback?.status).toBe("completed");
94
+ expect(fallback?.pricingVersion).toBe("openai-fallback");
72
95
 
73
- const unknown = rows.find((row) => row.providerId === "openai");
96
+ // Null modelId: truly unknown
97
+ const unknown = rows.find((row) => row.modelId === null);
74
98
  expect(unknown?.costMicros).toBeNull();
75
99
  expect(unknown?.status).toBe("unknown_pricing");
76
100
  expect(unknown?.pricingVersion).toBeNull();
@@ -0,0 +1,78 @@
1
+ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
2
+ import { mkdtempSync, rmSync } from "fs";
3
+ import { join } from "path";
4
+ import { tmpdir } from "os";
5
+
6
+ let tempDir: string;
7
+
8
+ beforeEach(() => {
9
+ tempDir = mkdtempSync(join(tmpdir(), "stagent-pricing-registry-"));
10
+ vi.resetModules();
11
+ vi.stubEnv("STAGENT_DATA_DIR", tempDir);
12
+ });
13
+
14
+ afterEach(() => {
15
+ vi.unstubAllEnvs();
16
+ vi.unstubAllGlobals();
17
+ rmSync(tempDir, { recursive: true, force: true });
18
+ });
19
+
20
+ describe("pricing registry", () => {
21
+ it("refreshes pricing from official provider pages and updates visible rows", async () => {
22
+ const fetch = vi.fn(async (input: string) => {
23
+ if (input.includes("anthropic.com")) {
24
+ return new Response(`
25
+ <html>
26
+ <body>
27
+ Claude Sonnet 4 $3 / 1M input tokens $15 / 1M output tokens
28
+ Claude Opus 4 $15 / 1M input tokens $75 / 1M output tokens
29
+ Claude Haiku 3.5 $0.80 / 1M input tokens $4 / 1M output tokens
30
+ Claude Pro $20
31
+ Max 5x $100
32
+ Max 20x $200
33
+ </body>
34
+ </html>
35
+ `);
36
+ }
37
+
38
+ return new Response(`
39
+ <html>
40
+ <body>
41
+ GPT-5 $10 / 1M input tokens $30 / 1M output tokens
42
+ GPT-4o $2.50 / 1M input tokens $10 / 1M output tokens
43
+ </body>
44
+ </html>
45
+ `);
46
+ });
47
+ vi.stubGlobal("fetch", fetch);
48
+
49
+ const { refreshPricingRegistry } = await import("../pricing-registry");
50
+ const snapshot = await refreshPricingRegistry();
51
+
52
+ expect(snapshot.providers.anthropic.rows.find((row) => row.key === "anthropic-plan-pro")?.monthlyPriceUsd).toBe(20);
53
+ expect(snapshot.providers.anthropic.rows.find((row) => row.key === "anthropic-claude-sonnet")?.inputCostPerMillionMicros).toBe(3_000_000);
54
+ expect(snapshot.providers.openai.rows.find((row) => row.key === "openai-gpt-5")?.outputCostPerMillionMicros).toBe(30_000_000);
55
+ });
56
+
57
+ it("keeps last-known-good pricing when a refresh fails", async () => {
58
+ const fetch = vi.fn(async (input: string) => {
59
+ if (input.includes("anthropic.com")) {
60
+ return new Response(`<html><body>Claude Pro $20</body></html>`);
61
+ }
62
+
63
+ throw new Error("network down");
64
+ });
65
+ vi.stubGlobal("fetch", fetch);
66
+
67
+ const { refreshPricingRegistry, getPricingRegistrySnapshot } = await import(
68
+ "../pricing-registry"
69
+ );
70
+
71
+ await refreshPricingRegistry();
72
+ const snapshot = await getPricingRegistrySnapshot();
73
+
74
+ expect(snapshot.providers.anthropic.refreshError).toBeNull();
75
+ expect(snapshot.providers.openai.refreshError).toContain("network down");
76
+ expect(snapshot.providers.openai.rows.find((row) => row.key === "openai-gpt-5")?.inputCostPerMillionMicros).toBe(10_000_000);
77
+ });
78
+ });
@@ -15,7 +15,9 @@ export type UsageActivityType =
15
15
  | "workflow_step"
16
16
  | "scheduled_firing"
17
17
  | "task_assist"
18
- | "profile_test";
18
+ | "profile_test"
19
+ | "pattern_extraction"
20
+ | "context_summarization";
19
21
 
20
22
  export type UsageLedgerStatus =
21
23
  | "completed"
@@ -205,7 +207,7 @@ export async function recordUsageLedgerEntry(input: UsageLedgerWriteInput) {
205
207
  (normalizedInputTokens != null && normalizedOutputTokens != null
206
208
  ? normalizedInputTokens + normalizedOutputTokens
207
209
  : null);
208
- const { costMicros, pricingVersion } = deriveUsageCostMicros({
210
+ const { costMicros, pricingVersion } = await deriveUsageCostMicros({
209
211
  providerId: input.providerId,
210
212
  modelId: input.modelId,
211
213
  inputTokens: normalizedInputTokens,