stagent 0.1.10 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/README.md +24 -24
  2. package/package.json +1 -2
  3. package/src/app/api/profiles/route.ts +0 -1
  4. package/src/app/globals.css +0 -5
  5. package/src/app/tasks/page.tsx +5 -0
  6. package/src/components/profiles/profile-detail-view.tsx +1 -16
  7. package/src/components/profiles/profile-form-view.tsx +0 -22
  8. package/src/lib/agents/__tests__/claude-agent.test.ts +7 -2
  9. package/src/lib/agents/__tests__/learned-context.test.ts +500 -0
  10. package/src/lib/agents/__tests__/pattern-extractor.test.ts +243 -0
  11. package/src/lib/agents/__tests__/sweep.test.ts +202 -0
  12. package/src/lib/agents/claude-agent.ts +104 -78
  13. package/src/lib/agents/learned-context.ts +5 -13
  14. package/src/lib/agents/pattern-extractor.ts +15 -64
  15. package/src/lib/agents/profiles/builtins/code-reviewer/profile.yaml +0 -1
  16. package/src/lib/agents/profiles/builtins/data-analyst/profile.yaml +0 -1
  17. package/src/lib/agents/profiles/builtins/devops-engineer/profile.yaml +0 -1
  18. package/src/lib/agents/profiles/builtins/document-writer/profile.yaml +0 -1
  19. package/src/lib/agents/profiles/builtins/general/profile.yaml +0 -1
  20. package/src/lib/agents/profiles/builtins/health-fitness-coach/profile.yaml +0 -1
  21. package/src/lib/agents/profiles/builtins/learning-coach/profile.yaml +0 -1
  22. package/src/lib/agents/profiles/builtins/project-manager/profile.yaml +0 -1
  23. package/src/lib/agents/profiles/builtins/researcher/profile.yaml +0 -1
  24. package/src/lib/agents/profiles/builtins/shopping-assistant/profile.yaml +0 -1
  25. package/src/lib/agents/profiles/builtins/sweep/profile.yaml +0 -1
  26. package/src/lib/agents/profiles/builtins/technical-writer/profile.yaml +0 -1
  27. package/src/lib/agents/profiles/builtins/travel-planner/profile.yaml +0 -1
  28. package/src/lib/agents/profiles/builtins/wealth-manager/profile.yaml +0 -1
  29. package/src/lib/agents/profiles/registry.ts +0 -1
  30. package/src/lib/agents/profiles/types.ts +0 -1
  31. package/src/lib/agents/runtime/catalog.ts +1 -1
  32. package/src/lib/agents/runtime/claude.ts +66 -0
  33. package/src/lib/constants/task-status.ts +6 -0
  34. package/src/lib/data/seed-data/profiles.ts +0 -3
  35. package/src/lib/usage/__tests__/ledger.test.ts +29 -5
  36. package/src/lib/usage/ledger.ts +3 -1
  37. package/src/lib/usage/pricing.ts +61 -7
  38. package/src/lib/validators/__tests__/profile.test.ts +0 -15
  39. package/src/lib/validators/profile.ts +0 -1
  40. package/src/lib/workflows/__tests__/engine.test.ts +2 -0
  41. package/src/lib/workflows/engine.ts +2 -1
@@ -2,7 +2,7 @@ import { db } from "@/lib/db";
2
2
  import { learnedContext, notifications } from "@/lib/db/schema";
3
3
  import { and, desc, eq } from "drizzle-orm";
4
4
  import type { LearnedContextRow } from "@/lib/db/schema";
5
- import Anthropic from "@anthropic-ai/sdk";
5
+ import { runMetaCompletion } from "./runtime/claude";
6
6
 
7
7
  const CONTEXT_CHAR_LIMIT = 8_000;
8
8
  const SUMMARIZATION_THRESHOLD = 6_000;
@@ -243,14 +243,8 @@ export async function summarizeContext(profileId: string): Promise<void> {
243
243
  const content = getActiveLearnedContext(profileId);
244
244
  if (!content || content.length <= SUMMARIZATION_THRESHOLD) return;
245
245
 
246
- const client = new Anthropic();
247
- const response = await client.messages.create({
248
- model: "claude-sonnet-4-20250514",
249
- max_tokens: 2048,
250
- messages: [
251
- {
252
- role: "user",
253
- content: `You are condensing learned context for an AI agent profile "${profileId}".
246
+ const { text } = await runMetaCompletion({
247
+ prompt: `You are condensing learned context for an AI agent profile "${profileId}".
254
248
  The current context has grown to ${content.length} characters and needs to be summarized to under ${SUMMARIZATION_THRESHOLD} characters while preserving all key patterns, best practices, and important insights.
255
249
 
256
250
  Current learned context:
@@ -266,12 +260,10 @@ Produce a condensed version that:
266
260
  5. Stays under ${SUMMARIZATION_THRESHOLD} characters
267
261
 
268
262
  Output ONLY the condensed context, no preamble.`,
269
- },
270
- ],
263
+ activityType: "context_summarization",
271
264
  });
272
265
 
273
- const summarized =
274
- response.content[0].type === "text" ? response.content[0].text : "";
266
+ const summarized = text.trim();
275
267
 
276
268
  if (!summarized || summarized.length >= content.length) return;
277
269
 
@@ -1,4 +1,3 @@
1
- import Anthropic from "@anthropic-ai/sdk";
2
1
  import { db } from "@/lib/db";
3
2
  import { tasks, agentLogs } from "@/lib/db/schema";
4
3
  import { eq, desc } from "drizzle-orm";
@@ -6,6 +5,7 @@ import {
6
5
  getActiveLearnedContext,
7
6
  proposeContextAddition,
8
7
  } from "./learned-context";
8
+ import { runMetaCompletion } from "./runtime/claude";
9
9
 
10
10
  export interface PatternEntry {
11
11
  title: string;
@@ -17,50 +17,9 @@ export interface PatternProposal {
17
17
  patterns: PatternEntry[];
18
18
  }
19
19
 
20
- const PATTERN_TOOL: Anthropic.Messages.Tool = {
21
- name: "propose_learned_patterns",
22
- description:
23
- "Propose patterns learned from this task execution that should be remembered for future tasks with this profile.",
24
- input_schema: {
25
- type: "object" as const,
26
- properties: {
27
- patterns: {
28
- type: "array",
29
- items: {
30
- type: "object",
31
- properties: {
32
- title: {
33
- type: "string",
34
- description: "Short pattern name (2-6 words)",
35
- },
36
- description: {
37
- type: "string",
38
- description:
39
- "Concise description of the pattern or lesson (1-2 sentences)",
40
- },
41
- category: {
42
- type: "string",
43
- enum: [
44
- "error_resolution",
45
- "best_practice",
46
- "shortcut",
47
- "preference",
48
- ],
49
- },
50
- },
51
- required: ["title", "description", "category"],
52
- },
53
- description:
54
- "Patterns worth remembering. Return empty array if nothing notable.",
55
- },
56
- },
57
- required: ["patterns"],
58
- },
59
- };
60
-
61
20
  /**
62
21
  * Analyze a completed task for patterns worth learning.
63
- * Makes a focused Claude API call, then proposes additions if patterns found.
22
+ * Routes through the Claude Agent SDK runtime (no direct Anthropic SDK usage).
64
23
  * Returns the notification ID if a proposal was created, null otherwise.
65
24
  */
66
25
  export async function analyzeForLearnedPatterns(
@@ -99,16 +58,13 @@ export async function analyzeForLearnedPatterns(
99
58
  })
100
59
  .join("\n");
101
60
 
102
- const client = new Anthropic();
103
- const response = await client.messages.create({
104
- model: "claude-sonnet-4-20250514",
105
- max_tokens: 1024,
106
- tools: [PATTERN_TOOL],
107
- tool_choice: { type: "tool", name: "propose_learned_patterns" },
108
- messages: [
109
- {
110
- role: "user",
111
- content: `Analyze this completed task for patterns worth learning for the "${profileId}" agent profile.
61
+ const { text } = await runMetaCompletion({
62
+ prompt: `Analyze this completed task for patterns worth learning for the "${profileId}" agent profile.
63
+
64
+ Return ONLY a JSON array (no markdown, no code fences):
65
+ [{"title": "...", "description": "...", "category": "error_resolution|best_practice|shortcut|preference"}]
66
+
67
+ Return an empty array [] if no noteworthy patterns.
112
68
 
113
69
  ## Task
114
70
  Title: ${task.title}
@@ -124,22 +80,17 @@ ${logSummary.slice(0, 2000)}
124
80
  ${currentContext ?? "(none yet)"}
125
81
 
126
82
  Extract ONLY genuinely useful patterns — things that would help this profile avoid mistakes or work more efficiently on similar future tasks. If this task was routine with nothing notable, return an empty patterns array. Do NOT repeat patterns already in the learned context.`,
127
- },
128
- ],
83
+ activityType: "pattern_extraction",
129
84
  });
130
85
 
131
- // Extract the tool use result
132
- const toolBlock = response.content.find(
133
- (block) => block.type === "tool_use" && block.name === "propose_learned_patterns"
134
- );
135
-
136
- if (!toolBlock || toolBlock.type !== "tool_use") return null;
86
+ // Parse JSON array from response text
87
+ const jsonMatch = text.match(/\[[\s\S]*\]/);
88
+ const patterns: PatternEntry[] = jsonMatch ? JSON.parse(jsonMatch[0]) : [];
137
89
 
138
- const proposal = toolBlock.input as PatternProposal;
139
- if (!proposal.patterns || proposal.patterns.length === 0) return null;
90
+ if (patterns.length === 0) return null;
140
91
 
141
92
  // Format patterns as text for the proposal
142
- const formattedAdditions = proposal.patterns
93
+ const formattedAdditions = patterns
143
94
  .map(
144
95
  (p) =>
145
96
  `### ${p.title} [${p.category}]\n${p.description}`
@@ -15,7 +15,6 @@ canUseToolPolicy:
15
15
  autoApprove: [Read, Grep, Glob]
16
16
  autoDeny: []
17
17
 
18
- temperature: 0.3
19
18
  maxTurns: 20
20
19
  outputFormat: structured-findings
21
20
 
@@ -15,7 +15,6 @@ canUseToolPolicy:
15
15
  autoApprove: [Read, Grep, Glob]
16
16
  autoDeny: []
17
17
 
18
- temperature: 0.3
19
18
  maxTurns: 30
20
19
 
21
20
  author: stagent
@@ -15,7 +15,6 @@ canUseToolPolicy:
15
15
  autoApprove: [Read, Grep, Glob]
16
16
  autoDeny: []
17
17
 
18
- temperature: 0.3
19
18
  maxTurns: 30
20
19
 
21
20
  author: stagent
@@ -14,7 +14,6 @@ canUseToolPolicy:
14
14
  autoApprove: [Read]
15
15
  autoDeny: []
16
16
 
17
- temperature: 0.5
18
17
  maxTurns: 20
19
18
  outputFormat: markdown-document
20
19
 
@@ -12,7 +12,6 @@ runtimeOverrides:
12
12
  Stay pragmatic, execute the requested work directly, and prefer concise operational updates.
13
13
  Keep outputs grounded in the current workspace and call out blocked actions explicitly.
14
14
 
15
- temperature: 0.5
16
15
  maxTurns: 30
17
16
 
18
17
  author: stagent
@@ -14,7 +14,6 @@ canUseToolPolicy:
14
14
  autoApprove: [WebSearch, WebFetch, Read]
15
15
  autoDeny: [Bash, Write, Edit]
16
16
 
17
- temperature: 0.6
18
17
  maxTurns: 20
19
18
 
20
19
  author: stagent
@@ -14,7 +14,6 @@ canUseToolPolicy:
14
14
  autoApprove: [WebSearch, WebFetch, Read]
15
15
  autoDeny: [Bash, Write, Edit]
16
16
 
17
- temperature: 0.5
18
17
  maxTurns: 25
19
18
 
20
19
  author: stagent
@@ -14,7 +14,6 @@ canUseToolPolicy:
14
14
  autoApprove: [Read, Grep, Glob]
15
15
  autoDeny: []
16
16
 
17
- temperature: 0.4
18
17
  maxTurns: 25
19
18
 
20
19
  author: stagent
@@ -15,7 +15,6 @@ canUseToolPolicy:
15
15
  autoApprove: [WebSearch, WebFetch, Read]
16
16
  autoDeny: []
17
17
 
18
- temperature: 0.4
19
18
  maxTurns: 25
20
19
 
21
20
  author: stagent
@@ -14,7 +14,6 @@ canUseToolPolicy:
14
14
  autoApprove: [WebSearch, WebFetch, Read]
15
15
  autoDeny: [Bash, Write, Edit]
16
16
 
17
- temperature: 0.5
18
17
  maxTurns: 20
19
18
 
20
19
  author: stagent
@@ -5,7 +5,6 @@ domain: work
5
5
  tags: [sweep, audit, improvement, maintenance]
6
6
  supportedRuntimes: [claude-code]
7
7
 
8
- temperature: 0.3
9
8
  maxTurns: 50
10
9
  outputFormat: json
11
10
 
@@ -16,7 +16,6 @@ canUseToolPolicy:
16
16
  autoApprove: [Read, Grep, Glob]
17
17
  autoDeny: []
18
18
 
19
- temperature: 0.4
20
19
  maxTurns: 20
21
20
  outputFormat: markdown
22
21
 
@@ -14,7 +14,6 @@ canUseToolPolicy:
14
14
  autoApprove: [WebSearch, WebFetch, Read]
15
15
  autoDeny: [Bash, Write, Edit]
16
16
 
17
- temperature: 0.6
18
17
  maxTurns: 25
19
18
 
20
19
  author: stagent
@@ -14,7 +14,6 @@ canUseToolPolicy:
14
14
  autoApprove: [Read]
15
15
  autoDeny: [Bash, Write, Edit]
16
16
 
17
- temperature: 0.3
18
17
  maxTurns: 20
19
18
 
20
19
  author: stagent
@@ -174,7 +174,6 @@ function scanProfiles(): Map<string, AgentProfile> {
174
174
  allowedTools: config.allowedTools,
175
175
  mcpServers: config.mcpServers as Record<string, unknown>,
176
176
  canUseToolPolicy: config.canUseToolPolicy,
177
- temperature: config.temperature,
178
177
  maxTurns: config.maxTurns,
179
178
  outputFormat: config.outputFormat,
180
179
  version: config.version,
@@ -31,7 +31,6 @@ export interface AgentProfile {
31
31
  allowedTools?: string[];
32
32
  mcpServers?: Record<string, unknown>;
33
33
  canUseToolPolicy?: CanUseToolPolicy;
34
- temperature?: number;
35
34
  maxTurns?: number;
36
35
  outputFormat?: string;
37
36
  version?: string;
@@ -50,7 +50,7 @@ const RUNTIME_CATALOG: Record<AgentRuntimeId, RuntimeCatalogEntry> = {
50
50
  resume: true,
51
51
  cancel: true,
52
52
  approvals: true,
53
- mcpServers: true,
53
+ mcpServers: false, // Not yet wired — configs not passed to codex subprocess
54
54
  profileTests: false,
55
55
  taskAssist: true,
56
56
  authHealthCheck: true,
@@ -252,6 +252,71 @@ async function runClaudeProfileTests(profileId: string): Promise<ProfileTestRepo
252
252
  };
253
253
  }
254
254
 
255
+ // ---------------------------------------------------------------------------
256
+ // Lightweight meta-completion (pattern extraction, context summarization, etc.)
257
+ // ---------------------------------------------------------------------------
258
+
259
+ export async function runMetaCompletion(input: {
260
+ prompt: string;
261
+ activityType: string;
262
+ }): Promise<{ text: string; usage: UsageSnapshot }> {
263
+ const authEnv = await getAuthEnv();
264
+ const startedAt = new Date();
265
+ let usage: UsageSnapshot = {};
266
+ const abortController = new AbortController();
267
+ const timeout = setTimeout(() => abortController.abort(), 60_000);
268
+
269
+ try {
270
+ const response = query({
271
+ prompt: input.prompt,
272
+ options: {
273
+ abortController,
274
+ includePartialMessages: true,
275
+ cwd: process.cwd(),
276
+ env: buildClaudeSdkEnv(authEnv),
277
+ allowedTools: [],
278
+ maxTurns: 1,
279
+ },
280
+ });
281
+
282
+ const collected = await collectResultText(
283
+ response as AsyncIterable<Record<string, unknown>>
284
+ );
285
+ usage = collected.usage;
286
+
287
+ await recordUsageLedgerEntry({
288
+ activityType: input.activityType as import("@/lib/usage/ledger").UsageActivityType,
289
+ runtimeId: "claude-code",
290
+ providerId: "anthropic",
291
+ modelId: usage.modelId ?? null,
292
+ inputTokens: usage.inputTokens ?? null,
293
+ outputTokens: usage.outputTokens ?? null,
294
+ totalTokens: usage.totalTokens ?? null,
295
+ status: "completed",
296
+ startedAt,
297
+ finishedAt: new Date(),
298
+ });
299
+
300
+ return { text: collected.resultText, usage };
301
+ } catch (error) {
302
+ await recordUsageLedgerEntry({
303
+ activityType: input.activityType as import("@/lib/usage/ledger").UsageActivityType,
304
+ runtimeId: "claude-code",
305
+ providerId: "anthropic",
306
+ modelId: usage.modelId ?? null,
307
+ inputTokens: usage.inputTokens ?? null,
308
+ outputTokens: usage.outputTokens ?? null,
309
+ totalTokens: usage.totalTokens ?? null,
310
+ status: "failed",
311
+ startedAt,
312
+ finishedAt: new Date(),
313
+ });
314
+ throw error;
315
+ } finally {
316
+ clearTimeout(timeout);
317
+ }
318
+ }
319
+
255
320
  async function runClaudeTaskAssist(
256
321
  input: TaskAssistInput
257
322
  ): Promise<TaskAssistResponse> {
@@ -345,6 +410,7 @@ async function testClaudeConnection(): Promise<RuntimeConnectionResult> {
345
410
  options: {
346
411
  abortController,
347
412
  maxTurns: 1,
413
+ includePartialMessages: false,
348
414
  cwd: process.cwd(),
349
415
  env: buildClaudeSdkEnv(authEnv),
350
416
  },
@@ -47,3 +47,9 @@ export function isValidDragTransition(from: TaskStatus, to: TaskStatus): boolean
47
47
 
48
48
  /** Maximum number of times a task can be resumed before requiring a fresh start */
49
49
  export const MAX_RESUME_COUNT = 3;
50
+
51
+ /** Default max turns for agent task execution (safety net) */
52
+ export const DEFAULT_MAX_TURNS = 50;
53
+
54
+ /** Default per-execution budget cap in USD */
55
+ export const DEFAULT_MAX_BUDGET_USD = 2.0;
@@ -34,7 +34,6 @@ export function getSampleProfiles(): SampleProfileSeed[] {
34
34
  canUseToolPolicy: {
35
35
  autoApprove: ["Read", "Grep"],
36
36
  },
37
- temperature: 0.3,
38
37
  maxTurns: 18,
39
38
  outputFormat: "Weekly operating note with metrics, risks, and next actions.",
40
39
  author: SAMPLE_PROFILE_AUTHOR,
@@ -73,7 +72,6 @@ You review pipeline movement, funnel risk, and rep follow-ups with a bias toward
73
72
  canUseToolPolicy: {
74
73
  autoApprove: ["Read"],
75
74
  },
76
- temperature: 0.6,
77
75
  maxTurns: 16,
78
76
  outputFormat: "Experiment summary with winning message angles and next tests.",
79
77
  author: SAMPLE_PROFILE_AUTHOR,
@@ -109,7 +107,6 @@ You turn campaign performance and research inputs into sharper launch messaging.
109
107
  domain: "personal",
110
108
  tags: ["investing", "portfolio", "risk", "habits"],
111
109
  allowedTools: ["Read", "Write"],
112
- temperature: 0.25,
113
110
  maxTurns: 14,
114
111
  outputFormat: "Short investor brief with posture, risk notes, and watchlist changes.",
115
112
  author: SAMPLE_PROFILE_AUTHOR,
@@ -33,9 +33,10 @@ function formatLocalDay(date: Date) {
33
33
  }
34
34
 
35
35
  describe("usage ledger", () => {
36
- it("records normalized ledger rows with derived and unknown pricing states", async () => {
36
+ it("records normalized ledger rows with derived, fallback, and unknown pricing states", async () => {
37
37
  const { db, usageLedger, recordUsageLedgerEntry } = await loadUsageModules();
38
38
 
39
+ // Known model — gets specific pricing rule
39
40
  await recordUsageLedgerEntry({
40
41
  activityType: "task_assist",
41
42
  runtimeId: "claude-code",
@@ -49,6 +50,7 @@ describe("usage ledger", () => {
49
50
  finishedAt: new Date("2026-03-10T08:01:00.000Z"),
50
51
  });
51
52
 
53
+ // Unknown model — hits catch-all fallback pricing (conservative estimate)
52
54
  await recordUsageLedgerEntry({
53
55
  activityType: "task_assist",
54
56
  runtimeId: "openai-codex-app-server",
@@ -62,15 +64,37 @@ describe("usage ledger", () => {
62
64
  finishedAt: new Date("2026-03-10T09:01:00.000Z"),
63
65
  });
64
66
 
67
+ // Null modelId — gets unknown_pricing (no model to match)
68
+ await recordUsageLedgerEntry({
69
+ activityType: "task_run",
70
+ runtimeId: "claude-code",
71
+ providerId: "anthropic",
72
+ modelId: null,
73
+ inputTokens: 100,
74
+ outputTokens: 50,
75
+ totalTokens: 150,
76
+ status: "completed",
77
+ startedAt: new Date("2026-03-10T10:00:00.000Z"),
78
+ finishedAt: new Date("2026-03-10T10:01:00.000Z"),
79
+ });
80
+
65
81
  const rows = await db.select().from(usageLedger);
66
- expect(rows).toHaveLength(2);
82
+ expect(rows).toHaveLength(3);
67
83
 
68
- const priced = rows.find((row) => row.providerId === "anthropic");
84
+ // Known: specific pricing
85
+ const priced = rows.find((row) => row.modelId === "claude-sonnet-4-20250514");
69
86
  expect(priced?.costMicros).toBe(10_500);
70
87
  expect(priced?.status).toBe("completed");
71
- expect(priced?.pricingVersion).toBe("registry-2026-03-12");
88
+ expect(priced?.pricingVersion).toBe("registry-2026-03-15");
89
+
90
+ // Unknown model: fallback pricing (conservative Opus-tier for OpenAI: $10/$30)
91
+ const fallback = rows.find((row) => row.modelId === "codex-unknown");
92
+ expect(fallback?.costMicros).toBeGreaterThan(0);
93
+ expect(fallback?.status).toBe("completed");
94
+ expect(fallback?.pricingVersion).toBe("registry-2026-03-15-fallback");
72
95
 
73
- const unknown = rows.find((row) => row.providerId === "openai");
96
+ // Null modelId: truly unknown
97
+ const unknown = rows.find((row) => row.modelId === null);
74
98
  expect(unknown?.costMicros).toBeNull();
75
99
  expect(unknown?.status).toBe("unknown_pricing");
76
100
  expect(unknown?.pricingVersion).toBeNull();
@@ -15,7 +15,9 @@ export type UsageActivityType =
15
15
  | "workflow_step"
16
16
  | "scheduled_firing"
17
17
  | "task_assist"
18
- | "profile_test";
18
+ | "profile_test"
19
+ | "pattern_extraction"
20
+ | "context_summarization";
19
21
 
20
22
  export type UsageLedgerStatus =
21
23
  | "completed"
@@ -7,25 +7,79 @@ export interface PricingRule {
7
7
  }
8
8
 
9
9
  const PRICING_RULES: PricingRule[] = [
10
+ // ── Anthropic ──────────────────────────────────────────────────────
10
11
  {
11
12
  providerId: "anthropic",
12
- pricingVersion: "registry-2026-03-12",
13
+ pricingVersion: "registry-2026-03-15",
14
+ inputCostPerMillionMicros: 15_000_000,
15
+ outputCostPerMillionMicros: 75_000_000,
16
+ matchesModel(modelId) {
17
+ return modelId.startsWith("claude-opus");
18
+ },
19
+ },
20
+ {
21
+ providerId: "anthropic",
22
+ pricingVersion: "registry-2026-03-15",
13
23
  inputCostPerMillionMicros: 3_000_000,
14
24
  outputCostPerMillionMicros: 15_000_000,
15
25
  matchesModel(modelId) {
16
- return (
17
- modelId === "claude-sonnet-4-20250514" ||
18
- modelId.startsWith("claude-sonnet-4")
19
- );
26
+ return modelId.startsWith("claude-sonnet");
27
+ },
28
+ },
29
+ {
30
+ providerId: "anthropic",
31
+ pricingVersion: "registry-2026-03-15",
32
+ inputCostPerMillionMicros: 800_000,
33
+ outputCostPerMillionMicros: 4_000_000,
34
+ matchesModel(modelId) {
35
+ return modelId.startsWith("claude-haiku");
20
36
  },
21
37
  },
38
+ // ── OpenAI ─────────────────────────────────────────────────────────
22
39
  {
23
40
  providerId: "openai",
24
- pricingVersion: "registry-2026-03-12",
41
+ pricingVersion: "registry-2026-03-15",
25
42
  inputCostPerMillionMicros: 1_500_000,
26
43
  outputCostPerMillionMicros: 6_000_000,
27
44
  matchesModel(modelId) {
28
- return modelId === "codex-mini-latest" || modelId.startsWith("codex-mini");
45
+ return modelId.startsWith("codex-mini") || modelId === "codex-mini-latest";
46
+ },
47
+ },
48
+ {
49
+ providerId: "openai",
50
+ pricingVersion: "registry-2026-03-15",
51
+ inputCostPerMillionMicros: 2_500_000,
52
+ outputCostPerMillionMicros: 10_000_000,
53
+ matchesModel(modelId) {
54
+ return modelId.startsWith("gpt-4o");
55
+ },
56
+ },
57
+ {
58
+ providerId: "openai",
59
+ pricingVersion: "registry-2026-03-15",
60
+ inputCostPerMillionMicros: 10_000_000,
61
+ outputCostPerMillionMicros: 30_000_000,
62
+ matchesModel(modelId) {
63
+ return modelId.startsWith("gpt-5") || modelId.startsWith("o3") || modelId.startsWith("o4");
64
+ },
65
+ },
66
+ // ── Catch-all (conservative estimate to prevent null costs) ────────
67
+ {
68
+ providerId: "anthropic",
69
+ pricingVersion: "registry-2026-03-15-fallback",
70
+ inputCostPerMillionMicros: 15_000_000,
71
+ outputCostPerMillionMicros: 75_000_000,
72
+ matchesModel() {
73
+ return true;
74
+ },
75
+ },
76
+ {
77
+ providerId: "openai",
78
+ pricingVersion: "registry-2026-03-15-fallback",
79
+ inputCostPerMillionMicros: 10_000_000,
80
+ outputCostPerMillionMicros: 30_000_000,
81
+ matchesModel() {
82
+ return true;
29
83
  },
30
84
  },
31
85
  ];
@@ -28,7 +28,6 @@ describe("ProfileConfigSchema", () => {
28
28
  preToolCall: ["echo pre"],
29
29
  postToolCall: ["echo post"],
30
30
  },
31
- temperature: 0.5,
32
31
  maxTurns: 20,
33
32
  outputFormat: "markdown",
34
33
  author: "stagent",
@@ -77,20 +76,6 @@ describe("ProfileConfigSchema", () => {
77
76
  expect(result.success).toBe(false);
78
77
  });
79
78
 
80
- it("rejects temperature out of range", () => {
81
- const tooHigh = ProfileConfigSchema.safeParse({
82
- ...validProfile,
83
- temperature: 1.5,
84
- });
85
- expect(tooHigh.success).toBe(false);
86
-
87
- const tooLow = ProfileConfigSchema.safeParse({
88
- ...validProfile,
89
- temperature: -0.1,
90
- });
91
- expect(tooLow.success).toBe(false);
92
- });
93
-
94
79
  it("rejects invalid source URL", () => {
95
80
  const result = ProfileConfigSchema.safeParse({
96
81
  ...validProfile,
@@ -38,7 +38,6 @@ export const ProfileConfigSchema = z.object({
38
38
  postToolCall: z.array(z.string()).optional(),
39
39
  })
40
40
  .optional(),
41
- temperature: z.number().min(0).max(1).optional(),
42
41
  maxTurns: z.number().positive().optional(),
43
42
  outputFormat: z.string().optional(),
44
43
  author: z.string().optional(),
@@ -103,6 +103,8 @@ describe("executeWorkflow", () => {
103
103
  .mockResolvedValueOnce([workflow])
104
104
  .mockResolvedValueOnce([failedTask])
105
105
  .mockResolvedValueOnce([workflow])
106
+ .mockResolvedValueOnce([workflow])
107
+ // syncSourceTaskStatus reads the workflow to find sourceTaskId
106
108
  .mockResolvedValueOnce([workflow]);
107
109
 
108
110
  const { executeWorkflow } = await import("../engine");
@@ -837,11 +837,12 @@ async function syncSourceTaskStatus(
837
837
  status: "completed" | "failed"
838
838
  ): Promise<void> {
839
839
  try {
840
- const [workflow] = await db
840
+ const result = await db
841
841
  .select()
842
842
  .from(workflows)
843
843
  .where(eq(workflows.id, workflowId));
844
844
 
845
+ const workflow = Array.isArray(result) ? result[0] : undefined;
845
846
  if (!workflow) return;
846
847
 
847
848
  const def = JSON.parse(workflow.definition);