stagent 0.1.10 → 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -24
- package/package.json +1 -2
- package/src/app/api/profiles/route.ts +0 -1
- package/src/app/globals.css +0 -5
- package/src/app/tasks/page.tsx +5 -0
- package/src/components/profiles/profile-detail-view.tsx +1 -16
- package/src/components/profiles/profile-form-view.tsx +0 -22
- package/src/lib/agents/__tests__/claude-agent.test.ts +7 -2
- package/src/lib/agents/__tests__/learned-context.test.ts +500 -0
- package/src/lib/agents/__tests__/pattern-extractor.test.ts +243 -0
- package/src/lib/agents/__tests__/sweep.test.ts +202 -0
- package/src/lib/agents/claude-agent.ts +104 -78
- package/src/lib/agents/learned-context.ts +5 -13
- package/src/lib/agents/pattern-extractor.ts +15 -64
- package/src/lib/agents/profiles/builtins/code-reviewer/profile.yaml +0 -1
- package/src/lib/agents/profiles/builtins/data-analyst/profile.yaml +0 -1
- package/src/lib/agents/profiles/builtins/devops-engineer/profile.yaml +0 -1
- package/src/lib/agents/profiles/builtins/document-writer/profile.yaml +0 -1
- package/src/lib/agents/profiles/builtins/general/profile.yaml +0 -1
- package/src/lib/agents/profiles/builtins/health-fitness-coach/profile.yaml +0 -1
- package/src/lib/agents/profiles/builtins/learning-coach/profile.yaml +0 -1
- package/src/lib/agents/profiles/builtins/project-manager/profile.yaml +0 -1
- package/src/lib/agents/profiles/builtins/researcher/profile.yaml +0 -1
- package/src/lib/agents/profiles/builtins/shopping-assistant/profile.yaml +0 -1
- package/src/lib/agents/profiles/builtins/sweep/profile.yaml +0 -1
- package/src/lib/agents/profiles/builtins/technical-writer/profile.yaml +0 -1
- package/src/lib/agents/profiles/builtins/travel-planner/profile.yaml +0 -1
- package/src/lib/agents/profiles/builtins/wealth-manager/profile.yaml +0 -1
- package/src/lib/agents/profiles/registry.ts +0 -1
- package/src/lib/agents/profiles/types.ts +0 -1
- package/src/lib/agents/runtime/catalog.ts +1 -1
- package/src/lib/agents/runtime/claude.ts +66 -0
- package/src/lib/constants/task-status.ts +6 -0
- package/src/lib/data/seed-data/profiles.ts +0 -3
- package/src/lib/usage/__tests__/ledger.test.ts +29 -5
- package/src/lib/usage/ledger.ts +3 -1
- package/src/lib/usage/pricing.ts +61 -7
- package/src/lib/validators/__tests__/profile.test.ts +0 -15
- package/src/lib/validators/profile.ts +0 -1
- package/src/lib/workflows/__tests__/engine.test.ts +2 -0
- package/src/lib/workflows/engine.ts +2 -1
|
@@ -2,7 +2,7 @@ import { db } from "@/lib/db";
|
|
|
2
2
|
import { learnedContext, notifications } from "@/lib/db/schema";
|
|
3
3
|
import { and, desc, eq } from "drizzle-orm";
|
|
4
4
|
import type { LearnedContextRow } from "@/lib/db/schema";
|
|
5
|
-
import
|
|
5
|
+
import { runMetaCompletion } from "./runtime/claude";
|
|
6
6
|
|
|
7
7
|
const CONTEXT_CHAR_LIMIT = 8_000;
|
|
8
8
|
const SUMMARIZATION_THRESHOLD = 6_000;
|
|
@@ -243,14 +243,8 @@ export async function summarizeContext(profileId: string): Promise<void> {
|
|
|
243
243
|
const content = getActiveLearnedContext(profileId);
|
|
244
244
|
if (!content || content.length <= SUMMARIZATION_THRESHOLD) return;
|
|
245
245
|
|
|
246
|
-
const
|
|
247
|
-
|
|
248
|
-
model: "claude-sonnet-4-20250514",
|
|
249
|
-
max_tokens: 2048,
|
|
250
|
-
messages: [
|
|
251
|
-
{
|
|
252
|
-
role: "user",
|
|
253
|
-
content: `You are condensing learned context for an AI agent profile "${profileId}".
|
|
246
|
+
const { text } = await runMetaCompletion({
|
|
247
|
+
prompt: `You are condensing learned context for an AI agent profile "${profileId}".
|
|
254
248
|
The current context has grown to ${content.length} characters and needs to be summarized to under ${SUMMARIZATION_THRESHOLD} characters while preserving all key patterns, best practices, and important insights.
|
|
255
249
|
|
|
256
250
|
Current learned context:
|
|
@@ -266,12 +260,10 @@ Produce a condensed version that:
|
|
|
266
260
|
5. Stays under ${SUMMARIZATION_THRESHOLD} characters
|
|
267
261
|
|
|
268
262
|
Output ONLY the condensed context, no preamble.`,
|
|
269
|
-
|
|
270
|
-
],
|
|
263
|
+
activityType: "context_summarization",
|
|
271
264
|
});
|
|
272
265
|
|
|
273
|
-
const summarized =
|
|
274
|
-
response.content[0].type === "text" ? response.content[0].text : "";
|
|
266
|
+
const summarized = text.trim();
|
|
275
267
|
|
|
276
268
|
if (!summarized || summarized.length >= content.length) return;
|
|
277
269
|
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import Anthropic from "@anthropic-ai/sdk";
|
|
2
1
|
import { db } from "@/lib/db";
|
|
3
2
|
import { tasks, agentLogs } from "@/lib/db/schema";
|
|
4
3
|
import { eq, desc } from "drizzle-orm";
|
|
@@ -6,6 +5,7 @@ import {
|
|
|
6
5
|
getActiveLearnedContext,
|
|
7
6
|
proposeContextAddition,
|
|
8
7
|
} from "./learned-context";
|
|
8
|
+
import { runMetaCompletion } from "./runtime/claude";
|
|
9
9
|
|
|
10
10
|
export interface PatternEntry {
|
|
11
11
|
title: string;
|
|
@@ -17,50 +17,9 @@ export interface PatternProposal {
|
|
|
17
17
|
patterns: PatternEntry[];
|
|
18
18
|
}
|
|
19
19
|
|
|
20
|
-
const PATTERN_TOOL: Anthropic.Messages.Tool = {
|
|
21
|
-
name: "propose_learned_patterns",
|
|
22
|
-
description:
|
|
23
|
-
"Propose patterns learned from this task execution that should be remembered for future tasks with this profile.",
|
|
24
|
-
input_schema: {
|
|
25
|
-
type: "object" as const,
|
|
26
|
-
properties: {
|
|
27
|
-
patterns: {
|
|
28
|
-
type: "array",
|
|
29
|
-
items: {
|
|
30
|
-
type: "object",
|
|
31
|
-
properties: {
|
|
32
|
-
title: {
|
|
33
|
-
type: "string",
|
|
34
|
-
description: "Short pattern name (2-6 words)",
|
|
35
|
-
},
|
|
36
|
-
description: {
|
|
37
|
-
type: "string",
|
|
38
|
-
description:
|
|
39
|
-
"Concise description of the pattern or lesson (1-2 sentences)",
|
|
40
|
-
},
|
|
41
|
-
category: {
|
|
42
|
-
type: "string",
|
|
43
|
-
enum: [
|
|
44
|
-
"error_resolution",
|
|
45
|
-
"best_practice",
|
|
46
|
-
"shortcut",
|
|
47
|
-
"preference",
|
|
48
|
-
],
|
|
49
|
-
},
|
|
50
|
-
},
|
|
51
|
-
required: ["title", "description", "category"],
|
|
52
|
-
},
|
|
53
|
-
description:
|
|
54
|
-
"Patterns worth remembering. Return empty array if nothing notable.",
|
|
55
|
-
},
|
|
56
|
-
},
|
|
57
|
-
required: ["patterns"],
|
|
58
|
-
},
|
|
59
|
-
};
|
|
60
|
-
|
|
61
20
|
/**
|
|
62
21
|
* Analyze a completed task for patterns worth learning.
|
|
63
|
-
*
|
|
22
|
+
* Routes through the Claude Agent SDK runtime (no direct Anthropic SDK usage).
|
|
64
23
|
* Returns the notification ID if a proposal was created, null otherwise.
|
|
65
24
|
*/
|
|
66
25
|
export async function analyzeForLearnedPatterns(
|
|
@@ -99,16 +58,13 @@ export async function analyzeForLearnedPatterns(
|
|
|
99
58
|
})
|
|
100
59
|
.join("\n");
|
|
101
60
|
|
|
102
|
-
const
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
{
|
|
110
|
-
role: "user",
|
|
111
|
-
content: `Analyze this completed task for patterns worth learning for the "${profileId}" agent profile.
|
|
61
|
+
const { text } = await runMetaCompletion({
|
|
62
|
+
prompt: `Analyze this completed task for patterns worth learning for the "${profileId}" agent profile.
|
|
63
|
+
|
|
64
|
+
Return ONLY a JSON array (no markdown, no code fences):
|
|
65
|
+
[{"title": "...", "description": "...", "category": "error_resolution|best_practice|shortcut|preference"}]
|
|
66
|
+
|
|
67
|
+
Return an empty array [] if no noteworthy patterns.
|
|
112
68
|
|
|
113
69
|
## Task
|
|
114
70
|
Title: ${task.title}
|
|
@@ -124,22 +80,17 @@ ${logSummary.slice(0, 2000)}
|
|
|
124
80
|
${currentContext ?? "(none yet)"}
|
|
125
81
|
|
|
126
82
|
Extract ONLY genuinely useful patterns — things that would help this profile avoid mistakes or work more efficiently on similar future tasks. If this task was routine with nothing notable, return an empty patterns array. Do NOT repeat patterns already in the learned context.`,
|
|
127
|
-
|
|
128
|
-
],
|
|
83
|
+
activityType: "pattern_extraction",
|
|
129
84
|
});
|
|
130
85
|
|
|
131
|
-
//
|
|
132
|
-
const
|
|
133
|
-
|
|
134
|
-
);
|
|
135
|
-
|
|
136
|
-
if (!toolBlock || toolBlock.type !== "tool_use") return null;
|
|
86
|
+
// Parse JSON array from response text
|
|
87
|
+
const jsonMatch = text.match(/\[[\s\S]*\]/);
|
|
88
|
+
const patterns: PatternEntry[] = jsonMatch ? JSON.parse(jsonMatch[0]) : [];
|
|
137
89
|
|
|
138
|
-
|
|
139
|
-
if (!proposal.patterns || proposal.patterns.length === 0) return null;
|
|
90
|
+
if (patterns.length === 0) return null;
|
|
140
91
|
|
|
141
92
|
// Format patterns as text for the proposal
|
|
142
|
-
const formattedAdditions =
|
|
93
|
+
const formattedAdditions = patterns
|
|
143
94
|
.map(
|
|
144
95
|
(p) =>
|
|
145
96
|
`### ${p.title} [${p.category}]\n${p.description}`
|
|
@@ -12,7 +12,6 @@ runtimeOverrides:
|
|
|
12
12
|
Stay pragmatic, execute the requested work directly, and prefer concise operational updates.
|
|
13
13
|
Keep outputs grounded in the current workspace and call out blocked actions explicitly.
|
|
14
14
|
|
|
15
|
-
temperature: 0.5
|
|
16
15
|
maxTurns: 30
|
|
17
16
|
|
|
18
17
|
author: stagent
|
|
@@ -174,7 +174,6 @@ function scanProfiles(): Map<string, AgentProfile> {
|
|
|
174
174
|
allowedTools: config.allowedTools,
|
|
175
175
|
mcpServers: config.mcpServers as Record<string, unknown>,
|
|
176
176
|
canUseToolPolicy: config.canUseToolPolicy,
|
|
177
|
-
temperature: config.temperature,
|
|
178
177
|
maxTurns: config.maxTurns,
|
|
179
178
|
outputFormat: config.outputFormat,
|
|
180
179
|
version: config.version,
|
|
@@ -50,7 +50,7 @@ const RUNTIME_CATALOG: Record<AgentRuntimeId, RuntimeCatalogEntry> = {
|
|
|
50
50
|
resume: true,
|
|
51
51
|
cancel: true,
|
|
52
52
|
approvals: true,
|
|
53
|
-
mcpServers:
|
|
53
|
+
mcpServers: false, // Not yet wired — configs not passed to codex subprocess
|
|
54
54
|
profileTests: false,
|
|
55
55
|
taskAssist: true,
|
|
56
56
|
authHealthCheck: true,
|
|
@@ -252,6 +252,71 @@ async function runClaudeProfileTests(profileId: string): Promise<ProfileTestRepo
|
|
|
252
252
|
};
|
|
253
253
|
}
|
|
254
254
|
|
|
255
|
+
// ---------------------------------------------------------------------------
|
|
256
|
+
// Lightweight meta-completion (pattern extraction, context summarization, etc.)
|
|
257
|
+
// ---------------------------------------------------------------------------
|
|
258
|
+
|
|
259
|
+
export async function runMetaCompletion(input: {
|
|
260
|
+
prompt: string;
|
|
261
|
+
activityType: string;
|
|
262
|
+
}): Promise<{ text: string; usage: UsageSnapshot }> {
|
|
263
|
+
const authEnv = await getAuthEnv();
|
|
264
|
+
const startedAt = new Date();
|
|
265
|
+
let usage: UsageSnapshot = {};
|
|
266
|
+
const abortController = new AbortController();
|
|
267
|
+
const timeout = setTimeout(() => abortController.abort(), 60_000);
|
|
268
|
+
|
|
269
|
+
try {
|
|
270
|
+
const response = query({
|
|
271
|
+
prompt: input.prompt,
|
|
272
|
+
options: {
|
|
273
|
+
abortController,
|
|
274
|
+
includePartialMessages: true,
|
|
275
|
+
cwd: process.cwd(),
|
|
276
|
+
env: buildClaudeSdkEnv(authEnv),
|
|
277
|
+
allowedTools: [],
|
|
278
|
+
maxTurns: 1,
|
|
279
|
+
},
|
|
280
|
+
});
|
|
281
|
+
|
|
282
|
+
const collected = await collectResultText(
|
|
283
|
+
response as AsyncIterable<Record<string, unknown>>
|
|
284
|
+
);
|
|
285
|
+
usage = collected.usage;
|
|
286
|
+
|
|
287
|
+
await recordUsageLedgerEntry({
|
|
288
|
+
activityType: input.activityType as import("@/lib/usage/ledger").UsageActivityType,
|
|
289
|
+
runtimeId: "claude-code",
|
|
290
|
+
providerId: "anthropic",
|
|
291
|
+
modelId: usage.modelId ?? null,
|
|
292
|
+
inputTokens: usage.inputTokens ?? null,
|
|
293
|
+
outputTokens: usage.outputTokens ?? null,
|
|
294
|
+
totalTokens: usage.totalTokens ?? null,
|
|
295
|
+
status: "completed",
|
|
296
|
+
startedAt,
|
|
297
|
+
finishedAt: new Date(),
|
|
298
|
+
});
|
|
299
|
+
|
|
300
|
+
return { text: collected.resultText, usage };
|
|
301
|
+
} catch (error) {
|
|
302
|
+
await recordUsageLedgerEntry({
|
|
303
|
+
activityType: input.activityType as import("@/lib/usage/ledger").UsageActivityType,
|
|
304
|
+
runtimeId: "claude-code",
|
|
305
|
+
providerId: "anthropic",
|
|
306
|
+
modelId: usage.modelId ?? null,
|
|
307
|
+
inputTokens: usage.inputTokens ?? null,
|
|
308
|
+
outputTokens: usage.outputTokens ?? null,
|
|
309
|
+
totalTokens: usage.totalTokens ?? null,
|
|
310
|
+
status: "failed",
|
|
311
|
+
startedAt,
|
|
312
|
+
finishedAt: new Date(),
|
|
313
|
+
});
|
|
314
|
+
throw error;
|
|
315
|
+
} finally {
|
|
316
|
+
clearTimeout(timeout);
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
|
|
255
320
|
async function runClaudeTaskAssist(
|
|
256
321
|
input: TaskAssistInput
|
|
257
322
|
): Promise<TaskAssistResponse> {
|
|
@@ -345,6 +410,7 @@ async function testClaudeConnection(): Promise<RuntimeConnectionResult> {
|
|
|
345
410
|
options: {
|
|
346
411
|
abortController,
|
|
347
412
|
maxTurns: 1,
|
|
413
|
+
includePartialMessages: false,
|
|
348
414
|
cwd: process.cwd(),
|
|
349
415
|
env: buildClaudeSdkEnv(authEnv),
|
|
350
416
|
},
|
|
@@ -47,3 +47,9 @@ export function isValidDragTransition(from: TaskStatus, to: TaskStatus): boolean
|
|
|
47
47
|
|
|
48
48
|
/** Maximum number of times a task can be resumed before requiring a fresh start */
|
|
49
49
|
export const MAX_RESUME_COUNT = 3;
|
|
50
|
+
|
|
51
|
+
/** Default max turns for agent task execution (safety net) */
|
|
52
|
+
export const DEFAULT_MAX_TURNS = 50;
|
|
53
|
+
|
|
54
|
+
/** Default per-execution budget cap in USD */
|
|
55
|
+
export const DEFAULT_MAX_BUDGET_USD = 2.0;
|
|
@@ -34,7 +34,6 @@ export function getSampleProfiles(): SampleProfileSeed[] {
|
|
|
34
34
|
canUseToolPolicy: {
|
|
35
35
|
autoApprove: ["Read", "Grep"],
|
|
36
36
|
},
|
|
37
|
-
temperature: 0.3,
|
|
38
37
|
maxTurns: 18,
|
|
39
38
|
outputFormat: "Weekly operating note with metrics, risks, and next actions.",
|
|
40
39
|
author: SAMPLE_PROFILE_AUTHOR,
|
|
@@ -73,7 +72,6 @@ You review pipeline movement, funnel risk, and rep follow-ups with a bias toward
|
|
|
73
72
|
canUseToolPolicy: {
|
|
74
73
|
autoApprove: ["Read"],
|
|
75
74
|
},
|
|
76
|
-
temperature: 0.6,
|
|
77
75
|
maxTurns: 16,
|
|
78
76
|
outputFormat: "Experiment summary with winning message angles and next tests.",
|
|
79
77
|
author: SAMPLE_PROFILE_AUTHOR,
|
|
@@ -109,7 +107,6 @@ You turn campaign performance and research inputs into sharper launch messaging.
|
|
|
109
107
|
domain: "personal",
|
|
110
108
|
tags: ["investing", "portfolio", "risk", "habits"],
|
|
111
109
|
allowedTools: ["Read", "Write"],
|
|
112
|
-
temperature: 0.25,
|
|
113
110
|
maxTurns: 14,
|
|
114
111
|
outputFormat: "Short investor brief with posture, risk notes, and watchlist changes.",
|
|
115
112
|
author: SAMPLE_PROFILE_AUTHOR,
|
|
@@ -33,9 +33,10 @@ function formatLocalDay(date: Date) {
|
|
|
33
33
|
}
|
|
34
34
|
|
|
35
35
|
describe("usage ledger", () => {
|
|
36
|
-
it("records normalized ledger rows with derived and unknown pricing states", async () => {
|
|
36
|
+
it("records normalized ledger rows with derived, fallback, and unknown pricing states", async () => {
|
|
37
37
|
const { db, usageLedger, recordUsageLedgerEntry } = await loadUsageModules();
|
|
38
38
|
|
|
39
|
+
// Known model — gets specific pricing rule
|
|
39
40
|
await recordUsageLedgerEntry({
|
|
40
41
|
activityType: "task_assist",
|
|
41
42
|
runtimeId: "claude-code",
|
|
@@ -49,6 +50,7 @@ describe("usage ledger", () => {
|
|
|
49
50
|
finishedAt: new Date("2026-03-10T08:01:00.000Z"),
|
|
50
51
|
});
|
|
51
52
|
|
|
53
|
+
// Unknown model — hits catch-all fallback pricing (conservative estimate)
|
|
52
54
|
await recordUsageLedgerEntry({
|
|
53
55
|
activityType: "task_assist",
|
|
54
56
|
runtimeId: "openai-codex-app-server",
|
|
@@ -62,15 +64,37 @@ describe("usage ledger", () => {
|
|
|
62
64
|
finishedAt: new Date("2026-03-10T09:01:00.000Z"),
|
|
63
65
|
});
|
|
64
66
|
|
|
67
|
+
// Null modelId — gets unknown_pricing (no model to match)
|
|
68
|
+
await recordUsageLedgerEntry({
|
|
69
|
+
activityType: "task_run",
|
|
70
|
+
runtimeId: "claude-code",
|
|
71
|
+
providerId: "anthropic",
|
|
72
|
+
modelId: null,
|
|
73
|
+
inputTokens: 100,
|
|
74
|
+
outputTokens: 50,
|
|
75
|
+
totalTokens: 150,
|
|
76
|
+
status: "completed",
|
|
77
|
+
startedAt: new Date("2026-03-10T10:00:00.000Z"),
|
|
78
|
+
finishedAt: new Date("2026-03-10T10:01:00.000Z"),
|
|
79
|
+
});
|
|
80
|
+
|
|
65
81
|
const rows = await db.select().from(usageLedger);
|
|
66
|
-
expect(rows).toHaveLength(
|
|
82
|
+
expect(rows).toHaveLength(3);
|
|
67
83
|
|
|
68
|
-
|
|
84
|
+
// Known: specific pricing
|
|
85
|
+
const priced = rows.find((row) => row.modelId === "claude-sonnet-4-20250514");
|
|
69
86
|
expect(priced?.costMicros).toBe(10_500);
|
|
70
87
|
expect(priced?.status).toBe("completed");
|
|
71
|
-
expect(priced?.pricingVersion).toBe("registry-2026-03-
|
|
88
|
+
expect(priced?.pricingVersion).toBe("registry-2026-03-15");
|
|
89
|
+
|
|
90
|
+
// Unknown model: fallback pricing (conservative Opus-tier for OpenAI: $10/$30)
|
|
91
|
+
const fallback = rows.find((row) => row.modelId === "codex-unknown");
|
|
92
|
+
expect(fallback?.costMicros).toBeGreaterThan(0);
|
|
93
|
+
expect(fallback?.status).toBe("completed");
|
|
94
|
+
expect(fallback?.pricingVersion).toBe("registry-2026-03-15-fallback");
|
|
72
95
|
|
|
73
|
-
|
|
96
|
+
// Null modelId: truly unknown
|
|
97
|
+
const unknown = rows.find((row) => row.modelId === null);
|
|
74
98
|
expect(unknown?.costMicros).toBeNull();
|
|
75
99
|
expect(unknown?.status).toBe("unknown_pricing");
|
|
76
100
|
expect(unknown?.pricingVersion).toBeNull();
|
package/src/lib/usage/ledger.ts
CHANGED
package/src/lib/usage/pricing.ts
CHANGED
|
@@ -7,25 +7,79 @@ export interface PricingRule {
|
|
|
7
7
|
}
|
|
8
8
|
|
|
9
9
|
const PRICING_RULES: PricingRule[] = [
|
|
10
|
+
// ── Anthropic ──────────────────────────────────────────────────────
|
|
10
11
|
{
|
|
11
12
|
providerId: "anthropic",
|
|
12
|
-
pricingVersion: "registry-2026-03-
|
|
13
|
+
pricingVersion: "registry-2026-03-15",
|
|
14
|
+
inputCostPerMillionMicros: 15_000_000,
|
|
15
|
+
outputCostPerMillionMicros: 75_000_000,
|
|
16
|
+
matchesModel(modelId) {
|
|
17
|
+
return modelId.startsWith("claude-opus");
|
|
18
|
+
},
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
providerId: "anthropic",
|
|
22
|
+
pricingVersion: "registry-2026-03-15",
|
|
13
23
|
inputCostPerMillionMicros: 3_000_000,
|
|
14
24
|
outputCostPerMillionMicros: 15_000_000,
|
|
15
25
|
matchesModel(modelId) {
|
|
16
|
-
return (
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
26
|
+
return modelId.startsWith("claude-sonnet");
|
|
27
|
+
},
|
|
28
|
+
},
|
|
29
|
+
{
|
|
30
|
+
providerId: "anthropic",
|
|
31
|
+
pricingVersion: "registry-2026-03-15",
|
|
32
|
+
inputCostPerMillionMicros: 800_000,
|
|
33
|
+
outputCostPerMillionMicros: 4_000_000,
|
|
34
|
+
matchesModel(modelId) {
|
|
35
|
+
return modelId.startsWith("claude-haiku");
|
|
20
36
|
},
|
|
21
37
|
},
|
|
38
|
+
// ── OpenAI ─────────────────────────────────────────────────────────
|
|
22
39
|
{
|
|
23
40
|
providerId: "openai",
|
|
24
|
-
pricingVersion: "registry-2026-03-
|
|
41
|
+
pricingVersion: "registry-2026-03-15",
|
|
25
42
|
inputCostPerMillionMicros: 1_500_000,
|
|
26
43
|
outputCostPerMillionMicros: 6_000_000,
|
|
27
44
|
matchesModel(modelId) {
|
|
28
|
-
return modelId
|
|
45
|
+
return modelId.startsWith("codex-mini") || modelId === "codex-mini-latest";
|
|
46
|
+
},
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
providerId: "openai",
|
|
50
|
+
pricingVersion: "registry-2026-03-15",
|
|
51
|
+
inputCostPerMillionMicros: 2_500_000,
|
|
52
|
+
outputCostPerMillionMicros: 10_000_000,
|
|
53
|
+
matchesModel(modelId) {
|
|
54
|
+
return modelId.startsWith("gpt-4o");
|
|
55
|
+
},
|
|
56
|
+
},
|
|
57
|
+
{
|
|
58
|
+
providerId: "openai",
|
|
59
|
+
pricingVersion: "registry-2026-03-15",
|
|
60
|
+
inputCostPerMillionMicros: 10_000_000,
|
|
61
|
+
outputCostPerMillionMicros: 30_000_000,
|
|
62
|
+
matchesModel(modelId) {
|
|
63
|
+
return modelId.startsWith("gpt-5") || modelId.startsWith("o3") || modelId.startsWith("o4");
|
|
64
|
+
},
|
|
65
|
+
},
|
|
66
|
+
// ── Catch-all (conservative estimate to prevent null costs) ────────
|
|
67
|
+
{
|
|
68
|
+
providerId: "anthropic",
|
|
69
|
+
pricingVersion: "registry-2026-03-15-fallback",
|
|
70
|
+
inputCostPerMillionMicros: 15_000_000,
|
|
71
|
+
outputCostPerMillionMicros: 75_000_000,
|
|
72
|
+
matchesModel() {
|
|
73
|
+
return true;
|
|
74
|
+
},
|
|
75
|
+
},
|
|
76
|
+
{
|
|
77
|
+
providerId: "openai",
|
|
78
|
+
pricingVersion: "registry-2026-03-15-fallback",
|
|
79
|
+
inputCostPerMillionMicros: 10_000_000,
|
|
80
|
+
outputCostPerMillionMicros: 30_000_000,
|
|
81
|
+
matchesModel() {
|
|
82
|
+
return true;
|
|
29
83
|
},
|
|
30
84
|
},
|
|
31
85
|
];
|
|
@@ -28,7 +28,6 @@ describe("ProfileConfigSchema", () => {
|
|
|
28
28
|
preToolCall: ["echo pre"],
|
|
29
29
|
postToolCall: ["echo post"],
|
|
30
30
|
},
|
|
31
|
-
temperature: 0.5,
|
|
32
31
|
maxTurns: 20,
|
|
33
32
|
outputFormat: "markdown",
|
|
34
33
|
author: "stagent",
|
|
@@ -77,20 +76,6 @@ describe("ProfileConfigSchema", () => {
|
|
|
77
76
|
expect(result.success).toBe(false);
|
|
78
77
|
});
|
|
79
78
|
|
|
80
|
-
it("rejects temperature out of range", () => {
|
|
81
|
-
const tooHigh = ProfileConfigSchema.safeParse({
|
|
82
|
-
...validProfile,
|
|
83
|
-
temperature: 1.5,
|
|
84
|
-
});
|
|
85
|
-
expect(tooHigh.success).toBe(false);
|
|
86
|
-
|
|
87
|
-
const tooLow = ProfileConfigSchema.safeParse({
|
|
88
|
-
...validProfile,
|
|
89
|
-
temperature: -0.1,
|
|
90
|
-
});
|
|
91
|
-
expect(tooLow.success).toBe(false);
|
|
92
|
-
});
|
|
93
|
-
|
|
94
79
|
it("rejects invalid source URL", () => {
|
|
95
80
|
const result = ProfileConfigSchema.safeParse({
|
|
96
81
|
...validProfile,
|
|
@@ -38,7 +38,6 @@ export const ProfileConfigSchema = z.object({
|
|
|
38
38
|
postToolCall: z.array(z.string()).optional(),
|
|
39
39
|
})
|
|
40
40
|
.optional(),
|
|
41
|
-
temperature: z.number().min(0).max(1).optional(),
|
|
42
41
|
maxTurns: z.number().positive().optional(),
|
|
43
42
|
outputFormat: z.string().optional(),
|
|
44
43
|
author: z.string().optional(),
|
|
@@ -103,6 +103,8 @@ describe("executeWorkflow", () => {
|
|
|
103
103
|
.mockResolvedValueOnce([workflow])
|
|
104
104
|
.mockResolvedValueOnce([failedTask])
|
|
105
105
|
.mockResolvedValueOnce([workflow])
|
|
106
|
+
.mockResolvedValueOnce([workflow])
|
|
107
|
+
// syncSourceTaskStatus reads the workflow to find sourceTaskId
|
|
106
108
|
.mockResolvedValueOnce([workflow]);
|
|
107
109
|
|
|
108
110
|
const { executeWorkflow } = await import("../engine");
|
|
@@ -837,11 +837,12 @@ async function syncSourceTaskStatus(
|
|
|
837
837
|
status: "completed" | "failed"
|
|
838
838
|
): Promise<void> {
|
|
839
839
|
try {
|
|
840
|
-
const
|
|
840
|
+
const result = await db
|
|
841
841
|
.select()
|
|
842
842
|
.from(workflows)
|
|
843
843
|
.where(eq(workflows.id, workflowId));
|
|
844
844
|
|
|
845
|
+
const workflow = Array.isArray(result) ? result[0] : undefined;
|
|
845
846
|
if (!workflow) return;
|
|
846
847
|
|
|
847
848
|
const def = JSON.parse(workflow.definition);
|