@bluecopa/harness 0.1.0-snapshot.76 → 0.1.0-snapshot.78
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
package/src/arc/arc-loop.ts
CHANGED
|
@@ -632,12 +632,15 @@ export class ArcLoop {
|
|
|
632
632
|
// Resolve skill — pass content + pre-read sub-guides
|
|
633
633
|
const skillRefPromise = this.skillResolver
|
|
634
634
|
? this.skillResolver.resolve(request.action, profileSkills).then(r => {
|
|
635
|
-
if (r)
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
635
|
+
if (!r) return null;
|
|
636
|
+
resolvedSkillName = r.name;
|
|
637
|
+
resolvedSkillPath = r.path;
|
|
638
|
+
return {
|
|
639
|
+
name: r.name,
|
|
640
|
+
path: r.path,
|
|
641
|
+
...(r.systemPrompt ? { content: r.systemPrompt } : {}),
|
|
642
|
+
...(r.subGuides ? { subGuides: r.subGuides } : {}),
|
|
643
|
+
};
|
|
641
644
|
})
|
|
642
645
|
: undefined;
|
|
643
646
|
// Keep legacy promise for backward compat (returns null — progressive loading handles it)
|
|
@@ -661,20 +664,22 @@ export class ArcLoop {
|
|
|
661
664
|
processTimeout: this.config.processTimeout ?? 120_000,
|
|
662
665
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
663
666
|
processTools: (profile?.tools ?? globalTools) as any,
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
667
|
+
...(profile?.systemPrompt ?? this.config.processSystemPrompt
|
|
668
|
+
? { processSystemPrompt: profile?.systemPrompt ?? this.config.processSystemPrompt }
|
|
669
|
+
: {}),
|
|
670
|
+
...(profile?.allowedToolNames ? { allowedToolNames: profile.allowedToolNames } : {}),
|
|
671
|
+
...(profile?.outputSchema ? { outputSchema: profile.outputSchema } : {}),
|
|
672
|
+
...(profile?.demoMessages ? { demoMessages: profile.demoMessages } : {}),
|
|
673
|
+
...(this.config.processToolChoice ? { toolChoice: this.config.processToolChoice } : {}),
|
|
674
|
+
...(this.config.resultPager ? { resultPager: this.config.resultPager } : {}),
|
|
675
|
+
...(this.config.resultPageThreshold != null ? { resultPageThreshold: this.config.resultPageThreshold } : {}),
|
|
676
|
+
...(this.config.pagingExclude ? { pagingExclude: this.config.pagingExclude } : {}),
|
|
677
|
+
...(this.config.maxToolResultLength != null ? { maxToolResultLength: this.config.maxToolResultLength } : {}),
|
|
673
678
|
contextFacts: threadContextFacts,
|
|
674
|
-
maxContextTokens: this.config.maxContextTokens,
|
|
675
|
-
processSeedContext: this.config.processSeedContext,
|
|
676
|
-
skillPromptPromise,
|
|
677
|
-
skillRefPromise,
|
|
679
|
+
...(this.config.maxContextTokens != null ? { maxContextTokens: this.config.maxContextTokens } : {}),
|
|
680
|
+
...(this.config.processSeedContext ? { processSeedContext: this.config.processSeedContext } : {}),
|
|
681
|
+
...(skillPromptPromise ? { skillPromptPromise } : {}),
|
|
682
|
+
...(skillRefPromise ? { skillRefPromise } : {}),
|
|
678
683
|
parentSignal,
|
|
679
684
|
...pickDefined(this.config, [
|
|
680
685
|
'hookRunner',
|
package/src/arc/org-arc-loop.ts
CHANGED
|
@@ -1,6 +1,13 @@
|
|
|
1
|
+
import { generateObject } from 'ai';
|
|
2
|
+
import { anthropic as defaultAnthropicProvider } from '@ai-sdk/anthropic';
|
|
3
|
+
import { z } from 'zod';
|
|
1
4
|
import { getTextContent, type AgentMessage } from '../agent/types';
|
|
5
|
+
import { DEFAULT_MODEL_MAP, resolveModel, type ModelTier } from './arc-types';
|
|
6
|
+
import type { ModelFactory } from './types';
|
|
7
|
+
import { builtinTools } from './tools';
|
|
2
8
|
import { type OrgAcceptedEpisode, type OrgArcEvent, type OrgArcLoopConfig, type OrgArcRunResult, type OrgEpisodeDecision, type OrgRejectedEpisode, type OrgWorkerRunResult } from './org-types';
|
|
3
9
|
import { OrgArcRunner } from './org-arc-runner';
|
|
10
|
+
import { buildOrchestratorPrompt } from './profile-builder';
|
|
4
11
|
|
|
5
12
|
function normalizeSeedContext(ctx: string | AgentMessage[] | undefined): AgentMessage[] {
|
|
6
13
|
if (!ctx) return [];
|
|
@@ -15,14 +22,39 @@ function renderUserMission(messages: AgentMessage[]): string {
|
|
|
15
22
|
}).join('\n\n').trim();
|
|
16
23
|
}
|
|
17
24
|
|
|
25
|
+
const orgDirectReplySchema = z.object({
|
|
26
|
+
route: z.enum(['direct', 'workers']).describe('Whether to answer now with no workers, or continue into worker rounds.'),
|
|
27
|
+
response: z.string().default('').describe('The exact user-facing response when route is direct. Empty when route is workers.'),
|
|
28
|
+
reason: z.string().default('').describe('Short rationale for the routing decision.'),
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
function renderCoordinatorContext(messages: AgentMessage[]): string {
|
|
32
|
+
return messages.map((message) => {
|
|
33
|
+
const prefix = message.role === 'user'
|
|
34
|
+
? 'User'
|
|
35
|
+
: message.role === 'assistant'
|
|
36
|
+
? 'Assistant'
|
|
37
|
+
: message.role === 'system'
|
|
38
|
+
? 'System'
|
|
39
|
+
: 'Context';
|
|
40
|
+
return `${prefix}: ${getTextContent(message.content)}`;
|
|
41
|
+
}).join('\n\n').trim();
|
|
42
|
+
}
|
|
43
|
+
|
|
18
44
|
export class OrgArcLoop {
|
|
19
45
|
private readonly runner: OrgArcRunner;
|
|
20
46
|
private readonly acceptedEpisodes: OrgAcceptedEpisode[] = [];
|
|
21
47
|
private readonly rejectedEpisodes: OrgRejectedEpisode[] = [];
|
|
22
48
|
private readonly config: OrgArcLoopConfig;
|
|
49
|
+
private readonly createModel: ModelFactory;
|
|
50
|
+
private readonly modelMap: Record<ModelTier, string>;
|
|
51
|
+
private readonly coordinatorModel: string;
|
|
23
52
|
|
|
24
53
|
constructor(config: OrgArcLoopConfig) {
|
|
25
54
|
this.config = config;
|
|
55
|
+
this.createModel = config.createModel ?? defaultAnthropicProvider;
|
|
56
|
+
this.modelMap = { ...DEFAULT_MODEL_MAP, ...(config.modelMap ?? {}) };
|
|
57
|
+
this.coordinatorModel = resolveModel(config.model ?? 'strong', this.modelMap, this.modelMap.strong);
|
|
26
58
|
this.runner = new OrgArcRunner({
|
|
27
59
|
toolProvider: config.toolProvider,
|
|
28
60
|
episodeStore: config.episodeStore,
|
|
@@ -32,7 +64,7 @@ export class OrgArcLoop {
|
|
|
32
64
|
...(config.modelMap ? { modelMap: config.modelMap } : {}),
|
|
33
65
|
...((config.workerMaxSteps ?? config.processMaxSteps) != null ? { workerMaxSteps: config.workerMaxSteps ?? config.processMaxSteps } : {}),
|
|
34
66
|
...(config.processTimeout != null ? { processTimeout: config.processTimeout } : {}),
|
|
35
|
-
|
|
67
|
+
processTools: (config.processTools ?? builtinTools) as Record<string, unknown>,
|
|
36
68
|
...(config.workerSystemPrompt ? { workerSystemPrompt: config.workerSystemPrompt } : {}),
|
|
37
69
|
...(config.synthesisSystemPrompt ? { synthesisSystemPrompt: config.synthesisSystemPrompt } : {}),
|
|
38
70
|
...(config.workerAllowedTools ? { workerAllowedTools: config.workerAllowedTools } : {}),
|
|
@@ -41,6 +73,9 @@ export class OrgArcLoop {
|
|
|
41
73
|
...(config.permissionManager ? { permissionManager: config.permissionManager } : {}),
|
|
42
74
|
...(config.telemetry ? { telemetry: config.telemetry } : {}),
|
|
43
75
|
...(config.executeToolAction ? { executeToolAction: config.executeToolAction } : {}),
|
|
76
|
+
...(config.askUser ? { askUser: config.askUser } : {}),
|
|
77
|
+
...(config.tellUser ? { tellUser: config.tellUser } : {}),
|
|
78
|
+
...(config.downloadRawFile ? { downloadRawFile: config.downloadRawFile } : {}),
|
|
44
79
|
});
|
|
45
80
|
}
|
|
46
81
|
|
|
@@ -59,7 +94,23 @@ export class OrgArcLoop {
|
|
|
59
94
|
}
|
|
60
95
|
|
|
61
96
|
async *stream(messages: AgentMessage[], signal: AbortSignal): AsyncGenerator<OrgArcEvent> {
|
|
97
|
+
const startTime = Date.now();
|
|
62
98
|
const mission = renderUserMission(messages);
|
|
99
|
+
const directResponse = await this.maybeReplyDirectly(messages, mission, signal);
|
|
100
|
+
if (directResponse) {
|
|
101
|
+
yield { type: 'text_delta', text: directResponse };
|
|
102
|
+
yield {
|
|
103
|
+
type: 'done',
|
|
104
|
+
output: directResponse,
|
|
105
|
+
stats: {
|
|
106
|
+
turns: 1,
|
|
107
|
+
processes: 0,
|
|
108
|
+
durationMs: Date.now() - startTime,
|
|
109
|
+
},
|
|
110
|
+
};
|
|
111
|
+
return;
|
|
112
|
+
}
|
|
113
|
+
|
|
63
114
|
const maxWorkers = this.config.maxWorkers ?? 4;
|
|
64
115
|
const maxAbstentions = this.config.maxAbstentions ?? 2;
|
|
65
116
|
const baseSeed = [
|
|
@@ -73,7 +124,6 @@ export class OrgArcLoop {
|
|
|
73
124
|
const sessionFacts = await this.loadSessionFacts();
|
|
74
125
|
let consecutiveAbstentions = 0;
|
|
75
126
|
let stopReason: string | null = null;
|
|
76
|
-
const startTime = Date.now();
|
|
77
127
|
let round = 0;
|
|
78
128
|
|
|
79
129
|
while (round < maxWorkers) {
|
|
@@ -493,4 +543,65 @@ export class OrgArcLoop {
|
|
|
493
543
|
|
|
494
544
|
return { worker: workerResult, decision: await this.evaluateWorker(workerResult) };
|
|
495
545
|
}
|
|
546
|
+
|
|
547
|
+
private buildCoordinatorPrompt(): string | null {
|
|
548
|
+
if (this.config.systemPrompt?.trim()) {
|
|
549
|
+
return [
|
|
550
|
+
this.config.systemPrompt.trim(),
|
|
551
|
+
'',
|
|
552
|
+
'You are deciding whether this OrgArc run should answer directly with no worker rounds, or continue into worker rounds.',
|
|
553
|
+
'Prefer a direct answer for greetings, thanks, clarifications, simple factual questions, or summaries already supported by context.',
|
|
554
|
+
'Choose worker rounds only when tools, repo inspection, command execution, file changes, or materially different parallel contributions are likely to improve the answer.',
|
|
555
|
+
'If you choose a direct answer, write the exact reply in simple English and keep it brief.',
|
|
556
|
+
].join('\n');
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
if (this.config.processProfiles && Object.keys(this.config.processProfiles).length > 0) {
|
|
560
|
+
return [
|
|
561
|
+
buildOrchestratorPrompt(this.config.processProfiles),
|
|
562
|
+
'',
|
|
563
|
+
'This decision happens before any worker rounds start.',
|
|
564
|
+
'If the task can be answered well without workers, choose a direct answer and provide the exact user-facing reply.',
|
|
565
|
+
].join('\n');
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
return null;
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
private async maybeReplyDirectly(
|
|
572
|
+
messages: AgentMessage[],
|
|
573
|
+
mission: string,
|
|
574
|
+
signal: AbortSignal,
|
|
575
|
+
): Promise<string | null> {
|
|
576
|
+
const systemPrompt = this.buildCoordinatorPrompt();
|
|
577
|
+
if (!systemPrompt) return null;
|
|
578
|
+
|
|
579
|
+
const seedContext = normalizeSeedContext(this.config.processSeedContext);
|
|
580
|
+
const seedText = seedContext.length > 0 ? renderCoordinatorContext(seedContext) : '';
|
|
581
|
+
|
|
582
|
+
try {
|
|
583
|
+
const { object } = await generateObject({
|
|
584
|
+
model: this.createModel(this.coordinatorModel),
|
|
585
|
+
schema: orgDirectReplySchema,
|
|
586
|
+
system: systemPrompt,
|
|
587
|
+
prompt: [
|
|
588
|
+
'Conversation:',
|
|
589
|
+
renderCoordinatorContext(messages),
|
|
590
|
+
'',
|
|
591
|
+
seedText ? `Process seed context:\n${seedText}\n` : '',
|
|
592
|
+
`Mission:\n${mission}`,
|
|
593
|
+
'',
|
|
594
|
+
'Return route="direct" only if the user should receive an immediate answer with no worker rounds.',
|
|
595
|
+
'Return route="workers" when worker rounds are needed.',
|
|
596
|
+
].filter(Boolean).join('\n'),
|
|
597
|
+
abortSignal: signal,
|
|
598
|
+
});
|
|
599
|
+
|
|
600
|
+
if (object.route !== 'direct') return null;
|
|
601
|
+
const response = object.response.trim();
|
|
602
|
+
return response ? response : null;
|
|
603
|
+
} catch {
|
|
604
|
+
return null;
|
|
605
|
+
}
|
|
606
|
+
}
|
|
496
607
|
}
|
|
@@ -21,8 +21,11 @@ const DEFAULT_ORG_WORKER_SYSTEM_PROMPT = [
|
|
|
21
21
|
'Review the mission and the accepted prior contributions.',
|
|
22
22
|
'Choose your own role based on what would add the most value now.',
|
|
23
23
|
'Contribute only if the final answer would be materially better because of your addition.',
|
|
24
|
+
'If the accepted contributions already support a strong final answer, abstain.',
|
|
24
25
|
'If you would mainly reframe, relabel, or slightly extend prior accepted work, abstain.',
|
|
25
26
|
'Do not contribute just because you can name another angle; abstain unless the contribution is meaningfully additive.',
|
|
27
|
+
'Do not add a fourth angle unless it would change the final synthesis in a meaningful way.',
|
|
28
|
+
'If your addition would not deserve its own paragraph, section, or bullet in the final answer, abstain.',
|
|
26
29
|
'Use tools when needed, but keep your scope focused.',
|
|
27
30
|
'At the end, give a concise final response so the system can extract your structured contribution.',
|
|
28
31
|
].join(' ');
|
|
@@ -95,7 +98,7 @@ function buildAcceptedEpisodeSeed(acceptedEpisodes: OrgAcceptedEpisode[]): Agent
|
|
|
95
98
|
|
|
96
99
|
return [{
|
|
97
100
|
role: 'system',
|
|
98
|
-
content: `Accepted prior contributions:\n\n${lines.join('\n\n')}\n\nUse these to avoid repetition. If you need more detail, refer to the episode IDs in your context.`,
|
|
101
|
+
content: `Accepted prior contributions:\n\n${lines.join('\n\n')}\n\nUse these to avoid repetition. If these contributions already support a strong final answer, abstain. If you need more detail, refer to the episode IDs in your context.`,
|
|
99
102
|
}];
|
|
100
103
|
}
|
|
101
104
|
|
|
@@ -109,6 +112,8 @@ function buildOrgWorkerPrompt(request: OrgWorkerRequest): string {
|
|
|
109
112
|
'Contribute only if your addition would materially improve the final answer.',
|
|
110
113
|
'If prior accepted contributions already make the answer structurally sufficient, abstain.',
|
|
111
114
|
'If you are only adding a nearby variation, abstain.',
|
|
115
|
+
'If your addition would not clearly earn its own paragraph, section, or bullet in the final answer, abstain.',
|
|
116
|
+
'Do not add another angle just because one exists.',
|
|
112
117
|
].join('\n');
|
|
113
118
|
}
|
|
114
119
|
|
|
@@ -69,6 +69,43 @@ describe('OrgArcLoop', () => {
|
|
|
69
69
|
streamTextQueue = [];
|
|
70
70
|
});
|
|
71
71
|
|
|
72
|
+
it('replies directly without worker rounds when the coordinator prompt can answer simply', async () => {
|
|
73
|
+
generateObjectQueue.push({
|
|
74
|
+
route: 'direct',
|
|
75
|
+
response: 'Hello! How can I help?',
|
|
76
|
+
reason: 'Simple greeting.',
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
const loop = new OrgArcLoop({
|
|
80
|
+
taskId: 'task-direct',
|
|
81
|
+
sessionId: 'session-direct',
|
|
82
|
+
toolProvider: createToolProvider(),
|
|
83
|
+
processTools: {},
|
|
84
|
+
episodeStore: new InMemoryEpisodeStore(),
|
|
85
|
+
sessionMemoStore: new InMemorySessionMemoStore(),
|
|
86
|
+
longTermStore: new InMemoryLongTermStore(),
|
|
87
|
+
systemPrompt: 'You are an orchestrator. Respond directly to greetings and simple questions when no tools are needed.',
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
const events = [];
|
|
91
|
+
for await (const event of loop.stream([{ role: 'user', content: 'hello' }], new AbortController().signal)) {
|
|
92
|
+
events.push(event);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
expect(events.some((event) => event.type === 'org_round_start')).toBe(false);
|
|
96
|
+
expect(events.some((event) => event.type === 'org_synthesis_started')).toBe(false);
|
|
97
|
+
|
|
98
|
+
const deltas = events.filter((event) => event.type === 'text_delta').map((event) => event.text).join('');
|
|
99
|
+
expect(deltas).toBe('Hello! How can I help?');
|
|
100
|
+
|
|
101
|
+
const done = [...events].reverse().find((event) => event.type === 'done');
|
|
102
|
+
expect(done?.type).toBe('done');
|
|
103
|
+
if (done?.type === 'done') {
|
|
104
|
+
expect(done.output).toBe('Hello! How can I help?');
|
|
105
|
+
expect(done.stats.processes).toBe(0);
|
|
106
|
+
}
|
|
107
|
+
});
|
|
108
|
+
|
|
72
109
|
it('accepts novel worker output, stops after abstention, and synthesizes final output', async () => {
|
|
73
110
|
generateTextQueue.push(
|
|
74
111
|
{ text: 'I analyzed the architecture and found the main leverage point.', toolCalls: [] },
|
|
@@ -25,7 +25,7 @@ let orchestratorScript: Array<() => unknown> = [];
|
|
|
25
25
|
function mockStreamText() {
|
|
26
26
|
const callNum = orchestratorCallCount++;
|
|
27
27
|
if (callNum < orchestratorScript.length) {
|
|
28
|
-
return orchestratorScript[callNum]();
|
|
28
|
+
return orchestratorScript[callNum]!();
|
|
29
29
|
}
|
|
30
30
|
// Default: final text
|
|
31
31
|
return {
|
|
@@ -341,7 +341,9 @@ describe('Context Paging', () => {
|
|
|
341
341
|
const toolMessage = result.messages.find(m => m.role === 'tool' && m.toolResults?.[0]?.toolName === 'ReadFullResult');
|
|
342
342
|
expect(toolMessage).toBeDefined();
|
|
343
343
|
expect(toolMessage!.content).toContain('expired or not found');
|
|
344
|
-
|
|
344
|
+
const firstToolResult = toolMessage!.toolResults?.[0];
|
|
345
|
+
expect(firstToolResult).toBeDefined();
|
|
346
|
+
expect(firstToolResult!.isError).toBe(true);
|
|
345
347
|
});
|
|
346
348
|
|
|
347
349
|
it('falls back to truncation when pager.page() throws', async () => {
|