@bluecopa/harness 0.1.0-snapshot.74 → 0.1.0-snapshot.76
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/arc/org-arc-loop.ts +180 -71
- package/src/arc/org-arc-runner.ts +6 -3
- package/src/arc/org-types.ts +0 -2
- package/tests/arc/org-arc-loop.test.ts +98 -3
package/package.json
CHANGED
package/src/arc/org-arc-loop.ts
CHANGED
|
@@ -15,29 +15,6 @@ function renderUserMission(messages: AgentMessage[]): string {
|
|
|
15
15
|
}).join('\n\n').trim();
|
|
16
16
|
}
|
|
17
17
|
|
|
18
|
-
function normalizeFocus(value: string): string {
|
|
19
|
-
return value.trim().toLowerCase().replace(/\s+/g, ' ');
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
function normalizeSummary(value: string): string {
|
|
23
|
-
return value
|
|
24
|
-
.trim()
|
|
25
|
-
.toLowerCase()
|
|
26
|
-
.replace(/[^a-z0-9\s]/g, ' ')
|
|
27
|
-
.replace(/\s+/g, ' ');
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
function summarySimilarity(a: string, b: string): number {
|
|
31
|
-
const aWords = new Set(normalizeSummary(a).split(' ').filter((word) => word.length >= 4));
|
|
32
|
-
const bWords = new Set(normalizeSummary(b).split(' ').filter((word) => word.length >= 4));
|
|
33
|
-
if (aWords.size === 0 || bWords.size === 0) return 0;
|
|
34
|
-
let intersection = 0;
|
|
35
|
-
for (const word of aWords) {
|
|
36
|
-
if (bWords.has(word)) intersection++;
|
|
37
|
-
}
|
|
38
|
-
return intersection / Math.min(aWords.size, bWords.size);
|
|
39
|
-
}
|
|
40
|
-
|
|
41
18
|
export class OrgArcLoop {
|
|
42
19
|
private readonly runner: OrgArcRunner;
|
|
43
20
|
private readonly acceptedEpisodes: OrgAcceptedEpisode[] = [];
|
|
@@ -85,8 +62,6 @@ export class OrgArcLoop {
|
|
|
85
62
|
const mission = renderUserMission(messages);
|
|
86
63
|
const maxWorkers = this.config.maxWorkers ?? 4;
|
|
87
64
|
const maxAbstentions = this.config.maxAbstentions ?? 2;
|
|
88
|
-
const maxConsecutiveLowValueRounds = this.config.maxConsecutiveLowValueRounds ?? maxAbstentions;
|
|
89
|
-
const minConfidence = this.config.minConfidence ?? 0.45;
|
|
90
65
|
const baseSeed = [
|
|
91
66
|
...normalizeSeedContext(this.config.processSeedContext),
|
|
92
67
|
{
|
|
@@ -96,12 +71,19 @@ export class OrgArcLoop {
|
|
|
96
71
|
];
|
|
97
72
|
|
|
98
73
|
const sessionFacts = await this.loadSessionFacts();
|
|
99
|
-
let
|
|
74
|
+
let consecutiveAbstentions = 0;
|
|
100
75
|
let stopReason: string | null = null;
|
|
101
76
|
const startTime = Date.now();
|
|
77
|
+
let round = 0;
|
|
102
78
|
|
|
103
|
-
|
|
79
|
+
while (round < maxWorkers) {
|
|
104
80
|
signal.throwIfAborted();
|
|
81
|
+
const remainingWorkers = maxWorkers - round;
|
|
82
|
+
const shouldRunFinishStage = this.acceptedEpisodes.length >= 2 && remainingWorkers >= 2;
|
|
83
|
+
if (shouldRunFinishStage) {
|
|
84
|
+
break;
|
|
85
|
+
}
|
|
86
|
+
|
|
105
87
|
yield {
|
|
106
88
|
type: 'org_round_start',
|
|
107
89
|
round,
|
|
@@ -151,7 +133,7 @@ export class OrgArcLoop {
|
|
|
151
133
|
|
|
152
134
|
if (!workerResult) {
|
|
153
135
|
yield { type: 'process_failed', id: workerId, error: 'Worker produced no result.' };
|
|
154
|
-
|
|
136
|
+
round++;
|
|
155
137
|
continue;
|
|
156
138
|
}
|
|
157
139
|
|
|
@@ -170,7 +152,7 @@ export class OrgArcLoop {
|
|
|
170
152
|
});
|
|
171
153
|
}
|
|
172
154
|
|
|
173
|
-
const decision = await this.evaluateWorker(workerResult
|
|
155
|
+
const decision = await this.evaluateWorker(workerResult);
|
|
174
156
|
if (!decision.accepted) {
|
|
175
157
|
const rejected: OrgRejectedEpisode = {
|
|
176
158
|
workerId: workerResult.workerId,
|
|
@@ -183,7 +165,7 @@ export class OrgArcLoop {
|
|
|
183
165
|
this.rejectedEpisodes.push(rejected);
|
|
184
166
|
|
|
185
167
|
if (workerResult.orgOutput.abstained) {
|
|
186
|
-
|
|
168
|
+
consecutiveAbstentions++;
|
|
187
169
|
yield {
|
|
188
170
|
type: 'org_worker_abstained',
|
|
189
171
|
id: workerResult.workerId,
|
|
@@ -192,7 +174,6 @@ export class OrgArcLoop {
|
|
|
192
174
|
reason: decision.reason,
|
|
193
175
|
};
|
|
194
176
|
} else {
|
|
195
|
-
consecutiveLowValueRounds++;
|
|
196
177
|
yield {
|
|
197
178
|
type: 'org_worker_rejected',
|
|
198
179
|
id: workerResult.workerId,
|
|
@@ -210,7 +191,7 @@ export class OrgArcLoop {
|
|
|
210
191
|
trace: workerResult.trace,
|
|
211
192
|
orgOutput: workerResult.orgOutput,
|
|
212
193
|
});
|
|
213
|
-
|
|
194
|
+
consecutiveAbstentions = 0;
|
|
214
195
|
yield {
|
|
215
196
|
type: 'org_worker_accepted',
|
|
216
197
|
id: workerResult.workerId,
|
|
@@ -221,10 +202,8 @@ export class OrgArcLoop {
|
|
|
221
202
|
};
|
|
222
203
|
}
|
|
223
204
|
|
|
224
|
-
if (workerResult.orgOutput.abstained &&
|
|
225
|
-
stopReason = `too_many_abstentions:${
|
|
226
|
-
} else if (consecutiveLowValueRounds >= maxConsecutiveLowValueRounds) {
|
|
227
|
-
stopReason = `novelty_exhausted:${consecutiveLowValueRounds}`;
|
|
205
|
+
if (workerResult.orgOutput.abstained && consecutiveAbstentions >= maxAbstentions) {
|
|
206
|
+
stopReason = `too_many_abstentions:${consecutiveAbstentions}`;
|
|
228
207
|
}
|
|
229
208
|
|
|
230
209
|
if (stopReason && this.config.hookRunner) {
|
|
@@ -243,9 +222,102 @@ export class OrgArcLoop {
|
|
|
243
222
|
}
|
|
244
223
|
}
|
|
245
224
|
|
|
225
|
+
round++;
|
|
246
226
|
if (stopReason) break;
|
|
247
227
|
}
|
|
248
228
|
|
|
229
|
+
if (!stopReason) {
|
|
230
|
+
const finishRemainingWorkers = maxWorkers - round;
|
|
231
|
+
const canRunFinishStage = this.acceptedEpisodes.length >= 2 && finishRemainingWorkers >= 2;
|
|
232
|
+
if (canRunFinishStage) {
|
|
233
|
+
const firstIterator = this.runCandidateWorker(mission, round, baseSeed, sessionFacts, signal)[Symbol.asyncIterator]();
|
|
234
|
+
let first: { worker: OrgWorkerRunResult; decision: OrgEpisodeDecision } | undefined;
|
|
235
|
+
while (true) {
|
|
236
|
+
const next = await firstIterator.next();
|
|
237
|
+
if (next.done) {
|
|
238
|
+
first = next.value;
|
|
239
|
+
break;
|
|
240
|
+
}
|
|
241
|
+
const event = next.value;
|
|
242
|
+
yield event;
|
|
243
|
+
}
|
|
244
|
+
round++;
|
|
245
|
+
const secondIterator = this.runCandidateWorker(mission, round, baseSeed, sessionFacts, signal)[Symbol.asyncIterator]();
|
|
246
|
+
let second: { worker: OrgWorkerRunResult; decision: OrgEpisodeDecision } | undefined;
|
|
247
|
+
while (true) {
|
|
248
|
+
const next = await secondIterator.next();
|
|
249
|
+
if (next.done) {
|
|
250
|
+
second = next.value;
|
|
251
|
+
break;
|
|
252
|
+
}
|
|
253
|
+
const event = next.value;
|
|
254
|
+
yield event;
|
|
255
|
+
}
|
|
256
|
+
round++;
|
|
257
|
+
if (!first || !second) throw new Error('Finish stage worker did not produce a result.');
|
|
258
|
+
|
|
259
|
+
const firstAccepted = first.decision.accepted && !first.worker.orgOutput.abstained;
|
|
260
|
+
const secondAccepted = second.decision.accepted && !second.worker.orgOutput.abstained;
|
|
261
|
+
|
|
262
|
+
if (firstAccepted && secondAccepted) {
|
|
263
|
+
const winner = first.worker.orgOutput.confidence >= second.worker.orgOutput.confidence ? first : second;
|
|
264
|
+
const loser = winner === first ? second : first;
|
|
265
|
+
this.acceptedEpisodes.push({
|
|
266
|
+
workerId: winner.worker.workerId,
|
|
267
|
+
round: winner.worker.round,
|
|
268
|
+
episode: winner.worker.episode,
|
|
269
|
+
trace: winner.worker.trace,
|
|
270
|
+
orgOutput: winner.worker.orgOutput,
|
|
271
|
+
});
|
|
272
|
+
yield {
|
|
273
|
+
type: 'org_worker_accepted',
|
|
274
|
+
id: winner.worker.workerId,
|
|
275
|
+
round: winner.worker.round,
|
|
276
|
+
episodeId: winner.worker.episode.id,
|
|
277
|
+
focus: winner.worker.orgOutput.focus,
|
|
278
|
+
roleLabel: winner.worker.orgOutput.roleLabel,
|
|
279
|
+
};
|
|
280
|
+
this.rejectedEpisodes.push({
|
|
281
|
+
workerId: loser.worker.workerId,
|
|
282
|
+
round: loser.worker.round,
|
|
283
|
+
episode: loser.worker.episode,
|
|
284
|
+
trace: loser.worker.trace,
|
|
285
|
+
orgOutput: loser.worker.orgOutput,
|
|
286
|
+
reason: 'Lost final two-choice selection.',
|
|
287
|
+
});
|
|
288
|
+
yield {
|
|
289
|
+
type: 'org_worker_rejected',
|
|
290
|
+
id: loser.worker.workerId,
|
|
291
|
+
round: loser.worker.round,
|
|
292
|
+
episodeId: loser.worker.episode.id,
|
|
293
|
+
reason: 'Lost final two-choice selection.',
|
|
294
|
+
focus: loser.worker.orgOutput.focus,
|
|
295
|
+
};
|
|
296
|
+
stopReason = 'finish_stage_complete';
|
|
297
|
+
} else if (firstAccepted || secondAccepted) {
|
|
298
|
+
const winner = firstAccepted ? first : second;
|
|
299
|
+
this.acceptedEpisodes.push({
|
|
300
|
+
workerId: winner.worker.workerId,
|
|
301
|
+
round: winner.worker.round,
|
|
302
|
+
episode: winner.worker.episode,
|
|
303
|
+
trace: winner.worker.trace,
|
|
304
|
+
orgOutput: winner.worker.orgOutput,
|
|
305
|
+
});
|
|
306
|
+
yield {
|
|
307
|
+
type: 'org_worker_accepted',
|
|
308
|
+
id: winner.worker.workerId,
|
|
309
|
+
round: winner.worker.round,
|
|
310
|
+
episodeId: winner.worker.episode.id,
|
|
311
|
+
focus: winner.worker.orgOutput.focus,
|
|
312
|
+
roleLabel: winner.worker.orgOutput.roleLabel,
|
|
313
|
+
};
|
|
314
|
+
stopReason = 'finish_stage_complete';
|
|
315
|
+
} else {
|
|
316
|
+
stopReason = 'finish_stage_abstained';
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
|
|
249
321
|
if (stopReason) {
|
|
250
322
|
yield {
|
|
251
323
|
type: 'org_stop_condition_met',
|
|
@@ -299,15 +371,14 @@ export class OrgArcLoop {
|
|
|
299
371
|
};
|
|
300
372
|
}
|
|
301
373
|
|
|
302
|
-
private async evaluateWorker(worker: OrgWorkerRunResult
|
|
303
|
-
const normalizedFocus = normalizeFocus(worker.orgOutput.focus);
|
|
374
|
+
private async evaluateWorker(worker: OrgWorkerRunResult): Promise<OrgEpisodeDecision> {
|
|
304
375
|
const forbiddenTools = (this.config.workerDisallowedTools ?? []).filter((tool) => worker.usedTools.includes(tool));
|
|
305
376
|
|
|
306
377
|
if (this.config.rejectOnForbiddenToolUse !== false && forbiddenTools.length > 0) {
|
|
307
378
|
return {
|
|
308
379
|
accepted: false,
|
|
309
380
|
reason: `Worker used forbidden exploration tools: ${forbiddenTools.join(', ')}.`,
|
|
310
|
-
normalizedFocus,
|
|
381
|
+
normalizedFocus: '',
|
|
311
382
|
};
|
|
312
383
|
}
|
|
313
384
|
|
|
@@ -315,7 +386,7 @@ export class OrgArcLoop {
|
|
|
315
386
|
return {
|
|
316
387
|
accepted: false,
|
|
317
388
|
reason: worker.orgOutput.noveltyReason || 'Worker abstained.',
|
|
318
|
-
normalizedFocus,
|
|
389
|
+
normalizedFocus: '',
|
|
319
390
|
};
|
|
320
391
|
}
|
|
321
392
|
|
|
@@ -323,35 +394,7 @@ export class OrgArcLoop {
|
|
|
323
394
|
return {
|
|
324
395
|
accepted: false,
|
|
325
396
|
reason: 'Worker contribution summary was empty.',
|
|
326
|
-
normalizedFocus,
|
|
327
|
-
};
|
|
328
|
-
}
|
|
329
|
-
|
|
330
|
-
if (worker.orgOutput.confidence < minConfidence) {
|
|
331
|
-
return {
|
|
332
|
-
accepted: false,
|
|
333
|
-
reason: `Worker confidence ${worker.orgOutput.confidence.toFixed(2)} was below threshold ${minConfidence.toFixed(2)}.`,
|
|
334
|
-
normalizedFocus,
|
|
335
|
-
};
|
|
336
|
-
}
|
|
337
|
-
|
|
338
|
-
const duplicate = this.acceptedEpisodes.find((item) => normalizeFocus(item.orgOutput.focus) === normalizedFocus);
|
|
339
|
-
if (duplicate) {
|
|
340
|
-
return {
|
|
341
|
-
accepted: false,
|
|
342
|
-
reason: `Focus "${worker.orgOutput.focus}" duplicates accepted episode ${duplicate.episode.id}.`,
|
|
343
|
-
normalizedFocus,
|
|
344
|
-
};
|
|
345
|
-
}
|
|
346
|
-
|
|
347
|
-
const similar = this.acceptedEpisodes.find((item) => (
|
|
348
|
-
summarySimilarity(item.orgOutput.contributionSummary, worker.orgOutput.contributionSummary) >= 0.72
|
|
349
|
-
));
|
|
350
|
-
if (similar) {
|
|
351
|
-
return {
|
|
352
|
-
accepted: false,
|
|
353
|
-
reason: `Contribution summary substantially overlaps accepted episode ${similar.episode.id}.`,
|
|
354
|
-
normalizedFocus,
|
|
397
|
+
normalizedFocus: '',
|
|
355
398
|
};
|
|
356
399
|
}
|
|
357
400
|
|
|
@@ -364,7 +407,7 @@ export class OrgArcLoop {
|
|
|
364
407
|
return {
|
|
365
408
|
accepted: false,
|
|
366
409
|
reason: hookDecision.reason ?? 'AcceptEpisode hook rejected the worker contribution.',
|
|
367
|
-
normalizedFocus,
|
|
410
|
+
normalizedFocus: '',
|
|
368
411
|
};
|
|
369
412
|
}
|
|
370
413
|
}
|
|
@@ -372,7 +415,7 @@ export class OrgArcLoop {
|
|
|
372
415
|
return {
|
|
373
416
|
accepted: true,
|
|
374
417
|
reason: 'Accepted.',
|
|
375
|
-
normalizedFocus,
|
|
418
|
+
normalizedFocus: '',
|
|
376
419
|
};
|
|
377
420
|
}
|
|
378
421
|
|
|
@@ -384,4 +427,70 @@ export class OrgArcLoop {
|
|
|
384
427
|
return [];
|
|
385
428
|
}
|
|
386
429
|
}
|
|
430
|
+
|
|
431
|
+
private async *runCandidateWorker(
|
|
432
|
+
mission: string,
|
|
433
|
+
round: number,
|
|
434
|
+
baseSeed: AgentMessage[],
|
|
435
|
+
sessionFacts: string[],
|
|
436
|
+
signal: AbortSignal,
|
|
437
|
+
): AsyncGenerator<OrgArcEvent, { worker: OrgWorkerRunResult; decision: OrgEpisodeDecision }> {
|
|
438
|
+
yield {
|
|
439
|
+
type: 'org_round_start',
|
|
440
|
+
round,
|
|
441
|
+
acceptedCount: this.acceptedEpisodes.length,
|
|
442
|
+
rejectedCount: this.rejectedEpisodes.length,
|
|
443
|
+
};
|
|
444
|
+
|
|
445
|
+
const workerId = `org-worker-${round + 1}`;
|
|
446
|
+
const workerStream = this.runner.streamWorker({
|
|
447
|
+
mission,
|
|
448
|
+
round,
|
|
449
|
+
acceptedEpisodes: this.acceptedEpisodes,
|
|
450
|
+
workerId,
|
|
451
|
+
model: this.config.workerModel ?? 'medium',
|
|
452
|
+
seedContext: baseSeed,
|
|
453
|
+
contextFacts: [...(this.config.contextFacts ?? []), ...sessionFacts],
|
|
454
|
+
...(this.config.workerMaxSteps != null ? { maxSteps: this.config.workerMaxSteps } : {}),
|
|
455
|
+
}, signal)[Symbol.asyncIterator]();
|
|
456
|
+
|
|
457
|
+
yield {
|
|
458
|
+
type: 'process_dispatched',
|
|
459
|
+
id: workerId,
|
|
460
|
+
action: `Autonomous worker round ${round + 1}`,
|
|
461
|
+
model: this.config.workerModel ?? 'medium',
|
|
462
|
+
};
|
|
463
|
+
|
|
464
|
+
let workerResult: OrgWorkerRunResult | undefined;
|
|
465
|
+
while (true) {
|
|
466
|
+
const next = await workerStream.next();
|
|
467
|
+
if (next.done) {
|
|
468
|
+
workerResult = next.value;
|
|
469
|
+
break;
|
|
470
|
+
}
|
|
471
|
+
const event = next.value;
|
|
472
|
+
yield { type: 'process_activity', id: workerId, activity: event.activity };
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
if (!workerResult) {
|
|
476
|
+
throw new Error(`Candidate worker ${workerId} produced no result.`);
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
yield {
|
|
480
|
+
type: 'process_completed',
|
|
481
|
+
id: workerResult.workerId,
|
|
482
|
+
episodeId: workerResult.episode.id,
|
|
483
|
+
summary: workerResult.episode.summary,
|
|
484
|
+
durationMs: workerResult.durationMs,
|
|
485
|
+
};
|
|
486
|
+
|
|
487
|
+
if (this.config.hookRunner) {
|
|
488
|
+
await this.config.hookRunner.run({
|
|
489
|
+
event: 'AfterWorker',
|
|
490
|
+
metadata: { round, mission, workerResult, acceptedEpisodes: this.acceptedEpisodes },
|
|
491
|
+
});
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
return { worker: workerResult, decision: await this.evaluateWorker(workerResult) };
|
|
495
|
+
}
|
|
387
496
|
}
|
|
@@ -20,7 +20,9 @@ const DEFAULT_ORG_WORKER_SYSTEM_PROMPT = [
|
|
|
20
20
|
'You are one autonomous worker in a self-organizing agent organization.',
|
|
21
21
|
'Review the mission and the accepted prior contributions.',
|
|
22
22
|
'Choose your own role based on what would add the most value now.',
|
|
23
|
-
'
|
|
23
|
+
'Contribute only if the final answer would be materially better because of your addition.',
|
|
24
|
+
'If you would mainly reframe, relabel, or slightly extend prior accepted work, abstain.',
|
|
25
|
+
'Do not contribute just because you can name another angle; abstain unless the contribution is meaningfully additive.',
|
|
24
26
|
'Use tools when needed, but keep your scope focused.',
|
|
25
27
|
'At the end, give a concise final response so the system can extract your structured contribution.',
|
|
26
28
|
].join(' ');
|
|
@@ -104,8 +106,9 @@ function buildOrgWorkerPrompt(request: OrgWorkerRequest): string {
|
|
|
104
106
|
`Round: ${request.round + 1}`,
|
|
105
107
|
'',
|
|
106
108
|
'Decide for yourself what role is most useful right now.',
|
|
107
|
-
'
|
|
108
|
-
'If prior accepted contributions already
|
|
109
|
+
'Contribute only if your addition would materially improve the final answer.',
|
|
110
|
+
'If prior accepted contributions already make the answer structurally sufficient, abstain.',
|
|
111
|
+
'If you are only adding a nearby variation, abstain.',
|
|
109
112
|
].join('\n');
|
|
110
113
|
}
|
|
111
114
|
|
package/src/arc/org-types.ts
CHANGED
|
@@ -75,8 +75,6 @@ export interface OrgWorkerRequest {
|
|
|
75
75
|
export interface OrgArcLoopConfig extends ArcLoopConfig {
|
|
76
76
|
maxWorkers?: number;
|
|
77
77
|
maxAbstentions?: number;
|
|
78
|
-
maxConsecutiveLowValueRounds?: number;
|
|
79
|
-
minConfidence?: number;
|
|
80
78
|
workerModel?: ModelTier;
|
|
81
79
|
synthesisModel?: ModelTier;
|
|
82
80
|
workerMaxSteps?: number;
|
|
@@ -113,7 +113,6 @@ describe('OrgArcLoop', () => {
|
|
|
113
113
|
longTermStore: new InMemoryLongTermStore(),
|
|
114
114
|
maxWorkers: 4,
|
|
115
115
|
maxAbstentions: 1,
|
|
116
|
-
minConfidence: 0.5,
|
|
117
116
|
});
|
|
118
117
|
|
|
119
118
|
const events = [];
|
|
@@ -144,6 +143,7 @@ describe('OrgArcLoop', () => {
|
|
|
144
143
|
toolCalls: [{ toolName: 'Write', input: { path: '/tmp/out.txt', content: 'nope' }, toolCallId: 'call-1' }],
|
|
145
144
|
},
|
|
146
145
|
{ text: 'Finished after writing a file.', toolCalls: [] },
|
|
146
|
+
{ text: 'I have nothing meaningful to add.', toolCalls: [] },
|
|
147
147
|
);
|
|
148
148
|
|
|
149
149
|
generateObjectQueue.push({
|
|
@@ -155,6 +155,15 @@ describe('OrgArcLoop', () => {
|
|
|
155
155
|
openQuestions: [],
|
|
156
156
|
nextSuggestions: [],
|
|
157
157
|
confidence: 0.92,
|
|
158
|
+
}, {
|
|
159
|
+
roleLabel: 'researcher',
|
|
160
|
+
focus: 'summary',
|
|
161
|
+
contributionSummary: '',
|
|
162
|
+
abstained: true,
|
|
163
|
+
noveltyReason: 'Nothing meaningful remains after the rejected artifact attempt.',
|
|
164
|
+
openQuestions: [],
|
|
165
|
+
nextSuggestions: [],
|
|
166
|
+
confidence: 0.55,
|
|
158
167
|
});
|
|
159
168
|
|
|
160
169
|
streamTextQueue.push([{ type: 'text-delta', text: 'Synthesis.' }]);
|
|
@@ -174,7 +183,6 @@ describe('OrgArcLoop', () => {
|
|
|
174
183
|
longTermStore: new InMemoryLongTermStore(),
|
|
175
184
|
maxWorkers: 2,
|
|
176
185
|
maxAbstentions: 1,
|
|
177
|
-
maxConsecutiveLowValueRounds: 1,
|
|
178
186
|
workerDisallowedTools: ['Write'],
|
|
179
187
|
});
|
|
180
188
|
|
|
@@ -192,7 +200,94 @@ describe('OrgArcLoop', () => {
|
|
|
192
200
|
const stopped = events.find((event) => event.type === 'org_stop_condition_met');
|
|
193
201
|
expect(stopped?.type).toBe('org_stop_condition_met');
|
|
194
202
|
if (stopped?.type === 'org_stop_condition_met') {
|
|
195
|
-
expect(stopped.reason).toContain('
|
|
203
|
+
expect(stopped.reason).toContain('too_many_abstentions');
|
|
204
|
+
}
|
|
205
|
+
});
|
|
206
|
+
|
|
207
|
+
it('runs a final two-choice round and keeps only one winner when both candidates contribute', async () => {
|
|
208
|
+
generateTextQueue.push(
|
|
209
|
+
{ text: 'Core literature view.', toolCalls: [] },
|
|
210
|
+
{ text: 'Recent developments.', toolCalls: [] },
|
|
211
|
+
{ text: 'Mission studies angle.', toolCalls: [] },
|
|
212
|
+
{ text: 'Observational constraints angle.', toolCalls: [] },
|
|
213
|
+
);
|
|
214
|
+
|
|
215
|
+
generateObjectQueue.push(
|
|
216
|
+
{
|
|
217
|
+
roleLabel: 'surveyor',
|
|
218
|
+
focus: 'core literature',
|
|
219
|
+
contributionSummary: 'Summarizes the core literature and the main interpretive threads.',
|
|
220
|
+
abstained: false,
|
|
221
|
+
noveltyReason: 'Provides the foundational overview.',
|
|
222
|
+
openQuestions: [],
|
|
223
|
+
nextSuggestions: ['Look at recent developments.'],
|
|
224
|
+
confidence: 0.76,
|
|
225
|
+
},
|
|
226
|
+
{
|
|
227
|
+
roleLabel: 'tracker',
|
|
228
|
+
focus: 'recent developments',
|
|
229
|
+
contributionSummary: 'Adds the most recent developments and updates to the literature.',
|
|
230
|
+
abstained: false,
|
|
231
|
+
noveltyReason: 'Adds recent work that the core survey does not cover.',
|
|
232
|
+
openQuestions: [],
|
|
233
|
+
nextSuggestions: ['Probe one last missing angle if it materially improves the answer.'],
|
|
234
|
+
confidence: 0.73,
|
|
235
|
+
},
|
|
236
|
+
{
|
|
237
|
+
roleLabel: 'planner',
|
|
238
|
+
focus: 'mission studies',
|
|
239
|
+
contributionSummary: 'Adds mission concepts motivated by the object and the literature.',
|
|
240
|
+
abstained: false,
|
|
241
|
+
noveltyReason: 'Adds one last missing implication bucket.',
|
|
242
|
+
openQuestions: [],
|
|
243
|
+
nextSuggestions: [],
|
|
244
|
+
confidence: 0.62,
|
|
245
|
+
},
|
|
246
|
+
{
|
|
247
|
+
roleLabel: 'observer',
|
|
248
|
+
focus: 'observational constraints',
|
|
249
|
+
contributionSummary: 'Adds the strongest observational constraints and how they shape interpretation.',
|
|
250
|
+
abstained: false,
|
|
251
|
+
noveltyReason: 'Adds a competing final angle.',
|
|
252
|
+
openQuestions: [],
|
|
253
|
+
nextSuggestions: [],
|
|
254
|
+
confidence: 0.81,
|
|
255
|
+
},
|
|
256
|
+
);
|
|
257
|
+
|
|
258
|
+
streamTextQueue.push([{ type: 'text-delta', text: 'Synthesis.' }]);
|
|
259
|
+
|
|
260
|
+
const loop = new OrgArcLoop({
|
|
261
|
+
taskId: 'task-3',
|
|
262
|
+
sessionId: 'session-3',
|
|
263
|
+
toolProvider: createToolProvider(),
|
|
264
|
+
processTools: {},
|
|
265
|
+
episodeStore: new InMemoryEpisodeStore(),
|
|
266
|
+
sessionMemoStore: new InMemorySessionMemoStore(),
|
|
267
|
+
longTermStore: new InMemoryLongTermStore(),
|
|
268
|
+
maxWorkers: 4,
|
|
269
|
+
maxAbstentions: 2,
|
|
270
|
+
});
|
|
271
|
+
|
|
272
|
+
const events = [];
|
|
273
|
+
for await (const event of loop.stream([{ role: 'user', content: 'Research the topic from a few angles and synthesize.' }], new AbortController().signal)) {
|
|
274
|
+
events.push(event);
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
const accepted = events.filter((event) => event.type === 'org_worker_accepted');
|
|
278
|
+
const rejected = events.filter((event) => event.type === 'org_worker_rejected');
|
|
279
|
+
|
|
280
|
+
expect(accepted).toHaveLength(3);
|
|
281
|
+
expect(rejected).toHaveLength(1);
|
|
282
|
+
expect(rejected[0]?.type).toBe('org_worker_rejected');
|
|
283
|
+
if (rejected[0]?.type === 'org_worker_rejected') {
|
|
284
|
+
expect(rejected[0].reason).toBe('Lost final two-choice selection.');
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
const stopped = events.find((event) => event.type === 'org_stop_condition_met');
|
|
288
|
+
expect(stopped?.type).toBe('org_stop_condition_met');
|
|
289
|
+
if (stopped?.type === 'org_stop_condition_met') {
|
|
290
|
+
expect(stopped.reason).toBe('finish_stage_complete');
|
|
196
291
|
}
|
|
197
292
|
});
|
|
198
293
|
});
|