@bluecopa/harness 0.1.0-snapshot.74 → 0.1.0-snapshot.76

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@bluecopa/harness",
3
- "version": "0.1.0-snapshot.74",
3
+ "version": "0.1.0-snapshot.76",
4
4
  "description": "Provider-agnostic TypeScript agent framework",
5
5
  "license": "UNLICENSED",
6
6
  "scripts": {
@@ -15,29 +15,6 @@ function renderUserMission(messages: AgentMessage[]): string {
15
15
  }).join('\n\n').trim();
16
16
  }
17
17
 
18
- function normalizeFocus(value: string): string {
19
- return value.trim().toLowerCase().replace(/\s+/g, ' ');
20
- }
21
-
22
- function normalizeSummary(value: string): string {
23
- return value
24
- .trim()
25
- .toLowerCase()
26
- .replace(/[^a-z0-9\s]/g, ' ')
27
- .replace(/\s+/g, ' ');
28
- }
29
-
30
- function summarySimilarity(a: string, b: string): number {
31
- const aWords = new Set(normalizeSummary(a).split(' ').filter((word) => word.length >= 4));
32
- const bWords = new Set(normalizeSummary(b).split(' ').filter((word) => word.length >= 4));
33
- if (aWords.size === 0 || bWords.size === 0) return 0;
34
- let intersection = 0;
35
- for (const word of aWords) {
36
- if (bWords.has(word)) intersection++;
37
- }
38
- return intersection / Math.min(aWords.size, bWords.size);
39
- }
40
-
41
18
  export class OrgArcLoop {
42
19
  private readonly runner: OrgArcRunner;
43
20
  private readonly acceptedEpisodes: OrgAcceptedEpisode[] = [];
@@ -85,8 +62,6 @@ export class OrgArcLoop {
85
62
  const mission = renderUserMission(messages);
86
63
  const maxWorkers = this.config.maxWorkers ?? 4;
87
64
  const maxAbstentions = this.config.maxAbstentions ?? 2;
88
- const maxConsecutiveLowValueRounds = this.config.maxConsecutiveLowValueRounds ?? maxAbstentions;
89
- const minConfidence = this.config.minConfidence ?? 0.45;
90
65
  const baseSeed = [
91
66
  ...normalizeSeedContext(this.config.processSeedContext),
92
67
  {
@@ -96,12 +71,19 @@ export class OrgArcLoop {
96
71
  ];
97
72
 
98
73
  const sessionFacts = await this.loadSessionFacts();
99
- let consecutiveLowValueRounds = 0;
74
+ let consecutiveAbstentions = 0;
100
75
  let stopReason: string | null = null;
101
76
  const startTime = Date.now();
77
+ let round = 0;
102
78
 
103
- for (let round = 0; round < maxWorkers; round++) {
79
+ while (round < maxWorkers) {
104
80
  signal.throwIfAborted();
81
+ const remainingWorkers = maxWorkers - round;
82
+ const shouldRunFinishStage = this.acceptedEpisodes.length >= 2 && remainingWorkers >= 2;
83
+ if (shouldRunFinishStage) {
84
+ break;
85
+ }
86
+
105
87
  yield {
106
88
  type: 'org_round_start',
107
89
  round,
@@ -151,7 +133,7 @@ export class OrgArcLoop {
151
133
 
152
134
  if (!workerResult) {
153
135
  yield { type: 'process_failed', id: workerId, error: 'Worker produced no result.' };
154
- consecutiveLowValueRounds++;
136
+ round++;
155
137
  continue;
156
138
  }
157
139
 
@@ -170,7 +152,7 @@ export class OrgArcLoop {
170
152
  });
171
153
  }
172
154
 
173
- const decision = await this.evaluateWorker(workerResult, minConfidence);
155
+ const decision = await this.evaluateWorker(workerResult);
174
156
  if (!decision.accepted) {
175
157
  const rejected: OrgRejectedEpisode = {
176
158
  workerId: workerResult.workerId,
@@ -183,7 +165,7 @@ export class OrgArcLoop {
183
165
  this.rejectedEpisodes.push(rejected);
184
166
 
185
167
  if (workerResult.orgOutput.abstained) {
186
- consecutiveLowValueRounds++;
168
+ consecutiveAbstentions++;
187
169
  yield {
188
170
  type: 'org_worker_abstained',
189
171
  id: workerResult.workerId,
@@ -192,7 +174,6 @@ export class OrgArcLoop {
192
174
  reason: decision.reason,
193
175
  };
194
176
  } else {
195
- consecutiveLowValueRounds++;
196
177
  yield {
197
178
  type: 'org_worker_rejected',
198
179
  id: workerResult.workerId,
@@ -210,7 +191,7 @@ export class OrgArcLoop {
210
191
  trace: workerResult.trace,
211
192
  orgOutput: workerResult.orgOutput,
212
193
  });
213
- consecutiveLowValueRounds = 0;
194
+ consecutiveAbstentions = 0;
214
195
  yield {
215
196
  type: 'org_worker_accepted',
216
197
  id: workerResult.workerId,
@@ -221,10 +202,8 @@ export class OrgArcLoop {
221
202
  };
222
203
  }
223
204
 
224
- if (workerResult.orgOutput.abstained && consecutiveLowValueRounds >= maxAbstentions) {
225
- stopReason = `too_many_abstentions:${consecutiveLowValueRounds}`;
226
- } else if (consecutiveLowValueRounds >= maxConsecutiveLowValueRounds) {
227
- stopReason = `novelty_exhausted:${consecutiveLowValueRounds}`;
205
+ if (workerResult.orgOutput.abstained && consecutiveAbstentions >= maxAbstentions) {
206
+ stopReason = `too_many_abstentions:${consecutiveAbstentions}`;
228
207
  }
229
208
 
230
209
  if (stopReason && this.config.hookRunner) {
@@ -243,9 +222,102 @@ export class OrgArcLoop {
243
222
  }
244
223
  }
245
224
 
225
+ round++;
246
226
  if (stopReason) break;
247
227
  }
248
228
 
229
+ if (!stopReason) {
230
+ const finishRemainingWorkers = maxWorkers - round;
231
+ const canRunFinishStage = this.acceptedEpisodes.length >= 2 && finishRemainingWorkers >= 2;
232
+ if (canRunFinishStage) {
233
+ const firstIterator = this.runCandidateWorker(mission, round, baseSeed, sessionFacts, signal)[Symbol.asyncIterator]();
234
+ let first: { worker: OrgWorkerRunResult; decision: OrgEpisodeDecision } | undefined;
235
+ while (true) {
236
+ const next = await firstIterator.next();
237
+ if (next.done) {
238
+ first = next.value;
239
+ break;
240
+ }
241
+ const event = next.value;
242
+ yield event;
243
+ }
244
+ round++;
245
+ const secondIterator = this.runCandidateWorker(mission, round, baseSeed, sessionFacts, signal)[Symbol.asyncIterator]();
246
+ let second: { worker: OrgWorkerRunResult; decision: OrgEpisodeDecision } | undefined;
247
+ while (true) {
248
+ const next = await secondIterator.next();
249
+ if (next.done) {
250
+ second = next.value;
251
+ break;
252
+ }
253
+ const event = next.value;
254
+ yield event;
255
+ }
256
+ round++;
257
+ if (!first || !second) throw new Error('Finish stage worker did not produce a result.');
258
+
259
+ const firstAccepted = first.decision.accepted && !first.worker.orgOutput.abstained;
260
+ const secondAccepted = second.decision.accepted && !second.worker.orgOutput.abstained;
261
+
262
+ if (firstAccepted && secondAccepted) {
263
+ const winner = first.worker.orgOutput.confidence >= second.worker.orgOutput.confidence ? first : second;
264
+ const loser = winner === first ? second : first;
265
+ this.acceptedEpisodes.push({
266
+ workerId: winner.worker.workerId,
267
+ round: winner.worker.round,
268
+ episode: winner.worker.episode,
269
+ trace: winner.worker.trace,
270
+ orgOutput: winner.worker.orgOutput,
271
+ });
272
+ yield {
273
+ type: 'org_worker_accepted',
274
+ id: winner.worker.workerId,
275
+ round: winner.worker.round,
276
+ episodeId: winner.worker.episode.id,
277
+ focus: winner.worker.orgOutput.focus,
278
+ roleLabel: winner.worker.orgOutput.roleLabel,
279
+ };
280
+ this.rejectedEpisodes.push({
281
+ workerId: loser.worker.workerId,
282
+ round: loser.worker.round,
283
+ episode: loser.worker.episode,
284
+ trace: loser.worker.trace,
285
+ orgOutput: loser.worker.orgOutput,
286
+ reason: 'Lost final two-choice selection.',
287
+ });
288
+ yield {
289
+ type: 'org_worker_rejected',
290
+ id: loser.worker.workerId,
291
+ round: loser.worker.round,
292
+ episodeId: loser.worker.episode.id,
293
+ reason: 'Lost final two-choice selection.',
294
+ focus: loser.worker.orgOutput.focus,
295
+ };
296
+ stopReason = 'finish_stage_complete';
297
+ } else if (firstAccepted || secondAccepted) {
298
+ const winner = firstAccepted ? first : second;
299
+ this.acceptedEpisodes.push({
300
+ workerId: winner.worker.workerId,
301
+ round: winner.worker.round,
302
+ episode: winner.worker.episode,
303
+ trace: winner.worker.trace,
304
+ orgOutput: winner.worker.orgOutput,
305
+ });
306
+ yield {
307
+ type: 'org_worker_accepted',
308
+ id: winner.worker.workerId,
309
+ round: winner.worker.round,
310
+ episodeId: winner.worker.episode.id,
311
+ focus: winner.worker.orgOutput.focus,
312
+ roleLabel: winner.worker.orgOutput.roleLabel,
313
+ };
314
+ stopReason = 'finish_stage_complete';
315
+ } else {
316
+ stopReason = 'finish_stage_abstained';
317
+ }
318
+ }
319
+ }
320
+
249
321
  if (stopReason) {
250
322
  yield {
251
323
  type: 'org_stop_condition_met',
@@ -299,15 +371,14 @@ export class OrgArcLoop {
299
371
  };
300
372
  }
301
373
 
302
- private async evaluateWorker(worker: OrgWorkerRunResult, minConfidence: number): Promise<OrgEpisodeDecision> {
303
- const normalizedFocus = normalizeFocus(worker.orgOutput.focus);
374
+ private async evaluateWorker(worker: OrgWorkerRunResult): Promise<OrgEpisodeDecision> {
304
375
  const forbiddenTools = (this.config.workerDisallowedTools ?? []).filter((tool) => worker.usedTools.includes(tool));
305
376
 
306
377
  if (this.config.rejectOnForbiddenToolUse !== false && forbiddenTools.length > 0) {
307
378
  return {
308
379
  accepted: false,
309
380
  reason: `Worker used forbidden exploration tools: ${forbiddenTools.join(', ')}.`,
310
- normalizedFocus,
381
+ normalizedFocus: '',
311
382
  };
312
383
  }
313
384
 
@@ -315,7 +386,7 @@ export class OrgArcLoop {
315
386
  return {
316
387
  accepted: false,
317
388
  reason: worker.orgOutput.noveltyReason || 'Worker abstained.',
318
- normalizedFocus,
389
+ normalizedFocus: '',
319
390
  };
320
391
  }
321
392
 
@@ -323,35 +394,7 @@ export class OrgArcLoop {
323
394
  return {
324
395
  accepted: false,
325
396
  reason: 'Worker contribution summary was empty.',
326
- normalizedFocus,
327
- };
328
- }
329
-
330
- if (worker.orgOutput.confidence < minConfidence) {
331
- return {
332
- accepted: false,
333
- reason: `Worker confidence ${worker.orgOutput.confidence.toFixed(2)} was below threshold ${minConfidence.toFixed(2)}.`,
334
- normalizedFocus,
335
- };
336
- }
337
-
338
- const duplicate = this.acceptedEpisodes.find((item) => normalizeFocus(item.orgOutput.focus) === normalizedFocus);
339
- if (duplicate) {
340
- return {
341
- accepted: false,
342
- reason: `Focus "${worker.orgOutput.focus}" duplicates accepted episode ${duplicate.episode.id}.`,
343
- normalizedFocus,
344
- };
345
- }
346
-
347
- const similar = this.acceptedEpisodes.find((item) => (
348
- summarySimilarity(item.orgOutput.contributionSummary, worker.orgOutput.contributionSummary) >= 0.72
349
- ));
350
- if (similar) {
351
- return {
352
- accepted: false,
353
- reason: `Contribution summary substantially overlaps accepted episode ${similar.episode.id}.`,
354
- normalizedFocus,
397
+ normalizedFocus: '',
355
398
  };
356
399
  }
357
400
 
@@ -364,7 +407,7 @@ export class OrgArcLoop {
364
407
  return {
365
408
  accepted: false,
366
409
  reason: hookDecision.reason ?? 'AcceptEpisode hook rejected the worker contribution.',
367
- normalizedFocus,
410
+ normalizedFocus: '',
368
411
  };
369
412
  }
370
413
  }
@@ -372,7 +415,7 @@ export class OrgArcLoop {
372
415
  return {
373
416
  accepted: true,
374
417
  reason: 'Accepted.',
375
- normalizedFocus,
418
+ normalizedFocus: '',
376
419
  };
377
420
  }
378
421
 
@@ -384,4 +427,70 @@ export class OrgArcLoop {
384
427
  return [];
385
428
  }
386
429
  }
430
+
431
+ private async *runCandidateWorker(
432
+ mission: string,
433
+ round: number,
434
+ baseSeed: AgentMessage[],
435
+ sessionFacts: string[],
436
+ signal: AbortSignal,
437
+ ): AsyncGenerator<OrgArcEvent, { worker: OrgWorkerRunResult; decision: OrgEpisodeDecision }> {
438
+ yield {
439
+ type: 'org_round_start',
440
+ round,
441
+ acceptedCount: this.acceptedEpisodes.length,
442
+ rejectedCount: this.rejectedEpisodes.length,
443
+ };
444
+
445
+ const workerId = `org-worker-${round + 1}`;
446
+ const workerStream = this.runner.streamWorker({
447
+ mission,
448
+ round,
449
+ acceptedEpisodes: this.acceptedEpisodes,
450
+ workerId,
451
+ model: this.config.workerModel ?? 'medium',
452
+ seedContext: baseSeed,
453
+ contextFacts: [...(this.config.contextFacts ?? []), ...sessionFacts],
454
+ ...(this.config.workerMaxSteps != null ? { maxSteps: this.config.workerMaxSteps } : {}),
455
+ }, signal)[Symbol.asyncIterator]();
456
+
457
+ yield {
458
+ type: 'process_dispatched',
459
+ id: workerId,
460
+ action: `Autonomous worker round ${round + 1}`,
461
+ model: this.config.workerModel ?? 'medium',
462
+ };
463
+
464
+ let workerResult: OrgWorkerRunResult | undefined;
465
+ while (true) {
466
+ const next = await workerStream.next();
467
+ if (next.done) {
468
+ workerResult = next.value;
469
+ break;
470
+ }
471
+ const event = next.value;
472
+ yield { type: 'process_activity', id: workerId, activity: event.activity };
473
+ }
474
+
475
+ if (!workerResult) {
476
+ throw new Error(`Candidate worker ${workerId} produced no result.`);
477
+ }
478
+
479
+ yield {
480
+ type: 'process_completed',
481
+ id: workerResult.workerId,
482
+ episodeId: workerResult.episode.id,
483
+ summary: workerResult.episode.summary,
484
+ durationMs: workerResult.durationMs,
485
+ };
486
+
487
+ if (this.config.hookRunner) {
488
+ await this.config.hookRunner.run({
489
+ event: 'AfterWorker',
490
+ metadata: { round, mission, workerResult, acceptedEpisodes: this.acceptedEpisodes },
491
+ });
492
+ }
493
+
494
+ return { worker: workerResult, decision: await this.evaluateWorker(workerResult) };
495
+ }
387
496
  }
@@ -20,7 +20,9 @@ const DEFAULT_ORG_WORKER_SYSTEM_PROMPT = [
20
20
  'You are one autonomous worker in a self-organizing agent organization.',
21
21
  'Review the mission and the accepted prior contributions.',
22
22
  'Choose your own role based on what would add the most value now.',
23
- 'Produce one useful non-duplicative contribution, or abstain if nothing meaningful remains.',
23
+ 'Contribute only if the final answer would be materially better because of your addition.',
24
+ 'If you would mainly reframe, relabel, or slightly extend prior accepted work, abstain.',
25
+ 'Do not contribute just because you can name another angle; abstain unless the contribution is meaningfully additive.',
24
26
  'Use tools when needed, but keep your scope focused.',
25
27
  'At the end, give a concise final response so the system can extract your structured contribution.',
26
28
  ].join(' ');
@@ -104,8 +106,9 @@ function buildOrgWorkerPrompt(request: OrgWorkerRequest): string {
104
106
  `Round: ${request.round + 1}`,
105
107
  '',
106
108
  'Decide for yourself what role is most useful right now.',
107
- 'Add one distinct contribution that moves the mission forward.',
108
- 'If prior accepted contributions already cover the useful angles, abstain.',
109
+ 'Contribute only if your addition would materially improve the final answer.',
110
+ 'If prior accepted contributions already make the answer structurally sufficient, abstain.',
111
+ 'If you are only adding a nearby variation, abstain.',
109
112
  ].join('\n');
110
113
  }
111
114
 
@@ -75,8 +75,6 @@ export interface OrgWorkerRequest {
75
75
  export interface OrgArcLoopConfig extends ArcLoopConfig {
76
76
  maxWorkers?: number;
77
77
  maxAbstentions?: number;
78
- maxConsecutiveLowValueRounds?: number;
79
- minConfidence?: number;
80
78
  workerModel?: ModelTier;
81
79
  synthesisModel?: ModelTier;
82
80
  workerMaxSteps?: number;
@@ -113,7 +113,6 @@ describe('OrgArcLoop', () => {
113
113
  longTermStore: new InMemoryLongTermStore(),
114
114
  maxWorkers: 4,
115
115
  maxAbstentions: 1,
116
- minConfidence: 0.5,
117
116
  });
118
117
 
119
118
  const events = [];
@@ -144,6 +143,7 @@ describe('OrgArcLoop', () => {
144
143
  toolCalls: [{ toolName: 'Write', input: { path: '/tmp/out.txt', content: 'nope' }, toolCallId: 'call-1' }],
145
144
  },
146
145
  { text: 'Finished after writing a file.', toolCalls: [] },
146
+ { text: 'I have nothing meaningful to add.', toolCalls: [] },
147
147
  );
148
148
 
149
149
  generateObjectQueue.push({
@@ -155,6 +155,15 @@ describe('OrgArcLoop', () => {
155
155
  openQuestions: [],
156
156
  nextSuggestions: [],
157
157
  confidence: 0.92,
158
+ }, {
159
+ roleLabel: 'researcher',
160
+ focus: 'summary',
161
+ contributionSummary: '',
162
+ abstained: true,
163
+ noveltyReason: 'Nothing meaningful remains after the rejected artifact attempt.',
164
+ openQuestions: [],
165
+ nextSuggestions: [],
166
+ confidence: 0.55,
158
167
  });
159
168
 
160
169
  streamTextQueue.push([{ type: 'text-delta', text: 'Synthesis.' }]);
@@ -174,7 +183,6 @@ describe('OrgArcLoop', () => {
174
183
  longTermStore: new InMemoryLongTermStore(),
175
184
  maxWorkers: 2,
176
185
  maxAbstentions: 1,
177
- maxConsecutiveLowValueRounds: 1,
178
186
  workerDisallowedTools: ['Write'],
179
187
  });
180
188
 
@@ -192,7 +200,94 @@ describe('OrgArcLoop', () => {
192
200
  const stopped = events.find((event) => event.type === 'org_stop_condition_met');
193
201
  expect(stopped?.type).toBe('org_stop_condition_met');
194
202
  if (stopped?.type === 'org_stop_condition_met') {
195
- expect(stopped.reason).toContain('novelty_exhausted');
203
+ expect(stopped.reason).toContain('too_many_abstentions');
204
+ }
205
+ });
206
+
207
+ it('runs a final two-choice round and keeps only one winner when both candidates contribute', async () => {
208
+ generateTextQueue.push(
209
+ { text: 'Core literature view.', toolCalls: [] },
210
+ { text: 'Recent developments.', toolCalls: [] },
211
+ { text: 'Mission studies angle.', toolCalls: [] },
212
+ { text: 'Observational constraints angle.', toolCalls: [] },
213
+ );
214
+
215
+ generateObjectQueue.push(
216
+ {
217
+ roleLabel: 'surveyor',
218
+ focus: 'core literature',
219
+ contributionSummary: 'Summarizes the core literature and the main interpretive threads.',
220
+ abstained: false,
221
+ noveltyReason: 'Provides the foundational overview.',
222
+ openQuestions: [],
223
+ nextSuggestions: ['Look at recent developments.'],
224
+ confidence: 0.76,
225
+ },
226
+ {
227
+ roleLabel: 'tracker',
228
+ focus: 'recent developments',
229
+ contributionSummary: 'Adds the most recent developments and updates to the literature.',
230
+ abstained: false,
231
+ noveltyReason: 'Adds recent work that the core survey does not cover.',
232
+ openQuestions: [],
233
+ nextSuggestions: ['Probe one last missing angle if it materially improves the answer.'],
234
+ confidence: 0.73,
235
+ },
236
+ {
237
+ roleLabel: 'planner',
238
+ focus: 'mission studies',
239
+ contributionSummary: 'Adds mission concepts motivated by the object and the literature.',
240
+ abstained: false,
241
+ noveltyReason: 'Adds one last missing implication bucket.',
242
+ openQuestions: [],
243
+ nextSuggestions: [],
244
+ confidence: 0.62,
245
+ },
246
+ {
247
+ roleLabel: 'observer',
248
+ focus: 'observational constraints',
249
+ contributionSummary: 'Adds the strongest observational constraints and how they shape interpretation.',
250
+ abstained: false,
251
+ noveltyReason: 'Adds a competing final angle.',
252
+ openQuestions: [],
253
+ nextSuggestions: [],
254
+ confidence: 0.81,
255
+ },
256
+ );
257
+
258
+ streamTextQueue.push([{ type: 'text-delta', text: 'Synthesis.' }]);
259
+
260
+ const loop = new OrgArcLoop({
261
+ taskId: 'task-3',
262
+ sessionId: 'session-3',
263
+ toolProvider: createToolProvider(),
264
+ processTools: {},
265
+ episodeStore: new InMemoryEpisodeStore(),
266
+ sessionMemoStore: new InMemorySessionMemoStore(),
267
+ longTermStore: new InMemoryLongTermStore(),
268
+ maxWorkers: 4,
269
+ maxAbstentions: 2,
270
+ });
271
+
272
+ const events = [];
273
+ for await (const event of loop.stream([{ role: 'user', content: 'Research the topic from a few angles and synthesize.' }], new AbortController().signal)) {
274
+ events.push(event);
275
+ }
276
+
277
+ const accepted = events.filter((event) => event.type === 'org_worker_accepted');
278
+ const rejected = events.filter((event) => event.type === 'org_worker_rejected');
279
+
280
+ expect(accepted).toHaveLength(3);
281
+ expect(rejected).toHaveLength(1);
282
+ expect(rejected[0]?.type).toBe('org_worker_rejected');
283
+ if (rejected[0]?.type === 'org_worker_rejected') {
284
+ expect(rejected[0].reason).toBe('Lost final two-choice selection.');
285
+ }
286
+
287
+ const stopped = events.find((event) => event.type === 'org_stop_condition_met');
288
+ expect(stopped?.type).toBe('org_stop_condition_met');
289
+ if (stopped?.type === 'org_stop_condition_met') {
290
+ expect(stopped.reason).toBe('finish_stage_complete');
196
291
  }
197
292
  });
198
293
  });