baro-ai 0.29.0 → 0.30.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.mjs +598 -24
- package/dist/cli.mjs.map +1 -1
- package/package.json +1 -1
- package/scripts/postinstall.js +1 -1
package/dist/cli.mjs
CHANGED
|
@@ -7160,6 +7160,46 @@ OpenAI.Skills = Skills;
|
|
|
7160
7160
|
OpenAI.Videos = Videos;
|
|
7161
7161
|
|
|
7162
7162
|
// ../../node_modules/@mozaik-ai/core/dist/index.mjs
|
|
7163
|
+
var ModelContext = class _ModelContext {
|
|
7164
|
+
constructor(id, projectId, items) {
|
|
7165
|
+
this.projectId = projectId;
|
|
7166
|
+
this.id = id;
|
|
7167
|
+
this.items = items;
|
|
7168
|
+
}
|
|
7169
|
+
addContextItem(item) {
|
|
7170
|
+
this.items.push(item);
|
|
7171
|
+
return this;
|
|
7172
|
+
}
|
|
7173
|
+
applyModelOutput(items) {
|
|
7174
|
+
for (const item of items) {
|
|
7175
|
+
const itemType = item.getType();
|
|
7176
|
+
if (itemType !== "function_call" && itemType !== "message" && itemType !== "reasoning") {
|
|
7177
|
+
throw new Error(`Invalid item type: ${itemType}`);
|
|
7178
|
+
}
|
|
7179
|
+
}
|
|
7180
|
+
this.items.push(...items);
|
|
7181
|
+
return this;
|
|
7182
|
+
}
|
|
7183
|
+
getItems() {
|
|
7184
|
+
return this.items;
|
|
7185
|
+
}
|
|
7186
|
+
getLastItem() {
|
|
7187
|
+
if (this.items.length === 0) {
|
|
7188
|
+
throw new Error("No items in context");
|
|
7189
|
+
}
|
|
7190
|
+
return this.items[this.items.length - 1];
|
|
7191
|
+
}
|
|
7192
|
+
static create(projectId) {
|
|
7193
|
+
const id = crypto.randomUUID();
|
|
7194
|
+
return new _ModelContext(id, projectId, []);
|
|
7195
|
+
}
|
|
7196
|
+
static rehydrate(data) {
|
|
7197
|
+
return new _ModelContext(data.id, data.projectId, data.items);
|
|
7198
|
+
}
|
|
7199
|
+
toJSON() {
|
|
7200
|
+
return this.items.map((item) => item.toJSON());
|
|
7201
|
+
}
|
|
7202
|
+
};
|
|
7163
7203
|
var ContextItem = class {
|
|
7164
7204
|
getType() {
|
|
7165
7205
|
return this.type;
|
|
@@ -7188,6 +7228,29 @@ var InputText = class _InputText extends ItemContent {
|
|
|
7188
7228
|
];
|
|
7189
7229
|
}
|
|
7190
7230
|
};
|
|
7231
|
+
var UserMessageItem = class _UserMessageItem extends ContextItem {
|
|
7232
|
+
constructor(content) {
|
|
7233
|
+
super();
|
|
7234
|
+
this.type = "message";
|
|
7235
|
+
this.role = "user";
|
|
7236
|
+
this.content = content;
|
|
7237
|
+
}
|
|
7238
|
+
static create(text) {
|
|
7239
|
+
const content = InputText.create(text);
|
|
7240
|
+
return new _UserMessageItem(content);
|
|
7241
|
+
}
|
|
7242
|
+
static rehydrate(data) {
|
|
7243
|
+
const content = InputText.rehydrate(data);
|
|
7244
|
+
return new _UserMessageItem(content);
|
|
7245
|
+
}
|
|
7246
|
+
toJSON() {
|
|
7247
|
+
return {
|
|
7248
|
+
type: this.type,
|
|
7249
|
+
role: this.role,
|
|
7250
|
+
content: this.content.toJSON()
|
|
7251
|
+
};
|
|
7252
|
+
}
|
|
7253
|
+
};
|
|
7191
7254
|
var OutputText = class _OutputText extends ItemContent {
|
|
7192
7255
|
constructor(text) {
|
|
7193
7256
|
super();
|
|
@@ -7245,6 +7308,27 @@ var FunctionCallItem = class _FunctionCallItem extends ContextItem {
|
|
|
7245
7308
|
};
|
|
7246
7309
|
}
|
|
7247
7310
|
};
|
|
7311
|
+
var ReasoningItem = class _ReasoningItem extends ContextItem {
|
|
7312
|
+
constructor(content, encryptedContent, summary = []) {
|
|
7313
|
+
super();
|
|
7314
|
+
this.type = "reasoning";
|
|
7315
|
+
this.content = content;
|
|
7316
|
+
this.encryptedContent = encryptedContent;
|
|
7317
|
+
this.summary = summary;
|
|
7318
|
+
}
|
|
7319
|
+
static rehydrate(data) {
|
|
7320
|
+
return new _ReasoningItem(data.content, data.encryptedContent, data.summary);
|
|
7321
|
+
}
|
|
7322
|
+
toJSON() {
|
|
7323
|
+
var _a3;
|
|
7324
|
+
return {
|
|
7325
|
+
type: this.type,
|
|
7326
|
+
content: (_a3 = this.content) == null ? void 0 : _a3.toJSON(),
|
|
7327
|
+
encryptedContent: this.encryptedContent,
|
|
7328
|
+
summary: this.summary.map((s) => s.toJSON())
|
|
7329
|
+
};
|
|
7330
|
+
}
|
|
7331
|
+
};
|
|
7248
7332
|
var FunctionCallOutputItem = class _FunctionCallOutputItem extends ContextItem {
|
|
7249
7333
|
constructor(callId, output) {
|
|
7250
7334
|
super();
|
|
@@ -7267,6 +7351,225 @@ var FunctionCallOutputItem = class _FunctionCallOutputItem extends ContextItem {
|
|
|
7267
7351
|
};
|
|
7268
7352
|
}
|
|
7269
7353
|
};
|
|
7354
|
+
var InferenceResponse = class {
|
|
7355
|
+
constructor(contextItems, tokenUsage) {
|
|
7356
|
+
this.contextItems = contextItems;
|
|
7357
|
+
this.tokenUsage = tokenUsage;
|
|
7358
|
+
}
|
|
7359
|
+
};
|
|
7360
|
+
var InputTokenDetails = class {
|
|
7361
|
+
constructor(cached_tokens) {
|
|
7362
|
+
this.cached_tokens = cached_tokens;
|
|
7363
|
+
}
|
|
7364
|
+
};
|
|
7365
|
+
var OutputTokenDetails = class {
|
|
7366
|
+
constructor(reasoning_tokens) {
|
|
7367
|
+
this.reasoning_tokens = reasoning_tokens;
|
|
7368
|
+
}
|
|
7369
|
+
};
|
|
7370
|
+
var TokenUsage = class {
|
|
7371
|
+
constructor(inputTokens, outputTokens, totalTokens, inputTokenDetails, outputTokenDetails) {
|
|
7372
|
+
this.inputTokens = inputTokens;
|
|
7373
|
+
this.outputTokens = outputTokens;
|
|
7374
|
+
this.totalTokens = totalTokens;
|
|
7375
|
+
this.inputTokenDetails = inputTokenDetails;
|
|
7376
|
+
this.outputTokenDetails = outputTokenDetails;
|
|
7377
|
+
}
|
|
7378
|
+
};
|
|
7379
|
+
var OpenAIResponses = class {
|
|
7380
|
+
constructor() {
|
|
7381
|
+
this.client = new OpenAI();
|
|
7382
|
+
}
|
|
7383
|
+
async infer(inferenceRequest) {
|
|
7384
|
+
const input = this.mapContextToRequest(inferenceRequest.context);
|
|
7385
|
+
const specification = inferenceRequest.model.specification;
|
|
7386
|
+
let request = {
|
|
7387
|
+
model: specification.name,
|
|
7388
|
+
input
|
|
7389
|
+
};
|
|
7390
|
+
if (specification.supportFunctionCalling && inferenceRequest.model.getTools().length > 0) {
|
|
7391
|
+
request.tools = inferenceRequest.model.getTools().map((tool) => {
|
|
7392
|
+
return {
|
|
7393
|
+
type: tool.type,
|
|
7394
|
+
name: tool.name,
|
|
7395
|
+
description: tool.description,
|
|
7396
|
+
parameters: tool.parameters
|
|
7397
|
+
};
|
|
7398
|
+
});
|
|
7399
|
+
}
|
|
7400
|
+
if (specification.supportReasoningEffort) {
|
|
7401
|
+
request.reasoning = {
|
|
7402
|
+
effort: inferenceRequest.model.getReasoningEffort()
|
|
7403
|
+
};
|
|
7404
|
+
}
|
|
7405
|
+
const response = await this.client.responses.create(request);
|
|
7406
|
+
const contextItems = this.extractContextItems(response);
|
|
7407
|
+
const tokenUsage = this.extractTokenUsage(response);
|
|
7408
|
+
return new InferenceResponse(contextItems, tokenUsage);
|
|
7409
|
+
}
|
|
7410
|
+
extractTokenUsage(response) {
|
|
7411
|
+
if (!response.usage) {
|
|
7412
|
+
return void 0;
|
|
7413
|
+
}
|
|
7414
|
+
return new TokenUsage(
|
|
7415
|
+
response.usage.input_tokens,
|
|
7416
|
+
response.usage.output_tokens,
|
|
7417
|
+
response.usage.total_tokens,
|
|
7418
|
+
new InputTokenDetails(response.usage.input_tokens_details.cached_tokens),
|
|
7419
|
+
new OutputTokenDetails(response.usage.output_tokens_details.reasoning_tokens)
|
|
7420
|
+
);
|
|
7421
|
+
}
|
|
7422
|
+
mapContextToRequest(context) {
|
|
7423
|
+
return context.getItems().map((item) => item.toJSON());
|
|
7424
|
+
}
|
|
7425
|
+
extractContextItems(response) {
|
|
7426
|
+
return response.output.map((item) => {
|
|
7427
|
+
if (item.type === "message" && item.role === "assistant") {
|
|
7428
|
+
return ModelMessageItem.rehydrate(item.content[0]);
|
|
7429
|
+
}
|
|
7430
|
+
if (item.type === "function_call") {
|
|
7431
|
+
return FunctionCallItem.rehydrate({
|
|
7432
|
+
callId: item.call_id,
|
|
7433
|
+
name: item.name,
|
|
7434
|
+
args: item.arguments
|
|
7435
|
+
});
|
|
7436
|
+
}
|
|
7437
|
+
if (item.type === "reasoning") {
|
|
7438
|
+
return ReasoningItem.rehydrate(item);
|
|
7439
|
+
}
|
|
7440
|
+
});
|
|
7441
|
+
}
|
|
7442
|
+
};
|
|
7443
|
+
var InferenceRequest = class {
|
|
7444
|
+
constructor(model, context) {
|
|
7445
|
+
this.model = model;
|
|
7446
|
+
this.context = context;
|
|
7447
|
+
}
|
|
7448
|
+
};
|
|
7449
|
+
var OpenAIReasoningEffort = class {
|
|
7450
|
+
constructor(reasoningEffort) {
|
|
7451
|
+
this.reasoningEffort = reasoningEffort;
|
|
7452
|
+
}
|
|
7453
|
+
setReasoningEffort(effort) {
|
|
7454
|
+
this.reasoningEffort = effort;
|
|
7455
|
+
}
|
|
7456
|
+
getReasoningEffort() {
|
|
7457
|
+
if (!this.reasoningEffort) {
|
|
7458
|
+
throw new Error("Reasoning effort not supported");
|
|
7459
|
+
}
|
|
7460
|
+
return this.reasoningEffort;
|
|
7461
|
+
}
|
|
7462
|
+
};
|
|
7463
|
+
var Gpt54Nano = class {
|
|
7464
|
+
constructor() {
|
|
7465
|
+
this.specification = {
|
|
7466
|
+
name: "gpt-5.4-nano",
|
|
7467
|
+
supportReasoningEffort: true,
|
|
7468
|
+
defaultReasoningEffort: "none",
|
|
7469
|
+
supportStreaming: true,
|
|
7470
|
+
contextWindowSize: 4e5,
|
|
7471
|
+
maxOutputTokens: 128e3,
|
|
7472
|
+
supportFunctionCalling: true
|
|
7473
|
+
};
|
|
7474
|
+
this.tools = [];
|
|
7475
|
+
this.effort = new OpenAIReasoningEffort(
|
|
7476
|
+
this.specification.defaultReasoningEffort
|
|
7477
|
+
);
|
|
7478
|
+
}
|
|
7479
|
+
setTools(tools) {
|
|
7480
|
+
this.tools = tools;
|
|
7481
|
+
}
|
|
7482
|
+
getTools() {
|
|
7483
|
+
return this.tools;
|
|
7484
|
+
}
|
|
7485
|
+
setReasoningEffort(effort) {
|
|
7486
|
+
this.effort.setReasoningEffort(effort);
|
|
7487
|
+
}
|
|
7488
|
+
getReasoningEffort() {
|
|
7489
|
+
return this.effort.getReasoningEffort();
|
|
7490
|
+
}
|
|
7491
|
+
};
|
|
7492
|
+
var Gpt54 = class {
|
|
7493
|
+
constructor() {
|
|
7494
|
+
this.specification = {
|
|
7495
|
+
name: "gpt-5.4",
|
|
7496
|
+
supportReasoningEffort: true,
|
|
7497
|
+
defaultReasoningEffort: "none",
|
|
7498
|
+
supportStreaming: true,
|
|
7499
|
+
contextWindowSize: 105e4,
|
|
7500
|
+
maxOutputTokens: 128e3,
|
|
7501
|
+
supportFunctionCalling: true
|
|
7502
|
+
};
|
|
7503
|
+
this.tools = [];
|
|
7504
|
+
this.effort = new OpenAIReasoningEffort(
|
|
7505
|
+
this.specification.defaultReasoningEffort
|
|
7506
|
+
);
|
|
7507
|
+
}
|
|
7508
|
+
setTools(tools) {
|
|
7509
|
+
this.tools = tools;
|
|
7510
|
+
}
|
|
7511
|
+
getTools() {
|
|
7512
|
+
return this.tools;
|
|
7513
|
+
}
|
|
7514
|
+
setReasoningEffort(effort) {
|
|
7515
|
+
this.effort.setReasoningEffort(effort);
|
|
7516
|
+
}
|
|
7517
|
+
getReasoningEffort() {
|
|
7518
|
+
return this.effort.getReasoningEffort();
|
|
7519
|
+
}
|
|
7520
|
+
};
|
|
7521
|
+
var Gpt54Mini = class {
|
|
7522
|
+
constructor() {
|
|
7523
|
+
this.specification = {
|
|
7524
|
+
name: "gpt-5.4-mini",
|
|
7525
|
+
supportReasoningEffort: true,
|
|
7526
|
+
defaultReasoningEffort: "none",
|
|
7527
|
+
supportStreaming: true,
|
|
7528
|
+
contextWindowSize: 4e5,
|
|
7529
|
+
maxOutputTokens: 128e3,
|
|
7530
|
+
supportFunctionCalling: true
|
|
7531
|
+
};
|
|
7532
|
+
this.tools = [];
|
|
7533
|
+
this.effort = new OpenAIReasoningEffort(
|
|
7534
|
+
this.specification.defaultReasoningEffort
|
|
7535
|
+
);
|
|
7536
|
+
}
|
|
7537
|
+
setReasoningEffort(effort) {
|
|
7538
|
+
this.effort.setReasoningEffort(effort);
|
|
7539
|
+
}
|
|
7540
|
+
getReasoningEffort() {
|
|
7541
|
+
return this.effort.getReasoningEffort();
|
|
7542
|
+
}
|
|
7543
|
+
setTools(tools) {
|
|
7544
|
+
this.tools = tools;
|
|
7545
|
+
}
|
|
7546
|
+
getTools() {
|
|
7547
|
+
return this.tools;
|
|
7548
|
+
}
|
|
7549
|
+
};
|
|
7550
|
+
var SystemMessageItem = class _SystemMessageItem extends ContextItem {
|
|
7551
|
+
constructor(content) {
|
|
7552
|
+
super();
|
|
7553
|
+
this.type = "message";
|
|
7554
|
+
this.role = "system";
|
|
7555
|
+
this.content = content;
|
|
7556
|
+
}
|
|
7557
|
+
toJSON() {
|
|
7558
|
+
return {
|
|
7559
|
+
type: this.type,
|
|
7560
|
+
role: this.role,
|
|
7561
|
+
content: this.content.toJSON()
|
|
7562
|
+
};
|
|
7563
|
+
}
|
|
7564
|
+
static create(text) {
|
|
7565
|
+
const content = InputText.create(text);
|
|
7566
|
+
return new _SystemMessageItem(content);
|
|
7567
|
+
}
|
|
7568
|
+
static rehydrate(data) {
|
|
7569
|
+
const content = InputText.rehydrate(data);
|
|
7570
|
+
return new _SystemMessageItem(content);
|
|
7571
|
+
}
|
|
7572
|
+
};
|
|
7270
7573
|
var Participant = class {
|
|
7271
7574
|
constructor() {
|
|
7272
7575
|
this.environments = [];
|
|
@@ -7369,6 +7672,46 @@ var AgenticEnvironment = class {
|
|
|
7369
7672
|
this.isActive = false;
|
|
7370
7673
|
}
|
|
7371
7674
|
};
|
|
7675
|
+
var OpenAIInferenceRunner = class {
|
|
7676
|
+
constructor() {
|
|
7677
|
+
this.runtime = new OpenAIResponses();
|
|
7678
|
+
}
|
|
7679
|
+
async *run(context, model, signal) {
|
|
7680
|
+
const response = await this.runtime.infer(new InferenceRequest(model, context));
|
|
7681
|
+
for (const item of response.contextItems) {
|
|
7682
|
+
yield item;
|
|
7683
|
+
}
|
|
7684
|
+
}
|
|
7685
|
+
};
|
|
7686
|
+
var Gpt55 = class {
|
|
7687
|
+
constructor() {
|
|
7688
|
+
this.specification = {
|
|
7689
|
+
name: "gpt-5.5",
|
|
7690
|
+
supportReasoningEffort: true,
|
|
7691
|
+
defaultReasoningEffort: "none",
|
|
7692
|
+
supportStreaming: true,
|
|
7693
|
+
contextWindowSize: 105e4,
|
|
7694
|
+
maxOutputTokens: 128e3,
|
|
7695
|
+
supportFunctionCalling: true
|
|
7696
|
+
};
|
|
7697
|
+
this.tools = [];
|
|
7698
|
+
this.effort = new OpenAIReasoningEffort(
|
|
7699
|
+
this.specification.defaultReasoningEffort
|
|
7700
|
+
);
|
|
7701
|
+
}
|
|
7702
|
+
setTools(tools) {
|
|
7703
|
+
this.tools = tools;
|
|
7704
|
+
}
|
|
7705
|
+
getTools() {
|
|
7706
|
+
return this.tools;
|
|
7707
|
+
}
|
|
7708
|
+
setReasoningEffort(effort) {
|
|
7709
|
+
this.effort.setReasoningEffort(effort);
|
|
7710
|
+
}
|
|
7711
|
+
getReasoningEffort() {
|
|
7712
|
+
return this.effort.getReasoningEffort();
|
|
7713
|
+
}
|
|
7714
|
+
};
|
|
7372
7715
|
|
|
7373
7716
|
// ../baro-orchestrator/src/bus.ts
|
|
7374
7717
|
var BusEvent = class {
|
|
@@ -9533,6 +9876,124 @@ function extractVerdictJson(text) {
|
|
|
9533
9876
|
throw new Error(`unbalanced JSON object in critic response: ${trimmed.slice(0, 200)}`);
|
|
9534
9877
|
}
|
|
9535
9878
|
|
|
9879
|
+
// ../baro-orchestrator/src/participants/critic-openai.ts
|
|
9880
|
+
function pickModel(name) {
|
|
9881
|
+
switch (name) {
|
|
9882
|
+
case "gpt-5.5":
|
|
9883
|
+
return new Gpt55();
|
|
9884
|
+
case "gpt-5.4":
|
|
9885
|
+
return new Gpt54();
|
|
9886
|
+
case "gpt-5.4-mini":
|
|
9887
|
+
return new Gpt54Mini();
|
|
9888
|
+
case "gpt-5.4-nano":
|
|
9889
|
+
return new Gpt54Nano();
|
|
9890
|
+
default:
|
|
9891
|
+
throw new Error(
|
|
9892
|
+
`CriticOpenAI: unknown model "${name}" \u2014 Mozaik 3.9 ships gpt-5.5, gpt-5.4, gpt-5.4-mini, gpt-5.4-nano`
|
|
9893
|
+
);
|
|
9894
|
+
}
|
|
9895
|
+
}
|
|
9896
|
+
var CriticOpenAI = class extends BaroParticipant {
|
|
9897
|
+
opts;
|
|
9898
|
+
model;
|
|
9899
|
+
runner = new OpenAIInferenceRunner();
|
|
9900
|
+
emissions = /* @__PURE__ */ new Map();
|
|
9901
|
+
turnCount = /* @__PURE__ */ new Map();
|
|
9902
|
+
pending = /* @__PURE__ */ new Set();
|
|
9903
|
+
constructor(opts) {
|
|
9904
|
+
super();
|
|
9905
|
+
this.opts = {
|
|
9906
|
+
maxEmissionsPerAgent: opts.maxEmissionsPerAgent ?? 2,
|
|
9907
|
+
model: opts.model ?? "gpt-5.4-mini",
|
|
9908
|
+
targets: opts.targets
|
|
9909
|
+
};
|
|
9910
|
+
this.model = pickModel(this.opts.model);
|
|
9911
|
+
}
|
|
9912
|
+
/** Resolves once every in-flight evaluation has emitted its CritiqueItem. */
|
|
9913
|
+
async idle() {
|
|
9914
|
+
await Promise.allSettled([...this.pending]);
|
|
9915
|
+
}
|
|
9916
|
+
async onExternalBusEvent(_source, event) {
|
|
9917
|
+
if (!(event instanceof ClaudeResultItem)) return;
|
|
9918
|
+
if (event.isError || !event.resultText) return;
|
|
9919
|
+
const criteria = this.opts.targets.get(event.agentId);
|
|
9920
|
+
if (!criteria || criteria.length === 0) return;
|
|
9921
|
+
const turn = (this.turnCount.get(event.agentId) ?? 0) + 1;
|
|
9922
|
+
this.turnCount.set(event.agentId, turn);
|
|
9923
|
+
const work = (async () => {
|
|
9924
|
+
const { verdict, reasoning, violatedCriteria } = await this.evaluate(
|
|
9925
|
+
event.resultText,
|
|
9926
|
+
criteria
|
|
9927
|
+
);
|
|
9928
|
+
const critiqueItem = new CritiqueItem(
|
|
9929
|
+
event.agentId,
|
|
9930
|
+
verdict,
|
|
9931
|
+
reasoning,
|
|
9932
|
+
violatedCriteria,
|
|
9933
|
+
turn,
|
|
9934
|
+
this.opts.model
|
|
9935
|
+
);
|
|
9936
|
+
for (const env of this.getEnvironments()) {
|
|
9937
|
+
;
|
|
9938
|
+
env.deliverBusEvent(this, critiqueItem);
|
|
9939
|
+
}
|
|
9940
|
+
if (verdict === "fail") {
|
|
9941
|
+
const emitted = this.emissions.get(event.agentId) ?? 0;
|
|
9942
|
+
if (emitted < this.opts.maxEmissionsPerAgent) {
|
|
9943
|
+
this.emissions.set(event.agentId, emitted + 1);
|
|
9944
|
+
const text = buildCorrectiveMessage(reasoning, violatedCriteria);
|
|
9945
|
+
const msg = new AgentTargetedMessageItem(event.agentId, text, {
|
|
9946
|
+
criticTurn: turn,
|
|
9947
|
+
emissionIndex: emitted + 1
|
|
9948
|
+
});
|
|
9949
|
+
for (const env of this.getEnvironments()) {
|
|
9950
|
+
;
|
|
9951
|
+
env.deliverBusEvent(this, msg);
|
|
9952
|
+
}
|
|
9953
|
+
}
|
|
9954
|
+
}
|
|
9955
|
+
})();
|
|
9956
|
+
this.pending.add(work);
|
|
9957
|
+
work.finally(() => this.pending.delete(work));
|
|
9958
|
+
}
|
|
9959
|
+
/**
|
|
9960
|
+
* One-shot OpenAI inference call. Builds a ModelContext with the
|
|
9961
|
+
* verdict system prompt + the eval prompt, runs the inference, and
|
|
9962
|
+
* parses the JSON verdict the model returned. Same prompt and same
|
|
9963
|
+
* JSON shape as the Claude version so behaviour stays comparable
|
|
9964
|
+
* for benchmarking.
|
|
9965
|
+
*/
|
|
9966
|
+
async evaluate(resultText, criteria) {
|
|
9967
|
+
const userPrompt = buildEvalPrompt(criteria, resultText);
|
|
9968
|
+
const context = ModelContext.create("critic").addContextItem(SystemMessageItem.create(VERDICT_SYSTEM_PROMPT)).addContextItem(UserMessageItem.create(userPrompt));
|
|
9969
|
+
try {
|
|
9970
|
+
let assistantText = "";
|
|
9971
|
+
for await (const item of this.runner.run(context, this.model)) {
|
|
9972
|
+
if (item.type === "message" && item.role === "assistant") {
|
|
9973
|
+
const json = item.toJSON();
|
|
9974
|
+
assistantText += json.content?.[0]?.text ?? "";
|
|
9975
|
+
}
|
|
9976
|
+
}
|
|
9977
|
+
if (!assistantText.trim()) {
|
|
9978
|
+
throw new Error("OpenAI returned empty assistant text");
|
|
9979
|
+
}
|
|
9980
|
+
const verdictJson = extractVerdictJson(assistantText);
|
|
9981
|
+
const parsed = JSON.parse(verdictJson);
|
|
9982
|
+
return {
|
|
9983
|
+
verdict: parsed.verdict === "pass" ? "pass" : "fail",
|
|
9984
|
+
reasoning: parsed.reasoning ?? "",
|
|
9985
|
+
violatedCriteria: Array.isArray(parsed.violated_criteria) ? parsed.violated_criteria : []
|
|
9986
|
+
};
|
|
9987
|
+
} catch (err) {
|
|
9988
|
+
return {
|
|
9989
|
+
verdict: "fail",
|
|
9990
|
+
reasoning: `Critic (OpenAI) LLM call failed: ${String(err?.message ?? err)}`,
|
|
9991
|
+
violatedCriteria: ["[critic-openai error \u2014 could not evaluate]"]
|
|
9992
|
+
};
|
|
9993
|
+
}
|
|
9994
|
+
}
|
|
9995
|
+
};
|
|
9996
|
+
|
|
9536
9997
|
// ../baro-orchestrator/src/participants/finalizer.ts
|
|
9537
9998
|
import { execFile as execFile3 } from "child_process";
|
|
9538
9999
|
import { promisify as promisify3 } from "util";
|
|
@@ -10482,13 +10943,7 @@ var Surgeon = class extends BaroParticipant {
|
|
|
10482
10943
|
* skipping (if their only dep is now gone, they become unreachable).
|
|
10483
10944
|
*/
|
|
10484
10945
|
evaluateDeterministic(failure) {
|
|
10485
|
-
return
|
|
10486
|
-
"surgeon",
|
|
10487
|
-
`deterministic skip: ${failure.storyId} exhausted ${failure.attempts} attempts (${failure.error ?? "no reason"})`,
|
|
10488
|
-
[],
|
|
10489
|
-
[failure.storyId],
|
|
10490
|
-
/* @__PURE__ */ new Map()
|
|
10491
|
-
);
|
|
10946
|
+
return surgeonDeterministicReplan(failure);
|
|
10492
10947
|
}
|
|
10493
10948
|
/**
|
|
10494
10949
|
* LLM strategy: ask Claude (via CLI subprocess) to propose a replan
|
|
@@ -10593,6 +11048,118 @@ function extractJsonObject(text) {
|
|
|
10593
11048
|
}
|
|
10594
11049
|
throw new Error("unbalanced JSON object in surgeon response");
|
|
10595
11050
|
}
|
|
11051
|
+
function surgeonDeterministicReplan(failure) {
|
|
11052
|
+
return new ReplanItem(
|
|
11053
|
+
"surgeon",
|
|
11054
|
+
`deterministic skip: ${failure.storyId} exhausted ${failure.attempts} attempts (${failure.error ?? "no reason"})`,
|
|
11055
|
+
[],
|
|
11056
|
+
[failure.storyId],
|
|
11057
|
+
/* @__PURE__ */ new Map()
|
|
11058
|
+
);
|
|
11059
|
+
}
|
|
11060
|
+
|
|
11061
|
+
// ../baro-orchestrator/src/participants/surgeon-openai.ts
|
|
11062
|
+
function pickModel2(name) {
|
|
11063
|
+
switch (name) {
|
|
11064
|
+
case "gpt-5.5":
|
|
11065
|
+
return new Gpt55();
|
|
11066
|
+
case "gpt-5.4":
|
|
11067
|
+
return new Gpt54();
|
|
11068
|
+
case "gpt-5.4-mini":
|
|
11069
|
+
return new Gpt54Mini();
|
|
11070
|
+
case "gpt-5.4-nano":
|
|
11071
|
+
return new Gpt54Nano();
|
|
11072
|
+
default:
|
|
11073
|
+
throw new Error(
|
|
11074
|
+
`SurgeonOpenAI: unknown model "${name}" \u2014 Mozaik 3.9 ships gpt-5.5, gpt-5.4, gpt-5.4-mini, gpt-5.4-nano`
|
|
11075
|
+
);
|
|
11076
|
+
}
|
|
11077
|
+
}
|
|
11078
|
+
var SurgeonOpenAI = class extends BaroParticipant {
|
|
11079
|
+
opts;
|
|
11080
|
+
model;
|
|
11081
|
+
runner = new OpenAIInferenceRunner();
|
|
11082
|
+
replansEmitted = 0;
|
|
11083
|
+
pending = /* @__PURE__ */ new Set();
|
|
11084
|
+
constructor(opts) {
|
|
11085
|
+
super();
|
|
11086
|
+
this.opts = {
|
|
11087
|
+
maxReplans: opts.maxReplans ?? 10,
|
|
11088
|
+
model: opts.model ?? "gpt-5.4",
|
|
11089
|
+
snapshot: opts.snapshot
|
|
11090
|
+
};
|
|
11091
|
+
this.model = pickModel2(this.opts.model);
|
|
11092
|
+
}
|
|
11093
|
+
async idle() {
|
|
11094
|
+
await Promise.allSettled([...this.pending]);
|
|
11095
|
+
}
|
|
11096
|
+
async onExternalBusEvent(_source, event) {
|
|
11097
|
+
if (!(event instanceof StoryResultItem)) return;
|
|
11098
|
+
if (event.success) return;
|
|
11099
|
+
if (this.replansEmitted >= this.opts.maxReplans) return;
|
|
11100
|
+
const work = (async () => {
|
|
11101
|
+
const replan = await this.evaluate(event);
|
|
11102
|
+
if (!replan) return;
|
|
11103
|
+
this.replansEmitted += 1;
|
|
11104
|
+
for (const env of this.getEnvironments()) {
|
|
11105
|
+
;
|
|
11106
|
+
env.deliverBusEvent(this, replan);
|
|
11107
|
+
}
|
|
11108
|
+
})();
|
|
11109
|
+
this.pending.add(work);
|
|
11110
|
+
work.finally(() => this.pending.delete(work));
|
|
11111
|
+
await work;
|
|
11112
|
+
}
|
|
11113
|
+
/**
|
|
11114
|
+
* One-shot OpenAI inference call asking the model for a structured
|
|
11115
|
+
* replan. Returns `null` on the "abort" action (no ReplanItem
|
|
11116
|
+
* emitted, run ends). Returns a deterministic-skip `ReplanItem` on
|
|
11117
|
+
* any inference or JSON-parse error so the run still has a chance
|
|
11118
|
+
* to recover.
|
|
11119
|
+
*/
|
|
11120
|
+
async evaluate(failure) {
|
|
11121
|
+
const snap = this.opts.snapshot();
|
|
11122
|
+
const userPrompt = buildSurgeonPrompt(snap, failure);
|
|
11123
|
+
const context = ModelContext.create("surgeon").addContextItem(SystemMessageItem.create(SURGEON_SYSTEM_PROMPT)).addContextItem(UserMessageItem.create(userPrompt));
|
|
11124
|
+
try {
|
|
11125
|
+
let assistantText = "";
|
|
11126
|
+
for await (const item of this.runner.run(context, this.model)) {
|
|
11127
|
+
if (item.type === "message" && item.role === "assistant") {
|
|
11128
|
+
const json = item.toJSON();
|
|
11129
|
+
assistantText += json.content?.[0]?.text ?? "";
|
|
11130
|
+
}
|
|
11131
|
+
}
|
|
11132
|
+
if (!assistantText.trim()) {
|
|
11133
|
+
throw new Error("OpenAI returned empty assistant text");
|
|
11134
|
+
}
|
|
11135
|
+
const verdictJson = extractJsonObject(assistantText);
|
|
11136
|
+
const parsed = JSON.parse(verdictJson);
|
|
11137
|
+
if (parsed.action === "abort") return null;
|
|
11138
|
+
const modifiedDeps = /* @__PURE__ */ new Map();
|
|
11139
|
+
for (const m of parsed.modifiedDeps ?? []) {
|
|
11140
|
+
if (typeof m.id === "string" && Array.isArray(m.newDependsOn)) {
|
|
11141
|
+
modifiedDeps.set(m.id, [...m.newDependsOn]);
|
|
11142
|
+
}
|
|
11143
|
+
}
|
|
11144
|
+
return new ReplanItem(
|
|
11145
|
+
"surgeon",
|
|
11146
|
+
`${parsed.action}: ${parsed.reason ?? ""}`,
|
|
11147
|
+
parsed.added ?? [],
|
|
11148
|
+
parsed.removed ?? [],
|
|
11149
|
+
modifiedDeps
|
|
11150
|
+
);
|
|
11151
|
+
} catch (err) {
|
|
11152
|
+
const fallback = surgeonDeterministicReplan(failure);
|
|
11153
|
+
return new ReplanItem(
|
|
11154
|
+
fallback.source,
|
|
11155
|
+
`${fallback.reason} (openai-llm fallback after error: ${err?.message ?? String(err)})`,
|
|
11156
|
+
fallback.addedStories,
|
|
11157
|
+
fallback.removedStoryIds,
|
|
11158
|
+
fallback.modifiedDeps
|
|
11159
|
+
);
|
|
11160
|
+
}
|
|
11161
|
+
}
|
|
11162
|
+
};
|
|
10596
11163
|
|
|
10597
11164
|
// ../baro-orchestrator/src/tui-protocol.ts
|
|
10598
11165
|
function emit(event) {
|
|
@@ -10607,7 +11174,7 @@ async function orchestrate(config) {
|
|
|
10607
11174
|
const llm = config.llm ?? "claude";
|
|
10608
11175
|
if (llm === "openai") {
|
|
10609
11176
|
process.stderr.write(
|
|
10610
|
-
"[orchestrate] llm=openai
|
|
11177
|
+
"[orchestrate] llm=openai: Critic + Surgeon route to Mozaik OpenAI; Architect, Planner, StoryAgent still on Claude CLI (per-phase ports in 0.31+).\n"
|
|
10611
11178
|
);
|
|
10612
11179
|
}
|
|
10613
11180
|
if (config.auditLogPath) {
|
|
@@ -10634,21 +11201,25 @@ async function orchestrate(config) {
|
|
|
10634
11201
|
if (sentry) sentry.join(env);
|
|
10635
11202
|
let surgeon = null;
|
|
10636
11203
|
if (config.withSurgeon) {
|
|
10637
|
-
|
|
10638
|
-
|
|
10639
|
-
|
|
10640
|
-
|
|
10641
|
-
|
|
10642
|
-
|
|
10643
|
-
|
|
10644
|
-
|
|
10645
|
-
|
|
10646
|
-
|
|
10647
|
-
|
|
10648
|
-
|
|
10649
|
-
|
|
10650
|
-
|
|
10651
|
-
|
|
11204
|
+
const snapshot = () => {
|
|
11205
|
+
const current = loadPrd(config.prdPath);
|
|
11206
|
+
return {
|
|
11207
|
+
project: current.project,
|
|
11208
|
+
description: current.description,
|
|
11209
|
+
stories: current.userStories.map((s) => ({
|
|
11210
|
+
id: s.id,
|
|
11211
|
+
title: s.title,
|
|
11212
|
+
description: s.description,
|
|
11213
|
+
dependsOn: s.dependsOn,
|
|
11214
|
+
passes: s.passes
|
|
11215
|
+
}))
|
|
11216
|
+
};
|
|
11217
|
+
};
|
|
11218
|
+
surgeon = llm === "openai" ? new SurgeonOpenAI({
|
|
11219
|
+
snapshot,
|
|
11220
|
+
model: config.surgeonModel ?? "gpt-5.4"
|
|
11221
|
+
}) : new Surgeon({
|
|
11222
|
+
snapshot,
|
|
10652
11223
|
useLlm: config.surgeonUseLlm ?? false,
|
|
10653
11224
|
model: config.surgeonModel ?? "opus"
|
|
10654
11225
|
});
|
|
@@ -10660,7 +11231,10 @@ async function orchestrate(config) {
|
|
|
10660
11231
|
const targets = new Map(
|
|
10661
11232
|
prd.userStories.filter((s) => s.acceptance && s.acceptance.length > 0).map((s) => [s.id, s.acceptance])
|
|
10662
11233
|
);
|
|
10663
|
-
critic = new
|
|
11234
|
+
critic = llm === "openai" ? new CriticOpenAI({
|
|
11235
|
+
targets,
|
|
11236
|
+
model: config.criticModel ?? "gpt-5.4-mini"
|
|
11237
|
+
}) : new Critic({
|
|
10664
11238
|
targets,
|
|
10665
11239
|
model: config.criticModel ?? "haiku"
|
|
10666
11240
|
});
|