baro-ai 0.28.0 → 0.30.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.mjs +622 -25
- package/dist/cli.mjs.map +1 -1
- package/package.json +1 -1
package/dist/cli.mjs
CHANGED
|
@@ -7160,6 +7160,46 @@ OpenAI.Skills = Skills;
|
|
|
7160
7160
|
OpenAI.Videos = Videos;
|
|
7161
7161
|
|
|
7162
7162
|
// ../../node_modules/@mozaik-ai/core/dist/index.mjs
|
|
7163
|
+
var ModelContext = class _ModelContext {
|
|
7164
|
+
constructor(id, projectId, items) {
|
|
7165
|
+
this.projectId = projectId;
|
|
7166
|
+
this.id = id;
|
|
7167
|
+
this.items = items;
|
|
7168
|
+
}
|
|
7169
|
+
addContextItem(item) {
|
|
7170
|
+
this.items.push(item);
|
|
7171
|
+
return this;
|
|
7172
|
+
}
|
|
7173
|
+
applyModelOutput(items) {
|
|
7174
|
+
for (const item of items) {
|
|
7175
|
+
const itemType = item.getType();
|
|
7176
|
+
if (itemType !== "function_call" && itemType !== "message" && itemType !== "reasoning") {
|
|
7177
|
+
throw new Error(`Invalid item type: ${itemType}`);
|
|
7178
|
+
}
|
|
7179
|
+
}
|
|
7180
|
+
this.items.push(...items);
|
|
7181
|
+
return this;
|
|
7182
|
+
}
|
|
7183
|
+
getItems() {
|
|
7184
|
+
return this.items;
|
|
7185
|
+
}
|
|
7186
|
+
getLastItem() {
|
|
7187
|
+
if (this.items.length === 0) {
|
|
7188
|
+
throw new Error("No items in context");
|
|
7189
|
+
}
|
|
7190
|
+
return this.items[this.items.length - 1];
|
|
7191
|
+
}
|
|
7192
|
+
static create(projectId) {
|
|
7193
|
+
const id = crypto.randomUUID();
|
|
7194
|
+
return new _ModelContext(id, projectId, []);
|
|
7195
|
+
}
|
|
7196
|
+
static rehydrate(data) {
|
|
7197
|
+
return new _ModelContext(data.id, data.projectId, data.items);
|
|
7198
|
+
}
|
|
7199
|
+
toJSON() {
|
|
7200
|
+
return this.items.map((item) => item.toJSON());
|
|
7201
|
+
}
|
|
7202
|
+
};
|
|
7163
7203
|
var ContextItem = class {
|
|
7164
7204
|
getType() {
|
|
7165
7205
|
return this.type;
|
|
@@ -7188,6 +7228,29 @@ var InputText = class _InputText extends ItemContent {
|
|
|
7188
7228
|
];
|
|
7189
7229
|
}
|
|
7190
7230
|
};
|
|
7231
|
+
var UserMessageItem = class _UserMessageItem extends ContextItem {
|
|
7232
|
+
constructor(content) {
|
|
7233
|
+
super();
|
|
7234
|
+
this.type = "message";
|
|
7235
|
+
this.role = "user";
|
|
7236
|
+
this.content = content;
|
|
7237
|
+
}
|
|
7238
|
+
static create(text) {
|
|
7239
|
+
const content = InputText.create(text);
|
|
7240
|
+
return new _UserMessageItem(content);
|
|
7241
|
+
}
|
|
7242
|
+
static rehydrate(data) {
|
|
7243
|
+
const content = InputText.rehydrate(data);
|
|
7244
|
+
return new _UserMessageItem(content);
|
|
7245
|
+
}
|
|
7246
|
+
toJSON() {
|
|
7247
|
+
return {
|
|
7248
|
+
type: this.type,
|
|
7249
|
+
role: this.role,
|
|
7250
|
+
content: this.content.toJSON()
|
|
7251
|
+
};
|
|
7252
|
+
}
|
|
7253
|
+
};
|
|
7191
7254
|
var OutputText = class _OutputText extends ItemContent {
|
|
7192
7255
|
constructor(text) {
|
|
7193
7256
|
super();
|
|
@@ -7245,6 +7308,27 @@ var FunctionCallItem = class _FunctionCallItem extends ContextItem {
|
|
|
7245
7308
|
};
|
|
7246
7309
|
}
|
|
7247
7310
|
};
|
|
7311
|
+
var ReasoningItem = class _ReasoningItem extends ContextItem {
|
|
7312
|
+
constructor(content, encryptedContent, summary = []) {
|
|
7313
|
+
super();
|
|
7314
|
+
this.type = "reasoning";
|
|
7315
|
+
this.content = content;
|
|
7316
|
+
this.encryptedContent = encryptedContent;
|
|
7317
|
+
this.summary = summary;
|
|
7318
|
+
}
|
|
7319
|
+
static rehydrate(data) {
|
|
7320
|
+
return new _ReasoningItem(data.content, data.encryptedContent, data.summary);
|
|
7321
|
+
}
|
|
7322
|
+
toJSON() {
|
|
7323
|
+
var _a3;
|
|
7324
|
+
return {
|
|
7325
|
+
type: this.type,
|
|
7326
|
+
content: (_a3 = this.content) == null ? void 0 : _a3.toJSON(),
|
|
7327
|
+
encryptedContent: this.encryptedContent,
|
|
7328
|
+
summary: this.summary.map((s) => s.toJSON())
|
|
7329
|
+
};
|
|
7330
|
+
}
|
|
7331
|
+
};
|
|
7248
7332
|
var FunctionCallOutputItem = class _FunctionCallOutputItem extends ContextItem {
|
|
7249
7333
|
constructor(callId, output) {
|
|
7250
7334
|
super();
|
|
@@ -7267,6 +7351,225 @@ var FunctionCallOutputItem = class _FunctionCallOutputItem extends ContextItem {
|
|
|
7267
7351
|
};
|
|
7268
7352
|
}
|
|
7269
7353
|
};
|
|
7354
|
+
var InferenceResponse = class {
|
|
7355
|
+
constructor(contextItems, tokenUsage) {
|
|
7356
|
+
this.contextItems = contextItems;
|
|
7357
|
+
this.tokenUsage = tokenUsage;
|
|
7358
|
+
}
|
|
7359
|
+
};
|
|
7360
|
+
var InputTokenDetails = class {
|
|
7361
|
+
constructor(cached_tokens) {
|
|
7362
|
+
this.cached_tokens = cached_tokens;
|
|
7363
|
+
}
|
|
7364
|
+
};
|
|
7365
|
+
var OutputTokenDetails = class {
|
|
7366
|
+
constructor(reasoning_tokens) {
|
|
7367
|
+
this.reasoning_tokens = reasoning_tokens;
|
|
7368
|
+
}
|
|
7369
|
+
};
|
|
7370
|
+
var TokenUsage = class {
|
|
7371
|
+
constructor(inputTokens, outputTokens, totalTokens, inputTokenDetails, outputTokenDetails) {
|
|
7372
|
+
this.inputTokens = inputTokens;
|
|
7373
|
+
this.outputTokens = outputTokens;
|
|
7374
|
+
this.totalTokens = totalTokens;
|
|
7375
|
+
this.inputTokenDetails = inputTokenDetails;
|
|
7376
|
+
this.outputTokenDetails = outputTokenDetails;
|
|
7377
|
+
}
|
|
7378
|
+
};
|
|
7379
|
+
var OpenAIResponses = class {
|
|
7380
|
+
constructor() {
|
|
7381
|
+
this.client = new OpenAI();
|
|
7382
|
+
}
|
|
7383
|
+
async infer(inferenceRequest) {
|
|
7384
|
+
const input = this.mapContextToRequest(inferenceRequest.context);
|
|
7385
|
+
const specification = inferenceRequest.model.specification;
|
|
7386
|
+
let request = {
|
|
7387
|
+
model: specification.name,
|
|
7388
|
+
input
|
|
7389
|
+
};
|
|
7390
|
+
if (specification.supportFunctionCalling && inferenceRequest.model.getTools().length > 0) {
|
|
7391
|
+
request.tools = inferenceRequest.model.getTools().map((tool) => {
|
|
7392
|
+
return {
|
|
7393
|
+
type: tool.type,
|
|
7394
|
+
name: tool.name,
|
|
7395
|
+
description: tool.description,
|
|
7396
|
+
parameters: tool.parameters
|
|
7397
|
+
};
|
|
7398
|
+
});
|
|
7399
|
+
}
|
|
7400
|
+
if (specification.supportReasoningEffort) {
|
|
7401
|
+
request.reasoning = {
|
|
7402
|
+
effort: inferenceRequest.model.getReasoningEffort()
|
|
7403
|
+
};
|
|
7404
|
+
}
|
|
7405
|
+
const response = await this.client.responses.create(request);
|
|
7406
|
+
const contextItems = this.extractContextItems(response);
|
|
7407
|
+
const tokenUsage = this.extractTokenUsage(response);
|
|
7408
|
+
return new InferenceResponse(contextItems, tokenUsage);
|
|
7409
|
+
}
|
|
7410
|
+
extractTokenUsage(response) {
|
|
7411
|
+
if (!response.usage) {
|
|
7412
|
+
return void 0;
|
|
7413
|
+
}
|
|
7414
|
+
return new TokenUsage(
|
|
7415
|
+
response.usage.input_tokens,
|
|
7416
|
+
response.usage.output_tokens,
|
|
7417
|
+
response.usage.total_tokens,
|
|
7418
|
+
new InputTokenDetails(response.usage.input_tokens_details.cached_tokens),
|
|
7419
|
+
new OutputTokenDetails(response.usage.output_tokens_details.reasoning_tokens)
|
|
7420
|
+
);
|
|
7421
|
+
}
|
|
7422
|
+
mapContextToRequest(context) {
|
|
7423
|
+
return context.getItems().map((item) => item.toJSON());
|
|
7424
|
+
}
|
|
7425
|
+
extractContextItems(response) {
|
|
7426
|
+
return response.output.map((item) => {
|
|
7427
|
+
if (item.type === "message" && item.role === "assistant") {
|
|
7428
|
+
return ModelMessageItem.rehydrate(item.content[0]);
|
|
7429
|
+
}
|
|
7430
|
+
if (item.type === "function_call") {
|
|
7431
|
+
return FunctionCallItem.rehydrate({
|
|
7432
|
+
callId: item.call_id,
|
|
7433
|
+
name: item.name,
|
|
7434
|
+
args: item.arguments
|
|
7435
|
+
});
|
|
7436
|
+
}
|
|
7437
|
+
if (item.type === "reasoning") {
|
|
7438
|
+
return ReasoningItem.rehydrate(item);
|
|
7439
|
+
}
|
|
7440
|
+
});
|
|
7441
|
+
}
|
|
7442
|
+
};
|
|
7443
|
+
var InferenceRequest = class {
|
|
7444
|
+
constructor(model, context) {
|
|
7445
|
+
this.model = model;
|
|
7446
|
+
this.context = context;
|
|
7447
|
+
}
|
|
7448
|
+
};
|
|
7449
|
+
var OpenAIReasoningEffort = class {
|
|
7450
|
+
constructor(reasoningEffort) {
|
|
7451
|
+
this.reasoningEffort = reasoningEffort;
|
|
7452
|
+
}
|
|
7453
|
+
setReasoningEffort(effort) {
|
|
7454
|
+
this.reasoningEffort = effort;
|
|
7455
|
+
}
|
|
7456
|
+
getReasoningEffort() {
|
|
7457
|
+
if (!this.reasoningEffort) {
|
|
7458
|
+
throw new Error("Reasoning effort not supported");
|
|
7459
|
+
}
|
|
7460
|
+
return this.reasoningEffort;
|
|
7461
|
+
}
|
|
7462
|
+
};
|
|
7463
|
+
var Gpt54Nano = class {
|
|
7464
|
+
constructor() {
|
|
7465
|
+
this.specification = {
|
|
7466
|
+
name: "gpt-5.4-nano",
|
|
7467
|
+
supportReasoningEffort: true,
|
|
7468
|
+
defaultReasoningEffort: "none",
|
|
7469
|
+
supportStreaming: true,
|
|
7470
|
+
contextWindowSize: 4e5,
|
|
7471
|
+
maxOutputTokens: 128e3,
|
|
7472
|
+
supportFunctionCalling: true
|
|
7473
|
+
};
|
|
7474
|
+
this.tools = [];
|
|
7475
|
+
this.effort = new OpenAIReasoningEffort(
|
|
7476
|
+
this.specification.defaultReasoningEffort
|
|
7477
|
+
);
|
|
7478
|
+
}
|
|
7479
|
+
setTools(tools) {
|
|
7480
|
+
this.tools = tools;
|
|
7481
|
+
}
|
|
7482
|
+
getTools() {
|
|
7483
|
+
return this.tools;
|
|
7484
|
+
}
|
|
7485
|
+
setReasoningEffort(effort) {
|
|
7486
|
+
this.effort.setReasoningEffort(effort);
|
|
7487
|
+
}
|
|
7488
|
+
getReasoningEffort() {
|
|
7489
|
+
return this.effort.getReasoningEffort();
|
|
7490
|
+
}
|
|
7491
|
+
};
|
|
7492
|
+
var Gpt54 = class {
|
|
7493
|
+
constructor() {
|
|
7494
|
+
this.specification = {
|
|
7495
|
+
name: "gpt-5.4",
|
|
7496
|
+
supportReasoningEffort: true,
|
|
7497
|
+
defaultReasoningEffort: "none",
|
|
7498
|
+
supportStreaming: true,
|
|
7499
|
+
contextWindowSize: 105e4,
|
|
7500
|
+
maxOutputTokens: 128e3,
|
|
7501
|
+
supportFunctionCalling: true
|
|
7502
|
+
};
|
|
7503
|
+
this.tools = [];
|
|
7504
|
+
this.effort = new OpenAIReasoningEffort(
|
|
7505
|
+
this.specification.defaultReasoningEffort
|
|
7506
|
+
);
|
|
7507
|
+
}
|
|
7508
|
+
setTools(tools) {
|
|
7509
|
+
this.tools = tools;
|
|
7510
|
+
}
|
|
7511
|
+
getTools() {
|
|
7512
|
+
return this.tools;
|
|
7513
|
+
}
|
|
7514
|
+
setReasoningEffort(effort) {
|
|
7515
|
+
this.effort.setReasoningEffort(effort);
|
|
7516
|
+
}
|
|
7517
|
+
getReasoningEffort() {
|
|
7518
|
+
return this.effort.getReasoningEffort();
|
|
7519
|
+
}
|
|
7520
|
+
};
|
|
7521
|
+
var Gpt54Mini = class {
|
|
7522
|
+
constructor() {
|
|
7523
|
+
this.specification = {
|
|
7524
|
+
name: "gpt-5.4-mini",
|
|
7525
|
+
supportReasoningEffort: true,
|
|
7526
|
+
defaultReasoningEffort: "none",
|
|
7527
|
+
supportStreaming: true,
|
|
7528
|
+
contextWindowSize: 4e5,
|
|
7529
|
+
maxOutputTokens: 128e3,
|
|
7530
|
+
supportFunctionCalling: true
|
|
7531
|
+
};
|
|
7532
|
+
this.tools = [];
|
|
7533
|
+
this.effort = new OpenAIReasoningEffort(
|
|
7534
|
+
this.specification.defaultReasoningEffort
|
|
7535
|
+
);
|
|
7536
|
+
}
|
|
7537
|
+
setReasoningEffort(effort) {
|
|
7538
|
+
this.effort.setReasoningEffort(effort);
|
|
7539
|
+
}
|
|
7540
|
+
getReasoningEffort() {
|
|
7541
|
+
return this.effort.getReasoningEffort();
|
|
7542
|
+
}
|
|
7543
|
+
setTools(tools) {
|
|
7544
|
+
this.tools = tools;
|
|
7545
|
+
}
|
|
7546
|
+
getTools() {
|
|
7547
|
+
return this.tools;
|
|
7548
|
+
}
|
|
7549
|
+
};
|
|
7550
|
+
var SystemMessageItem = class _SystemMessageItem extends ContextItem {
|
|
7551
|
+
constructor(content) {
|
|
7552
|
+
super();
|
|
7553
|
+
this.type = "message";
|
|
7554
|
+
this.role = "system";
|
|
7555
|
+
this.content = content;
|
|
7556
|
+
}
|
|
7557
|
+
toJSON() {
|
|
7558
|
+
return {
|
|
7559
|
+
type: this.type,
|
|
7560
|
+
role: this.role,
|
|
7561
|
+
content: this.content.toJSON()
|
|
7562
|
+
};
|
|
7563
|
+
}
|
|
7564
|
+
static create(text) {
|
|
7565
|
+
const content = InputText.create(text);
|
|
7566
|
+
return new _SystemMessageItem(content);
|
|
7567
|
+
}
|
|
7568
|
+
static rehydrate(data) {
|
|
7569
|
+
const content = InputText.rehydrate(data);
|
|
7570
|
+
return new _SystemMessageItem(content);
|
|
7571
|
+
}
|
|
7572
|
+
};
|
|
7270
7573
|
var Participant = class {
|
|
7271
7574
|
constructor() {
|
|
7272
7575
|
this.environments = [];
|
|
@@ -7369,6 +7672,46 @@ var AgenticEnvironment = class {
|
|
|
7369
7672
|
this.isActive = false;
|
|
7370
7673
|
}
|
|
7371
7674
|
};
|
|
7675
|
+
var OpenAIInferenceRunner = class {
|
|
7676
|
+
constructor() {
|
|
7677
|
+
this.runtime = new OpenAIResponses();
|
|
7678
|
+
}
|
|
7679
|
+
async *run(context, model, signal) {
|
|
7680
|
+
const response = await this.runtime.infer(new InferenceRequest(model, context));
|
|
7681
|
+
for (const item of response.contextItems) {
|
|
7682
|
+
yield item;
|
|
7683
|
+
}
|
|
7684
|
+
}
|
|
7685
|
+
};
|
|
7686
|
+
var Gpt55 = class {
|
|
7687
|
+
constructor() {
|
|
7688
|
+
this.specification = {
|
|
7689
|
+
name: "gpt-5.5",
|
|
7690
|
+
supportReasoningEffort: true,
|
|
7691
|
+
defaultReasoningEffort: "none",
|
|
7692
|
+
supportStreaming: true,
|
|
7693
|
+
contextWindowSize: 105e4,
|
|
7694
|
+
maxOutputTokens: 128e3,
|
|
7695
|
+
supportFunctionCalling: true
|
|
7696
|
+
};
|
|
7697
|
+
this.tools = [];
|
|
7698
|
+
this.effort = new OpenAIReasoningEffort(
|
|
7699
|
+
this.specification.defaultReasoningEffort
|
|
7700
|
+
);
|
|
7701
|
+
}
|
|
7702
|
+
setTools(tools) {
|
|
7703
|
+
this.tools = tools;
|
|
7704
|
+
}
|
|
7705
|
+
getTools() {
|
|
7706
|
+
return this.tools;
|
|
7707
|
+
}
|
|
7708
|
+
setReasoningEffort(effort) {
|
|
7709
|
+
this.effort.setReasoningEffort(effort);
|
|
7710
|
+
}
|
|
7711
|
+
getReasoningEffort() {
|
|
7712
|
+
return this.effort.getReasoningEffort();
|
|
7713
|
+
}
|
|
7714
|
+
};
|
|
7372
7715
|
|
|
7373
7716
|
// ../baro-orchestrator/src/bus.ts
|
|
7374
7717
|
var BusEvent = class {
|
|
@@ -9533,6 +9876,124 @@ function extractVerdictJson(text) {
|
|
|
9533
9876
|
throw new Error(`unbalanced JSON object in critic response: ${trimmed.slice(0, 200)}`);
|
|
9534
9877
|
}
|
|
9535
9878
|
|
|
9879
|
+
// ../baro-orchestrator/src/participants/critic-openai.ts
|
|
9880
|
+
function pickModel(name) {
|
|
9881
|
+
switch (name) {
|
|
9882
|
+
case "gpt-5.5":
|
|
9883
|
+
return new Gpt55();
|
|
9884
|
+
case "gpt-5.4":
|
|
9885
|
+
return new Gpt54();
|
|
9886
|
+
case "gpt-5.4-mini":
|
|
9887
|
+
return new Gpt54Mini();
|
|
9888
|
+
case "gpt-5.4-nano":
|
|
9889
|
+
return new Gpt54Nano();
|
|
9890
|
+
default:
|
|
9891
|
+
throw new Error(
|
|
9892
|
+
`CriticOpenAI: unknown model "${name}" \u2014 Mozaik 3.9 ships gpt-5.5, gpt-5.4, gpt-5.4-mini, gpt-5.4-nano`
|
|
9893
|
+
);
|
|
9894
|
+
}
|
|
9895
|
+
}
|
|
9896
|
+
var CriticOpenAI = class extends BaroParticipant {
|
|
9897
|
+
opts;
|
|
9898
|
+
model;
|
|
9899
|
+
runner = new OpenAIInferenceRunner();
|
|
9900
|
+
emissions = /* @__PURE__ */ new Map();
|
|
9901
|
+
turnCount = /* @__PURE__ */ new Map();
|
|
9902
|
+
pending = /* @__PURE__ */ new Set();
|
|
9903
|
+
constructor(opts) {
|
|
9904
|
+
super();
|
|
9905
|
+
this.opts = {
|
|
9906
|
+
maxEmissionsPerAgent: opts.maxEmissionsPerAgent ?? 2,
|
|
9907
|
+
model: opts.model ?? "gpt-5.4-mini",
|
|
9908
|
+
targets: opts.targets
|
|
9909
|
+
};
|
|
9910
|
+
this.model = pickModel(this.opts.model);
|
|
9911
|
+
}
|
|
9912
|
+
/** Resolves once every in-flight evaluation has emitted its CritiqueItem. */
|
|
9913
|
+
async idle() {
|
|
9914
|
+
await Promise.allSettled([...this.pending]);
|
|
9915
|
+
}
|
|
9916
|
+
async onExternalBusEvent(_source, event) {
|
|
9917
|
+
if (!(event instanceof ClaudeResultItem)) return;
|
|
9918
|
+
if (event.isError || !event.resultText) return;
|
|
9919
|
+
const criteria = this.opts.targets.get(event.agentId);
|
|
9920
|
+
if (!criteria || criteria.length === 0) return;
|
|
9921
|
+
const turn = (this.turnCount.get(event.agentId) ?? 0) + 1;
|
|
9922
|
+
this.turnCount.set(event.agentId, turn);
|
|
9923
|
+
const work = (async () => {
|
|
9924
|
+
const { verdict, reasoning, violatedCriteria } = await this.evaluate(
|
|
9925
|
+
event.resultText,
|
|
9926
|
+
criteria
|
|
9927
|
+
);
|
|
9928
|
+
const critiqueItem = new CritiqueItem(
|
|
9929
|
+
event.agentId,
|
|
9930
|
+
verdict,
|
|
9931
|
+
reasoning,
|
|
9932
|
+
violatedCriteria,
|
|
9933
|
+
turn,
|
|
9934
|
+
this.opts.model
|
|
9935
|
+
);
|
|
9936
|
+
for (const env of this.getEnvironments()) {
|
|
9937
|
+
;
|
|
9938
|
+
env.deliverBusEvent(this, critiqueItem);
|
|
9939
|
+
}
|
|
9940
|
+
if (verdict === "fail") {
|
|
9941
|
+
const emitted = this.emissions.get(event.agentId) ?? 0;
|
|
9942
|
+
if (emitted < this.opts.maxEmissionsPerAgent) {
|
|
9943
|
+
this.emissions.set(event.agentId, emitted + 1);
|
|
9944
|
+
const text = buildCorrectiveMessage(reasoning, violatedCriteria);
|
|
9945
|
+
const msg = new AgentTargetedMessageItem(event.agentId, text, {
|
|
9946
|
+
criticTurn: turn,
|
|
9947
|
+
emissionIndex: emitted + 1
|
|
9948
|
+
});
|
|
9949
|
+
for (const env of this.getEnvironments()) {
|
|
9950
|
+
;
|
|
9951
|
+
env.deliverBusEvent(this, msg);
|
|
9952
|
+
}
|
|
9953
|
+
}
|
|
9954
|
+
}
|
|
9955
|
+
})();
|
|
9956
|
+
this.pending.add(work);
|
|
9957
|
+
work.finally(() => this.pending.delete(work));
|
|
9958
|
+
}
|
|
9959
|
+
/**
|
|
9960
|
+
* One-shot OpenAI inference call. Builds a ModelContext with the
|
|
9961
|
+
* verdict system prompt + the eval prompt, runs the inference, and
|
|
9962
|
+
* parses the JSON verdict the model returned. Same prompt and same
|
|
9963
|
+
* JSON shape as the Claude version so behaviour stays comparable
|
|
9964
|
+
* for benchmarking.
|
|
9965
|
+
*/
|
|
9966
|
+
async evaluate(resultText, criteria) {
|
|
9967
|
+
const userPrompt = buildEvalPrompt(criteria, resultText);
|
|
9968
|
+
const context = ModelContext.create("critic").addContextItem(SystemMessageItem.create(VERDICT_SYSTEM_PROMPT)).addContextItem(UserMessageItem.create(userPrompt));
|
|
9969
|
+
try {
|
|
9970
|
+
let assistantText = "";
|
|
9971
|
+
for await (const item of this.runner.run(context, this.model)) {
|
|
9972
|
+
if (item.type === "message" && item.role === "assistant") {
|
|
9973
|
+
const json = item.toJSON();
|
|
9974
|
+
assistantText += json.content?.[0]?.text ?? "";
|
|
9975
|
+
}
|
|
9976
|
+
}
|
|
9977
|
+
if (!assistantText.trim()) {
|
|
9978
|
+
throw new Error("OpenAI returned empty assistant text");
|
|
9979
|
+
}
|
|
9980
|
+
const verdictJson = extractVerdictJson(assistantText);
|
|
9981
|
+
const parsed = JSON.parse(verdictJson);
|
|
9982
|
+
return {
|
|
9983
|
+
verdict: parsed.verdict === "pass" ? "pass" : "fail",
|
|
9984
|
+
reasoning: parsed.reasoning ?? "",
|
|
9985
|
+
violatedCriteria: Array.isArray(parsed.violated_criteria) ? parsed.violated_criteria : []
|
|
9986
|
+
};
|
|
9987
|
+
} catch (err) {
|
|
9988
|
+
return {
|
|
9989
|
+
verdict: "fail",
|
|
9990
|
+
reasoning: `Critic (OpenAI) LLM call failed: ${String(err?.message ?? err)}`,
|
|
9991
|
+
violatedCriteria: ["[critic-openai error \u2014 could not evaluate]"]
|
|
9992
|
+
};
|
|
9993
|
+
}
|
|
9994
|
+
}
|
|
9995
|
+
};
|
|
9996
|
+
|
|
9536
9997
|
// ../baro-orchestrator/src/participants/finalizer.ts
|
|
9537
9998
|
import { execFile as execFile3 } from "child_process";
|
|
9538
9999
|
import { promisify as promisify3 } from "util";
|
|
@@ -10482,13 +10943,7 @@ var Surgeon = class extends BaroParticipant {
|
|
|
10482
10943
|
* skipping (if their only dep is now gone, they become unreachable).
|
|
10483
10944
|
*/
|
|
10484
10945
|
evaluateDeterministic(failure) {
|
|
10485
|
-
return
|
|
10486
|
-
"surgeon",
|
|
10487
|
-
`deterministic skip: ${failure.storyId} exhausted ${failure.attempts} attempts (${failure.error ?? "no reason"})`,
|
|
10488
|
-
[],
|
|
10489
|
-
[failure.storyId],
|
|
10490
|
-
/* @__PURE__ */ new Map()
|
|
10491
|
-
);
|
|
10946
|
+
return surgeonDeterministicReplan(failure);
|
|
10492
10947
|
}
|
|
10493
10948
|
/**
|
|
10494
10949
|
* LLM strategy: ask Claude (via CLI subprocess) to propose a replan
|
|
@@ -10593,6 +11048,118 @@ function extractJsonObject(text) {
|
|
|
10593
11048
|
}
|
|
10594
11049
|
throw new Error("unbalanced JSON object in surgeon response");
|
|
10595
11050
|
}
|
|
11051
|
+
function surgeonDeterministicReplan(failure) {
|
|
11052
|
+
return new ReplanItem(
|
|
11053
|
+
"surgeon",
|
|
11054
|
+
`deterministic skip: ${failure.storyId} exhausted ${failure.attempts} attempts (${failure.error ?? "no reason"})`,
|
|
11055
|
+
[],
|
|
11056
|
+
[failure.storyId],
|
|
11057
|
+
/* @__PURE__ */ new Map()
|
|
11058
|
+
);
|
|
11059
|
+
}
|
|
11060
|
+
|
|
11061
|
+
// ../baro-orchestrator/src/participants/surgeon-openai.ts
|
|
11062
|
+
function pickModel2(name) {
|
|
11063
|
+
switch (name) {
|
|
11064
|
+
case "gpt-5.5":
|
|
11065
|
+
return new Gpt55();
|
|
11066
|
+
case "gpt-5.4":
|
|
11067
|
+
return new Gpt54();
|
|
11068
|
+
case "gpt-5.4-mini":
|
|
11069
|
+
return new Gpt54Mini();
|
|
11070
|
+
case "gpt-5.4-nano":
|
|
11071
|
+
return new Gpt54Nano();
|
|
11072
|
+
default:
|
|
11073
|
+
throw new Error(
|
|
11074
|
+
`SurgeonOpenAI: unknown model "${name}" \u2014 Mozaik 3.9 ships gpt-5.5, gpt-5.4, gpt-5.4-mini, gpt-5.4-nano`
|
|
11075
|
+
);
|
|
11076
|
+
}
|
|
11077
|
+
}
|
|
11078
|
+
var SurgeonOpenAI = class extends BaroParticipant {
|
|
11079
|
+
opts;
|
|
11080
|
+
model;
|
|
11081
|
+
runner = new OpenAIInferenceRunner();
|
|
11082
|
+
replansEmitted = 0;
|
|
11083
|
+
pending = /* @__PURE__ */ new Set();
|
|
11084
|
+
constructor(opts) {
|
|
11085
|
+
super();
|
|
11086
|
+
this.opts = {
|
|
11087
|
+
maxReplans: opts.maxReplans ?? 10,
|
|
11088
|
+
model: opts.model ?? "gpt-5.4",
|
|
11089
|
+
snapshot: opts.snapshot
|
|
11090
|
+
};
|
|
11091
|
+
this.model = pickModel2(this.opts.model);
|
|
11092
|
+
}
|
|
11093
|
+
async idle() {
|
|
11094
|
+
await Promise.allSettled([...this.pending]);
|
|
11095
|
+
}
|
|
11096
|
+
async onExternalBusEvent(_source, event) {
|
|
11097
|
+
if (!(event instanceof StoryResultItem)) return;
|
|
11098
|
+
if (event.success) return;
|
|
11099
|
+
if (this.replansEmitted >= this.opts.maxReplans) return;
|
|
11100
|
+
const work = (async () => {
|
|
11101
|
+
const replan = await this.evaluate(event);
|
|
11102
|
+
if (!replan) return;
|
|
11103
|
+
this.replansEmitted += 1;
|
|
11104
|
+
for (const env of this.getEnvironments()) {
|
|
11105
|
+
;
|
|
11106
|
+
env.deliverBusEvent(this, replan);
|
|
11107
|
+
}
|
|
11108
|
+
})();
|
|
11109
|
+
this.pending.add(work);
|
|
11110
|
+
work.finally(() => this.pending.delete(work));
|
|
11111
|
+
await work;
|
|
11112
|
+
}
|
|
11113
|
+
/**
|
|
11114
|
+
* One-shot OpenAI inference call asking the model for a structured
|
|
11115
|
+
* replan. Returns `null` on the "abort" action (no ReplanItem
|
|
11116
|
+
* emitted, run ends). Returns a deterministic-skip `ReplanItem` on
|
|
11117
|
+
* any inference or JSON-parse error so the run still has a chance
|
|
11118
|
+
* to recover.
|
|
11119
|
+
*/
|
|
11120
|
+
async evaluate(failure) {
|
|
11121
|
+
const snap = this.opts.snapshot();
|
|
11122
|
+
const userPrompt = buildSurgeonPrompt(snap, failure);
|
|
11123
|
+
const context = ModelContext.create("surgeon").addContextItem(SystemMessageItem.create(SURGEON_SYSTEM_PROMPT)).addContextItem(UserMessageItem.create(userPrompt));
|
|
11124
|
+
try {
|
|
11125
|
+
let assistantText = "";
|
|
11126
|
+
for await (const item of this.runner.run(context, this.model)) {
|
|
11127
|
+
if (item.type === "message" && item.role === "assistant") {
|
|
11128
|
+
const json = item.toJSON();
|
|
11129
|
+
assistantText += json.content?.[0]?.text ?? "";
|
|
11130
|
+
}
|
|
11131
|
+
}
|
|
11132
|
+
if (!assistantText.trim()) {
|
|
11133
|
+
throw new Error("OpenAI returned empty assistant text");
|
|
11134
|
+
}
|
|
11135
|
+
const verdictJson = extractJsonObject(assistantText);
|
|
11136
|
+
const parsed = JSON.parse(verdictJson);
|
|
11137
|
+
if (parsed.action === "abort") return null;
|
|
11138
|
+
const modifiedDeps = /* @__PURE__ */ new Map();
|
|
11139
|
+
for (const m of parsed.modifiedDeps ?? []) {
|
|
11140
|
+
if (typeof m.id === "string" && Array.isArray(m.newDependsOn)) {
|
|
11141
|
+
modifiedDeps.set(m.id, [...m.newDependsOn]);
|
|
11142
|
+
}
|
|
11143
|
+
}
|
|
11144
|
+
return new ReplanItem(
|
|
11145
|
+
"surgeon",
|
|
11146
|
+
`${parsed.action}: ${parsed.reason ?? ""}`,
|
|
11147
|
+
parsed.added ?? [],
|
|
11148
|
+
parsed.removed ?? [],
|
|
11149
|
+
modifiedDeps
|
|
11150
|
+
);
|
|
11151
|
+
} catch (err) {
|
|
11152
|
+
const fallback = surgeonDeterministicReplan(failure);
|
|
11153
|
+
return new ReplanItem(
|
|
11154
|
+
fallback.source,
|
|
11155
|
+
`${fallback.reason} (openai-llm fallback after error: ${err?.message ?? String(err)})`,
|
|
11156
|
+
fallback.addedStories,
|
|
11157
|
+
fallback.removedStoryIds,
|
|
11158
|
+
fallback.modifiedDeps
|
|
11159
|
+
);
|
|
11160
|
+
}
|
|
11161
|
+
}
|
|
11162
|
+
};
|
|
10596
11163
|
|
|
10597
11164
|
// ../baro-orchestrator/src/tui-protocol.ts
|
|
10598
11165
|
function emit(event) {
|
|
@@ -10604,6 +11171,12 @@ function emit(event) {
|
|
|
10604
11171
|
async function orchestrate(config) {
|
|
10605
11172
|
const env = new BaroEnvironment();
|
|
10606
11173
|
const emitTui = config.emitTuiEvents ?? true;
|
|
11174
|
+
const llm = config.llm ?? "claude";
|
|
11175
|
+
if (llm === "openai") {
|
|
11176
|
+
process.stderr.write(
|
|
11177
|
+
"[orchestrate] llm=openai: Critic + Surgeon route to Mozaik OpenAI; Architect, Planner, StoryAgent still on Claude CLI (per-phase ports in 0.31+).\n"
|
|
11178
|
+
);
|
|
11179
|
+
}
|
|
10607
11180
|
if (config.auditLogPath) {
|
|
10608
11181
|
mkdirSync2(dirname2(config.auditLogPath), { recursive: true });
|
|
10609
11182
|
new Auditor({ path: config.auditLogPath }).join(env);
|
|
@@ -10628,21 +11201,25 @@ async function orchestrate(config) {
|
|
|
10628
11201
|
if (sentry) sentry.join(env);
|
|
10629
11202
|
let surgeon = null;
|
|
10630
11203
|
if (config.withSurgeon) {
|
|
10631
|
-
|
|
10632
|
-
|
|
10633
|
-
|
|
10634
|
-
|
|
10635
|
-
|
|
10636
|
-
|
|
10637
|
-
|
|
10638
|
-
|
|
10639
|
-
|
|
10640
|
-
|
|
10641
|
-
|
|
10642
|
-
|
|
10643
|
-
|
|
10644
|
-
|
|
10645
|
-
|
|
11204
|
+
const snapshot = () => {
|
|
11205
|
+
const current = loadPrd(config.prdPath);
|
|
11206
|
+
return {
|
|
11207
|
+
project: current.project,
|
|
11208
|
+
description: current.description,
|
|
11209
|
+
stories: current.userStories.map((s) => ({
|
|
11210
|
+
id: s.id,
|
|
11211
|
+
title: s.title,
|
|
11212
|
+
description: s.description,
|
|
11213
|
+
dependsOn: s.dependsOn,
|
|
11214
|
+
passes: s.passes
|
|
11215
|
+
}))
|
|
11216
|
+
};
|
|
11217
|
+
};
|
|
11218
|
+
surgeon = llm === "openai" ? new SurgeonOpenAI({
|
|
11219
|
+
snapshot,
|
|
11220
|
+
model: config.surgeonModel ?? "gpt-5.4"
|
|
11221
|
+
}) : new Surgeon({
|
|
11222
|
+
snapshot,
|
|
10646
11223
|
useLlm: config.surgeonUseLlm ?? false,
|
|
10647
11224
|
model: config.surgeonModel ?? "opus"
|
|
10648
11225
|
});
|
|
@@ -10654,7 +11231,10 @@ async function orchestrate(config) {
|
|
|
10654
11231
|
const targets = new Map(
|
|
10655
11232
|
prd.userStories.filter((s) => s.acceptance && s.acceptance.length > 0).map((s) => [s.id, s.acceptance])
|
|
10656
11233
|
);
|
|
10657
|
-
critic = new
|
|
11234
|
+
critic = llm === "openai" ? new CriticOpenAI({
|
|
11235
|
+
targets,
|
|
11236
|
+
model: config.criticModel ?? "gpt-5.4-mini"
|
|
11237
|
+
}) : new Critic({
|
|
10658
11238
|
targets,
|
|
10659
11239
|
model: config.criticModel ?? "haiku"
|
|
10660
11240
|
});
|
|
@@ -10958,6 +11538,7 @@ function parseArgs(argv) {
|
|
|
10958
11538
|
noSentry: false,
|
|
10959
11539
|
withSurgeon: false,
|
|
10960
11540
|
surgeonUseLlm: false,
|
|
11541
|
+
llm: "claude",
|
|
10961
11542
|
help: false
|
|
10962
11543
|
};
|
|
10963
11544
|
for (let i = 0; i < argv.length; i++) {
|
|
@@ -11018,6 +11599,16 @@ function parseArgs(argv) {
|
|
|
11018
11599
|
10
|
|
11019
11600
|
);
|
|
11020
11601
|
break;
|
|
11602
|
+
case "--llm": {
|
|
11603
|
+
const v = required(argv, ++i, "--llm");
|
|
11604
|
+
if (v !== "claude" && v !== "openai") {
|
|
11605
|
+
process.stderr.write(`[cli] --llm must be 'claude' or 'openai', got '${v}'
|
|
11606
|
+
`);
|
|
11607
|
+
process.exit(2);
|
|
11608
|
+
}
|
|
11609
|
+
args.llm = v;
|
|
11610
|
+
break;
|
|
11611
|
+
}
|
|
11021
11612
|
default:
|
|
11022
11613
|
process.stderr.write(`[cli] unknown flag: ${a}
|
|
11023
11614
|
`);
|
|
@@ -11095,10 +11686,16 @@ async function main() {
|
|
|
11095
11686
|
withSurgeon: args.withSurgeon,
|
|
11096
11687
|
surgeonUseLlm: args.surgeonUseLlm,
|
|
11097
11688
|
surgeonModel: args.surgeonModel,
|
|
11098
|
-
intraLevelDelaySecs: args.intraLevelDelaySecs
|
|
11689
|
+
intraLevelDelaySecs: args.intraLevelDelaySecs,
|
|
11690
|
+
llm: args.llm
|
|
11099
11691
|
};
|
|
11692
|
+
if (args.llm === "openai" && !process.env.OPENAI_API_KEY) {
|
|
11693
|
+
process.stderr.write(
|
|
11694
|
+
"[cli] WARNING: --llm openai requested but OPENAI_API_KEY is not set.\n[cli] The current build falls through to Claude behaviour;\n[cli] set OPENAI_API_KEY before phase 3+ OpenAI siblings ship.\n"
|
|
11695
|
+
);
|
|
11696
|
+
}
|
|
11100
11697
|
process.stderr.write(
|
|
11101
|
-
`[cli] starting orchestrator: prd=${prdPath} cwd=${cwd} parallel=${args.parallel} timeout=${args.timeout}s
|
|
11698
|
+
`[cli] starting orchestrator: prd=${prdPath} cwd=${cwd} parallel=${args.parallel} timeout=${args.timeout}s llm=${args.llm}
|
|
11102
11699
|
`
|
|
11103
11700
|
);
|
|
11104
11701
|
const startedAt = Date.now();
|