baro-ai 0.28.0 → 0.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.mjs CHANGED
@@ -7160,6 +7160,46 @@ OpenAI.Skills = Skills;
7160
7160
  OpenAI.Videos = Videos;
7161
7161
 
7162
7162
  // ../../node_modules/@mozaik-ai/core/dist/index.mjs
7163
+ var ModelContext = class _ModelContext {
7164
+ constructor(id, projectId, items) {
7165
+ this.projectId = projectId;
7166
+ this.id = id;
7167
+ this.items = items;
7168
+ }
7169
+ addContextItem(item) {
7170
+ this.items.push(item);
7171
+ return this;
7172
+ }
7173
+ applyModelOutput(items) {
7174
+ for (const item of items) {
7175
+ const itemType = item.getType();
7176
+ if (itemType !== "function_call" && itemType !== "message" && itemType !== "reasoning") {
7177
+ throw new Error(`Invalid item type: ${itemType}`);
7178
+ }
7179
+ }
7180
+ this.items.push(...items);
7181
+ return this;
7182
+ }
7183
+ getItems() {
7184
+ return this.items;
7185
+ }
7186
+ getLastItem() {
7187
+ if (this.items.length === 0) {
7188
+ throw new Error("No items in context");
7189
+ }
7190
+ return this.items[this.items.length - 1];
7191
+ }
7192
+ static create(projectId) {
7193
+ const id = crypto.randomUUID();
7194
+ return new _ModelContext(id, projectId, []);
7195
+ }
7196
+ static rehydrate(data) {
7197
+ return new _ModelContext(data.id, data.projectId, data.items);
7198
+ }
7199
+ toJSON() {
7200
+ return this.items.map((item) => item.toJSON());
7201
+ }
7202
+ };
7163
7203
  var ContextItem = class {
7164
7204
  getType() {
7165
7205
  return this.type;
@@ -7188,6 +7228,29 @@ var InputText = class _InputText extends ItemContent {
7188
7228
  ];
7189
7229
  }
7190
7230
  };
7231
+ var UserMessageItem = class _UserMessageItem extends ContextItem {
7232
+ constructor(content) {
7233
+ super();
7234
+ this.type = "message";
7235
+ this.role = "user";
7236
+ this.content = content;
7237
+ }
7238
+ static create(text) {
7239
+ const content = InputText.create(text);
7240
+ return new _UserMessageItem(content);
7241
+ }
7242
+ static rehydrate(data) {
7243
+ const content = InputText.rehydrate(data);
7244
+ return new _UserMessageItem(content);
7245
+ }
7246
+ toJSON() {
7247
+ return {
7248
+ type: this.type,
7249
+ role: this.role,
7250
+ content: this.content.toJSON()
7251
+ };
7252
+ }
7253
+ };
7191
7254
  var OutputText = class _OutputText extends ItemContent {
7192
7255
  constructor(text) {
7193
7256
  super();
@@ -7245,6 +7308,27 @@ var FunctionCallItem = class _FunctionCallItem extends ContextItem {
7245
7308
  };
7246
7309
  }
7247
7310
  };
7311
+ var ReasoningItem = class _ReasoningItem extends ContextItem {
7312
+ constructor(content, encryptedContent, summary = []) {
7313
+ super();
7314
+ this.type = "reasoning";
7315
+ this.content = content;
7316
+ this.encryptedContent = encryptedContent;
7317
+ this.summary = summary;
7318
+ }
7319
+ static rehydrate(data) {
7320
+ return new _ReasoningItem(data.content, data.encryptedContent, data.summary);
7321
+ }
7322
+ toJSON() {
7323
+ var _a3;
7324
+ return {
7325
+ type: this.type,
7326
+ content: (_a3 = this.content) == null ? void 0 : _a3.toJSON(),
7327
+ encryptedContent: this.encryptedContent,
7328
+ summary: this.summary.map((s) => s.toJSON())
7329
+ };
7330
+ }
7331
+ };
7248
7332
  var FunctionCallOutputItem = class _FunctionCallOutputItem extends ContextItem {
7249
7333
  constructor(callId, output) {
7250
7334
  super();
@@ -7267,6 +7351,225 @@ var FunctionCallOutputItem = class _FunctionCallOutputItem extends ContextItem {
7267
7351
  };
7268
7352
  }
7269
7353
  };
7354
+ var InferenceResponse = class {
7355
+ constructor(contextItems, tokenUsage) {
7356
+ this.contextItems = contextItems;
7357
+ this.tokenUsage = tokenUsage;
7358
+ }
7359
+ };
7360
+ var InputTokenDetails = class {
7361
+ constructor(cached_tokens) {
7362
+ this.cached_tokens = cached_tokens;
7363
+ }
7364
+ };
7365
+ var OutputTokenDetails = class {
7366
+ constructor(reasoning_tokens) {
7367
+ this.reasoning_tokens = reasoning_tokens;
7368
+ }
7369
+ };
7370
+ var TokenUsage = class {
7371
+ constructor(inputTokens, outputTokens, totalTokens, inputTokenDetails, outputTokenDetails) {
7372
+ this.inputTokens = inputTokens;
7373
+ this.outputTokens = outputTokens;
7374
+ this.totalTokens = totalTokens;
7375
+ this.inputTokenDetails = inputTokenDetails;
7376
+ this.outputTokenDetails = outputTokenDetails;
7377
+ }
7378
+ };
7379
+ var OpenAIResponses = class {
7380
+ constructor() {
7381
+ this.client = new OpenAI();
7382
+ }
7383
+ async infer(inferenceRequest) {
7384
+ const input = this.mapContextToRequest(inferenceRequest.context);
7385
+ const specification = inferenceRequest.model.specification;
7386
+ let request = {
7387
+ model: specification.name,
7388
+ input
7389
+ };
7390
+ if (specification.supportFunctionCalling && inferenceRequest.model.getTools().length > 0) {
7391
+ request.tools = inferenceRequest.model.getTools().map((tool) => {
7392
+ return {
7393
+ type: tool.type,
7394
+ name: tool.name,
7395
+ description: tool.description,
7396
+ parameters: tool.parameters
7397
+ };
7398
+ });
7399
+ }
7400
+ if (specification.supportReasoningEffort) {
7401
+ request.reasoning = {
7402
+ effort: inferenceRequest.model.getReasoningEffort()
7403
+ };
7404
+ }
7405
+ const response = await this.client.responses.create(request);
7406
+ const contextItems = this.extractContextItems(response);
7407
+ const tokenUsage = this.extractTokenUsage(response);
7408
+ return new InferenceResponse(contextItems, tokenUsage);
7409
+ }
7410
+ extractTokenUsage(response) {
7411
+ if (!response.usage) {
7412
+ return void 0;
7413
+ }
7414
+ return new TokenUsage(
7415
+ response.usage.input_tokens,
7416
+ response.usage.output_tokens,
7417
+ response.usage.total_tokens,
7418
+ new InputTokenDetails(response.usage.input_tokens_details.cached_tokens),
7419
+ new OutputTokenDetails(response.usage.output_tokens_details.reasoning_tokens)
7420
+ );
7421
+ }
7422
+ mapContextToRequest(context) {
7423
+ return context.getItems().map((item) => item.toJSON());
7424
+ }
7425
+ extractContextItems(response) {
7426
+ return response.output.map((item) => {
7427
+ if (item.type === "message" && item.role === "assistant") {
7428
+ return ModelMessageItem.rehydrate(item.content[0]);
7429
+ }
7430
+ if (item.type === "function_call") {
7431
+ return FunctionCallItem.rehydrate({
7432
+ callId: item.call_id,
7433
+ name: item.name,
7434
+ args: item.arguments
7435
+ });
7436
+ }
7437
+ if (item.type === "reasoning") {
7438
+ return ReasoningItem.rehydrate(item);
7439
+ }
7440
+ });
7441
+ }
7442
+ };
7443
+ var InferenceRequest = class {
7444
+ constructor(model, context) {
7445
+ this.model = model;
7446
+ this.context = context;
7447
+ }
7448
+ };
7449
+ var OpenAIReasoningEffort = class {
7450
+ constructor(reasoningEffort) {
7451
+ this.reasoningEffort = reasoningEffort;
7452
+ }
7453
+ setReasoningEffort(effort) {
7454
+ this.reasoningEffort = effort;
7455
+ }
7456
+ getReasoningEffort() {
7457
+ if (!this.reasoningEffort) {
7458
+ throw new Error("Reasoning effort not supported");
7459
+ }
7460
+ return this.reasoningEffort;
7461
+ }
7462
+ };
7463
+ var Gpt54Nano = class {
7464
+ constructor() {
7465
+ this.specification = {
7466
+ name: "gpt-5.4-nano",
7467
+ supportReasoningEffort: true,
7468
+ defaultReasoningEffort: "none",
7469
+ supportStreaming: true,
7470
+ contextWindowSize: 4e5,
7471
+ maxOutputTokens: 128e3,
7472
+ supportFunctionCalling: true
7473
+ };
7474
+ this.tools = [];
7475
+ this.effort = new OpenAIReasoningEffort(
7476
+ this.specification.defaultReasoningEffort
7477
+ );
7478
+ }
7479
+ setTools(tools) {
7480
+ this.tools = tools;
7481
+ }
7482
+ getTools() {
7483
+ return this.tools;
7484
+ }
7485
+ setReasoningEffort(effort) {
7486
+ this.effort.setReasoningEffort(effort);
7487
+ }
7488
+ getReasoningEffort() {
7489
+ return this.effort.getReasoningEffort();
7490
+ }
7491
+ };
7492
+ var Gpt54 = class {
7493
+ constructor() {
7494
+ this.specification = {
7495
+ name: "gpt-5.4",
7496
+ supportReasoningEffort: true,
7497
+ defaultReasoningEffort: "none",
7498
+ supportStreaming: true,
7499
+ contextWindowSize: 105e4,
7500
+ maxOutputTokens: 128e3,
7501
+ supportFunctionCalling: true
7502
+ };
7503
+ this.tools = [];
7504
+ this.effort = new OpenAIReasoningEffort(
7505
+ this.specification.defaultReasoningEffort
7506
+ );
7507
+ }
7508
+ setTools(tools) {
7509
+ this.tools = tools;
7510
+ }
7511
+ getTools() {
7512
+ return this.tools;
7513
+ }
7514
+ setReasoningEffort(effort) {
7515
+ this.effort.setReasoningEffort(effort);
7516
+ }
7517
+ getReasoningEffort() {
7518
+ return this.effort.getReasoningEffort();
7519
+ }
7520
+ };
7521
+ var Gpt54Mini = class {
7522
+ constructor() {
7523
+ this.specification = {
7524
+ name: "gpt-5.4-mini",
7525
+ supportReasoningEffort: true,
7526
+ defaultReasoningEffort: "none",
7527
+ supportStreaming: true,
7528
+ contextWindowSize: 4e5,
7529
+ maxOutputTokens: 128e3,
7530
+ supportFunctionCalling: true
7531
+ };
7532
+ this.tools = [];
7533
+ this.effort = new OpenAIReasoningEffort(
7534
+ this.specification.defaultReasoningEffort
7535
+ );
7536
+ }
7537
+ setReasoningEffort(effort) {
7538
+ this.effort.setReasoningEffort(effort);
7539
+ }
7540
+ getReasoningEffort() {
7541
+ return this.effort.getReasoningEffort();
7542
+ }
7543
+ setTools(tools) {
7544
+ this.tools = tools;
7545
+ }
7546
+ getTools() {
7547
+ return this.tools;
7548
+ }
7549
+ };
7550
+ var SystemMessageItem = class _SystemMessageItem extends ContextItem {
7551
+ constructor(content) {
7552
+ super();
7553
+ this.type = "message";
7554
+ this.role = "system";
7555
+ this.content = content;
7556
+ }
7557
+ toJSON() {
7558
+ return {
7559
+ type: this.type,
7560
+ role: this.role,
7561
+ content: this.content.toJSON()
7562
+ };
7563
+ }
7564
+ static create(text) {
7565
+ const content = InputText.create(text);
7566
+ return new _SystemMessageItem(content);
7567
+ }
7568
+ static rehydrate(data) {
7569
+ const content = InputText.rehydrate(data);
7570
+ return new _SystemMessageItem(content);
7571
+ }
7572
+ };
7270
7573
  var Participant = class {
7271
7574
  constructor() {
7272
7575
  this.environments = [];
@@ -7369,6 +7672,46 @@ var AgenticEnvironment = class {
7369
7672
  this.isActive = false;
7370
7673
  }
7371
7674
  };
7675
+ var OpenAIInferenceRunner = class {
7676
+ constructor() {
7677
+ this.runtime = new OpenAIResponses();
7678
+ }
7679
+ async *run(context, model, signal) {
7680
+ const response = await this.runtime.infer(new InferenceRequest(model, context));
7681
+ for (const item of response.contextItems) {
7682
+ yield item;
7683
+ }
7684
+ }
7685
+ };
7686
+ var Gpt55 = class {
7687
+ constructor() {
7688
+ this.specification = {
7689
+ name: "gpt-5.5",
7690
+ supportReasoningEffort: true,
7691
+ defaultReasoningEffort: "none",
7692
+ supportStreaming: true,
7693
+ contextWindowSize: 105e4,
7694
+ maxOutputTokens: 128e3,
7695
+ supportFunctionCalling: true
7696
+ };
7697
+ this.tools = [];
7698
+ this.effort = new OpenAIReasoningEffort(
7699
+ this.specification.defaultReasoningEffort
7700
+ );
7701
+ }
7702
+ setTools(tools) {
7703
+ this.tools = tools;
7704
+ }
7705
+ getTools() {
7706
+ return this.tools;
7707
+ }
7708
+ setReasoningEffort(effort) {
7709
+ this.effort.setReasoningEffort(effort);
7710
+ }
7711
+ getReasoningEffort() {
7712
+ return this.effort.getReasoningEffort();
7713
+ }
7714
+ };
7372
7715
 
7373
7716
  // ../baro-orchestrator/src/bus.ts
7374
7717
  var BusEvent = class {
@@ -9533,6 +9876,124 @@ function extractVerdictJson(text) {
9533
9876
  throw new Error(`unbalanced JSON object in critic response: ${trimmed.slice(0, 200)}`);
9534
9877
  }
9535
9878
 
9879
+ // ../baro-orchestrator/src/participants/critic-openai.ts
9880
+ function pickModel(name) {
9881
+ switch (name) {
9882
+ case "gpt-5.5":
9883
+ return new Gpt55();
9884
+ case "gpt-5.4":
9885
+ return new Gpt54();
9886
+ case "gpt-5.4-mini":
9887
+ return new Gpt54Mini();
9888
+ case "gpt-5.4-nano":
9889
+ return new Gpt54Nano();
9890
+ default:
9891
+ throw new Error(
9892
+ `CriticOpenAI: unknown model "${name}" \u2014 Mozaik 3.9 ships gpt-5.5, gpt-5.4, gpt-5.4-mini, gpt-5.4-nano`
9893
+ );
9894
+ }
9895
+ }
9896
+ var CriticOpenAI = class extends BaroParticipant {
9897
+ opts;
9898
+ model;
9899
+ runner = new OpenAIInferenceRunner();
9900
+ emissions = /* @__PURE__ */ new Map();
9901
+ turnCount = /* @__PURE__ */ new Map();
9902
+ pending = /* @__PURE__ */ new Set();
9903
+ constructor(opts) {
9904
+ super();
9905
+ this.opts = {
9906
+ maxEmissionsPerAgent: opts.maxEmissionsPerAgent ?? 2,
9907
+ model: opts.model ?? "gpt-5.4-mini",
9908
+ targets: opts.targets
9909
+ };
9910
+ this.model = pickModel(this.opts.model);
9911
+ }
9912
+ /** Resolves once every in-flight evaluation has emitted its CritiqueItem. */
9913
+ async idle() {
9914
+ await Promise.allSettled([...this.pending]);
9915
+ }
9916
+ async onExternalBusEvent(_source, event) {
9917
+ if (!(event instanceof ClaudeResultItem)) return;
9918
+ if (event.isError || !event.resultText) return;
9919
+ const criteria = this.opts.targets.get(event.agentId);
9920
+ if (!criteria || criteria.length === 0) return;
9921
+ const turn = (this.turnCount.get(event.agentId) ?? 0) + 1;
9922
+ this.turnCount.set(event.agentId, turn);
9923
+ const work = (async () => {
9924
+ const { verdict, reasoning, violatedCriteria } = await this.evaluate(
9925
+ event.resultText,
9926
+ criteria
9927
+ );
9928
+ const critiqueItem = new CritiqueItem(
9929
+ event.agentId,
9930
+ verdict,
9931
+ reasoning,
9932
+ violatedCriteria,
9933
+ turn,
9934
+ this.opts.model
9935
+ );
9936
+ for (const env of this.getEnvironments()) {
9937
+ ;
9938
+ env.deliverBusEvent(this, critiqueItem);
9939
+ }
9940
+ if (verdict === "fail") {
9941
+ const emitted = this.emissions.get(event.agentId) ?? 0;
9942
+ if (emitted < this.opts.maxEmissionsPerAgent) {
9943
+ this.emissions.set(event.agentId, emitted + 1);
9944
+ const text = buildCorrectiveMessage(reasoning, violatedCriteria);
9945
+ const msg = new AgentTargetedMessageItem(event.agentId, text, {
9946
+ criticTurn: turn,
9947
+ emissionIndex: emitted + 1
9948
+ });
9949
+ for (const env of this.getEnvironments()) {
9950
+ ;
9951
+ env.deliverBusEvent(this, msg);
9952
+ }
9953
+ }
9954
+ }
9955
+ })();
9956
+ this.pending.add(work);
9957
+ work.finally(() => this.pending.delete(work));
9958
+ }
9959
+ /**
9960
+ * One-shot OpenAI inference call. Builds a ModelContext with the
9961
+ * verdict system prompt + the eval prompt, runs the inference, and
9962
+ * parses the JSON verdict the model returned. Same prompt and same
9963
+ * JSON shape as the Claude version so behaviour stays comparable
9964
+ * for benchmarking.
9965
+ */
9966
+ async evaluate(resultText, criteria) {
9967
+ const userPrompt = buildEvalPrompt(criteria, resultText);
9968
+ const context = ModelContext.create("critic").addContextItem(SystemMessageItem.create(VERDICT_SYSTEM_PROMPT)).addContextItem(UserMessageItem.create(userPrompt));
9969
+ try {
9970
+ let assistantText = "";
9971
+ for await (const item of this.runner.run(context, this.model)) {
9972
+ if (item.type === "message" && item.role === "assistant") {
9973
+ const json = item.toJSON();
9974
+ assistantText += json.content?.[0]?.text ?? "";
9975
+ }
9976
+ }
9977
+ if (!assistantText.trim()) {
9978
+ throw new Error("OpenAI returned empty assistant text");
9979
+ }
9980
+ const verdictJson = extractVerdictJson(assistantText);
9981
+ const parsed = JSON.parse(verdictJson);
9982
+ return {
9983
+ verdict: parsed.verdict === "pass" ? "pass" : "fail",
9984
+ reasoning: parsed.reasoning ?? "",
9985
+ violatedCriteria: Array.isArray(parsed.violated_criteria) ? parsed.violated_criteria : []
9986
+ };
9987
+ } catch (err) {
9988
+ return {
9989
+ verdict: "fail",
9990
+ reasoning: `Critic (OpenAI) LLM call failed: ${String(err?.message ?? err)}`,
9991
+ violatedCriteria: ["[critic-openai error \u2014 could not evaluate]"]
9992
+ };
9993
+ }
9994
+ }
9995
+ };
9996
+
9536
9997
  // ../baro-orchestrator/src/participants/finalizer.ts
9537
9998
  import { execFile as execFile3 } from "child_process";
9538
9999
  import { promisify as promisify3 } from "util";
@@ -10482,13 +10943,7 @@ var Surgeon = class extends BaroParticipant {
10482
10943
  * skipping (if their only dep is now gone, they become unreachable).
10483
10944
  */
10484
10945
  evaluateDeterministic(failure) {
10485
- return new ReplanItem(
10486
- "surgeon",
10487
- `deterministic skip: ${failure.storyId} exhausted ${failure.attempts} attempts (${failure.error ?? "no reason"})`,
10488
- [],
10489
- [failure.storyId],
10490
- /* @__PURE__ */ new Map()
10491
- );
10946
+ return surgeonDeterministicReplan(failure);
10492
10947
  }
10493
10948
  /**
10494
10949
  * LLM strategy: ask Claude (via CLI subprocess) to propose a replan
@@ -10593,6 +11048,118 @@ function extractJsonObject(text) {
10593
11048
  }
10594
11049
  throw new Error("unbalanced JSON object in surgeon response");
10595
11050
  }
11051
+ function surgeonDeterministicReplan(failure) {
11052
+ return new ReplanItem(
11053
+ "surgeon",
11054
+ `deterministic skip: ${failure.storyId} exhausted ${failure.attempts} attempts (${failure.error ?? "no reason"})`,
11055
+ [],
11056
+ [failure.storyId],
11057
+ /* @__PURE__ */ new Map()
11058
+ );
11059
+ }
11060
+
11061
+ // ../baro-orchestrator/src/participants/surgeon-openai.ts
11062
+ function pickModel2(name) {
11063
+ switch (name) {
11064
+ case "gpt-5.5":
11065
+ return new Gpt55();
11066
+ case "gpt-5.4":
11067
+ return new Gpt54();
11068
+ case "gpt-5.4-mini":
11069
+ return new Gpt54Mini();
11070
+ case "gpt-5.4-nano":
11071
+ return new Gpt54Nano();
11072
+ default:
11073
+ throw new Error(
11074
+ `SurgeonOpenAI: unknown model "${name}" \u2014 Mozaik 3.9 ships gpt-5.5, gpt-5.4, gpt-5.4-mini, gpt-5.4-nano`
11075
+ );
11076
+ }
11077
+ }
11078
+ var SurgeonOpenAI = class extends BaroParticipant {
11079
+ opts;
11080
+ model;
11081
+ runner = new OpenAIInferenceRunner();
11082
+ replansEmitted = 0;
11083
+ pending = /* @__PURE__ */ new Set();
11084
+ constructor(opts) {
11085
+ super();
11086
+ this.opts = {
11087
+ maxReplans: opts.maxReplans ?? 10,
11088
+ model: opts.model ?? "gpt-5.4",
11089
+ snapshot: opts.snapshot
11090
+ };
11091
+ this.model = pickModel2(this.opts.model);
11092
+ }
11093
+ async idle() {
11094
+ await Promise.allSettled([...this.pending]);
11095
+ }
11096
+ async onExternalBusEvent(_source, event) {
11097
+ if (!(event instanceof StoryResultItem)) return;
11098
+ if (event.success) return;
11099
+ if (this.replansEmitted >= this.opts.maxReplans) return;
11100
+ const work = (async () => {
11101
+ const replan = await this.evaluate(event);
11102
+ if (!replan) return;
11103
+ this.replansEmitted += 1;
11104
+ for (const env of this.getEnvironments()) {
11105
+ ;
11106
+ env.deliverBusEvent(this, replan);
11107
+ }
11108
+ })();
11109
+ this.pending.add(work);
11110
+ work.finally(() => this.pending.delete(work));
11111
+ await work;
11112
+ }
11113
+ /**
11114
+ * One-shot OpenAI inference call asking the model for a structured
11115
+ * replan. Returns `null` on the "abort" action (no ReplanItem
11116
+ * emitted, run ends). Returns a deterministic-skip `ReplanItem` on
11117
+ * any inference or JSON-parse error so the run still has a chance
11118
+ * to recover.
11119
+ */
11120
+ async evaluate(failure) {
11121
+ const snap = this.opts.snapshot();
11122
+ const userPrompt = buildSurgeonPrompt(snap, failure);
11123
+ const context = ModelContext.create("surgeon").addContextItem(SystemMessageItem.create(SURGEON_SYSTEM_PROMPT)).addContextItem(UserMessageItem.create(userPrompt));
11124
+ try {
11125
+ let assistantText = "";
11126
+ for await (const item of this.runner.run(context, this.model)) {
11127
+ if (item.type === "message" && item.role === "assistant") {
11128
+ const json = item.toJSON();
11129
+ assistantText += json.content?.[0]?.text ?? "";
11130
+ }
11131
+ }
11132
+ if (!assistantText.trim()) {
11133
+ throw new Error("OpenAI returned empty assistant text");
11134
+ }
11135
+ const verdictJson = extractJsonObject(assistantText);
11136
+ const parsed = JSON.parse(verdictJson);
11137
+ if (parsed.action === "abort") return null;
11138
+ const modifiedDeps = /* @__PURE__ */ new Map();
11139
+ for (const m of parsed.modifiedDeps ?? []) {
11140
+ if (typeof m.id === "string" && Array.isArray(m.newDependsOn)) {
11141
+ modifiedDeps.set(m.id, [...m.newDependsOn]);
11142
+ }
11143
+ }
11144
+ return new ReplanItem(
11145
+ "surgeon",
11146
+ `${parsed.action}: ${parsed.reason ?? ""}`,
11147
+ parsed.added ?? [],
11148
+ parsed.removed ?? [],
11149
+ modifiedDeps
11150
+ );
11151
+ } catch (err) {
11152
+ const fallback = surgeonDeterministicReplan(failure);
11153
+ return new ReplanItem(
11154
+ fallback.source,
11155
+ `${fallback.reason} (openai-llm fallback after error: ${err?.message ?? String(err)})`,
11156
+ fallback.addedStories,
11157
+ fallback.removedStoryIds,
11158
+ fallback.modifiedDeps
11159
+ );
11160
+ }
11161
+ }
11162
+ };
10596
11163
 
10597
11164
  // ../baro-orchestrator/src/tui-protocol.ts
10598
11165
  function emit(event) {
@@ -10604,6 +11171,12 @@ function emit(event) {
10604
11171
  async function orchestrate(config) {
10605
11172
  const env = new BaroEnvironment();
10606
11173
  const emitTui = config.emitTuiEvents ?? true;
11174
+ const llm = config.llm ?? "claude";
11175
+ if (llm === "openai") {
11176
+ process.stderr.write(
11177
+ "[orchestrate] llm=openai: Critic + Surgeon route to Mozaik OpenAI; Architect, Planner, StoryAgent still on Claude CLI (per-phase ports in 0.31+).\n"
11178
+ );
11179
+ }
10607
11180
  if (config.auditLogPath) {
10608
11181
  mkdirSync2(dirname2(config.auditLogPath), { recursive: true });
10609
11182
  new Auditor({ path: config.auditLogPath }).join(env);
@@ -10628,21 +11201,25 @@ async function orchestrate(config) {
10628
11201
  if (sentry) sentry.join(env);
10629
11202
  let surgeon = null;
10630
11203
  if (config.withSurgeon) {
10631
- surgeon = new Surgeon({
10632
- snapshot: () => {
10633
- const current = loadPrd(config.prdPath);
10634
- return {
10635
- project: current.project,
10636
- description: current.description,
10637
- stories: current.userStories.map((s) => ({
10638
- id: s.id,
10639
- title: s.title,
10640
- description: s.description,
10641
- dependsOn: s.dependsOn,
10642
- passes: s.passes
10643
- }))
10644
- };
10645
- },
11204
+ const snapshot = () => {
11205
+ const current = loadPrd(config.prdPath);
11206
+ return {
11207
+ project: current.project,
11208
+ description: current.description,
11209
+ stories: current.userStories.map((s) => ({
11210
+ id: s.id,
11211
+ title: s.title,
11212
+ description: s.description,
11213
+ dependsOn: s.dependsOn,
11214
+ passes: s.passes
11215
+ }))
11216
+ };
11217
+ };
11218
+ surgeon = llm === "openai" ? new SurgeonOpenAI({
11219
+ snapshot,
11220
+ model: config.surgeonModel ?? "gpt-5.4"
11221
+ }) : new Surgeon({
11222
+ snapshot,
10646
11223
  useLlm: config.surgeonUseLlm ?? false,
10647
11224
  model: config.surgeonModel ?? "opus"
10648
11225
  });
@@ -10654,7 +11231,10 @@ async function orchestrate(config) {
10654
11231
  const targets = new Map(
10655
11232
  prd.userStories.filter((s) => s.acceptance && s.acceptance.length > 0).map((s) => [s.id, s.acceptance])
10656
11233
  );
10657
- critic = new Critic({
11234
+ critic = llm === "openai" ? new CriticOpenAI({
11235
+ targets,
11236
+ model: config.criticModel ?? "gpt-5.4-mini"
11237
+ }) : new Critic({
10658
11238
  targets,
10659
11239
  model: config.criticModel ?? "haiku"
10660
11240
  });
@@ -10958,6 +11538,7 @@ function parseArgs(argv) {
10958
11538
  noSentry: false,
10959
11539
  withSurgeon: false,
10960
11540
  surgeonUseLlm: false,
11541
+ llm: "claude",
10961
11542
  help: false
10962
11543
  };
10963
11544
  for (let i = 0; i < argv.length; i++) {
@@ -11018,6 +11599,16 @@ function parseArgs(argv) {
11018
11599
  10
11019
11600
  );
11020
11601
  break;
11602
+ case "--llm": {
11603
+ const v = required(argv, ++i, "--llm");
11604
+ if (v !== "claude" && v !== "openai") {
11605
+ process.stderr.write(`[cli] --llm must be 'claude' or 'openai', got '${v}'
11606
+ `);
11607
+ process.exit(2);
11608
+ }
11609
+ args.llm = v;
11610
+ break;
11611
+ }
11021
11612
  default:
11022
11613
  process.stderr.write(`[cli] unknown flag: ${a}
11023
11614
  `);
@@ -11095,10 +11686,16 @@ async function main() {
11095
11686
  withSurgeon: args.withSurgeon,
11096
11687
  surgeonUseLlm: args.surgeonUseLlm,
11097
11688
  surgeonModel: args.surgeonModel,
11098
- intraLevelDelaySecs: args.intraLevelDelaySecs
11689
+ intraLevelDelaySecs: args.intraLevelDelaySecs,
11690
+ llm: args.llm
11099
11691
  };
11692
+ if (args.llm === "openai" && !process.env.OPENAI_API_KEY) {
11693
+ process.stderr.write(
11694
+ "[cli] WARNING: --llm openai requested but OPENAI_API_KEY is not set.\n[cli] The current build falls through to Claude behaviour;\n[cli] set OPENAI_API_KEY before phase 3+ OpenAI siblings ship.\n"
11695
+ );
11696
+ }
11100
11697
  process.stderr.write(
11101
- `[cli] starting orchestrator: prd=${prdPath} cwd=${cwd} parallel=${args.parallel} timeout=${args.timeout}s
11698
+ `[cli] starting orchestrator: prd=${prdPath} cwd=${cwd} parallel=${args.parallel} timeout=${args.timeout}s llm=${args.llm}
11102
11699
  `
11103
11700
  );
11104
11701
  const startedAt = Date.now();