open-agents-ai 0.187.370 → 0.187.372

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +298 -13
  2. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -81,6 +81,7 @@ function loadConfigFile() {
81
81
  if (typeof parsed.dryRun === "boolean") result.dryRun = parsed.dryRun;
82
82
  if (typeof parsed.verbose === "boolean") result.verbose = parsed.verbose;
83
83
  if (typeof parsed.dbPath === "string") result.dbPath = parsed.dbPath;
84
+ if (typeof parsed.thinking === "boolean") result.thinking = parsed.thinking;
84
85
  return result;
85
86
  } catch {
86
87
  return {};
@@ -515664,6 +515665,46 @@ ${todoItems}
515664
515665
  </system-reminder>`;
515665
515666
  return { shouldInject: true, content, reason: "injected" };
515666
515667
  }
515668
+ function stripThinkBlocks(s2) {
515669
+ if (!s2)
515670
+ return s2;
515671
+ return s2.replace(/<think>[\s\S]*?<\/think>/g, "").trim();
515672
+ }
515673
+ function computeEffectiveThink(params) {
515674
+ if (process.env["OA_FORCE_NO_THINK"] === "1")
515675
+ return false;
515676
+ if (params.suppressed)
515677
+ return false;
515678
+ if (params.hasTools)
515679
+ return false;
515680
+ if (typeof params.requestThink === "boolean")
515681
+ return params.requestThink;
515682
+ if (process.env["OA_THINK_AUTO"] !== "0" && Array.isArray(params.messages)) {
515683
+ const blob = params.messages.filter((m2) => m2.role === "user" || m2.role === "system").map((m2) => typeof m2.content === "string" ? m2.content : "").join("\n").toLowerCase();
515684
+ if (/\b(plan|decompose|analyze(?:\s+complex)?|step\s*by\s*step|reason through|think through|reason step)\b/.test(blob)) {
515685
+ return true;
515686
+ }
515687
+ }
515688
+ return params.defaultThink;
515689
+ }
515690
+ function classifyThinkOutcome(raw) {
515691
+ if (!raw)
515692
+ return "empty_after_strip";
515693
+ const hasOpen = /<think>/i.test(raw);
515694
+ const hasClose = /<\/think>/i.test(raw);
515695
+ if (hasOpen && !hasClose)
515696
+ return "unclosed_think";
515697
+ const stripped = stripThinkBlocks(raw);
515698
+ if (stripped.trim().length < 2)
515699
+ return "empty_after_strip";
515700
+ if (hasOpen && hasClose) {
515701
+ const thinkLen = raw.length - stripped.length;
515702
+ if (thinkLen > raw.length * 0.9 && stripped.trim().length < 40) {
515703
+ return "runaway_think";
515704
+ }
515705
+ }
515706
+ return null;
515707
+ }
515667
515708
  var SYSTEM_PROMPT, SYSTEM_PROMPT_MEDIUM, SYSTEM_PROMPT_SMALL, VISUAL_TOOLS, AUDIO_TOOLS, SOCIAL_TOOLS, SPATIAL_TOOLS, CODE_TOOLS, AgenticRunner, OllamaAgenticBackend;
515668
515709
  var init_agenticRunner = __esm({
515669
515710
  "packages/orchestrator/dist/agenticRunner.js"() {
@@ -516486,6 +516527,40 @@ ${body}`;
516486
516527
  }
516487
516528
  }
516488
516529
  }
516530
+ /**
516531
+ * Think-loop-guard runner hook. Called once per turn at the top of the
516532
+ * agentic loop. Responsibilities:
516533
+ * 1. Consume OA_THINK_GUARD_RESET env var (written by /think reset) to
516534
+ * clear a prior suppression — the CLI can't talk to the backend
516535
+ * directly, so it drops a timestamp in the env and we pick it up.
516536
+ * 2. Emit a one-shot user-visible warning the first turn after the
516537
+ * guard trips, so the user knows why answers suddenly look different.
516538
+ */
516539
+ _lastThinkGuardResetAt = 0;
516540
+ _maybeApplyThinkGuard() {
516541
+ const resetRaw = process.env["OA_THINK_GUARD_RESET"];
516542
+ if (resetRaw) {
516543
+ const ts = Number(resetRaw);
516544
+ if (Number.isFinite(ts) && ts > this._lastThinkGuardResetAt) {
516545
+ this._lastThinkGuardResetAt = ts;
516546
+ if (typeof this.backend.resetThinkGuard === "function") {
516547
+ this.backend.resetThinkGuard();
516548
+ this.emit({
516549
+ type: "status",
516550
+ content: "🧠 Think-guard cleared — reasoning mode will re-enable on the next eligible request.",
516551
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
516552
+ });
516553
+ }
516554
+ }
516555
+ }
516556
+ if (typeof this.backend.consumeSuppressionNotice === "function" && this.backend.consumeSuppressionNotice()) {
516557
+ this.emit({
516558
+ type: "status",
516559
+ content: "⚠ Think-mode auto-suppressed — two consecutive empty/unclosed-<think> responses detected. Continuing with direct answers. Use `/think reset` to retry.",
516560
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
516561
+ });
516562
+ }
516563
+ }
516489
516564
  /**
516490
516565
  * Detect repetition in recent tool calls.
516491
516566
  * Returns a score 0-1 where 1 = fully repetitive (stuck in a loop).
@@ -516764,6 +516839,7 @@ TASK: ${task}` : task;
516764
516839
  }
516765
516840
  for (let turn = 0; turn < this.options.maxTurns; turn++) {
516766
516841
  clearTurnState(this._appState);
516842
+ this._maybeApplyThinkGuard();
516767
516843
  if (this._paused) {
516768
516844
  const shouldContinue = await this.waitIfPaused();
516769
516845
  if (!shouldContinue) {
@@ -518244,6 +518320,7 @@ You have ${this.options.maxTurns} more turns. Continue making progress. Call tas
518244
518320
  messages2.push(...compacted);
518245
518321
  }
518246
518322
  for (let turn = 0; turn < this.options.maxTurns; turn++) {
518323
+ this._maybeApplyThinkGuard();
518247
518324
  if (this._paused) {
518248
518325
  const shouldContinue = await this.waitIfPaused();
518249
518326
  if (!shouldContinue) {
@@ -521100,13 +521177,23 @@ ${description}`
521100
521177
  return resp;
521101
521178
  }
521102
521179
  };
521103
- OllamaAgenticBackend = class {
521180
+ OllamaAgenticBackend = class _OllamaAgenticBackend {
521104
521181
  baseUrl;
521105
521182
  model;
521106
521183
  apiKey;
521107
521184
  thinking;
521108
521185
  /** Abort signal — set by the runner so /stop can cancel in-flight requests */
521109
521186
  _abortSignal = null;
521187
+ // ── Think-loop guard (0.187.372) ──────────────────────────────────────
521188
+ // If the model keeps producing empty / unclosed-think responses, we
521189
+ // assume Qwen3 dual-mode is looping and start suppressing think for
521190
+ // this backend instance. User can clear via /think reset.
521191
+ _thinkFailStreak = 0;
521192
+ _thinkSuccessStreak = 0;
521193
+ _thinkSuppressed = false;
521194
+ _thinkSuppressedNotified = false;
521195
+ static _thinkFailThreshold = 2;
521196
+ static _thinkRecoveryThreshold = 6;
521110
521197
  /** Multi-key pool — round-robin rotation per request for load distribution */
521111
521198
  _keyPool = [];
521112
521199
  _keyIndex = 0;
@@ -521116,7 +521203,7 @@ ${description}`
521116
521203
  this.baseUrl = normalizeBaseUrl(baseUrl);
521117
521204
  this.model = model;
521118
521205
  this.apiKey = apiKey ?? "";
521119
- this.thinking = thinking ?? true;
521206
+ this.thinking = thinking ?? false;
521120
521207
  this._isAnthropic = /api\.anthropic\.com/i.test(baseUrl);
521121
521208
  }
521122
521209
  /** Set multiple API keys for round-robin rotation per request */
@@ -521128,6 +521215,61 @@ ${description}`
521128
521215
  setAbortSignal(signal) {
521129
521216
  this._abortSignal = signal;
521130
521217
  }
521218
+ /** Is think currently auto-suppressed by the loop-guard? */
521219
+ isThinkSuppressed() {
521220
+ return this._thinkSuppressed;
521221
+ }
521222
+ /** Clear the loop-guard — lets think re-enable on the next eligible request. */
521223
+ resetThinkGuard() {
521224
+ this._thinkFailStreak = 0;
521225
+ this._thinkSuccessStreak = 0;
521226
+ this._thinkSuppressed = false;
521227
+ this._thinkSuppressedNotified = false;
521228
+ }
521229
+ /**
521230
+ * Feed a completed assistant response into the loop-guard. We only
521231
+ * update counters on responses that WERE think=true — otherwise
521232
+ * think-off responses (the vast majority) would drive the counters
521233
+ * and mask the failure signal we're trying to detect.
521234
+ *
521235
+ * Failure classes (per classifyThinkOutcome) bump the fail streak.
521236
+ * Healthy think-mode responses bump the success streak and, past a
521237
+ * threshold, clear a prior suppression so think can come back on if
521238
+ * the model is behaving again.
521239
+ *
521240
+ * Returns the classification so callers can decide whether to
521241
+ * emit a warning / retry.
521242
+ */
521243
+ recordThinkOutcome(raw, wasThinkRequested) {
521244
+ if (!wasThinkRequested)
521245
+ return null;
521246
+ const cls = classifyThinkOutcome(raw);
521247
+ if (cls !== null) {
521248
+ this._thinkFailStreak++;
521249
+ this._thinkSuccessStreak = 0;
521250
+ if (this._thinkFailStreak >= _OllamaAgenticBackend._thinkFailThreshold && !this._thinkSuppressed) {
521251
+ this._thinkSuppressed = true;
521252
+ }
521253
+ } else {
521254
+ this._thinkSuccessStreak++;
521255
+ this._thinkFailStreak = 0;
521256
+ if (this._thinkSuppressed && this._thinkSuccessStreak >= _OllamaAgenticBackend._thinkRecoveryThreshold) {
521257
+ this._thinkSuppressed = false;
521258
+ this._thinkSuppressedNotified = false;
521259
+ }
521260
+ }
521261
+ return cls;
521262
+ }
521263
+ /** Pick up the one-shot "notify about suppression" flag. Returns true
521264
+ * the first time it's called after a trip; false thereafter until
521265
+ * the guard resets. Used by the runner to emit a single warning. */
521266
+ consumeSuppressionNotice() {
521267
+ if (this._thinkSuppressed && !this._thinkSuppressedNotified) {
521268
+ this._thinkSuppressedNotified = true;
521269
+ return true;
521270
+ }
521271
+ return false;
521272
+ }
521131
521273
  /** Build auth headers — adapts to provider (Bearer for most, x-api-key for Anthropic).
521132
521274
  * When a key pool is set, round-robins through keys per request. */
521133
521275
  authHeaders() {
@@ -521151,13 +521293,25 @@ ${description}`
521151
521293
  if (this._isAnthropic) {
521152
521294
  return this._anthropicChatCompletion(request);
521153
521295
  }
521296
+ const cleanedMessages = request.messages.map((m2) => m2.role === "assistant" && typeof m2.content === "string" ? { ...m2, content: stripThinkBlocks(m2.content) } : m2);
521297
+ const effectiveThink = computeEffectiveThink({
521298
+ requestThink: request.think,
521299
+ defaultThink: this.thinking,
521300
+ hasTools: Array.isArray(request.tools) && request.tools.length > 0,
521301
+ messages: cleanedMessages,
521302
+ suppressed: this._thinkSuppressed
521303
+ });
521304
+ let effectiveMaxTokens = request.maxTokens;
521305
+ if (effectiveThink === true && (effectiveMaxTokens ?? 0) < 4096) {
521306
+ effectiveMaxTokens = 4096;
521307
+ }
521154
521308
  const body = {
521155
521309
  model: this.model,
521156
- messages: request.messages,
521310
+ messages: cleanedMessages,
521157
521311
  tools: request.tools,
521158
521312
  temperature: request.temperature,
521159
- max_tokens: request.maxTokens,
521160
- think: this.thinking
521313
+ max_tokens: effectiveMaxTokens,
521314
+ think: effectiveThink
521161
521315
  };
521162
521316
  const fetchOpts = {
521163
521317
  method: "POST",
@@ -521176,6 +521330,71 @@ ${description}`
521176
521330
  const data = await resp.json();
521177
521331
  const choices = data.choices ?? [];
521178
521332
  const usage = data.usage;
521333
+ const firstChoice = choices[0];
521334
+ const responseText = firstChoice ? String(firstChoice.message?.content ?? "") : "";
521335
+ const outcome = this.recordThinkOutcome(responseText, effectiveThink === true);
521336
+ if (outcome !== null && effectiveThink === true) {
521337
+ const justSuppressed = this._thinkSuppressed && this._thinkFailStreak === _OllamaAgenticBackend._thinkFailThreshold;
521338
+ if (justSuppressed || outcome === "empty_after_strip" || outcome === "unclosed_think") {
521339
+ const retryBody = {
521340
+ model: this.model,
521341
+ messages: cleanedMessages,
521342
+ tools: request.tools,
521343
+ temperature: request.temperature,
521344
+ max_tokens: request.maxTokens,
521345
+ think: false
521346
+ };
521347
+ try {
521348
+ const retryOpts = {
521349
+ method: "POST",
521350
+ headers: this.authHeaders(),
521351
+ body: JSON.stringify(retryBody)
521352
+ };
521353
+ if (this._abortSignal)
521354
+ retryOpts.signal = this._abortSignal;
521355
+ const retryResp = await fetch(`${this.baseUrl}/v1/chat/completions`, retryOpts);
521356
+ if (retryResp.ok) {
521357
+ const retryData = await retryResp.json();
521358
+ const retryChoices = retryData.choices ?? [];
521359
+ const retryUsage = retryData.usage;
521360
+ if (retryChoices.length > 0) {
521361
+ return {
521362
+ choices: retryChoices.map((c8) => {
521363
+ const msg = c8.message;
521364
+ const toolCalls = msg.tool_calls ?? [];
521365
+ return {
521366
+ message: {
521367
+ content: msg.content || null,
521368
+ toolCalls: toolCalls.length > 0 ? toolCalls.map((tc) => {
521369
+ const fn = tc.function;
521370
+ let args;
521371
+ try {
521372
+ args = typeof fn.arguments === "string" ? JSON.parse(fn.arguments) : fn.arguments ?? {};
521373
+ } catch {
521374
+ const repaired = repairJson(fn.arguments);
521375
+ args = repaired ?? { _raw: fn.arguments };
521376
+ }
521377
+ return {
521378
+ id: tc.id || crypto.randomUUID(),
521379
+ name: fn.name,
521380
+ arguments: args
521381
+ };
521382
+ }) : void 0
521383
+ }
521384
+ };
521385
+ }),
521386
+ usage: retryUsage ? {
521387
+ totalTokens: retryUsage.total_tokens ?? 0,
521388
+ promptTokens: retryUsage.prompt_tokens,
521389
+ completionTokens: retryUsage.completion_tokens
521390
+ } : void 0
521391
+ };
521392
+ }
521393
+ }
521394
+ } catch {
521395
+ }
521396
+ }
521397
+ }
521179
521398
  return {
521180
521399
  choices: choices.map((c8) => {
521181
521400
  const msg = c8.message;
@@ -521314,15 +521533,27 @@ ${description}`
521314
521533
  * The existing chatCompletion() method is completely unmodified.
521315
521534
  */
521316
521535
  async *chatCompletionStream(request) {
521536
+ const cleanedMessages = request.messages.map((m2) => m2.role === "assistant" && typeof m2.content === "string" ? { ...m2, content: stripThinkBlocks(m2.content) } : m2);
521537
+ const effectiveThink = computeEffectiveThink({
521538
+ requestThink: request.think,
521539
+ defaultThink: this.thinking,
521540
+ hasTools: Array.isArray(request.tools) && request.tools.length > 0,
521541
+ messages: cleanedMessages,
521542
+ suppressed: this._thinkSuppressed
521543
+ });
521544
+ let effectiveMaxTokens = request.maxTokens;
521545
+ if (effectiveThink === true && (effectiveMaxTokens ?? 0) < 4096) {
521546
+ effectiveMaxTokens = 4096;
521547
+ }
521317
521548
  const body = {
521318
521549
  model: this.model,
521319
- messages: request.messages,
521550
+ messages: cleanedMessages,
521320
521551
  tools: request.tools,
521321
521552
  temperature: request.temperature,
521322
- max_tokens: request.maxTokens,
521553
+ max_tokens: effectiveMaxTokens,
521323
521554
  stream: true,
521324
521555
  stream_options: { include_usage: true },
521325
- think: this.thinking
521556
+ think: effectiveThink
521326
521557
  };
521327
521558
  const streamFetchOpts = {
521328
521559
  method: "POST",
@@ -521340,6 +521571,9 @@ ${description}`
521340
521571
  }
521341
521572
  let sseBuffer = "";
521342
521573
  const decoder = new TextDecoder();
521574
+ let accumulatedContent = "";
521575
+ let accumulatedThinking = "";
521576
+ let sawReasoningTokens = false;
521343
521577
  for await (const rawChunk of resp.body) {
521344
521578
  sseBuffer += decoder.decode(rawChunk, { stream: true });
521345
521579
  const parts = sseBuffer.split("\n\n");
@@ -521348,8 +521582,10 @@ ${description}`
521348
521582
  const line = part.trim();
521349
521583
  if (!line)
521350
521584
  continue;
521351
- if (line === "data: [DONE]")
521585
+ if (line === "data: [DONE]") {
521586
+ this._finalizeStreamGuard(effectiveThink, accumulatedContent, accumulatedThinking, sawReasoningTokens);
521352
521587
  return;
521588
+ }
521353
521589
  if (!line.startsWith("data: "))
521354
521590
  continue;
521355
521591
  try {
@@ -521373,9 +521609,12 @@ ${description}`
521373
521609
  const finishReason = choice.finish_reason;
521374
521610
  const reasoningToken = delta?.reasoning ?? delta?.reasoning_content;
521375
521611
  if (reasoningToken) {
521612
+ sawReasoningTokens = true;
521613
+ accumulatedThinking += reasoningToken;
521376
521614
  yield { type: "content", content: reasoningToken, thinking: true };
521377
521615
  }
521378
521616
  if (delta?.content) {
521617
+ accumulatedContent += delta.content;
521379
521618
  yield { type: "content", content: delta.content };
521380
521619
  }
521381
521620
  const tcDeltas = delta?.tool_calls;
@@ -521409,6 +521648,23 @@ ${description}`
521409
521648
  }
521410
521649
  }
521411
521650
  }
521651
+ this._finalizeStreamGuard(effectiveThink, accumulatedContent, accumulatedThinking, sawReasoningTokens);
521652
+ }
521653
+ /** Reconstruct a raw-looking assistant response from the streamed
521654
+ * parts, then feed it into the loop-guard. Used at stream end (both
521655
+ * the [DONE] case and the fell-off-the-end case). */
521656
+ _finalizeStreamGuard(thinkRequested, content, thinking, hadReasoningTokens) {
521657
+ if (!thinkRequested) {
521658
+ this.recordThinkOutcome(content, false);
521659
+ return;
521660
+ }
521661
+ let rawLike;
521662
+ if (hadReasoningTokens && thinking) {
521663
+ rawLike = `<think>${thinking}</think>${content}`;
521664
+ } else {
521665
+ rawLike = content;
521666
+ }
521667
+ this.recordThinkOutcome(rawLike, true);
521412
521668
  }
521413
521669
  };
521414
521670
  }
@@ -546123,12 +546379,41 @@ Clone a new voice: /voice clone <wav-file> [name]`);
546123
546379
  return "handled";
546124
546380
  }
546125
546381
  case "think": {
546126
- const isOn = ctx3.thinkToggle();
546382
+ const token = (arg || "").trim().toLowerCase();
546383
+ const desc = (s2) => s2 ? "🧠 models that support reasoning (Qwen3, DeepSeek-R1, etc.) will show their thinking chain; tool calls still run direct" : "⚡ direct-answer mode (reasoning suppressed); recommended for tool-heavy workflows";
546384
+ if (token === "status" || token === "?") {
546385
+ const cur = ctx3.config.thinking ?? false;
546386
+ renderInfo2(`Thinking mode: ${cur ? "on" : "off"} — ${desc(cur)}`);
546387
+ if (process.env["OA_THINK_AUTO"] !== "0") renderInfo2("Auto-heuristic active (set OA_THINK_AUTO=0 to disable) — user messages with plan/decompose/analyze/step-by-step/reason-through auto-flip to think=on, tool calls stay off.");
546388
+ if (process.env["OA_FORCE_NO_THINK"] === "1") renderWarning2("OA_FORCE_NO_THINK=1 forces off regardless of /think setting");
546389
+ return "handled";
546390
+ }
546391
+ if (token === "auto") {
546392
+ process.env["OA_THINK_AUTO"] = "1";
546393
+ renderInfo2("Thinking auto-heuristic enabled (default since 0.187.372). User message containing plan/decompose/analyze/step-by-step/reason-through auto-flips think=on; tool calls still force off. Disable with OA_THINK_AUTO=0.");
546394
+ return "handled";
546395
+ }
546396
+ if (token === "reset" || token === "clear") {
546397
+ process.env["OA_THINK_GUARD_RESET"] = String(Date.now());
546398
+ renderInfo2("Loop-guard reset requested. If think was auto-suppressed after empty/unclosed-think responses, it will re-enable on the next eligible request.");
546399
+ return "handled";
546400
+ }
546401
+ let isOn;
546402
+ if (token === "on" || token === "true" || token === "yes" || token === "1") {
546403
+ isOn = true;
546404
+ ctx3.config.thinking = true;
546405
+ } else if (token === "off" || token === "false" || token === "no" || token === "0") {
546406
+ isOn = false;
546407
+ ctx3.config.thinking = false;
546408
+ } else {
546409
+ isOn = ctx3.thinkToggle();
546410
+ }
546127
546411
  const save2 = hasLocal ? ctx3.saveLocalSettings.bind(ctx3) : ctx3.saveSettings.bind(ctx3);
546128
546412
  save2({ thinking: isOn });
546129
- renderInfo2(
546130
- `Thinking mode: ${isOn ? "on" : "off"}${hasLocal ? " (project-local)" : ""}` + (isOn ? " — models that support reasoning (Qwen3, DeepSeek-R1, etc.) will show their thinking chain" : " — reasoning chain suppressed, model responds directly")
546131
- );
546413
+ renderInfo2(`Thinking mode: ${isOn ? "on" : "off"}${hasLocal ? " (project-local)" : ""} — ${desc(isOn)}`);
546414
+ if (isOn) {
546415
+ renderInfo2("Note: max_tokens will auto-raise to ≥4096 per request to prevent <think> truncation.");
546416
+ }
546132
546417
  return "handled";
546133
546418
  }
546134
546419
  case "tools": {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "open-agents-ai",
3
- "version": "0.187.370",
3
+ "version": "0.187.372",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",