openclaw-hybrid-memory 2026.3.293 → 2026.3.301

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cli/cmd-verify.ts CHANGED
@@ -368,11 +368,14 @@ export async function runVerifyForCli(
368
368
  model: modelForTest,
369
369
  dimensions: dimensionsForTest,
370
370
  batchSize: cfg.embedding.batchSize ?? 32,
371
+ ...(typeof cfg.embedding.deployment === "string" && cfg.embedding.deployment.trim()
372
+ ? { deployment: cfg.embedding.deployment.trim() }
373
+ : {}),
374
+ ...(cfg.embedding.models?.length ? { models: cfg.embedding.models } : {}),
371
375
  ...(p === "openai" && {
372
376
  apiKey: cfg.embedding.apiKey,
373
- ...(typeof (cfg.embedding as Record<string, unknown>).endpoint === "string" &&
374
- (cfg.embedding as Record<string, unknown>).endpoint
375
- ? { endpoint: (cfg.embedding as Record<string, unknown>).endpoint as string }
377
+ ...(typeof cfg.embedding.endpoint === "string" && cfg.embedding.endpoint.trim()
378
+ ? { endpoint: cfg.embedding.endpoint.trim() }
376
379
  : {}),
377
380
  }),
378
381
  ...(p === "google" && {
@@ -22,6 +22,7 @@ import { registerFrustrationHandlers } from "./stage-frustration.js";
22
22
  import { createSessionState } from "./session-state.js";
23
23
  import type { LifecycleContext, SessionState } from "./types.js";
24
24
  import { capturePluginError } from "../services/error-reporter.js";
25
+ import { isAbortOrTransientLlmError } from "../services/chat.js";
25
26
  import { buildDailyNarrative } from "../src/worker/narratives.js";
26
27
 
27
28
  export type { LifecycleContext } from "./types.js";
@@ -155,12 +156,20 @@ export function createLifecycleHooks(ctx: LifecycleContext) {
155
156
  fallbackModels: [],
156
157
  });
157
158
  } catch (err) {
158
- capturePluginError(err instanceof Error ? err : new Error(String(err)), {
159
- subsystem: "narratives",
160
- operation: "agent-end-build-narrative",
161
- sessionId,
162
- });
163
- api.logger.warn(`memory-hybrid: session narrative build failed: ${String(err)}`);
159
+ const transient = isAbortOrTransientLlmError(err);
160
+ if (!transient) {
161
+ capturePluginError(err instanceof Error ? err : new Error(String(err)), {
162
+ subsystem: "narratives",
163
+ operation: "agent-end-build-narrative",
164
+ sessionId,
165
+ });
166
+ }
167
+ const detail = err instanceof Error ? err.message : String(err);
168
+ if (transient) {
169
+ api.logger.info?.(`memory-hybrid: session narrative skipped (LLM unavailable or aborted): ${detail}`);
170
+ } else {
171
+ api.logger.warn(`memory-hybrid: session narrative build failed: ${String(err)}`);
172
+ }
164
173
  }
165
174
  });
166
175
  };
@@ -18,6 +18,7 @@ import {
18
18
  } from "../services/ambient-retrieval.js";
19
19
  import { capturePluginError } from "../services/error-reporter.js";
20
20
  import { formatNarrativeRange, recallNarrativeSummaries } from "../services/narrative-recall.js";
21
+ import { yieldEventLoop } from "../utils/event-loop-yield.js";
21
22
  import { withTimeout } from "../utils/timeout.js";
22
23
  import { estimateTokens } from "../utils/text.js";
23
24
  import { isConsolidatedDerivedFact } from "../utils/consolidation-controls.js";
@@ -64,6 +65,9 @@ async function runRecall(
64
65
 
65
66
  api.logger.debug?.(`memory-hybrid: auto-recall start (prompt length ${e.prompt.length})`);
66
67
 
68
+ // Let pending gateway I/O (health RPCs, WebSocket) run before heavy sync work (#931).
69
+ await yieldEventLoop();
70
+
67
71
  const fmt = ctx.cfg.autoRecall.injectionFormat;
68
72
  const isProgressive = fmt === "progressive" || fmt === "progressive_hybrid";
69
73
  const searchLimit = isProgressive
@@ -110,6 +114,7 @@ async function runRecall(
110
114
  };
111
115
  const degradedLimit = ctx.cfg.autoRecall.limit;
112
116
  const trimmed = e.prompt.trim();
117
+ await yieldEventLoop();
113
118
  const ftsOnly = ctx.factsDb.search(trimmed, degradedLimit, recallOpts);
114
119
  let hotPart = "";
115
120
  if (ctx.cfg.memoryTiering.enabled && ctx.cfg.memoryTiering.hotMaxTokens > 0) {
@@ -218,6 +223,7 @@ async function runRecall(
218
223
  procedureBlock = block;
219
224
  }
220
225
  }
226
+ await yieldEventLoop();
221
227
  const withProcedures = (s: string) => (procedureBlock ? `${procedureBlock}\n${s}` : s);
222
228
 
223
229
  // HOT block
@@ -233,6 +239,8 @@ async function runRecall(
233
239
  }
234
240
  }
235
241
 
242
+ await yieldEventLoop();
243
+
236
244
  const recallOpts = {
237
245
  tierFilter,
238
246
  scopeFilter,
@@ -315,6 +323,7 @@ async function runRecall(
315
323
  if (extraQueries.length > 0) {
316
324
  const extraResultSets: SearchResult[][] = [candidates];
317
325
  for (const q of extraQueries) {
326
+ await yieldEventLoop();
318
327
  try {
319
328
  const qResults = await runRecallPipelineQuery(q.text, Math.ceil(limit / 2), pipelineDeps, hydeUsedRef, {
320
329
  entity: q.type === "entity" ? q.entity : undefined,
@@ -398,6 +407,8 @@ async function runRecall(
398
407
  }
399
408
  }
400
409
 
410
+ await yieldEventLoop();
411
+
401
412
  const promptLower = e.prompt.toLowerCase();
402
413
  const { entityLookup } = ctx.cfg.autoRecall;
403
414
  if (entityLookup.enabled && entityLookup.entities.length > 0) {
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "openclaw-hybrid-memory",
3
- "version": "2026.3.293",
3
+ "version": "2026.3.301",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "openclaw-hybrid-memory",
9
- "version": "2026.3.293",
9
+ "version": "2026.3.301",
10
10
  "hasInstallScript": true,
11
11
  "dependencies": {
12
12
  "@lancedb/lancedb": "^0.27.1",
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "id": "openclaw-hybrid-memory",
3
3
  "kind": "memory",
4
- "version": "2026.3.293",
4
+ "version": "2026.3.301",
5
5
  "uiHints": {
6
6
  "embedding.provider": {
7
7
  "label": "Embedding Provider",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "openclaw-hybrid-memory",
3
- "version": "2026.3.293",
3
+ "version": "2026.3.301",
4
4
  "type": "module",
5
5
  "description": "Give your OpenClaw agent lasting memory: structured facts, semantic search, auto-capture & recall, decay, optional credential vault. Part of Hybrid Memory v3.",
6
6
  "files": [
package/services/chat.ts CHANGED
@@ -413,6 +413,7 @@ export async function chatComplete(opts: {
413
413
  const msg = error.message.toLowerCase();
414
414
  const isTransient =
415
415
  msg.includes("request was aborted") ||
416
+ msg.includes("operation was aborted") ||
416
417
  msg.includes("request timed out") ||
417
418
  msg.includes("timed out") ||
418
419
  msg.includes("llm request timeout") || // #339: our own timeout message uses "timeout" not "timed out"
@@ -464,6 +465,28 @@ export class LLMRetryError extends Error {
464
465
  }
465
466
  }
466
467
 
468
+ /**
469
+ * True when an LLM call failed for abort, gateway loss, or transport — not plugin logic.
470
+ * Used by session narrative and similar paths to avoid noisy warns when the gateway stops.
471
+ */
472
+ export function isAbortOrTransientLlmError(err: unknown): boolean {
473
+ if (err instanceof LLMRetryError) {
474
+ return isAbortOrTransientLlmError(err.cause);
475
+ }
476
+ if (err && typeof err === "object" && "cause" in err) {
477
+ const c = (err as { cause?: unknown }).cause;
478
+ if (c !== undefined && c !== null && isAbortOrTransientLlmError(c)) return true;
479
+ }
480
+ if (!(err instanceof Error)) {
481
+ return isConnectionErrorLike(err);
482
+ }
483
+ if (err.name === "AbortError") return true;
484
+ const msg = err.message;
485
+ if (/request was aborted|Request was aborted|The operation was aborted|operation was aborted/i.test(msg)) return true;
486
+ if (/gateway client stopped|gateway not reachable|not reachable\.|is it running/i.test(msg)) return true;
487
+ return isConnectionErrorLike(err);
488
+ }
489
+
467
490
  /**
468
491
  * Retry wrapper for LLM calls with exponential backoff.
469
492
  * Retries on failure with increasing delays: 1s, 3s, 9s.
@@ -726,10 +749,8 @@ export async function chatCompleteWithRetry(opts: {
726
749
  const finalIsOOM = isOllamaOOM(finalError); // #387: OOM is expected when model too large for RAM
727
750
  const finalIs429 = is429OrWrapped(finalError); // #397
728
751
  const finalIsContextLength = isContextLengthError(finalError); // #488: input too long for model context window
729
- const finalIsTimeout = /timed out|llm request timeout|request was aborted|Request was aborted/i.test(
730
- finalError.message,
731
- );
732
- const finalIsConnectionError = isConnectionErrorLike(finalError);
752
+ /** Unwraps LLMRetryError so "Request was aborted" in the cause is detected (#935, #936). */
753
+ const finalIsTransientLlm = isAbortOrTransientLlmError(finalError);
733
754
 
734
755
  // When every model failed because provider keys are missing, queue a user-visible chat warning
735
756
  // and skip Sentry (this is a config issue, not a bug).
@@ -755,8 +776,7 @@ export async function chatCompleteWithRetry(opts: {
755
776
  !finalIsOOM &&
756
777
  !finalIsContextLength && // #488: context window exceeded = config issue, not a bug
757
778
  !finalIsUnconfigured &&
758
- !finalIsTimeout &&
759
- !finalIsConnectionError &&
779
+ !finalIsTransientLlm &&
760
780
  !finalIs403 &&
761
781
  !finalIs401 &&
762
782
  !finalIs429
@@ -803,10 +823,8 @@ export async function chatCompleteWithRetry(opts: {
803
823
  "⚠️ Memory plugin: LLM unauthorized (401) — your API key is invalid or expired. Check provider settings. " +
804
824
  "Run: openclaw hybrid-mem verify --test-llm",
805
825
  );
806
- } else if (finalIsTimeout) {
807
- // #339: timeout errors are transient — don't report to GlitchTip
808
- } else if (finalIsConnectionError) {
809
- // #703: OpenAI SDK "Connection error." / APIConnectionError is transient — don't report to GlitchTip
826
+ } else if (finalIsTransientLlm) {
827
+ // #339, #703, #935, #936: abort/timeout/connection (including LLMRetryError-wrapped causes) — don't report
810
828
  } else if (finalIs429) {
811
829
  // #397: rate limit / usage limit — transient provider error, don't report to GlitchTip
812
830
  pendingWarnings?.add(
@@ -11,6 +11,7 @@ import {
11
11
  GOOGLE_EMBED_DEFAULT_MODEL,
12
12
  KNOWN_GOOGLE_EMBED_MODELS,
13
13
  OPENAI_ONLY_EMBED_MODELS,
14
+ isAzureOpenAiCompatibleEndpoint,
14
15
  isAzureOpenAiResourceEndpoint,
15
16
  } from "./shared.js";
16
17
  import { Embeddings } from "./openai-provider.js";
@@ -84,6 +85,24 @@ function openaiEmbeddingClientOpts(
84
85
  return opts;
85
86
  }
86
87
 
88
+ /** Local/Ollama/ONNX embedding ids — never use as OpenAI/Azure `model` when falling back from Ollama/ONNX (#932). */
89
+ function isLocalOnlyEmbeddingModelId(model: string | undefined): boolean {
90
+ if (!model) return false;
91
+ switch (model) {
92
+ case "nomic-embed-text":
93
+ case "mxbai-embed-large":
94
+ case "bge-m3":
95
+ case "bge-large":
96
+ case "bge-small-en-v1.5":
97
+ case "snowflake-arctic-embed":
98
+ case "all-minilm":
99
+ case "all-MiniLM-L6-v2":
100
+ return true;
101
+ default:
102
+ return false;
103
+ }
104
+ }
105
+
87
106
  /** API model id(s) for OpenAI-compatible embeddings: optional Azure deployment name overrides logical `model`. */
88
107
  function openAiEmbeddingApiModels(cfg: EmbeddingConfig, forFallback = false): string[] {
89
108
  const { model, models, deployment } = cfg;
@@ -94,6 +113,12 @@ function openAiEmbeddingApiModels(cfg: EmbeddingConfig, forFallback = false): st
94
113
  if (model && OPENAI_ONLY_EMBED_MODELS.has(model)) {
95
114
  return [model];
96
115
  }
116
+ // Fallback path used for chain OpenAI arm and Ollama/ONNX→OpenAI fallback. Azure deployment names
117
+ // are often not in OPENAI_ONLY_EMBED_MODELS; do not substitute text-embedding-3-small (#932).
118
+ const m = typeof model === "string" ? model.trim() : "";
119
+ if (m && isAzureOpenAiCompatibleEndpoint(cfg.endpoint) && !isLocalOnlyEmbeddingModelId(m)) {
120
+ return [m];
121
+ }
97
122
  return ["text-embedding-3-small"];
98
123
  }
99
124
  return models?.length ? models : [model];
@@ -46,7 +46,7 @@ export function isAzureOpenAiResourceEndpoint(endpoint: string | undefined): boo
46
46
  * True when the embedding base URL targets Azure (resource, APIM gateway, Cognitive Services, Foundry),
47
47
  * not public api.openai.com.
48
48
  */
49
- function isAzureOpenAiCompatibleEndpoint(endpoint: string | undefined): boolean {
49
+ export function isAzureOpenAiCompatibleEndpoint(endpoint: string | undefined): boolean {
50
50
  if (typeof endpoint !== "string" || !endpoint.trim()) return false;
51
51
  // Use specific Azure AI/OpenAI domains only — `\.azure\.com` alone is too broad and would
52
52
  // match unrelated Azure services (portal.azure.com, devops.azure.com, etc.).
@@ -24,6 +24,7 @@ import type { EmbeddingProvider } from "./embeddings.js";
24
24
  import { shouldSuppressEmbeddingError } from "./embeddings.js";
25
25
  import { expandQueryWithHyde } from "./hyde-helper.js";
26
26
  import { DEFAULT_INTERACTIVE_RECALL_POLICY, type InteractiveRecallPolicy } from "./retrieval-mode-policy.js";
27
+ import { yieldEventLoop } from "../utils/event-loop-yield.js";
27
28
 
28
29
  async function embedWithAbortRace(
29
30
  embedPromise: Promise<number[]>,
@@ -130,6 +131,9 @@ export async function runRecallPipelineQuery(
130
131
  stageMs.fts = Date.now() - t0;
131
132
  sqliteResults = [...sqliteResults, ...ftsResults];
132
133
 
134
+ // FTS + lookup are synchronous SQLite — yield so gateway WebSocket/health can run (#931).
135
+ await yieldEventLoop();
136
+
133
137
  let lanceResults: SearchResult[] = [];
134
138
  const useSemantic = cfg.retrievalStrategies.includes("semantic");
135
139
 
@@ -232,6 +236,8 @@ export async function runRecallPipelineQuery(
232
236
  }
233
237
  }
234
238
 
239
+ await yieldEventLoop();
240
+
235
241
  t0 = Date.now();
236
242
  let results = mergeResults(sqliteResults, lanceResults, limitNum, factsDb);
237
243
  stageMs.merge = Date.now() - t0;
@@ -2,9 +2,9 @@ import type OpenAI from "openai";
2
2
  import type { EventLog } from "../../backends/event-log.js";
3
3
  import type { NarrativesDB } from "../../backends/narratives-db.js";
4
4
  import type { WorkflowStore } from "../../backends/workflow-store.js";
5
- import { chatCompleteWithRetry } from "../../services/chat.js";
5
+ import { chatCompleteWithRetry, isAbortOrTransientLlmError } from "../../services/chat.js";
6
6
  import { capturePluginError } from "../../services/error-reporter.js";
7
- import { getSessionLogFileSuffix } from "../../utils/constants.js";
7
+ import { getSessionLogFileSuffix, NARRATIVE_CHAT_TIMEOUT_MS } from "../../utils/constants.js";
8
8
  import { fillPrompt, loadPrompt } from "../../utils/prompt-loader.js";
9
9
 
10
10
  /** Session transcript basename for `sessionId` (suffix from OPENCLAW_SESSION_LOG_SUFFIX, default .jsonl). */
@@ -105,6 +105,7 @@ export async function buildDailyNarrative(params: BuildDailyNarrativeParams): Pr
105
105
  fallbackModels: fallbackModels ?? [],
106
106
  label: "memory-hybrid: narrative-summary",
107
107
  feature: "distill",
108
+ timeoutMs: NARRATIVE_CHAT_TIMEOUT_MS,
108
109
  });
109
110
  const normalized = normalizeNarrative(raw);
110
111
  if (!normalized || normalized === "NO_NARRATIVE") return false;
@@ -122,12 +123,20 @@ export async function buildDailyNarrative(params: BuildDailyNarrativeParams): Pr
122
123
  );
123
124
  return true;
124
125
  } catch (err) {
125
- capturePluginError(err instanceof Error ? err : new Error(String(err)), {
126
- subsystem: "narratives",
127
- operation: "build-daily-narrative",
128
- sessionId,
129
- });
130
- logger.warn(`memory-hybrid: narrative build failed for ${sessionId}: ${err}`);
126
+ const transient = isAbortOrTransientLlmError(err);
127
+ if (!transient) {
128
+ capturePluginError(err instanceof Error ? err : new Error(String(err)), {
129
+ subsystem: "narratives",
130
+ operation: "build-daily-narrative",
131
+ sessionId,
132
+ });
133
+ }
134
+ const detail = err instanceof Error ? err.message : String(err);
135
+ if (transient) {
136
+ logger.info?.(`memory-hybrid: narrative skipped (LLM unavailable or aborted) for ${sessionId}: ${detail}`);
137
+ } else {
138
+ logger.warn(`memory-hybrid: narrative build failed for ${sessionId}: ${err}`);
139
+ }
131
140
  return false;
132
141
  }
133
142
  }
@@ -19,7 +19,7 @@ import type { EventLog } from "../backends/event-log.js";
19
19
  import type { NarrativesDB } from "../backends/narratives-db.js";
20
20
  import { categoryToEventType } from "../backends/event-log.js";
21
21
  import type { EmbeddingProvider } from "../services/embeddings.js";
22
- import { AllEmbeddingProvidersFailed } from "../services/embeddings.js";
22
+ import { AllEmbeddingProvidersFailed, shouldSuppressEmbeddingError } from "../services/embeddings.js";
23
23
  import type { EmbeddingRegistry } from "../services/embedding-registry.js";
24
24
  import { toFloat32Array } from "../services/embedding-registry.js";
25
25
  import type { PendingLLMWarnings } from "../services/chat.js";
@@ -1546,6 +1546,9 @@ export function registerMemoryTools(
1546
1546
  // Graceful degradation: store the fact without a vector.
1547
1547
  // The fact is still findable by structured/keyword search.
1548
1548
  api.logger.warn("memory-hybrid: Stored fact without embeddings — all providers unavailable");
1549
+ } else if (shouldSuppressEmbeddingError(err)) {
1550
+ // Ollama circuit breaker, 429, config errors, etc. — expected noise (#937); don't send to GlitchTip.
1551
+ api.logger.warn(`memory-hybrid: embedding skipped (expected): ${err}`);
1549
1552
  } else {
1550
1553
  capturePluginError(err instanceof Error ? err : new Error(String(err)), {
1551
1554
  subsystem: "embeddings",
@@ -75,6 +75,9 @@ export const OLLAMA_COOLDOWN_MS = 5 * 60 * 1000; // 5 minutes
75
75
  /** Default timeout for chat completion requests (ms). */
76
76
  export const DEFAULT_CHAT_TIMEOUT_MS = 45_000;
77
77
 
78
+ /** Daily narrative prompts aggregate many events — allow longer than default chat timeout (#935, #936). */
79
+ export const NARRATIVE_CHAT_TIMEOUT_MS = 120_000;
80
+
78
81
  // VectorDB constants
79
82
  /** Threshold for warning about consecutive optimize failures. */
80
83
  export const VECTORDB_OPTIMIZE_FAILURE_WARN_THRESHOLD = 3;
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Yield to the Node.js event loop so I/O and WebSocket handlers (e.g. gateway health RPCs) can run.
3
+ * Use after heavy synchronous SQLite / merge work on the auto-recall path (#931).
4
+ */
5
+ export async function yieldEventLoop(): Promise<void> {
6
+ await new Promise<void>((resolve) => {
7
+ setImmediate(resolve);
8
+ });
9
+ }