@totalreclaw/totalreclaw 3.3.1-rc.2 → 3.3.1-rc.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/CHANGELOG.md +330 -0
  2. package/SKILL.md +50 -83
  3. package/api-client.ts +18 -11
  4. package/config.ts +117 -3
  5. package/crypto.ts +10 -2
  6. package/dist/api-client.js +226 -0
  7. package/dist/billing-cache.js +100 -0
  8. package/dist/claims-helper.js +606 -0
  9. package/dist/config.js +280 -0
  10. package/dist/consolidation.js +258 -0
  11. package/dist/contradiction-sync.js +1034 -0
  12. package/dist/crypto.js +138 -0
  13. package/dist/digest-sync.js +361 -0
  14. package/dist/download-ux.js +63 -0
  15. package/dist/embedding.js +86 -0
  16. package/dist/extractor.js +1225 -0
  17. package/dist/first-run.js +103 -0
  18. package/dist/fs-helpers.js +563 -0
  19. package/dist/gateway-url.js +197 -0
  20. package/dist/generate-mnemonic.js +13 -0
  21. package/dist/hot-cache-wrapper.js +101 -0
  22. package/dist/import-adapters/base-adapter.js +64 -0
  23. package/dist/import-adapters/chatgpt-adapter.js +238 -0
  24. package/dist/import-adapters/claude-adapter.js +114 -0
  25. package/dist/import-adapters/gemini-adapter.js +201 -0
  26. package/dist/import-adapters/index.js +26 -0
  27. package/dist/import-adapters/mcp-memory-adapter.js +219 -0
  28. package/dist/import-adapters/mem0-adapter.js +158 -0
  29. package/dist/import-adapters/types.js +1 -0
  30. package/dist/index.js +5348 -0
  31. package/dist/llm-client.js +686 -0
  32. package/dist/llm-profile-reader.js +346 -0
  33. package/dist/lsh.js +62 -0
  34. package/dist/onboarding-cli.js +750 -0
  35. package/dist/pair-cli.js +344 -0
  36. package/dist/pair-crypto.js +359 -0
  37. package/dist/pair-http.js +404 -0
  38. package/dist/pair-page.js +826 -0
  39. package/dist/pair-qr.js +107 -0
  40. package/dist/pair-remote-client.js +410 -0
  41. package/dist/pair-session-store.js +566 -0
  42. package/dist/pin.js +542 -0
  43. package/dist/qa-bug-report.js +301 -0
  44. package/dist/relay-headers.js +44 -0
  45. package/dist/reranker.js +442 -0
  46. package/dist/retype-setscope.js +348 -0
  47. package/dist/semantic-dedup.js +75 -0
  48. package/dist/subgraph-search.js +289 -0
  49. package/dist/subgraph-store.js +694 -0
  50. package/dist/tool-gating.js +58 -0
  51. package/download-ux.ts +91 -0
  52. package/embedding.ts +32 -9
  53. package/fs-helpers.ts +124 -0
  54. package/gateway-url.ts +57 -9
  55. package/index.ts +586 -357
  56. package/llm-client.ts +211 -23
  57. package/lsh.ts +7 -2
  58. package/onboarding-cli.ts +114 -1
  59. package/package.json +19 -5
  60. package/pair-cli.ts +76 -8
  61. package/pair-crypto.ts +34 -24
  62. package/pair-page.ts +28 -17
  63. package/pair-qr.ts +152 -0
  64. package/pair-remote-client.ts +540 -0
  65. package/qa-bug-report.ts +381 -0
  66. package/relay-headers.ts +50 -0
  67. package/reranker.ts +73 -0
  68. package/retype-setscope.ts +12 -0
  69. package/subgraph-search.ts +4 -3
  70. package/subgraph-store.ts +109 -16
@@ -0,0 +1,686 @@
1
+ /**
2
+ * TotalReclaw Plugin - LLM Client
3
+ *
4
+ * Auto-detects the user's LLM provider from OpenClaw's config and derives a
5
+ * cheap extraction model. Supports OpenAI-compatible APIs and Anthropic's
6
+ * Messages API. No external dependencies -- uses native fetch().
7
+ *
8
+ * Embedding generation has been moved to embedding.ts (local ONNX model via
9
+ * @huggingface/transformers). No API key needed for embeddings.
10
+ */
11
+ import { CONFIG } from './config.js';
12
+ // ---------------------------------------------------------------------------
13
+ // Provider mappings
14
+ // ---------------------------------------------------------------------------
15
+ /** Maps provider name to CONFIG.llmApiKeys property names to check (in order). */
16
+ const PROVIDER_KEY_NAMES = {
17
+ zai: ['zai'],
18
+ anthropic: ['anthropic'],
19
+ openai: ['openai'],
20
+ gemini: ['gemini'],
21
+ google: ['gemini', 'google'],
22
+ mistral: ['mistral'],
23
+ groq: ['groq'],
24
+ deepseek: ['deepseek'],
25
+ openrouter: ['openrouter'],
26
+ xai: ['xai'],
27
+ together: ['together'],
28
+ cerebras: ['cerebras'],
29
+ };
30
+ /**
31
+ * zai has TWO public endpoints. The CODING endpoint is what GLM Coding Plan
32
+ * subscription keys are provisioned against; the STANDARD (PAYG) endpoint
33
+ * serves pay-as-you-go balances. A coding-plan key that hits the STANDARD
34
+ * endpoint returns HTTP 429 with body `"Insufficient balance or no resource
35
+ * package. Please recharge."` — misleading because the subscription is in
36
+ * good standing. Vice-versa for PAYG keys that accidentally hit CODING.
37
+ *
38
+ * 3.3.1-rc.3: exported so the rc.3 auto-fallback (see `chatCompletion`)
39
+ * can flip between them when the upstream error signature matches.
40
+ */
41
+ export const ZAI_CODING_BASE_URL = 'https://api.z.ai/api/coding/paas/v4';
42
+ export const ZAI_STANDARD_BASE_URL = 'https://api.z.ai/api/paas/v4';
43
+ /**
44
+ * Resolve the zai base URL.
45
+ *
46
+ * Precedence:
47
+ * 1. `ZAI_BASE_URL` env var (explicit operator override — read by
48
+ * `CONFIG.zaiBaseUrl` via a getter so tests can mutate the env
49
+ * between calls)
50
+ * 2. Default: coding endpoint (coding-plan-biased; the rc.3 auto-fallback
51
+ * hops to the standard endpoint on an "Insufficient balance" 429).
52
+ *
53
+ * Documented in plugin SKILL.md — Coding-Plan users can leave it unset (or
54
+ * set it explicitly to `https://api.z.ai/api/coding/paas/v4`). PAYG users
55
+ * MUST set it to `https://api.z.ai/api/paas/v4` to avoid the auto-fallback
56
+ * tax on every first call.
57
+ *
58
+ * Scanner-isolation note: the env read lives in `config.ts` (which has no
59
+ * network triggers). This module has network calls, so it cannot touch
60
+ * env vars directly — both rules 1 (env-harvesting) and 2 (potential-
61
+ * exfiltration) in check-scanner.mjs would fire.
62
+ */
63
+ export function getZaiBaseUrl() {
64
+ return CONFIG.zaiBaseUrl;
65
+ }
66
+ const PROVIDER_BASE_URLS = {
67
+ // zai: resolved lazily at each init/call so `ZAI_BASE_URL` env changes
68
+ // propagate without a module re-import. See `getZaiBaseUrl()`.
69
+ zai: getZaiBaseUrl(),
70
+ anthropic: 'https://api.anthropic.com/v1',
71
+ openai: 'https://api.openai.com/v1',
72
+ gemini: 'https://generativelanguage.googleapis.com/v1beta/openai',
73
+ google: 'https://generativelanguage.googleapis.com/v1beta/openai',
74
+ mistral: 'https://api.mistral.ai/v1',
75
+ groq: 'https://api.groq.com/openai/v1',
76
+ deepseek: 'https://api.deepseek.com/v1',
77
+ openrouter: 'https://openrouter.ai/api/v1',
78
+ xai: 'https://api.x.ai/v1',
79
+ together: 'https://api.together.xyz/v1',
80
+ cerebras: 'https://api.cerebras.ai/v1',
81
+ };
82
+ // ---------------------------------------------------------------------------
83
+ // Cheap model derivation
84
+ // ---------------------------------------------------------------------------
85
+ const CHEAP_INDICATORS = ['flash', 'mini', 'nano', 'haiku', 'small', 'lite', 'fast'];
86
+ /**
87
+ * Regex that tests whether a model id genuinely mentions a "cheap" tier.
88
+ * Uses word-boundary + `-` separators so we do NOT match substrings like
89
+ * "mini" inside "gemini" (real bug caught in 3.3.1 tests — deriveCheapModel
90
+ * was passing gemini-2.5-pro through unchanged because `.includes('mini')`
91
+ * matched the letters inside "gemini"). The canonical cheap-tier naming
92
+ * conventions put the indicator at a hyphen boundary or end of string:
93
+ * gpt-4.1-mini, claude-haiku-4-5, gemini-flash-lite, glm-4.5-flash, o4-mini
94
+ */
95
+ const CHEAP_INDICATOR_RE = new RegExp(`(?:^|[-_/.])(?:${CHEAP_INDICATORS.join('|')})(?:[-_/.]|$)`, 'i');
96
+ /**
97
+ * Default cheap extraction model per provider. Exported so callers that
98
+ * resolve a provider WITHOUT knowing the user's primary model (e.g. the
99
+ * auth-profiles.json path) can still pick a sensible model.
100
+ *
101
+ * 3.3.1 update: haiku is now `claude-haiku-4-5-20251001` (latest cheap
102
+ * Claude as of 2026-04). glm-4.5-flash stays the zai extraction default.
103
+ */
104
+ export const CHEAP_MODEL_BY_PROVIDER = {
105
+ zai: 'glm-4.5-flash',
106
+ anthropic: 'claude-haiku-4-5-20251001',
107
+ openai: 'gpt-4.1-mini',
108
+ gemini: 'gemini-flash-lite',
109
+ google: 'gemini-flash-lite',
110
+ mistral: 'mistral-small-latest',
111
+ groq: 'llama-3.3-70b-versatile',
112
+ deepseek: 'deepseek-chat',
113
+ openrouter: 'anthropic/claude-haiku-4-5-20251001',
114
+ xai: 'grok-2',
115
+ together: 'meta-llama/Llama-3.3-70B-Instruct-Turbo',
116
+ cerebras: 'llama3.3-70b',
117
+ };
118
+ /**
119
+ * Derive a cheap/fast model suitable for fact extraction, given the user's
120
+ * provider and primary (potentially expensive) model.
121
+ */
122
+ export function deriveCheapModel(provider, primaryModel) {
123
+ // If already on a cheap model, use it as-is.
124
+ // Word-boundary match to avoid false positives (see CHEAP_INDICATOR_RE).
125
+ if (CHEAP_INDICATOR_RE.test(primaryModel)) {
126
+ return primaryModel;
127
+ }
128
+ // Derive based on provider naming conventions
129
+ const fromTable = CHEAP_MODEL_BY_PROVIDER[provider];
130
+ if (fromTable)
131
+ return fromTable;
132
+ // Fallback: use the primary model (best-effort — caller may still work)
133
+ return primaryModel;
134
+ }
135
+ // ---------------------------------------------------------------------------
136
+ // Module-level state
137
+ // ---------------------------------------------------------------------------
138
+ let _cachedConfig = null;
139
+ let _initialized = false;
140
+ let _logger = null;
141
+ // ---------------------------------------------------------------------------
142
+ // Initialization
143
+ // ---------------------------------------------------------------------------
144
+ /**
145
+ * Build an LLMClientConfig for a known provider + apiKey, picking a
146
+ * cheap default model if none is specified. Returns null if the
147
+ * provider is unknown and no baseUrl is available.
148
+ */
149
+ function buildConfigForProvider(provider, apiKey, opts = {}) {
150
+ // zai's base URL is resolved via `getZaiBaseUrl()` (reads CONFIG) so
151
+ // the `ZAI_BASE_URL` env override takes effect even when this helper is
152
+ // called with no `baseUrlOverride` (i.e. the env-var fallback tier in
153
+ // initLLMClient).
154
+ const defaultForProvider = provider === 'zai' ? getZaiBaseUrl() : PROVIDER_BASE_URLS[provider] ?? '';
155
+ const baseUrl = (opts.baseUrlOverride ?? defaultForProvider).replace(/\/+$/, '');
156
+ if (!baseUrl)
157
+ return null;
158
+ const model = opts.modelOverride ??
159
+ (opts.primaryModelHint ? deriveCheapModel(provider, opts.primaryModelHint) : null) ??
160
+ CHEAP_MODEL_BY_PROVIDER[provider];
161
+ if (!model)
162
+ return null;
163
+ const apiFormat = opts.apiFormatOverride ?? (provider === 'anthropic' ? 'anthropic' : 'openai');
164
+ return { apiKey, baseUrl, model, apiFormat };
165
+ }
166
+ /**
167
+ * Initialize the LLM client by detecting the provider from OpenClaw's config.
168
+ * Called once from the plugin's `register()` function.
169
+ *
170
+ * 3.3.1 resolution cascade (highest priority first):
171
+ * 1. Plugin config `extraction.llm` override block (provider/apiKey/baseUrl/model)
172
+ * 2. `api.config.providers` / `openclawProviders` — SDK-passed
173
+ * 3. `~/.openclaw/agents/*\/agent/auth-profiles.json` (harvested by caller)
174
+ * 4. Env var fallback (`ZAI_API_KEY`, `OPENAI_API_KEY`, ...)
175
+ * 5. No source → disable extraction cleanly (single log at startup, never
176
+ * per-turn).
177
+ *
178
+ * The `TOTALRECLAW_LLM_MODEL` user-facing override was removed in v1 —
179
+ * `deriveCheapModel(provider)` covers the 99% case and a model-level knob
180
+ * was adding config surface for no tangible win.
181
+ */
182
+ export function initLLMClient(options) {
183
+ _logger = options.logger ?? null;
184
+ _initialized = true;
185
+ _cachedConfig = null;
186
+ const { primaryModel, pluginConfig, openclawProviders, authProfileKeys } = options;
187
+ // Check if extraction is explicitly disabled
188
+ const extraction = pluginConfig?.extraction;
189
+ if (extraction?.enabled === false) {
190
+ _logger?.info?.('TotalReclaw extraction LLM: disabled via plugin config (extraction.enabled=false).');
191
+ return;
192
+ }
193
+ const modelOverride = typeof extraction?.model === 'string' ? extraction.model : undefined;
194
+ const llmOverrideRaw = extraction?.llm;
195
+ const llmOverride = typeof llmOverrideRaw === 'object' && llmOverrideRaw !== null ? llmOverrideRaw : undefined;
196
+ // Derive provider name from primary-model ("anthropic/claude-sonnet-4-5" etc)
197
+ let providerFromPrimary = '';
198
+ let modelFromPrimary;
199
+ if (primaryModel) {
200
+ const parts = primaryModel.split('/');
201
+ if (parts.length >= 2) {
202
+ providerFromPrimary = parts[0].toLowerCase();
203
+ modelFromPrimary = parts.slice(1).join('/');
204
+ }
205
+ else {
206
+ modelFromPrimary = primaryModel;
207
+ }
208
+ }
209
+ // ---------------------------------------------------------------------
210
+ // Tier 1 — explicit plugin-config override (highest priority)
211
+ // Accepts any subset of { provider, model, apiKey, baseUrl }. A bare
212
+ // `model` override without a provider+apiKey falls through to lower
213
+ // tiers — matches pre-3.3.1 behaviour.
214
+ // ---------------------------------------------------------------------
215
+ if (llmOverride && typeof llmOverride === 'object') {
216
+ const provider = (llmOverride.provider ?? providerFromPrimary).toLowerCase();
217
+ const apiKey = typeof llmOverride.apiKey === 'string' && llmOverride.apiKey.trim()
218
+ ? llmOverride.apiKey.trim()
219
+ : undefined;
220
+ if (provider && apiKey) {
221
+ const cfg = buildConfigForProvider(provider, apiKey, {
222
+ baseUrlOverride: llmOverride.baseUrl,
223
+ modelOverride: llmOverride.model ?? modelOverride,
224
+ primaryModelHint: modelFromPrimary,
225
+ });
226
+ if (cfg) {
227
+ _cachedConfig = cfg;
228
+ _logger?.info?.(`TotalReclaw extraction LLM: resolved ${provider}/${cfg.model} (plugin config override)`);
229
+ return;
230
+ }
231
+ }
232
+ }
233
+ // ---------------------------------------------------------------------
234
+ // Tier 2 — SDK-passed openclawProviders. Try the primary-model's provider
235
+ // first, then any other provider that has an apiKey.
236
+ // ---------------------------------------------------------------------
237
+ if (openclawProviders) {
238
+ if (providerFromPrimary) {
239
+ const ocProvider = openclawProviders[providerFromPrimary];
240
+ if (ocProvider?.apiKey) {
241
+ const cfg = buildConfigForProvider(providerFromPrimary, ocProvider.apiKey, {
242
+ baseUrlOverride: ocProvider.baseUrl,
243
+ modelOverride,
244
+ primaryModelHint: modelFromPrimary,
245
+ apiFormatOverride: ocProvider.api === 'anthropic-messages' || providerFromPrimary === 'anthropic'
246
+ ? 'anthropic'
247
+ : 'openai',
248
+ });
249
+ if (cfg) {
250
+ _cachedConfig = cfg;
251
+ _logger?.info?.(`TotalReclaw extraction LLM: resolved ${providerFromPrimary}/${cfg.model} (OpenClaw provider config)`);
252
+ return;
253
+ }
254
+ }
255
+ }
256
+ for (const [providerName, providerConfig] of Object.entries(openclawProviders)) {
257
+ if (!providerConfig?.apiKey)
258
+ continue;
259
+ const provider = providerName.toLowerCase();
260
+ const firstModelId = providerConfig.models?.[0]?.id;
261
+ const cfg = buildConfigForProvider(provider, providerConfig.apiKey, {
262
+ baseUrlOverride: providerConfig.baseUrl,
263
+ modelOverride,
264
+ primaryModelHint: firstModelId,
265
+ apiFormatOverride: providerConfig.api === 'anthropic-messages' || provider === 'anthropic'
266
+ ? 'anthropic'
267
+ : 'openai',
268
+ });
269
+ if (cfg) {
270
+ _cachedConfig = cfg;
271
+ _logger?.info?.(`TotalReclaw extraction LLM: resolved ${provider}/${cfg.model} (OpenClaw provider config)`);
272
+ return;
273
+ }
274
+ }
275
+ }
276
+ // ---------------------------------------------------------------------
277
+ // Tier 3 — auth-profiles.json keys harvested by llm-profile-reader.
278
+ // 3.3.1: new tier. Prefer the primary-model's provider, then any other.
279
+ // ---------------------------------------------------------------------
280
+ if (authProfileKeys && authProfileKeys.length > 0) {
281
+ if (providerFromPrimary) {
282
+ const hit = authProfileKeys.find((k) => k.provider === providerFromPrimary);
283
+ if (hit) {
284
+ const cfg = buildConfigForProvider(providerFromPrimary, hit.apiKey, {
285
+ modelOverride,
286
+ primaryModelHint: modelFromPrimary,
287
+ });
288
+ if (cfg) {
289
+ _cachedConfig = cfg;
290
+ _logger?.info?.(`TotalReclaw extraction LLM: resolved ${providerFromPrimary}/${cfg.model} (auth-profiles.json)`);
291
+ return;
292
+ }
293
+ }
294
+ }
295
+ // Try zai / openai / anthropic first (cheapest+most available), then anything else.
296
+ const priority = ['zai', 'openai', 'anthropic', 'gemini', 'groq', 'deepseek', 'mistral', 'openrouter', 'xai', 'together', 'cerebras'];
297
+ const ordered = [
298
+ ...priority.flatMap((p) => authProfileKeys.filter((k) => k.provider === p)),
299
+ ...authProfileKeys.filter((k) => !priority.includes(k.provider)),
300
+ ];
301
+ for (const entry of ordered) {
302
+ const cfg = buildConfigForProvider(entry.provider, entry.apiKey, {
303
+ modelOverride,
304
+ });
305
+ if (cfg) {
306
+ _cachedConfig = cfg;
307
+ _logger?.info?.(`TotalReclaw extraction LLM: resolved ${entry.provider}/${cfg.model} (auth-profiles.json)`);
308
+ return;
309
+ }
310
+ }
311
+ }
312
+ // ---------------------------------------------------------------------
313
+ // Tier 4 — env var fallback (for dev/test without OpenClaw config)
314
+ // ---------------------------------------------------------------------
315
+ const envFallback = [
316
+ ['zai', 'zai'],
317
+ ['openai', 'openai'],
318
+ ['anthropic', 'anthropic'],
319
+ ['gemini', 'gemini'],
320
+ ['groq', 'groq'],
321
+ ['deepseek', 'deepseek'],
322
+ ['mistral', 'mistral'],
323
+ ['openrouter', 'openrouter'],
324
+ ['xai', 'xai'],
325
+ ];
326
+ // If primary model hints a specific provider, try it first.
327
+ if (providerFromPrimary) {
328
+ const keyNames = PROVIDER_KEY_NAMES[providerFromPrimary];
329
+ if (keyNames) {
330
+ const apiKey = keyNames.map((n) => CONFIG.llmApiKeys[n]).find(Boolean);
331
+ if (apiKey) {
332
+ const cfg = buildConfigForProvider(providerFromPrimary, apiKey, {
333
+ modelOverride,
334
+ primaryModelHint: modelFromPrimary,
335
+ });
336
+ if (cfg) {
337
+ _cachedConfig = cfg;
338
+ _logger?.info?.(`TotalReclaw extraction LLM: resolved ${providerFromPrimary}/${cfg.model} (env var)`);
339
+ return;
340
+ }
341
+ }
342
+ }
343
+ }
344
+ for (const [provider, keyName] of envFallback) {
345
+ const apiKey = CONFIG.llmApiKeys[keyName];
346
+ if (!apiKey)
347
+ continue;
348
+ const cfg = buildConfigForProvider(provider, apiKey, { modelOverride });
349
+ if (cfg) {
350
+ _cachedConfig = cfg;
351
+ _logger?.info?.(`TotalReclaw extraction LLM: resolved ${provider}/${cfg.model} (env var)`);
352
+ return;
353
+ }
354
+ }
355
+ // ---------------------------------------------------------------------
356
+ // No source — extraction disabled. Single startup log, INFO-level.
357
+ // NOT a warn: this is the default state for users who have not set up a
358
+ // provider. Warning per turn is what 3.3.0 did and it was misleading.
359
+ // ---------------------------------------------------------------------
360
+ _logger?.info?.('TotalReclaw extraction LLM: not configured — auto-extraction disabled. ' +
361
+ 'To enable, configure a provider in ~/.openclaw/agents/*\/agent/auth-profiles.json ' +
362
+ 'or set an API key env var (ZAI_API_KEY, OPENAI_API_KEY, ANTHROPIC_API_KEY, ...).');
363
+ }
364
+ // ---------------------------------------------------------------------------
365
+ // Public API
366
+ // ---------------------------------------------------------------------------
367
+ /**
368
+ * Resolve LLM configuration. Returns the cached config set by `initLLMClient()`,
369
+ * or falls back to the legacy env-var detection if `initLLMClient()` was never called.
370
+ */
371
+ export function resolveLLMConfig() {
372
+ if (_initialized) {
373
+ return _cachedConfig;
374
+ }
375
+ // Legacy fallback: if initLLMClient() was never called (e.g. running outside
376
+ // the plugin context), try the config-based approach for backwards compat.
377
+ const zaiKey = CONFIG.llmApiKeys.zai;
378
+ const openaiKey = CONFIG.llmApiKeys.openai;
379
+ const model = zaiKey ? 'glm-4.5-flash' : 'gpt-4.1-mini';
380
+ if (zaiKey) {
381
+ return {
382
+ apiKey: zaiKey,
383
+ baseUrl: getZaiBaseUrl(),
384
+ model,
385
+ apiFormat: 'openai',
386
+ };
387
+ }
388
+ if (openaiKey) {
389
+ return {
390
+ apiKey: openaiKey,
391
+ baseUrl: 'https://api.openai.com/v1',
392
+ model,
393
+ apiFormat: 'openai',
394
+ };
395
+ }
396
+ return null;
397
+ }
398
+ /**
399
+ * Default retry budget in ms. Configurable via
400
+ * `TOTALRECLAW_LLM_RETRY_BUDGET_MS` env var — read by `config.ts`. Callers
401
+ * can override per-call via `retry.budgetMs`. 60_000ms covers ~8 minutes
402
+ * worth of upstream outages with the 2s→32s schedule.
403
+ *
404
+ * Scanner-isolation note: the env read lives in `config.ts` so this file
405
+ * stays clean of env-harvesting triggers.
406
+ */
407
+ export const DEFAULT_RETRY_BUDGET_MS = CONFIG.llmRetryBudgetMs;
408
+ /**
409
+ * Structured error thrown when the extraction LLM upstream is unreachable
410
+ * after the full retry budget is exhausted. The extraction pipeline
411
+ * recognizes this via `err instanceof LLMUpstreamOutageError` and can
412
+ * choose to:
413
+ * - queue the message batch for retry next turn,
414
+ * - surface a one-time notification to the user, or
415
+ * - simply skip this extraction window silently.
416
+ */
417
+ export class LLMUpstreamOutageError extends Error {
418
+ attempts;
419
+ lastStatus;
420
+ constructor(message, attempts, lastStatus) {
421
+ super(message);
422
+ this.name = 'LLMUpstreamOutageError';
423
+ this.attempts = attempts;
424
+ this.lastStatus = lastStatus;
425
+ }
426
+ }
427
+ /**
428
+ * Detect the "Insufficient balance" error shape from zai. Matches both
429
+ * the exact production wording ("Insufficient balance or no resource
430
+ * package. Please recharge.") and the short "no resource package" variant
431
+ * we've seen in some historical responses.
432
+ */
433
+ export function isZaiBalanceError(errorMessage) {
434
+ const m = errorMessage.toLowerCase();
435
+ return m.includes('insufficient balance') || m.includes('no resource package');
436
+ }
437
+ /**
438
+ * Identify the "other" zai endpoint when the current one returns a balance
439
+ * error — CODING ↔ STANDARD. Returns `null` when the URL is neither of
440
+ * the two zai endpoints we know about (e.g. a self-hosted proxy), which
441
+ * means the fallback logic stays put.
442
+ */
443
+ export function zaiFallbackBaseUrl(currentBaseUrl) {
444
+ const normalized = currentBaseUrl.replace(/\/+$/, '');
445
+ if (normalized === ZAI_CODING_BASE_URL)
446
+ return ZAI_STANDARD_BASE_URL;
447
+ if (normalized === ZAI_STANDARD_BASE_URL)
448
+ return ZAI_CODING_BASE_URL;
449
+ return null;
450
+ }
451
+ /**
452
+ * Call the LLM chat completion endpoint.
453
+ *
454
+ * Supports both OpenAI-compatible format and Anthropic Messages API,
455
+ * determined by `config.apiFormat`.
456
+ *
457
+ * 3.3.1-rc.3 — lifts the retry budget 5 attempts × (2s/4s/8s/16s/32s), total
458
+ * ~62s. Configurable via `TOTALRECLAW_LLM_RETRY_BUDGET_MS`. Adds zai
459
+ * "Insufficient balance" auto-fallback: when a zai 429 carries the balance
460
+ * error body AND we're on one of the two known zai endpoints, we flip to
461
+ * the OTHER endpoint and retry ONCE (accounted for separately from the
462
+ * normal retry loop). On exhaustion, throws `LLMUpstreamOutageError`.
463
+ *
464
+ * Non-retryable errors (4xx other than 429, network refused, JSON parse)
465
+ * fail fast on the first attempt.
466
+ *
467
+ * @returns The assistant's response content, or null on failure.
468
+ */
469
+ export async function chatCompletion(config, messages, options) {
470
+ const maxTokens = options?.maxTokens ?? 2048;
471
+ const temperature = options?.temperature ?? 0; // Deterministic output for dedup (same input → same text → same content fingerprint)
472
+ const attempts = Math.max(1, options?.retry?.attempts ?? 5);
473
+ const baseDelayMs = Math.max(100, options?.retry?.baseDelayMs ?? 2000);
474
+ const budgetMs = Math.max(100, options?.retry?.budgetMs ?? DEFAULT_RETRY_BUDGET_MS);
475
+ const timeoutMs = options?.timeoutMs ?? 30_000;
476
+ const logger = options?.logger;
477
+ // We mutate `activeConfig.baseUrl` in the zai fallback branch so the
478
+ // retried call hits the other endpoint. Shallow-clone so the caller's
479
+ // config object stays untouched.
480
+ const activeConfig = { ...config };
481
+ // One-shot flag: we only auto-fallback zai once per chatCompletion call
482
+ // to prevent ping-pong between the two endpoints if both reject.
483
+ let zaiFallbackAttempted = false;
484
+ const callOnce = () => activeConfig.apiFormat === 'anthropic'
485
+ ? chatCompletionAnthropic(activeConfig, messages, maxTokens, temperature, timeoutMs)
486
+ : chatCompletionOpenAI(activeConfig, messages, maxTokens, temperature, timeoutMs);
487
+ let lastErr;
488
+ let cumulativeDelayMs = 0;
489
+ let lastStatus;
490
+ for (let attempt = 1; attempt <= attempts; attempt++) {
491
+ try {
492
+ return await callOnce();
493
+ }
494
+ catch (err) {
495
+ lastErr = err;
496
+ const msg = err instanceof Error ? err.message : String(err);
497
+ lastStatus = parseHttpStatus(msg) ?? lastStatus;
498
+ // ── zai "Insufficient balance" auto-fallback ──
499
+ // Fires BEFORE the normal retry accounting. If the error is a zai
500
+ // balance-shaped 429, flip the baseUrl once and immediately retry —
501
+ // no backoff, no decrement of the attempt count. Keeps the total
502
+ // attempt budget reserved for genuine outages.
503
+ if (!zaiFallbackAttempted && /\b429\b/.test(msg) && isZaiBalanceError(msg)) {
504
+ const fallback = zaiFallbackBaseUrl(activeConfig.baseUrl);
505
+ if (fallback) {
506
+ zaiFallbackAttempted = true;
507
+ const oldUrl = activeConfig.baseUrl;
508
+ activeConfig.baseUrl = fallback;
509
+ logger?.info?.(`chatCompletion: zai endpoint auto-fallback: ${oldUrl} → ${fallback} due to "Insufficient balance" response`);
510
+ // Retry immediately — do NOT decrement attempts counter further;
511
+ // this "extra" attempt is the fallback freebie.
512
+ attempt--;
513
+ continue;
514
+ }
515
+ }
516
+ const retryable = isRetryable(msg);
517
+ const isFinalAttempt = attempt >= attempts;
518
+ if (!retryable || isFinalAttempt) {
519
+ // Fail-fast OR last attempt — rethrow.
520
+ if (attempt > 1 || !retryable) {
521
+ if (retryable) {
522
+ logger?.warn?.(`chatCompletion: giving up after ${attempt} attempts: ${msg.slice(0, 200)}`);
523
+ }
524
+ // Structured outage error when the retryable error budget is
525
+ // fully exhausted — lets downstream recognize vs bail silently.
526
+ if (retryable) {
527
+ throw new LLMUpstreamOutageError(`LLM upstream outage after ${attempt} attempts: ${msg.slice(0, 200)}`, attempt, lastStatus);
528
+ }
529
+ }
530
+ throw err;
531
+ }
532
+ // Compute next delay, but respect the cumulative retry-budget cap.
533
+ const delayMs = baseDelayMs * Math.pow(2, attempt - 1);
534
+ if (cumulativeDelayMs + delayMs > budgetMs) {
535
+ logger?.warn?.(`chatCompletion: retry budget exhausted (${cumulativeDelayMs}ms used + ${delayMs}ms next > ${budgetMs}ms budget); surfacing outage after ${attempt} attempts: ${msg.slice(0, 160)}`);
536
+ throw new LLMUpstreamOutageError(`LLM upstream outage (budget ${budgetMs}ms exhausted after ${attempt} attempts): ${msg.slice(0, 200)}`, attempt, lastStatus);
537
+ }
538
+ cumulativeDelayMs += delayMs;
539
+ // Log only the FIRST retry at INFO to avoid spamming during long
540
+ // outages; subsequent retries are DEBUG (debounced per outage).
541
+ if (attempt === 1) {
542
+ logger?.info?.(`chatCompletion: retrying after transient failure (attempt ${attempt}/${attempts}, wait ${delayMs}ms): ${msg.slice(0, 160)}`);
543
+ }
544
+ else {
545
+ logger?.debug?.(`chatCompletion: retry attempt ${attempt}/${attempts} (wait ${delayMs}ms): ${msg.slice(0, 160)}`);
546
+ }
547
+ await new Promise((resolve) => setTimeout(resolve, delayMs));
548
+ }
549
+ }
550
+ // Defensive — should never reach here since the loop always throws on the
551
+ // final attempt when it fails. Keeps TS happy.
552
+ throw lastErr instanceof Error ? lastErr : new Error(String(lastErr));
553
+ }
554
+ /**
555
+ * Parse the HTTP status code from an error message of the form
556
+ * `"LLM API 429: rate limit"` or `"Anthropic API 503: ..."`. Returns
557
+ * `undefined` when the message doesn't follow that shape (e.g. network
558
+ * refused). Used by `LLMUpstreamOutageError.lastStatus` for downstream
559
+ * classification.
560
+ */
561
+ function parseHttpStatus(errorMessage) {
562
+ const m = errorMessage.match(/\b(\d{3})\b/);
563
+ if (!m)
564
+ return undefined;
565
+ const code = parseInt(m[1], 10);
566
+ return code >= 100 && code < 600 ? code : undefined;
567
+ }
568
+ /**
569
+ * Which LLM-call errors are worth retrying. Exported for testability.
570
+ *
571
+ * Retryable:
572
+ * - HTTP 429 (rate limit)
573
+ * - HTTP 503 / 502 / 504 (gateway transients)
574
+ * - AbortError / "aborted due to timeout" / "TimeoutError"
575
+ *
576
+ * NOT retryable:
577
+ * - HTTP 400 / 401 / 403 / 404 (auth / request errors — no point retrying)
578
+ * - JSON parse errors
579
+ * - DNS / connection refused (usually misconfig, not transient)
580
+ */
581
+ export function isRetryable(errorMessage) {
582
+ const m = errorMessage.toLowerCase();
583
+ // Rate limit
584
+ if (/\b429\b/.test(errorMessage) || m.includes('rate limit'))
585
+ return true;
586
+ // Transient gateway errors
587
+ if (/\b50(2|3|4)\b/.test(errorMessage))
588
+ return true;
589
+ // Timeouts
590
+ if (m.includes('timeout') ||
591
+ m.includes('aborterror') ||
592
+ m.includes('was aborted') ||
593
+ m.includes('operation was aborted')) {
594
+ return true;
595
+ }
596
+ return false;
597
+ }
598
+ // ---------------------------------------------------------------------------
599
+ // OpenAI-compatible chat completion
600
+ // ---------------------------------------------------------------------------
601
+ async function chatCompletionOpenAI(config, messages, maxTokens, temperature, timeoutMs) {
602
+ const url = `${config.baseUrl}/chat/completions`;
603
+ const body = {
604
+ model: config.model,
605
+ messages,
606
+ temperature,
607
+ max_completion_tokens: maxTokens,
608
+ };
609
+ try {
610
+ const res = await fetch(url, {
611
+ method: 'POST',
612
+ headers: {
613
+ 'Content-Type': 'application/json',
614
+ Authorization: `Bearer ${config.apiKey}`,
615
+ },
616
+ body: JSON.stringify(body),
617
+ signal: AbortSignal.timeout(timeoutMs),
618
+ });
619
+ if (!res.ok) {
620
+ const text = await res.text().catch(() => '');
621
+ throw new Error(`LLM API ${res.status}: ${text.slice(0, 200)}`);
622
+ }
623
+ const json = (await res.json());
624
+ return json.choices?.[0]?.message?.content ?? null;
625
+ }
626
+ catch (err) {
627
+ const msg = err instanceof Error ? err.message : String(err);
628
+ throw new Error(`LLM call failed: ${msg}`);
629
+ }
630
+ }
631
+ // ---------------------------------------------------------------------------
632
+ // Anthropic Messages API chat completion
633
+ // ---------------------------------------------------------------------------
634
+ async function chatCompletionAnthropic(config, messages, maxTokens, temperature, timeoutMs) {
635
+ const url = `${config.baseUrl}/messages`;
636
+ // Anthropic requires system prompt to be a top-level param, not in messages
637
+ let system;
638
+ const apiMessages = [];
639
+ for (const msg of messages) {
640
+ if (msg.role === 'system') {
641
+ system = msg.content;
642
+ }
643
+ else {
644
+ apiMessages.push({ role: msg.role, content: msg.content });
645
+ }
646
+ }
647
+ const body = {
648
+ model: config.model,
649
+ max_tokens: maxTokens,
650
+ temperature,
651
+ messages: apiMessages,
652
+ };
653
+ if (system) {
654
+ body.system = system;
655
+ }
656
+ try {
657
+ const res = await fetch(url, {
658
+ method: 'POST',
659
+ headers: {
660
+ 'Content-Type': 'application/json',
661
+ 'x-api-key': config.apiKey,
662
+ 'anthropic-version': '2023-06-01',
663
+ },
664
+ body: JSON.stringify(body),
665
+ signal: AbortSignal.timeout(timeoutMs),
666
+ });
667
+ if (!res.ok) {
668
+ const text = await res.text().catch(() => '');
669
+ throw new Error(`Anthropic API ${res.status}: ${text.slice(0, 200)}`);
670
+ }
671
+ const json = (await res.json());
672
+ const textBlock = json.content?.find((block) => block.type === 'text');
673
+ return textBlock?.text ?? null;
674
+ }
675
+ catch (err) {
676
+ const msg = err instanceof Error ? err.message : String(err);
677
+ throw new Error(`LLM call failed: ${msg}`);
678
+ }
679
+ }
680
+ // ---------------------------------------------------------------------------
681
+ // Embedding (re-exported from local ONNX module)
682
+ // ---------------------------------------------------------------------------
683
+ // Embeddings are now generated locally via @huggingface/transformers
684
+ // (Harrier-OSS-v1-270M ONNX model). No API key needed.
685
+ // See embedding.ts for implementation details.
686
+ export { generateEmbedding, getEmbeddingDims } from './embedding.js';