@totalreclaw/totalreclaw 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/llm-client.ts ADDED
@@ -0,0 +1,418 @@
1
+ /**
2
+ * TotalReclaw Plugin - LLM Client
3
+ *
4
+ * Auto-detects the user's LLM provider from OpenClaw's config and derives a
5
+ * cheap extraction model. Supports OpenAI-compatible APIs and Anthropic's
6
+ * Messages API. No external dependencies -- uses native fetch().
7
+ *
8
+ * Embedding generation has been moved to embedding.ts (local ONNX model via
9
+ * @huggingface/transformers). No API key needed for embeddings.
10
+ */
11
+
12
+ // ---------------------------------------------------------------------------
13
+ // Types
14
+ // ---------------------------------------------------------------------------
15
+
16
+ interface ChatMessage {
17
+ role: 'system' | 'user' | 'assistant';
18
+ content: string;
19
+ }
20
+
21
+ interface ChatCompletionResponse {
22
+ choices: Array<{
23
+ message: {
24
+ content: string | null;
25
+ };
26
+ }>;
27
+ }
28
+
29
+ /** Anthropic Messages API response shape. */
30
+ interface AnthropicMessagesResponse {
31
+ content: Array<{
32
+ type: string;
33
+ text?: string;
34
+ }>;
35
+ }
36
+
37
+ export interface LLMClientConfig {
38
+ apiKey: string;
39
+ baseUrl: string;
40
+ model: string;
41
+ apiFormat: 'openai' | 'anthropic';
42
+ }
43
+
44
+ // ---------------------------------------------------------------------------
45
+ // Provider mappings
46
+ // ---------------------------------------------------------------------------
47
+
48
+ const PROVIDER_ENV_VARS: Record<string, string[]> = {
49
+ zai: ['ZAI_API_KEY'],
50
+ anthropic: ['ANTHROPIC_API_KEY'],
51
+ openai: ['OPENAI_API_KEY'],
52
+ gemini: ['GEMINI_API_KEY'],
53
+ google: ['GEMINI_API_KEY', 'GOOGLE_API_KEY'],
54
+ mistral: ['MISTRAL_API_KEY'],
55
+ groq: ['GROQ_API_KEY'],
56
+ deepseek: ['DEEPSEEK_API_KEY'],
57
+ openrouter: ['OPENROUTER_API_KEY'],
58
+ xai: ['XAI_API_KEY'],
59
+ together: ['TOGETHER_API_KEY'],
60
+ cerebras: ['CEREBRAS_API_KEY'],
61
+ };
62
+
63
+ const PROVIDER_BASE_URLS: Record<string, string> = {
64
+ zai: 'https://api.z.ai/api/paas/v4',
65
+ anthropic: 'https://api.anthropic.com/v1',
66
+ openai: 'https://api.openai.com/v1',
67
+ gemini: 'https://generativelanguage.googleapis.com/v1beta/openai',
68
+ google: 'https://generativelanguage.googleapis.com/v1beta/openai',
69
+ mistral: 'https://api.mistral.ai/v1',
70
+ groq: 'https://api.groq.com/openai/v1',
71
+ deepseek: 'https://api.deepseek.com/v1',
72
+ openrouter: 'https://openrouter.ai/api/v1',
73
+ xai: 'https://api.x.ai/v1',
74
+ together: 'https://api.together.xyz/v1',
75
+ cerebras: 'https://api.cerebras.ai/v1',
76
+ };
77
+
78
+ // ---------------------------------------------------------------------------
79
+ // Cheap model derivation
80
+ // ---------------------------------------------------------------------------
81
+
82
+ const CHEAP_INDICATORS = ['flash', 'mini', 'nano', 'haiku', 'small', 'lite', 'fast'];
83
+
84
+ /**
85
+ * Derive a cheap/fast model suitable for fact extraction, given the user's
86
+ * provider and primary (potentially expensive) model.
87
+ */
88
+ function deriveCheapModel(provider: string, primaryModel: string): string {
89
+ // If already on a cheap model, use it as-is
90
+ if (CHEAP_INDICATORS.some((s) => primaryModel.toLowerCase().includes(s))) {
91
+ return primaryModel;
92
+ }
93
+
94
+ // Derive based on provider naming conventions
95
+ switch (provider) {
96
+ case 'zai': {
97
+ // glm-5 -> glm-4.5-flash, glm-4.6 -> glm-4.5-flash
98
+ return 'glm-4.5-flash';
99
+ }
100
+ case 'anthropic': {
101
+ // claude-sonnet-4-5 -> claude-haiku-4-5-20251001
102
+ return 'claude-haiku-4-5-20251001';
103
+ }
104
+ case 'openai': {
105
+ // gpt-4.1 -> gpt-4.1-mini, gpt-4o -> gpt-4.1-mini
106
+ return 'gpt-4.1-mini';
107
+ }
108
+ case 'gemini':
109
+ case 'google': {
110
+ return 'gemini-2.0-flash';
111
+ }
112
+ case 'mistral': {
113
+ return 'mistral-small-latest';
114
+ }
115
+ case 'groq': {
116
+ return 'llama-3.3-70b-versatile';
117
+ }
118
+ case 'deepseek': {
119
+ return 'deepseek-chat';
120
+ }
121
+ case 'openrouter': {
122
+ // Use Anthropic Haiku via OpenRouter (cheap and good at JSON)
123
+ return 'anthropic/claude-haiku-4-5-20251001';
124
+ }
125
+ case 'xai': {
126
+ return 'grok-2';
127
+ }
128
+ default: {
129
+ // Fallback: try the primary model itself
130
+ return primaryModel;
131
+ }
132
+ }
133
+ }
134
+
135
+ // ---------------------------------------------------------------------------
136
+ // Module-level state
137
+ // ---------------------------------------------------------------------------
138
+
139
+ let _cachedConfig: LLMClientConfig | null = null;
140
+ let _initialized = false;
141
+ let _logger: { warn: (msg: string) => void } | null = null;
142
+
143
+ // ---------------------------------------------------------------------------
144
+ // Initialization
145
+ // ---------------------------------------------------------------------------
146
+
147
+ /**
148
+ * Initialize the LLM client by detecting the provider from OpenClaw's config.
149
+ * Called once from the plugin's `register()` function.
150
+ *
151
+ * Resolution order (highest priority first):
152
+ * 1. TOTALRECLAW_LLM_MODEL env var (power user override for model)
153
+ * 2. Plugin config `extraction.model` (if provided)
154
+ * 3. Auto-derived from provider heuristic
155
+ * 4. Fallback: try common env vars (ZAI_API_KEY, OPENAI_API_KEY) for dev/test
156
+ */
157
+ export function initLLMClient(options: {
158
+ primaryModel?: string;
159
+ pluginConfig?: Record<string, unknown>;
160
+ logger?: { warn: (msg: string) => void };
161
+ }): void {
162
+ _logger = options.logger ?? null;
163
+ _initialized = true;
164
+ _cachedConfig = null;
165
+
166
+ const { primaryModel, pluginConfig } = options;
167
+
168
+ // Check if extraction is explicitly disabled
169
+ const extraction = pluginConfig?.extraction as Record<string, unknown> | undefined;
170
+ if (extraction?.enabled === false) {
171
+ _logger?.warn('TotalReclaw: LLM extraction explicitly disabled via plugin config.');
172
+ return;
173
+ }
174
+
175
+ // --- Try to resolve from primaryModel (auto-detect path) ---
176
+ if (primaryModel) {
177
+ const parts = primaryModel.split('/');
178
+ const provider = parts.length >= 2 ? parts[0].toLowerCase() : '';
179
+ const modelName = parts.length >= 2 ? parts.slice(1).join('/') : primaryModel;
180
+
181
+ if (provider) {
182
+ // Find the API key for this provider
183
+ const envVarNames = PROVIDER_ENV_VARS[provider];
184
+ const apiKey = envVarNames
185
+ ? envVarNames.map((name) => process.env[name]).find(Boolean)
186
+ : undefined;
187
+
188
+ if (apiKey) {
189
+ const baseUrl = PROVIDER_BASE_URLS[provider];
190
+ if (baseUrl) {
191
+ // Determine model: env override > plugin config > auto-derived
192
+ const model =
193
+ process.env.TOTALRECLAW_LLM_MODEL ??
194
+ (typeof extraction?.model === 'string' ? extraction.model : null) ??
195
+ deriveCheapModel(provider, modelName);
196
+
197
+ const apiFormat: 'openai' | 'anthropic' =
198
+ provider === 'anthropic' ? 'anthropic' : 'openai';
199
+
200
+ _cachedConfig = { apiKey, baseUrl, model, apiFormat };
201
+ return;
202
+ }
203
+ }
204
+ }
205
+ }
206
+
207
+ // --- Fallback: try common env vars (for dev/test without OpenClaw config) ---
208
+ const fallbackProviders: Array<[string, string, string]> = [
209
+ ['zai', 'ZAI_API_KEY', 'glm-4.5-flash'],
210
+ ['openai', 'OPENAI_API_KEY', 'gpt-4.1-mini'],
211
+ ['anthropic', 'ANTHROPIC_API_KEY', 'claude-haiku-4-5-20251001'],
212
+ ['gemini', 'GEMINI_API_KEY', 'gemini-2.0-flash'],
213
+ ];
214
+
215
+ for (const [provider, envVar, defaultModel] of fallbackProviders) {
216
+ const apiKey = process.env[envVar];
217
+ if (apiKey) {
218
+ const model = process.env.TOTALRECLAW_LLM_MODEL ??
219
+ (typeof extraction?.model === 'string' ? extraction.model : null) ??
220
+ defaultModel;
221
+
222
+ const apiFormat: 'openai' | 'anthropic' =
223
+ provider === 'anthropic' ? 'anthropic' : 'openai';
224
+
225
+ _cachedConfig = {
226
+ apiKey,
227
+ baseUrl: PROVIDER_BASE_URLS[provider],
228
+ model,
229
+ apiFormat,
230
+ };
231
+ return;
232
+ }
233
+ }
234
+
235
+ // No LLM available
236
+ _logger?.warn(
237
+ 'TotalReclaw: No LLM available for auto-extraction. ' +
238
+ 'Set an API key for your provider or configure extraction in plugin settings.',
239
+ );
240
+ }
241
+
242
+ // ---------------------------------------------------------------------------
243
+ // Public API
244
+ // ---------------------------------------------------------------------------
245
+
246
+ /**
247
+ * Resolve LLM configuration. Returns the cached config set by `initLLMClient()`,
248
+ * or falls back to the legacy env-var detection if `initLLMClient()` was never called.
249
+ */
250
+ export function resolveLLMConfig(): LLMClientConfig | null {
251
+ if (_initialized) {
252
+ return _cachedConfig;
253
+ }
254
+
255
+ // Legacy fallback: if initLLMClient() was never called (e.g. running outside
256
+ // the plugin context), try the old env-var approach for backwards compat.
257
+ const zaiKey = process.env.ZAI_API_KEY;
258
+ const openaiKey = process.env.OPENAI_API_KEY;
259
+
260
+ const model = process.env.TOTALRECLAW_LLM_MODEL
261
+ ?? (zaiKey ? 'glm-4.5-flash' : 'gpt-4.1-mini');
262
+
263
+ if (zaiKey) {
264
+ return {
265
+ apiKey: zaiKey,
266
+ baseUrl: 'https://api.z.ai/api/paas/v4',
267
+ model,
268
+ apiFormat: 'openai',
269
+ };
270
+ }
271
+
272
+ if (openaiKey) {
273
+ return {
274
+ apiKey: openaiKey,
275
+ baseUrl: 'https://api.openai.com/v1',
276
+ model,
277
+ apiFormat: 'openai',
278
+ };
279
+ }
280
+
281
+ return null;
282
+ }
283
+
284
+ /**
285
+ * Call the LLM chat completion endpoint.
286
+ *
287
+ * Supports both OpenAI-compatible format and Anthropic Messages API,
288
+ * determined by `config.apiFormat`.
289
+ *
290
+ * @returns The assistant's response content, or null on failure.
291
+ */
292
+ export async function chatCompletion(
293
+ config: LLMClientConfig,
294
+ messages: ChatMessage[],
295
+ options?: { maxTokens?: number; temperature?: number },
296
+ ): Promise<string | null> {
297
+ const maxTokens = options?.maxTokens ?? 2048;
298
+ const temperature = options?.temperature ?? 0; // Deterministic output for dedup (same input → same text → same content fingerprint)
299
+
300
+ if (config.apiFormat === 'anthropic') {
301
+ return chatCompletionAnthropic(config, messages, maxTokens, temperature);
302
+ }
303
+
304
+ return chatCompletionOpenAI(config, messages, maxTokens, temperature);
305
+ }
306
+
307
+ // ---------------------------------------------------------------------------
308
+ // OpenAI-compatible chat completion
309
+ // ---------------------------------------------------------------------------
310
+
311
+ async function chatCompletionOpenAI(
312
+ config: LLMClientConfig,
313
+ messages: ChatMessage[],
314
+ maxTokens: number,
315
+ temperature: number,
316
+ ): Promise<string | null> {
317
+ const url = `${config.baseUrl}/chat/completions`;
318
+
319
+ const body: Record<string, unknown> = {
320
+ model: config.model,
321
+ messages,
322
+ temperature,
323
+ max_completion_tokens: maxTokens,
324
+ };
325
+
326
+ try {
327
+ const res = await fetch(url, {
328
+ method: 'POST',
329
+ headers: {
330
+ 'Content-Type': 'application/json',
331
+ Authorization: `Bearer ${config.apiKey}`,
332
+ },
333
+ body: JSON.stringify(body),
334
+ signal: AbortSignal.timeout(30_000), // 30 second timeout
335
+ });
336
+
337
+ if (!res.ok) {
338
+ const text = await res.text().catch(() => '');
339
+ throw new Error(`LLM API ${res.status}: ${text.slice(0, 200)}`);
340
+ }
341
+
342
+ const json = (await res.json()) as ChatCompletionResponse;
343
+ return json.choices?.[0]?.message?.content ?? null;
344
+ } catch (err) {
345
+ const msg = err instanceof Error ? err.message : String(err);
346
+ throw new Error(`LLM call failed: ${msg}`);
347
+ }
348
+ }
349
+
350
+ // ---------------------------------------------------------------------------
351
+ // Anthropic Messages API chat completion
352
+ // ---------------------------------------------------------------------------
353
+
354
+ async function chatCompletionAnthropic(
355
+ config: LLMClientConfig,
356
+ messages: ChatMessage[],
357
+ maxTokens: number,
358
+ temperature: number,
359
+ ): Promise<string | null> {
360
+ const url = `${config.baseUrl}/messages`;
361
+
362
+ // Anthropic requires system prompt to be a top-level param, not in messages
363
+ let system: string | undefined;
364
+ const apiMessages: Array<{ role: string; content: string }> = [];
365
+
366
+ for (const msg of messages) {
367
+ if (msg.role === 'system') {
368
+ system = msg.content;
369
+ } else {
370
+ apiMessages.push({ role: msg.role, content: msg.content });
371
+ }
372
+ }
373
+
374
+ const body: Record<string, unknown> = {
375
+ model: config.model,
376
+ max_tokens: maxTokens,
377
+ temperature,
378
+ messages: apiMessages,
379
+ };
380
+
381
+ if (system) {
382
+ body.system = system;
383
+ }
384
+
385
+ try {
386
+ const res = await fetch(url, {
387
+ method: 'POST',
388
+ headers: {
389
+ 'Content-Type': 'application/json',
390
+ 'x-api-key': config.apiKey,
391
+ 'anthropic-version': '2023-06-01',
392
+ },
393
+ body: JSON.stringify(body),
394
+ signal: AbortSignal.timeout(30_000),
395
+ });
396
+
397
+ if (!res.ok) {
398
+ const text = await res.text().catch(() => '');
399
+ throw new Error(`Anthropic API ${res.status}: ${text.slice(0, 200)}`);
400
+ }
401
+
402
+ const json = (await res.json()) as AnthropicMessagesResponse;
403
+ const textBlock = json.content?.find((block) => block.type === 'text');
404
+ return textBlock?.text ?? null;
405
+ } catch (err) {
406
+ const msg = err instanceof Error ? err.message : String(err);
407
+ throw new Error(`LLM call failed: ${msg}`);
408
+ }
409
+ }
410
+
411
+ // ---------------------------------------------------------------------------
412
+ // Embedding (re-exported from local ONNX module)
413
+ // ---------------------------------------------------------------------------
414
+
415
+ // Embeddings are now generated locally via @huggingface/transformers
416
+ // (bge-small-en-v1.5 ONNX model). No API key needed.
417
+ // See embedding.ts for implementation details.
418
+ export { generateEmbedding, getEmbeddingDims } from './embedding.js';