plugin-custom-llm 1.2.1 → 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/dist/client/index.js +9 -0
  2. package/dist/externalVersion.js +16 -7
  3. package/dist/index.js +9 -0
  4. package/dist/server/index.js +9 -0
  5. package/dist/server/llm-providers/custom-llm.js +157 -5
  6. package/dist/server/plugin.js +9 -0
  7. package/dist/swagger.js +39 -0
  8. package/package.json +9 -1
  9. package/src/client/client.d.ts +249 -0
  10. package/src/client/index.tsx +19 -0
  11. package/src/client/llm-providers/custom-llm/ModelSettings.tsx +139 -0
  12. package/src/client/llm-providers/custom-llm/ProviderSettings.tsx +115 -0
  13. package/src/client/llm-providers/custom-llm/index.ts +10 -0
  14. package/src/client/locale.ts +8 -0
  15. package/{dist/client/models/index.d.ts → src/client/models/index.ts} +12 -10
  16. package/src/client/plugin.tsx +10 -0
  17. package/{dist/index.d.ts → src/index.ts} +2 -2
  18. package/src/locale/en-US.json +29 -0
  19. package/src/locale/vi-VN.json +29 -0
  20. package/src/locale/zh-CN.json +16 -0
  21. package/src/server/collections/.gitkeep +0 -0
  22. package/{dist/server/index.d.ts → src/server/index.ts} +1 -1
  23. package/src/server/llm-providers/custom-llm.ts +992 -0
  24. package/src/server/plugin.ts +27 -0
  25. package/src/swagger.ts +9 -0
  26. package/dist/client/index.d.ts +0 -8
  27. package/dist/client/llm-providers/custom-llm/ModelSettings.d.ts +0 -2
  28. package/dist/client/llm-providers/custom-llm/ProviderSettings.d.ts +0 -2
  29. package/dist/client/llm-providers/custom-llm/index.d.ts +0 -2
  30. package/dist/client/locale.d.ts +0 -2
  31. package/dist/client/plugin.d.ts +0 -5
  32. package/dist/server/llm-providers/custom-llm.d.ts +0 -54
  33. package/dist/server/plugin.d.ts +0 -12
@@ -0,0 +1,992 @@
1
+ import { LLMProvider, LLMProviderMeta } from '@nocobase/plugin-ai';
2
+ import { Model } from '@nocobase/database';
3
+ import path from 'node:path';
4
+ import fs from 'node:fs/promises';
5
+ import axios from 'axios';
6
+ import { Context } from '@nocobase/actions';
7
+ import type { ParsedAttachmentResult } from '@nocobase/plugin-ai';
8
+
9
+ // Keepalive marker — zero-width space prefix to distinguish from real content
10
+ const KEEPALIVE_PREFIX = '\u200B\u200B\u200B';
11
+
12
+ /**
13
+ * Resolve a module from the main NocoBase app's node_modules.
14
+ */
15
+ function requireFromApp(moduleName: string) {
16
+ const appNodeModules = process.env.NODE_MODULES_PATH || path.join(process.cwd(), 'node_modules');
17
+ const resolved = require.resolve(moduleName, { paths: [appNodeModules] });
18
+ return require(resolved);
19
+ }
20
+
21
+ let _ChatOpenAI: any = null;
22
+ function getChatOpenAI() {
23
+ if (!_ChatOpenAI) {
24
+ const mod = requireFromApp('@langchain/openai');
25
+ _ChatOpenAI = mod.ChatOpenAI;
26
+ }
27
+ return _ChatOpenAI;
28
+ }
29
+
30
+ let _ChatGenerationChunk: any = null;
31
+ function getChatGenerationChunk() {
32
+ if (!_ChatGenerationChunk) {
33
+ const mod = requireFromApp('@langchain/core/outputs');
34
+ _ChatGenerationChunk = mod.ChatGenerationChunk;
35
+ }
36
+ return _ChatGenerationChunk;
37
+ }
38
+
39
+ let _AIMessageChunk: any = null;
40
+ function getAIMessageChunk() {
41
+ if (!_AIMessageChunk) {
42
+ const mod = requireFromApp('@langchain/core/messages');
43
+ _AIMessageChunk = mod.AIMessageChunk;
44
+ }
45
+ return _AIMessageChunk;
46
+ }
47
+
48
+ function stripToolCallTags(content: string): string | null {
49
+ if (typeof content !== 'string') {
50
+ return content;
51
+ }
52
+ return content.replace(/<[||]tool▁(?:calls▁begin|calls▁end|call▁begin|call▁end|sep)[||]>/g, '');
53
+ }
54
+
55
+ function extractTextContent(content: any, contentPath?: string): string {
56
+ if (contentPath && contentPath !== 'auto') {
57
+ try {
58
+ const keys = contentPath.split('.');
59
+ let result = content;
60
+ for (const key of keys) {
61
+ if (result == null) break;
62
+ result = result[key];
63
+ }
64
+ if (typeof result === 'string') return result;
65
+ } catch {
66
+ // Fall through to auto
67
+ }
68
+ }
69
+ if (typeof content === 'string') return content;
70
+ if (Array.isArray(content)) {
71
+ return content
72
+ .filter((block: any) => block && block.type === 'text')
73
+ .map((block: any) => block.text ?? '')
74
+ .join('');
75
+ }
76
+ if (content && typeof content === 'object' && content.text) {
77
+ return String(content.text);
78
+ }
79
+ return '';
80
+ }
81
+
82
+ /**
83
+ * Detect whether a MIME type is text-decodable (UTF-8 safe).
84
+ */
85
+ function isTextMimetype(mimetype?: string): boolean {
86
+ if (!mimetype) return false;
87
+ // All text/* subtypes are UTF-8 decodable
88
+ if (mimetype.startsWith('text/')) return true;
89
+ // Common text-based application types
90
+ const TEXT_APPLICATION_TYPES = new Set([
91
+ 'application/json',
92
+ 'application/xml',
93
+ 'application/xhtml+xml',
94
+ 'application/atom+xml',
95
+ 'application/rss+xml',
96
+ 'application/csv',
97
+ 'application/javascript',
98
+ 'application/typescript',
99
+ 'application/x-javascript',
100
+ 'application/x-typescript',
101
+ 'application/x-yaml',
102
+ 'application/yaml',
103
+ 'application/x-json',
104
+ 'application/geo+json',
105
+ 'application/ld+json',
106
+ 'application/manifest+json',
107
+ 'application/graphql',
108
+ 'application/x-www-form-urlencoded',
109
+ 'application/toml',
110
+ 'application/x-sh',
111
+ 'application/x-shellscript',
112
+ 'application/sql',
113
+ ]);
114
+ return TEXT_APPLICATION_TYPES.has(mimetype);
115
+ }
116
+
117
+ function safeParseJSON(str: any, fieldName?: string): any {
118
+ if (!str || typeof str !== 'string') return {};
119
+ try {
120
+ return JSON.parse(str);
121
+ } catch (e) {
122
+ // Warn so misconfigured JSON doesn't silently fall through to defaults
123
+ console.warn(`[CustomLLM] Failed to parse ${fieldName || 'JSON config'}: ${(e as Error).message}`);
124
+ return {};
125
+ }
126
+ }
127
+
128
+ /**
129
+ * Get a nested value from an object using a dot-path string.
130
+ * e.g. getByPath({a:{b:"hello"}}, "a.b") => "hello"
131
+ */
132
+ function getByPath(obj: any, dotPath: string): any {
133
+ if (!obj || !dotPath) return undefined;
134
+ const keys = dotPath.split('.');
135
+ let current = obj;
136
+ for (const key of keys) {
137
+ if (current == null) return undefined;
138
+ current = current[key];
139
+ }
140
+ return current;
141
+ }
142
+
143
+ /**
144
+ * Create a custom fetch that intercepts LLM responses and maps them
145
+ * from a non-standard format to OpenAI-compatible format.
146
+ *
147
+ * responseMapping config example:
148
+ * {
149
+ * "content": "message.response" // dot-path to the content field
150
+ * "role": "message.role" // optional, dot-path to role (default: "assistant")
151
+ * "id": "id" // optional, dot-path to response id
152
+ * }
153
+ */
154
+ function createMappingFetch(responseMapping: Record<string, string>) {
155
+ const contentPath = responseMapping.content;
156
+ if (!contentPath) return undefined; // No mapping needed
157
+
158
+ return async (url: RequestInfo | URL, init?: RequestInit): Promise<Response> => {
159
+ const response = await fetch(url, init);
160
+
161
+ // Only intercept successful JSON responses
162
+ if (!response.ok) return response;
163
+
164
+ const contentType = response.headers.get('content-type') || '';
165
+
166
+ // Handle streaming responses (SSE) — transform each chunk
167
+ if (contentType.includes('text/event-stream') || init?.headers?.['Accept'] === 'text/event-stream') {
168
+ const reader = response.body?.getReader();
169
+ if (!reader) return response;
170
+
171
+ const stream = new ReadableStream({
172
+ async start(controller) {
173
+ const decoder = new TextDecoder();
174
+ const encoder = new TextEncoder();
175
+ let buffer = '';
176
+
177
+ try {
178
+ while (true) {
179
+ const { done, value } = await reader.read();
180
+ if (done) {
181
+ controller.close();
182
+ break;
183
+ }
184
+
185
+ buffer += decoder.decode(value, { stream: true });
186
+ const lines = buffer.split('\n');
187
+ buffer = lines.pop() || '';
188
+
189
+ for (const line of lines) {
190
+ if (line.startsWith('data: ')) {
191
+ const data = line.slice(6).trim();
192
+ if (data === '[DONE]') {
193
+ controller.enqueue(encoder.encode('data: [DONE]\n\n'));
194
+ continue;
195
+ }
196
+ try {
197
+ const parsed = JSON.parse(data);
198
+ const mappedContent = getByPath(parsed, contentPath);
199
+ if (mappedContent !== undefined) {
200
+ // Map to OpenAI streaming format
201
+ const mapped = {
202
+ id: getByPath(parsed, responseMapping.id || 'id') || 'chatcmpl-custom',
203
+ object: 'chat.completion.chunk',
204
+ created: Math.floor(Date.now() / 1000),
205
+ model: 'custom',
206
+ choices: [{
207
+ index: 0,
208
+ delta: { content: String(mappedContent), role: 'assistant' },
209
+ finish_reason: null,
210
+ }],
211
+ };
212
+ controller.enqueue(encoder.encode(`data: ${JSON.stringify(mapped)}\n\n`));
213
+ } else {
214
+ // Pass through unmapped — SSE events must be terminated with \n\n
215
+ controller.enqueue(encoder.encode(line + '\n\n'));
216
+ }
217
+ } catch {
218
+ // Preserve SSE framing: each event line needs \n\n terminator
219
+ controller.enqueue(encoder.encode(line + '\n\n'));
220
+ }
221
+ } else {
222
+ controller.enqueue(encoder.encode(line + '\n\n'));
223
+ }
224
+ }
225
+ }
226
+ } catch (err) {
227
+ controller.error(err);
228
+ }
229
+ },
230
+ });
231
+
232
+ return new Response(stream, {
233
+ status: response.status,
234
+ statusText: response.statusText,
235
+ headers: new Headers({
236
+ 'content-type': 'text/event-stream',
237
+ }),
238
+ });
239
+ }
240
+
241
+ // Handle non-streaming JSON responses
242
+ if (contentType.includes('application/json')) {
243
+ const body = await response.json();
244
+ const mappedContent = getByPath(body, contentPath);
245
+
246
+ if (mappedContent !== undefined) {
247
+ const mapped = {
248
+ id: getByPath(body, responseMapping.id || 'id') || 'chatcmpl-custom',
249
+ object: 'chat.completion',
250
+ created: Math.floor(Date.now() / 1000),
251
+ model: 'custom',
252
+ choices: [{
253
+ index: 0,
254
+ message: {
255
+ role: getByPath(body, responseMapping.role || '') || 'assistant',
256
+ content: String(mappedContent),
257
+ },
258
+ finish_reason: 'stop',
259
+ }],
260
+ usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
261
+ };
262
+
263
+ return new Response(JSON.stringify(mapped), {
264
+ status: response.status,
265
+ statusText: response.statusText,
266
+ headers: new Headers({
267
+ 'content-type': 'application/json',
268
+ }),
269
+ });
270
+ }
271
+ }
272
+
273
+ return response;
274
+ };
275
+ }
276
+
277
+ /**
278
+ * Wrap a ChatOpenAI model's _stream method to inject keepalive chunks
279
+ * during long idle periods (e.g., model thinking/reasoning phase).
280
+ *
281
+ * This runs the base stream in a background task and uses Promise.race
282
+ * to send keepalive chunks when no real data arrives within the interval.
283
+ */
284
+ function wrapWithStreamKeepAlive(
285
+ model: any,
286
+ options: { intervalMs: number; keepAliveContent: string },
287
+ ) {
288
+ const streamMethodName = typeof model._streamResponseChunks === 'function'
289
+ ? '_streamResponseChunks'
290
+ : '_stream';
291
+ const originalStream = model[streamMethodName].bind(model);
292
+ const { intervalMs, keepAliveContent } = options;
293
+
294
+ model[streamMethodName] = async function* (messages: any[], opts: any, runManager?: any) {
295
+ const ChatGenerationChunk = getChatGenerationChunk();
296
+ const AIMessageChunk = getAIMessageChunk();
297
+
298
+ const baseIterator = originalStream(messages, opts, runManager);
299
+
300
+ // Queue for chunks from the base stream
301
+ const buffer: any[] = [];
302
+ let streamDone = false;
303
+ let streamError: Error | null = null;
304
+ let notifyReady: (() => void) | null = null;
305
+ // Track whether tool call chunks are present in the current batch
306
+ // to avoid injecting keepalive during tool calling sequences
307
+ let hasToolCallChunks = false;
308
+ // Phase 6: Track error state to prevent further keepalive after errors
309
+ let hasErrored = false;
310
+
311
+ // Consume the base stream in a background task
312
+ const consumer = (async () => {
313
+ try {
314
+ for await (const chunk of baseIterator) {
315
+ // Detect tool call activity — keepalive must not be injected
316
+ // while tool call chunks are streaming in
317
+ const msg = chunk?.message;
318
+ if (msg?.tool_call_chunks?.length || msg?.tool_calls?.length) {
319
+ hasToolCallChunks = true;
320
+ }
321
+ buffer.push(chunk);
322
+ // Wake up the main loop
323
+ if (notifyReady) {
324
+ notifyReady();
325
+ notifyReady = null;
326
+ }
327
+ }
328
+ } catch (err) {
329
+ streamError = err as Error;
330
+ hasErrored = true;
331
+ // Wake up main loop immediately for prompt error propagation
332
+ if (notifyReady) {
333
+ notifyReady();
334
+ notifyReady = null;
335
+ }
336
+ } finally {
337
+ streamDone = true;
338
+ if (notifyReady) {
339
+ notifyReady();
340
+ notifyReady = null;
341
+ }
342
+ }
343
+ })();
344
+
345
+ try {
346
+ while (!streamDone || buffer.length > 0) {
347
+ // Flush buffered chunks first
348
+ while (buffer.length > 0) {
349
+ yield buffer.shift();
350
+ }
351
+ // Reset tool call flag after flushing — if tool calling has
352
+ // completed, keepalive may resume on the next idle interval
353
+ hasToolCallChunks = false;
354
+
355
+ if (streamDone) break;
356
+
357
+ // Wait for either: new chunk arrives OR keepalive interval expires
358
+ const waitForChunk = new Promise<void>((resolve) => {
359
+ notifyReady = resolve;
360
+ });
361
+
362
+ let timer: ReturnType<typeof setTimeout> | null = null;
363
+ const result = await Promise.race([
364
+ waitForChunk.then(() => 'chunk' as const),
365
+ new Promise<'timeout'>((resolve) => {
366
+ timer = setTimeout(() => resolve('timeout'), intervalMs);
367
+ }),
368
+ ]);
369
+
370
+ // Clear the timer to prevent leaks
371
+ if (timer) clearTimeout(timer);
372
+
373
+ if (result === 'timeout' && !streamDone && buffer.length === 0) {
374
+ // Don't emit keepalive if stream has errored — propagate immediately
375
+ if (streamError || hasErrored) break;
376
+ // Don't emit keepalive during active tool call sequences
377
+ if (hasToolCallChunks) continue;
378
+ // Send keepalive with KEEPALIVE_PREFIX as content.
379
+ // Must be truthy so plugin-ai's `if (chunk.content)` check passes
380
+ // and protocol.content() writes an SSE event to prevent proxy timeouts.
381
+ // KEEPALIVE_PREFIX is zero-width spaces — invisible in client UI.
382
+ // parseResponseChunk returns it, protocol.content() emits it.
383
+ // gathered.content accumulates ZWS but parseResponseMessage strips them.
384
+ const keepAliveChunk = new ChatGenerationChunk({
385
+ message: new AIMessageChunk({
386
+ content: KEEPALIVE_PREFIX,
387
+ additional_kwargs: { __keepalive: true },
388
+ }),
389
+ text: KEEPALIVE_PREFIX,
390
+ });
391
+ yield keepAliveChunk;
392
+ }
393
+ // If result === 'chunk', flush happens at top of loop
394
+ }
395
+
396
+ // Re-throw any stream error
397
+ if (streamError) {
398
+ throw streamError;
399
+ }
400
+ } finally {
401
+ // Ensure the consumer finishes
402
+ await consumer;
403
+ }
404
+ };
405
+
406
+ return model;
407
+ }
408
+
409
+ /**
410
+ * Check if a text string is a keepalive marker.
411
+ */
412
+ function isKeepAlive(text: string): boolean {
413
+ return typeof text === 'string' && text.startsWith(KEEPALIVE_PREFIX);
414
+ }
415
+
416
+ /**
417
+ * Wrap bindTools on the model to fix empty tool properties.
418
+ * Gemini and some providers reject tools with `properties: {}`.
419
+ * This ensures empty properties objects get a placeholder property.
420
+ *
421
+ * The fix works at TWO levels:
422
+ * 1. Pre-conversion: Fix raw tool definitions before LangChain converts them
423
+ * 2. Post-conversion: Fix the converted OpenAI-format tools after bindTools
424
+ * returns, catching cases where Zod `z.object({})` schemas get converted
425
+ * to `{ properties: {} }` by LangChain's _convertToOpenAITool
426
+ */
427
+ function fixEmptyToolProperties(model: any) {
428
+ const originalBind = model.bindTools?.bind(model);
429
+ if (!originalBind) return model;
430
+
431
+ const PLACEHOLDER_PROP = {
432
+ _placeholder: { type: 'string', description: 'No parameters required' },
433
+ };
434
+
435
+ /**
436
+ * Recursively fix empty properties in a JSON Schema-like object.
437
+ * Handles: top-level properties, function.parameters.properties,
438
+ * and nested anyOf/oneOf/allOf schemas.
439
+ */
440
+ function fixPropertiesInSchema(schema: any): void {
441
+ if (!schema || typeof schema !== 'object') return;
442
+
443
+ // Fix direct properties
444
+ if (
445
+ schema.properties &&
446
+ typeof schema.properties === 'object' &&
447
+ Object.keys(schema.properties).length === 0
448
+ ) {
449
+ schema.properties = { ...PLACEHOLDER_PROP };
450
+ }
451
+
452
+ // Recurse into nested schemas
453
+ for (const key of ['anyOf', 'oneOf', 'allOf']) {
454
+ if (Array.isArray(schema[key])) {
455
+ schema[key].forEach((sub: any) => fixPropertiesInSchema(sub));
456
+ }
457
+ }
458
+ }
459
+
460
+ model.bindTools = function (tools: any[], kwargs?: any) {
461
+ // Phase 1: Pre-conversion fix for raw JSON Schema tool definitions
462
+ const fixedTools = tools.map((tool: any) => {
463
+ if (!tool || typeof tool !== 'object') return tool;
464
+
465
+ // Skip Zod schema tools — they'll be handled post-conversion
466
+ if (typeof tool.schema?.safeParse === 'function') {
467
+ return tool;
468
+ }
469
+
470
+ // Handle raw schema objects (already JSON Schema)
471
+ const schema = tool.schema;
472
+ if (schema && typeof schema === 'object' && !schema.safeParse) {
473
+ const props = schema.properties;
474
+ if (props && typeof props === 'object' && Object.keys(props).length === 0) {
475
+ return {
476
+ ...tool,
477
+ schema: {
478
+ ...schema,
479
+ properties: { ...PLACEHOLDER_PROP },
480
+ },
481
+ };
482
+ }
483
+ }
484
+
485
+ // Handle function-calling style definitions (OpenAI format)
486
+ const funcParams = tool.function?.parameters;
487
+ if (funcParams?.properties) {
488
+ if (typeof funcParams.properties === 'object' && Object.keys(funcParams.properties).length === 0) {
489
+ return {
490
+ ...tool,
491
+ function: {
492
+ ...tool.function,
493
+ parameters: {
494
+ ...funcParams,
495
+ properties: { ...PLACEHOLDER_PROP },
496
+ },
497
+ },
498
+ };
499
+ }
500
+ }
501
+
502
+ return tool;
503
+ });
504
+
505
+ // Call the original bindTools — this converts Zod → JSON Schema internally
506
+ const result = originalBind(fixedTools, kwargs);
507
+
508
+ // Phase 2: Post-conversion fix — patch the converted tools in the result
509
+ // LangChain's bindTools returns a RunnableBinding or the model itself with
510
+ // tools stored in defaultOptions or bound config
511
+ try {
512
+ const config = result?.kwargs ?? result?.defaultOptions;
513
+ if (config?.tools && Array.isArray(config.tools)) {
514
+ for (const tool of config.tools) {
515
+ // OpenAI format: { type: 'function', function: { parameters: { properties: {} } } }
516
+ if (tool?.function?.parameters) {
517
+ fixPropertiesInSchema(tool.function.parameters);
518
+ }
519
+ // Direct parameter format (some providers)
520
+ if (tool?.parameters) {
521
+ fixPropertiesInSchema(tool.parameters);
522
+ }
523
+ }
524
+ }
525
+ } catch {
526
+ // Don't break tool binding if post-fix inspection fails
527
+ }
528
+
529
+ return result;
530
+ };
531
+
532
+ return model;
533
+ }
534
+
535
+ export class CustomLLMProvider extends LLMProvider {
536
+ get baseURL() {
537
+ return null;
538
+ }
539
+
540
+ private get requestConfig() {
541
+ return safeParseJSON(this.serviceOptions?.requestConfig, 'requestConfig');
542
+ }
543
+
544
+ private get responseConfig() {
545
+ return safeParseJSON(this.serviceOptions?.responseConfig, 'responseConfig');
546
+ }
547
+
548
+ createModel() {
549
+ const { apiKey, disableStream, timeout, streamKeepAlive, keepAliveIntervalMs, keepAliveContent } =
550
+ this.serviceOptions || {};
551
+ // baseURL comes from core's options.baseURL field
552
+ const baseURL = this.serviceOptions?.baseURL;
553
+ const { responseFormat } = this.modelOptions || {};
554
+ const reqConfig = this.requestConfig;
555
+ const resConfig = this.responseConfig;
556
+
557
+ const responseFormatOptions: Record<string, any> = {
558
+ type: responseFormat ?? 'text',
559
+ };
560
+
561
+ const modelKwargs: Record<string, any> = {
562
+ response_format: responseFormatOptions,
563
+ ...(reqConfig.modelKwargs || {}),
564
+ };
565
+
566
+ if (reqConfig.extraBody && typeof reqConfig.extraBody === 'object') {
567
+ Object.assign(modelKwargs, reqConfig.extraBody);
568
+ }
569
+
570
+ const ChatOpenAI = getChatOpenAI();
571
+ const config: Record<string, any> = {
572
+ apiKey,
573
+ ...this.modelOptions,
574
+ modelKwargs,
575
+ configuration: {
576
+ baseURL,
577
+ },
578
+ verbose: false,
579
+ };
580
+
581
+ // Disable streaming for models with long thinking phases
582
+ // that return empty stream values causing processing to terminate
583
+ if (disableStream) {
584
+ config.streaming = false;
585
+ }
586
+
587
+ // Apply custom timeout (in milliseconds) for slow-responding models
588
+ if (timeout && Number(timeout) > 0) {
589
+ config.timeout = Number(timeout);
590
+ config.configuration.timeout = Number(timeout);
591
+ }
592
+
593
+ // Apply extra headers
594
+ if (reqConfig.extraHeaders && typeof reqConfig.extraHeaders === 'object') {
595
+ config.configuration.defaultHeaders = reqConfig.extraHeaders;
596
+ }
597
+
598
+ // Apply response mapping via custom fetch
599
+ if (resConfig.responseMapping) {
600
+ config.configuration.fetch = createMappingFetch(resConfig.responseMapping);
601
+ }
602
+
603
+ let model = new ChatOpenAI(config);
604
+
605
+ // Fix empty tool properties for strict providers (Gemini, etc.)
606
+ model = fixEmptyToolProperties(model);
607
+
608
+ // Wrap with keepalive proxy if enabled (and streaming is not disabled)
609
+ if (streamKeepAlive && !disableStream) {
610
+ return wrapWithStreamKeepAlive(model, {
611
+ intervalMs: Number(keepAliveIntervalMs) || 5000,
612
+ keepAliveContent: keepAliveContent || '...',
613
+ });
614
+ }
615
+
616
+ return model;
617
+ }
618
+
619
+ parseResponseChunk(chunk: any): string | null {
620
+ const resConfig = this.responseConfig;
621
+ const text = extractTextContent(chunk, resConfig.contentPath);
622
+
623
+ // Return keepalive prefix as-is so protocol.content() emits SSE event.
624
+ // The zero-width spaces are invisible in the client UI but keep
625
+ // proxy/gateway connections alive during long model thinking phases.
626
+ if (isKeepAlive(text)) {
627
+ return KEEPALIVE_PREFIX;
628
+ }
629
+
630
+ return stripToolCallTags(text);
631
+ }
632
+
633
+ parseResponseMessage(message: Model) {
634
+ const { content: rawContent, messageId, metadata, role, toolCalls, attachments, workContext } = message;
635
+ const content: Record<string, any> = {
636
+ ...(rawContent ?? {}),
637
+ messageId,
638
+ metadata,
639
+ attachments,
640
+ workContext,
641
+ };
642
+
643
+ if (toolCalls) {
644
+ content.tool_calls = toolCalls;
645
+ }
646
+
647
+ if (Array.isArray(content.content)) {
648
+ const textBlocks = content.content.filter((block: any) => block.type === 'text');
649
+ content.content = textBlocks.map((block: any) => block.text).join('') || '';
650
+ }
651
+
652
+ if (typeof content.content === 'string') {
653
+ // Strip keepalive markers from saved messages (backward compat for pre-Phase3 records)
654
+ const escapedPrefix = KEEPALIVE_PREFIX.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
655
+ content.content = content.content.replace(new RegExp(escapedPrefix + '.*?(?=' + escapedPrefix + '|$)', 'g'), '');
656
+ content.content = stripToolCallTags(content.content);
657
+ }
658
+
659
+ // Clean internal keepalive flag from persisted additional_kwargs
660
+ if (content.metadata?.additional_kwargs?.__keepalive !== undefined) {
661
+ const { __keepalive, ...cleanKwargs } = content.metadata.additional_kwargs;
662
+ content.metadata = { ...content.metadata, additional_kwargs: cleanKwargs };
663
+ }
664
+
665
+ return {
666
+ key: messageId,
667
+ content,
668
+ role,
669
+ };
670
+ }
671
+
672
+ parseReasoningContent(chunk: any): { status: string; content: string } | null {
673
+ const resConfig = this.responseConfig;
674
+ const reasoningKey = resConfig.reasoningKey || 'reasoning_content';
675
+
676
+ // Check multiple paths — different providers/chunk formats nest reasoning differently
677
+ const reasoning =
678
+ chunk?.additional_kwargs?.[reasoningKey] ??
679
+ chunk?.kwargs?.additional_kwargs?.[reasoningKey];
680
+
681
+ if (reasoning && typeof reasoning === 'string') {
682
+ return { status: 'streaming', content: reasoning };
683
+ }
684
+ return null;
685
+ }
686
+
687
+ /**
688
+ * Extract response metadata from LLM output for post-save enrichment.
689
+ * Sanitizes overly long message IDs from Gemini or other providers.
690
+ */
691
+ parseResponseMetadata(output: any): any {
692
+ try {
693
+ const generation = output?.generations?.[0]?.[0];
694
+ if (!generation) return [null, null];
695
+
696
+ const message = generation.message;
697
+ let id = message?.id;
698
+ if (!id) return [null, null];
699
+
700
+ // Sanitize overly long IDs (Gemini can return very long chatcmpl-xxx or run-xxx IDs)
701
+ if (typeof id === 'string' && id.length > 128) {
702
+ id = id.substring(0, 128);
703
+ }
704
+
705
+ const metadata: Record<string, any> = {};
706
+ if (message?.response_metadata) {
707
+ metadata.finish_reason = message.response_metadata.finish_reason;
708
+ metadata.system_fingerprint = message.response_metadata.system_fingerprint;
709
+ }
710
+ if (message?.usage_metadata) {
711
+ metadata.usage_metadata = message.usage_metadata;
712
+ }
713
+
714
+ return Object.keys(metadata).length > 0 ? [id, metadata] : [null, null];
715
+ } catch {
716
+ return [null, null];
717
+ }
718
+ }
719
+
720
+ parseResponseError(err: any) {
721
+ return err?.message ?? 'Unexpected LLM service error';
722
+ }
723
+
724
+ /**
725
+ * Self-contained file reading that correctly handles the APP_PUBLIC_PATH prefix.
726
+ *
727
+ * plugin-ai's encodeLocalFile does path.join(cwd, url) without stripping
728
+ * APP_PUBLIC_PATH, so when the app is deployed under a sub-path (e.g. /my-app)
729
+ * the resolved path becomes '{cwd}/my-app/storage/uploads/…' which does not exist.
730
+ * We cannot fix that in plugin-ai (core), so we re-implement file reading here
731
+ * with the prefix stripped before the cwd join.
732
+ */
733
+ /**
734
+ * Reads the attachment and returns its base64-encoded content plus, when the
735
+ * file lives on the local filesystem, the resolved absolute path so callers
736
+ * can hand that path directly to tools like DocPixie and avoid a second
737
+ * write-to-disk round-trip.
738
+ */
739
+ private async readFileData(
740
+ ctx: Context,
741
+ attachment: any,
742
+ ): Promise<{ base64: string; absPath?: string }> {
743
+ const fileManager = this.app.pm.get('file-manager') as any;
744
+ const rawUrl: string = await fileManager.getFileURL(attachment);
745
+ const url = decodeURIComponent(rawUrl);
746
+
747
+ if (url.startsWith('http://') || url.startsWith('https://')) {
748
+ const referer = ctx.get('referer') || '';
749
+ const ua = ctx.get('user-agent') || '';
750
+ const response = await axios.get(url, {
751
+ responseType: 'arraybuffer',
752
+ timeout: 30_000,
753
+ headers: { referer, 'User-Agent': ua },
754
+ });
755
+ return { base64: Buffer.from(response.data).toString('base64') };
756
+ }
757
+
758
+ // Internal API stream URL (e.g. s3-private-storage proxy) — read directly via fileManager
759
+ if (url.includes('/api/attachments:stream')) {
760
+ const { stream } = await fileManager.getFileStream(attachment);
761
+ const chunks: Buffer[] = [];
762
+ for await (const chunk of stream) {
763
+ chunks.push(typeof chunk === 'string' ? Buffer.from(chunk) : chunk);
764
+ }
765
+ return { base64: Buffer.concat(chunks).toString('base64') };
766
+ }
767
+
768
+ // Local file — strip APP_PUBLIC_PATH prefix before joining with cwd
769
+ let localPath = url;
770
+ const appPublicPath = (process.env.APP_PUBLIC_PATH || '/').replace(/\/+$/, '');
771
+ if (appPublicPath && localPath.startsWith(appPublicPath + '/')) {
772
+ localPath = localPath.slice(appPublicPath.length);
773
+ }
774
+
775
+ // Resolve and guard against path traversal
776
+ const storageRoot = path.resolve(process.cwd());
777
+ const absPath = path.resolve(storageRoot, localPath.replace(/^\//, ''));
778
+ if (!absPath.startsWith(storageRoot + path.sep) && absPath !== storageRoot) {
779
+ throw new Error(`Attachment path escapes storage root: ${localPath}`);
780
+ }
781
+
782
+ const data = await fs.readFile(absPath);
783
+ // Return absPath so parseAttachment can pass it directly to DocPixie
784
+ return { base64: Buffer.from(data).toString('base64'), absPath };
785
+ }
786
+
787
+ /**
788
+ * Override parseAttachment to convert all attachments into formats that
789
+ * generic OpenAI-compatible endpoints actually support:
790
+ *
791
+ * - Images → image_url block with base64 data URI (vision models)
792
+ * - Text files → text block with decoded UTF-8 content
793
+ * - Binary → text block with base64 data URI (multi-modal or fallback)
794
+ *
795
+ * The base-class implementation returns a LangChain ContentBlock.Multimodal.File
796
+ * (`type: 'file'`) for non-image attachments. LangChain serialises this as the
797
+ * newer OpenAI Files API format which most custom/local endpoints do NOT understand,
798
+ * causing file content to be silently dropped.
799
+ *
800
+ * This method is entirely self-contained — it does not call super — so it is
801
+ * safe to use without modifying plugin-ai core.
802
+ */
803
+ /**
804
+ * Try to extract text from an attachment using DocPixie (if available and
805
+ * the file type is supported). Returns null if DocPixie is unavailable,
806
+ * not ready, or the file type is not supported.
807
+ */
808
+ /**
809
+ * Check whether the DocPixie skill (`docpixie.query.document`) is configured
810
+ * on the AI employee that initiated this request.
811
+ *
812
+ * Reads `ctx.action.params.values.aiEmployee` (the employee username set by the
813
+ * `sendMessages` action handler), then looks up the employee's `skillSettings`
814
+ * from DB. Result is cached on `ctx.state._docPixieActive` for the request lifetime.
815
+ */
816
+ private async hasDocPixieSkill(ctx: Context): Promise<boolean> {
817
+ if (ctx.state._docPixieActive !== undefined) return ctx.state._docPixieActive as boolean;
818
+
819
+ try {
820
+ const employeeUsername = ctx.action?.params?.values?.aiEmployee;
821
+ if (!employeeUsername) {
822
+ ctx.state._docPixieActive = false;
823
+ return false;
824
+ }
825
+
826
+ const employee = await ctx.db.getRepository('aiEmployees').findOne({
827
+ filter: { username: String(employeeUsername) },
828
+ fields: ['skillSettings'],
829
+ });
830
+
831
+ const skills: Array<{ name: string }> = (employee?.get?.('skillSettings') as any)?.skills ?? [];
832
+ const has = skills.some((s) => s.name === 'docpixie.query.document');
833
+ ctx.state._docPixieActive = has;
834
+ return has;
835
+ } catch {
836
+ ctx.state._docPixieActive = false;
837
+ return false;
838
+ }
839
+ }
840
+
841
+ /**
842
+ * Run the full DocPixie ingestion pipeline (extract pages → generate summary → index).
843
+ * Returns a formatted `<processed_document>` context block the LLM can use immediately,
844
+ * plus a clear instruction to call the RAG tool with the returned documentId for details.
845
+ *
846
+ * Prefers passing `absPath` directly for local-storage files to avoid a second
847
+ * write-to-disk round-trip. Falls back to Buffer for remote / S3 files.
848
+ *
849
+ * Returns null if DocPixie is unavailable, not configured, or processing fails.
850
+ */
851
+ private async tryDocPixieFullProcess(
852
+ fileData: { base64: string; absPath?: string },
853
+ filename: string,
854
+ ctx: Context,
855
+ ): Promise<string | null> {
856
+ try {
857
+ const docpixie = this.app.pm.get('docpixie') as any;
858
+ if (!docpixie?.service?.isReady?.()) return null;
859
+
860
+ const userId: number | undefined = ctx.state?.currentUser?.id;
861
+ let result: { documentId: number; summary: string; pageCount: number };
862
+
863
+ if (fileData.absPath) {
864
+ result = await docpixie.service.processDocumentFromPath(fileData.absPath, filename, { userId });
865
+ } else {
866
+ const buffer = Buffer.from(fileData.base64, 'base64');
867
+ result = await docpixie.service.processDocumentFromBuffer(buffer, filename, { userId });
868
+ }
869
+
870
+ const { documentId, summary, pageCount } = result;
871
+ const summaryText = summary?.trim() || 'No summary available.';
872
+
873
+ return (
874
+ `<processed_document id="${documentId}" filename="${filename}" pages="${pageCount}">\n` +
875
+ `<summary>\n${summaryText}\n</summary>\n` +
876
+ `<rag_instruction>This document is fully indexed. ` +
877
+ `Call docpixie.query.document with documentId=${documentId} to retrieve specific details.</rag_instruction>\n` +
878
+ `</processed_document>`
879
+ );
880
+ } catch {
881
+ return null;
882
+ }
883
+ }
884
+
885
+ /**
886
+ * Try to extract text from an attachment using DocPixie (transient — no DB indexing).
887
+ * When `absPath` is provided (local-storage file), DocPixie reads the file
888
+ * directly — no Buffer decode/re-encode or extra temp-file write.
889
+ * Falls back to `extractTextFromBuffer` for remote/S3 files.
890
+ * Returns null if DocPixie is unavailable, not ready, or file type unsupported.
891
+ */
892
+ private async tryDocPixieExtract(
893
+ fileData: { base64: string; absPath?: string },
894
+ filename: string,
895
+ ): Promise<string | null> {
896
+ try {
897
+ const docpixie = this.app.pm.get('docpixie') as any;
898
+ if (!docpixie?.service) return null;
899
+ let text: string;
900
+ if (fileData.absPath) {
901
+ text = await docpixie.service.extractTextFromPath(fileData.absPath, filename);
902
+ } else {
903
+ const buffer = Buffer.from(fileData.base64, 'base64');
904
+ text = await docpixie.service.extractTextFromBuffer(buffer, filename);
905
+ }
906
+ return text || null;
907
+ } catch {
908
+ return null;
909
+ }
910
+ }
911
+
912
+ async parseAttachment(ctx: Context, attachment: any): Promise<ParsedAttachmentResult> {
913
+ const mimetype: string = attachment.mimetype || 'application/octet-stream';
914
+ const filename: string = attachment.filename || attachment.name || 'file';
915
+ const fileData = await this.readFileData(ctx, attachment);
916
+ const { base64: data } = fileData;
917
+
918
+ const isDocPixieSupported = mimetype === 'application/pdf' || mimetype.startsWith('image/');
919
+
920
+ // ── Path A: DocPixie skill active → full ingestion pipeline ──────────────
921
+ // Runs processDocument (extract pages + generate summary + DB index) so the
922
+ // LLM gets a rich summary + documentId it can pass to the RAG tool for specifics.
923
+ if (isDocPixieSupported && (await this.hasDocPixieSkill(ctx))) {
924
+ const contextBlock = await this.tryDocPixieFullProcess(fileData, filename, ctx);
925
+ if (contextBlock) {
926
+ return {
927
+ placement: 'contentBlocks',
928
+ content: { type: 'text', text: contextBlock },
929
+ };
930
+ }
931
+ // DocPixie not configured / failed → fall through to Path B
932
+ }
933
+
934
+ // ── Path B: DocPixie skill absent → transient extraction (no DB) ─────────
935
+ if (mimetype === 'application/pdf') {
936
+ const extracted = await this.tryDocPixieExtract(fileData, filename);
937
+ if (extracted) {
938
+ return {
939
+ placement: 'contentBlocks',
940
+ content: {
941
+ type: 'text',
942
+ text: `<attachment filename="${filename}" type="${mimetype}">\n${extracted}\n</attachment>`,
943
+ },
944
+ };
945
+ }
946
+ // DocPixie unavailable — fall through to base64 data-URI
947
+ }
948
+
949
+ if (mimetype.startsWith('image/')) {
950
+ // Transient DocPixie extraction (e.g. OCR); fallback to image_url for vision models
951
+ const extracted = await this.tryDocPixieExtract(fileData, filename);
952
+ if (extracted) {
953
+ return {
954
+ placement: 'contentBlocks',
955
+ content: {
956
+ type: 'text',
957
+ text: `<attachment filename="${filename}" type="${mimetype}">\n${extracted}\n</attachment>`,
958
+ },
959
+ };
960
+ }
961
+ // Final fallback — vision-capable models handle image_url natively
962
+ return {
963
+ placement: 'contentBlocks',
964
+ content: {
965
+ type: 'image_url',
966
+ image_url: { url: `data:${mimetype};base64,${data}` },
967
+ },
968
+ };
969
+ }
970
+
971
+ let textContent: string;
972
+ if (isTextMimetype(mimetype)) {
973
+ // Decode to readable UTF-8 so the model can actually read the content
974
+ const decoded = Buffer.from(data, 'base64').toString('utf-8');
975
+ textContent = `<attachment filename="${filename}" type="${mimetype}">\n${decoded}\n</attachment>`;
976
+ } else {
977
+ // Binary non-image: embed as data-URI; multi-modal models may process it,
978
+ // text-only models at minimum see the filename and type
979
+ textContent = `<attachment filename="${filename}" type="${mimetype}">\ndata:${mimetype};base64,${data}\n</attachment>`;
980
+ }
981
+
982
+ return {
983
+ placement: 'contentBlocks',
984
+ content: { type: 'text', text: textContent },
985
+ };
986
+ }
987
+ }
988
+
989
+ export const customLLMProviderOptions: LLMProviderMeta = {
990
+ title: 'Custom LLM (OpenAI Compatible)',
991
+ provider: CustomLLMProvider,
992
+ };