plugin-custom-llm 1.2.2 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,992 +1,1183 @@
1
- import { LLMProvider, LLMProviderMeta } from '@nocobase/plugin-ai';
2
- import { Model } from '@nocobase/database';
3
- import path from 'node:path';
4
- import fs from 'node:fs/promises';
5
- import axios from 'axios';
6
- import { Context } from '@nocobase/actions';
7
- import type { ParsedAttachmentResult } from '@nocobase/plugin-ai';
8
-
9
- // Keepalive marker — zero-width space prefix to distinguish from real content
10
- const KEEPALIVE_PREFIX = '\u200B\u200B\u200B';
11
-
12
- /**
13
- * Resolve a module from the main NocoBase app's node_modules.
14
- */
15
- function requireFromApp(moduleName: string) {
16
- const appNodeModules = process.env.NODE_MODULES_PATH || path.join(process.cwd(), 'node_modules');
17
- const resolved = require.resolve(moduleName, { paths: [appNodeModules] });
18
- return require(resolved);
19
- }
20
-
21
- let _ChatOpenAI: any = null;
22
- function getChatOpenAI() {
23
- if (!_ChatOpenAI) {
24
- const mod = requireFromApp('@langchain/openai');
25
- _ChatOpenAI = mod.ChatOpenAI;
26
- }
27
- return _ChatOpenAI;
28
- }
29
-
30
- let _ChatGenerationChunk: any = null;
31
- function getChatGenerationChunk() {
32
- if (!_ChatGenerationChunk) {
33
- const mod = requireFromApp('@langchain/core/outputs');
34
- _ChatGenerationChunk = mod.ChatGenerationChunk;
35
- }
36
- return _ChatGenerationChunk;
37
- }
38
-
39
- let _AIMessageChunk: any = null;
40
- function getAIMessageChunk() {
41
- if (!_AIMessageChunk) {
42
- const mod = requireFromApp('@langchain/core/messages');
43
- _AIMessageChunk = mod.AIMessageChunk;
44
- }
45
- return _AIMessageChunk;
46
- }
47
-
48
- function stripToolCallTags(content: string): string | null {
49
- if (typeof content !== 'string') {
50
- return content;
51
- }
52
- return content.replace(/<[||]tool▁(?:calls▁begin|calls▁end|call▁begin|call▁end|sep)[||]>/g, '');
53
- }
54
-
55
- function extractTextContent(content: any, contentPath?: string): string {
56
- if (contentPath && contentPath !== 'auto') {
57
- try {
58
- const keys = contentPath.split('.');
59
- let result = content;
60
- for (const key of keys) {
61
- if (result == null) break;
62
- result = result[key];
63
- }
64
- if (typeof result === 'string') return result;
65
- } catch {
66
- // Fall through to auto
67
- }
68
- }
69
- if (typeof content === 'string') return content;
70
- if (Array.isArray(content)) {
71
- return content
72
- .filter((block: any) => block && block.type === 'text')
73
- .map((block: any) => block.text ?? '')
74
- .join('');
75
- }
76
- if (content && typeof content === 'object' && content.text) {
77
- return String(content.text);
78
- }
79
- return '';
80
- }
81
-
82
- /**
83
- * Detect whether a MIME type is text-decodable (UTF-8 safe).
84
- */
85
- function isTextMimetype(mimetype?: string): boolean {
86
- if (!mimetype) return false;
87
- // All text/* subtypes are UTF-8 decodable
88
- if (mimetype.startsWith('text/')) return true;
89
- // Common text-based application types
90
- const TEXT_APPLICATION_TYPES = new Set([
91
- 'application/json',
92
- 'application/xml',
93
- 'application/xhtml+xml',
94
- 'application/atom+xml',
95
- 'application/rss+xml',
96
- 'application/csv',
97
- 'application/javascript',
98
- 'application/typescript',
99
- 'application/x-javascript',
100
- 'application/x-typescript',
101
- 'application/x-yaml',
102
- 'application/yaml',
103
- 'application/x-json',
104
- 'application/geo+json',
105
- 'application/ld+json',
106
- 'application/manifest+json',
107
- 'application/graphql',
108
- 'application/x-www-form-urlencoded',
109
- 'application/toml',
110
- 'application/x-sh',
111
- 'application/x-shellscript',
112
- 'application/sql',
113
- ]);
114
- return TEXT_APPLICATION_TYPES.has(mimetype);
115
- }
116
-
117
- function safeParseJSON(str: any, fieldName?: string): any {
118
- if (!str || typeof str !== 'string') return {};
119
- try {
120
- return JSON.parse(str);
121
- } catch (e) {
122
- // Warn so misconfigured JSON doesn't silently fall through to defaults
123
- console.warn(`[CustomLLM] Failed to parse ${fieldName || 'JSON config'}: ${(e as Error).message}`);
124
- return {};
125
- }
126
- }
127
-
128
- /**
129
- * Get a nested value from an object using a dot-path string.
130
- * e.g. getByPath({a:{b:"hello"}}, "a.b") => "hello"
131
- */
132
- function getByPath(obj: any, dotPath: string): any {
133
- if (!obj || !dotPath) return undefined;
134
- const keys = dotPath.split('.');
135
- let current = obj;
136
- for (const key of keys) {
137
- if (current == null) return undefined;
138
- current = current[key];
139
- }
140
- return current;
141
- }
142
-
143
- /**
144
- * Create a custom fetch that intercepts LLM responses and maps them
145
- * from a non-standard format to OpenAI-compatible format.
146
- *
147
- * responseMapping config example:
148
- * {
149
- * "content": "message.response" // dot-path to the content field
150
- * "role": "message.role" // optional, dot-path to role (default: "assistant")
151
- * "id": "id" // optional, dot-path to response id
152
- * }
153
- */
154
- function createMappingFetch(responseMapping: Record<string, string>) {
155
- const contentPath = responseMapping.content;
156
- if (!contentPath) return undefined; // No mapping needed
157
-
158
- return async (url: RequestInfo | URL, init?: RequestInit): Promise<Response> => {
159
- const response = await fetch(url, init);
160
-
161
- // Only intercept successful JSON responses
162
- if (!response.ok) return response;
163
-
164
- const contentType = response.headers.get('content-type') || '';
165
-
166
- // Handle streaming responses (SSE) — transform each chunk
167
- if (contentType.includes('text/event-stream') || init?.headers?.['Accept'] === 'text/event-stream') {
168
- const reader = response.body?.getReader();
169
- if (!reader) return response;
170
-
171
- const stream = new ReadableStream({
172
- async start(controller) {
173
- const decoder = new TextDecoder();
174
- const encoder = new TextEncoder();
175
- let buffer = '';
176
-
177
- try {
178
- while (true) {
179
- const { done, value } = await reader.read();
180
- if (done) {
181
- controller.close();
182
- break;
183
- }
184
-
185
- buffer += decoder.decode(value, { stream: true });
186
- const lines = buffer.split('\n');
187
- buffer = lines.pop() || '';
188
-
189
- for (const line of lines) {
190
- if (line.startsWith('data: ')) {
191
- const data = line.slice(6).trim();
192
- if (data === '[DONE]') {
193
- controller.enqueue(encoder.encode('data: [DONE]\n\n'));
194
- continue;
195
- }
196
- try {
197
- const parsed = JSON.parse(data);
198
- const mappedContent = getByPath(parsed, contentPath);
199
- if (mappedContent !== undefined) {
200
- // Map to OpenAI streaming format
201
- const mapped = {
202
- id: getByPath(parsed, responseMapping.id || 'id') || 'chatcmpl-custom',
203
- object: 'chat.completion.chunk',
204
- created: Math.floor(Date.now() / 1000),
205
- model: 'custom',
206
- choices: [{
207
- index: 0,
208
- delta: { content: String(mappedContent), role: 'assistant' },
209
- finish_reason: null,
210
- }],
211
- };
212
- controller.enqueue(encoder.encode(`data: ${JSON.stringify(mapped)}\n\n`));
213
- } else {
214
- // Pass through unmapped SSE events must be terminated with \n\n
215
- controller.enqueue(encoder.encode(line + '\n\n'));
216
- }
217
- } catch {
218
- // Preserve SSE framing: each event line needs \n\n terminator
219
- controller.enqueue(encoder.encode(line + '\n\n'));
220
- }
221
- } else {
222
- controller.enqueue(encoder.encode(line + '\n\n'));
223
- }
224
- }
225
- }
226
- } catch (err) {
227
- controller.error(err);
228
- }
229
- },
230
- });
231
-
232
- return new Response(stream, {
233
- status: response.status,
234
- statusText: response.statusText,
235
- headers: new Headers({
236
- 'content-type': 'text/event-stream',
237
- }),
238
- });
239
- }
240
-
241
- // Handle non-streaming JSON responses
242
- if (contentType.includes('application/json')) {
243
- const body = await response.json();
244
- const mappedContent = getByPath(body, contentPath);
245
-
246
- if (mappedContent !== undefined) {
247
- const mapped = {
248
- id: getByPath(body, responseMapping.id || 'id') || 'chatcmpl-custom',
249
- object: 'chat.completion',
250
- created: Math.floor(Date.now() / 1000),
251
- model: 'custom',
252
- choices: [{
253
- index: 0,
254
- message: {
255
- role: getByPath(body, responseMapping.role || '') || 'assistant',
256
- content: String(mappedContent),
257
- },
258
- finish_reason: 'stop',
259
- }],
260
- usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
261
- };
262
-
263
- return new Response(JSON.stringify(mapped), {
264
- status: response.status,
265
- statusText: response.statusText,
266
- headers: new Headers({
267
- 'content-type': 'application/json',
268
- }),
269
- });
270
- }
271
- }
272
-
273
- return response;
274
- };
275
- }
276
-
277
- /**
278
- * Wrap a ChatOpenAI model's _stream method to inject keepalive chunks
279
- * during long idle periods (e.g., model thinking/reasoning phase).
280
- *
281
- * This runs the base stream in a background task and uses Promise.race
282
- * to send keepalive chunks when no real data arrives within the interval.
283
- */
284
- function wrapWithStreamKeepAlive(
285
- model: any,
286
- options: { intervalMs: number; keepAliveContent: string },
287
- ) {
288
- const streamMethodName = typeof model._streamResponseChunks === 'function'
289
- ? '_streamResponseChunks'
290
- : '_stream';
291
- const originalStream = model[streamMethodName].bind(model);
292
- const { intervalMs, keepAliveContent } = options;
293
-
294
- model[streamMethodName] = async function* (messages: any[], opts: any, runManager?: any) {
295
- const ChatGenerationChunk = getChatGenerationChunk();
296
- const AIMessageChunk = getAIMessageChunk();
297
-
298
- const baseIterator = originalStream(messages, opts, runManager);
299
-
300
- // Queue for chunks from the base stream
301
- const buffer: any[] = [];
302
- let streamDone = false;
303
- let streamError: Error | null = null;
304
- let notifyReady: (() => void) | null = null;
305
- // Track whether tool call chunks are present in the current batch
306
- // to avoid injecting keepalive during tool calling sequences
307
- let hasToolCallChunks = false;
308
- // Phase 6: Track error state to prevent further keepalive after errors
309
- let hasErrored = false;
310
-
311
- // Consume the base stream in a background task
312
- const consumer = (async () => {
313
- try {
314
- for await (const chunk of baseIterator) {
315
- // Detect tool call activitykeepalive must not be injected
316
- // while tool call chunks are streaming in
317
- const msg = chunk?.message;
318
- if (msg?.tool_call_chunks?.length || msg?.tool_calls?.length) {
319
- hasToolCallChunks = true;
320
- }
321
- buffer.push(chunk);
322
- // Wake up the main loop
323
- if (notifyReady) {
324
- notifyReady();
325
- notifyReady = null;
326
- }
327
- }
328
- } catch (err) {
329
- streamError = err as Error;
330
- hasErrored = true;
331
- // Wake up main loop immediately for prompt error propagation
332
- if (notifyReady) {
333
- notifyReady();
334
- notifyReady = null;
335
- }
336
- } finally {
337
- streamDone = true;
338
- if (notifyReady) {
339
- notifyReady();
340
- notifyReady = null;
341
- }
342
- }
343
- })();
344
-
345
- try {
346
- while (!streamDone || buffer.length > 0) {
347
- // Flush buffered chunks first
348
- while (buffer.length > 0) {
349
- yield buffer.shift();
350
- }
351
- // Reset tool call flag after flushing if tool calling has
352
- // completed, keepalive may resume on the next idle interval
353
- hasToolCallChunks = false;
354
-
355
- if (streamDone) break;
356
-
357
- // Wait for either: new chunk arrives OR keepalive interval expires
358
- const waitForChunk = new Promise<void>((resolve) => {
359
- notifyReady = resolve;
360
- });
361
-
362
- let timer: ReturnType<typeof setTimeout> | null = null;
363
- const result = await Promise.race([
364
- waitForChunk.then(() => 'chunk' as const),
365
- new Promise<'timeout'>((resolve) => {
366
- timer = setTimeout(() => resolve('timeout'), intervalMs);
367
- }),
368
- ]);
369
-
370
- // Clear the timer to prevent leaks
371
- if (timer) clearTimeout(timer);
372
-
373
- if (result === 'timeout' && !streamDone && buffer.length === 0) {
374
- // Don't emit keepalive if stream has errored — propagate immediately
375
- if (streamError || hasErrored) break;
376
- // Don't emit keepalive during active tool call sequences
377
- if (hasToolCallChunks) continue;
378
- // Send keepalive with KEEPALIVE_PREFIX as content.
379
- // Must be truthy so plugin-ai's `if (chunk.content)` check passes
380
- // and protocol.content() writes an SSE event to prevent proxy timeouts.
381
- // KEEPALIVE_PREFIX is zero-width spaces — invisible in client UI.
382
- // parseResponseChunk returns it, protocol.content() emits it.
383
- // gathered.content accumulates ZWS but parseResponseMessage strips them.
384
- const keepAliveChunk = new ChatGenerationChunk({
385
- message: new AIMessageChunk({
386
- content: KEEPALIVE_PREFIX,
387
- additional_kwargs: { __keepalive: true },
388
- }),
389
- text: KEEPALIVE_PREFIX,
390
- });
391
- yield keepAliveChunk;
392
- }
393
- // If result === 'chunk', flush happens at top of loop
394
- }
395
-
396
- // Re-throw any stream error
397
- if (streamError) {
398
- throw streamError;
399
- }
400
- } finally {
401
- // Ensure the consumer finishes
402
- await consumer;
403
- }
404
- };
405
-
406
- return model;
407
- }
408
-
409
- /**
410
- * Check if a text string is a keepalive marker.
411
- */
412
- function isKeepAlive(text: string): boolean {
413
- return typeof text === 'string' && text.startsWith(KEEPALIVE_PREFIX);
414
- }
415
-
416
- /**
417
- * Wrap bindTools on the model to fix empty tool properties.
418
- * Gemini and some providers reject tools with `properties: {}`.
419
- * This ensures empty properties objects get a placeholder property.
420
- *
421
- * The fix works at TWO levels:
422
- * 1. Pre-conversion: Fix raw tool definitions before LangChain converts them
423
- * 2. Post-conversion: Fix the converted OpenAI-format tools after bindTools
424
- * returns, catching cases where Zod `z.object({})` schemas get converted
425
- * to `{ properties: {} }` by LangChain's _convertToOpenAITool
426
- */
427
- function fixEmptyToolProperties(model: any) {
428
- const originalBind = model.bindTools?.bind(model);
429
- if (!originalBind) return model;
430
-
431
- const PLACEHOLDER_PROP = {
432
- _placeholder: { type: 'string', description: 'No parameters required' },
433
- };
434
-
435
- /**
436
- * Recursively fix empty properties in a JSON Schema-like object.
437
- * Handles: top-level properties, function.parameters.properties,
438
- * and nested anyOf/oneOf/allOf schemas.
439
- */
440
- function fixPropertiesInSchema(schema: any): void {
441
- if (!schema || typeof schema !== 'object') return;
442
-
443
- // Fix direct properties
444
- if (
445
- schema.properties &&
446
- typeof schema.properties === 'object' &&
447
- Object.keys(schema.properties).length === 0
448
- ) {
449
- schema.properties = { ...PLACEHOLDER_PROP };
450
- }
451
-
452
- // Recurse into nested schemas
453
- for (const key of ['anyOf', 'oneOf', 'allOf']) {
454
- if (Array.isArray(schema[key])) {
455
- schema[key].forEach((sub: any) => fixPropertiesInSchema(sub));
456
- }
457
- }
458
- }
459
-
460
- model.bindTools = function (tools: any[], kwargs?: any) {
461
- // Phase 1: Pre-conversion fix for raw JSON Schema tool definitions
462
- const fixedTools = tools.map((tool: any) => {
463
- if (!tool || typeof tool !== 'object') return tool;
464
-
465
- // Skip Zod schema tools — they'll be handled post-conversion
466
- if (typeof tool.schema?.safeParse === 'function') {
467
- return tool;
468
- }
469
-
470
- // Handle raw schema objects (already JSON Schema)
471
- const schema = tool.schema;
472
- if (schema && typeof schema === 'object' && !schema.safeParse) {
473
- const props = schema.properties;
474
- if (props && typeof props === 'object' && Object.keys(props).length === 0) {
475
- return {
476
- ...tool,
477
- schema: {
478
- ...schema,
479
- properties: { ...PLACEHOLDER_PROP },
480
- },
481
- };
482
- }
483
- }
484
-
485
- // Handle function-calling style definitions (OpenAI format)
486
- const funcParams = tool.function?.parameters;
487
- if (funcParams?.properties) {
488
- if (typeof funcParams.properties === 'object' && Object.keys(funcParams.properties).length === 0) {
489
- return {
490
- ...tool,
491
- function: {
492
- ...tool.function,
493
- parameters: {
494
- ...funcParams,
495
- properties: { ...PLACEHOLDER_PROP },
496
- },
497
- },
498
- };
499
- }
500
- }
501
-
502
- return tool;
503
- });
504
-
505
- // Call the original bindTools — this converts Zod → JSON Schema internally
506
- const result = originalBind(fixedTools, kwargs);
507
-
508
- // Phase 2: Post-conversion fix — patch the converted tools in the result
509
- // LangChain's bindTools returns a RunnableBinding or the model itself with
510
- // tools stored in defaultOptions or bound config
511
- try {
512
- const config = result?.kwargs ?? result?.defaultOptions;
513
- if (config?.tools && Array.isArray(config.tools)) {
514
- for (const tool of config.tools) {
515
- // OpenAI format: { type: 'function', function: { parameters: { properties: {} } } }
516
- if (tool?.function?.parameters) {
517
- fixPropertiesInSchema(tool.function.parameters);
518
- }
519
- // Direct parameter format (some providers)
520
- if (tool?.parameters) {
521
- fixPropertiesInSchema(tool.parameters);
522
- }
523
- }
524
- }
525
- } catch {
526
- // Don't break tool binding if post-fix inspection fails
527
- }
528
-
529
- return result;
530
- };
531
-
532
- return model;
533
- }
534
-
535
- export class CustomLLMProvider extends LLMProvider {
536
- get baseURL() {
537
- return null;
538
- }
539
-
540
- private get requestConfig() {
541
- return safeParseJSON(this.serviceOptions?.requestConfig, 'requestConfig');
542
- }
543
-
544
- private get responseConfig() {
545
- return safeParseJSON(this.serviceOptions?.responseConfig, 'responseConfig');
546
- }
547
-
548
- createModel() {
549
- const { apiKey, disableStream, timeout, streamKeepAlive, keepAliveIntervalMs, keepAliveContent } =
550
- this.serviceOptions || {};
551
- // baseURL comes from core's options.baseURL field
552
- const baseURL = this.serviceOptions?.baseURL;
553
- const { responseFormat } = this.modelOptions || {};
554
- const reqConfig = this.requestConfig;
555
- const resConfig = this.responseConfig;
556
-
557
- const responseFormatOptions: Record<string, any> = {
558
- type: responseFormat ?? 'text',
559
- };
560
-
561
- const modelKwargs: Record<string, any> = {
562
- response_format: responseFormatOptions,
563
- ...(reqConfig.modelKwargs || {}),
564
- };
565
-
566
- if (reqConfig.extraBody && typeof reqConfig.extraBody === 'object') {
567
- Object.assign(modelKwargs, reqConfig.extraBody);
568
- }
569
-
570
- const ChatOpenAI = getChatOpenAI();
571
- const config: Record<string, any> = {
572
- apiKey,
573
- ...this.modelOptions,
574
- modelKwargs,
575
- configuration: {
576
- baseURL,
577
- },
578
- verbose: false,
579
- };
580
-
581
- // Disable streaming for models with long thinking phases
582
- // that return empty stream values causing processing to terminate
583
- if (disableStream) {
584
- config.streaming = false;
585
- }
586
-
587
- // Apply custom timeout (in milliseconds) for slow-responding models
588
- if (timeout && Number(timeout) > 0) {
589
- config.timeout = Number(timeout);
590
- config.configuration.timeout = Number(timeout);
591
- }
592
-
593
- // Apply extra headers
594
- if (reqConfig.extraHeaders && typeof reqConfig.extraHeaders === 'object') {
595
- config.configuration.defaultHeaders = reqConfig.extraHeaders;
596
- }
597
-
598
- // Apply response mapping via custom fetch
599
- if (resConfig.responseMapping) {
600
- config.configuration.fetch = createMappingFetch(resConfig.responseMapping);
601
- }
602
-
603
- let model = new ChatOpenAI(config);
604
-
605
- // Fix empty tool properties for strict providers (Gemini, etc.)
606
- model = fixEmptyToolProperties(model);
607
-
608
- // Wrap with keepalive proxy if enabled (and streaming is not disabled)
609
- if (streamKeepAlive && !disableStream) {
610
- return wrapWithStreamKeepAlive(model, {
611
- intervalMs: Number(keepAliveIntervalMs) || 5000,
612
- keepAliveContent: keepAliveContent || '...',
613
- });
614
- }
615
-
616
- return model;
617
- }
618
-
619
- parseResponseChunk(chunk: any): string | null {
620
- const resConfig = this.responseConfig;
621
- const text = extractTextContent(chunk, resConfig.contentPath);
622
-
623
- // Return keepalive prefix as-is so protocol.content() emits SSE event.
624
- // The zero-width spaces are invisible in the client UI but keep
625
- // proxy/gateway connections alive during long model thinking phases.
626
- if (isKeepAlive(text)) {
627
- return KEEPALIVE_PREFIX;
628
- }
629
-
630
- return stripToolCallTags(text);
631
- }
632
-
633
- parseResponseMessage(message: Model) {
634
- const { content: rawContent, messageId, metadata, role, toolCalls, attachments, workContext } = message;
635
- const content: Record<string, any> = {
636
- ...(rawContent ?? {}),
637
- messageId,
638
- metadata,
639
- attachments,
640
- workContext,
641
- };
642
-
643
- if (toolCalls) {
644
- content.tool_calls = toolCalls;
645
- }
646
-
647
- if (Array.isArray(content.content)) {
648
- const textBlocks = content.content.filter((block: any) => block.type === 'text');
649
- content.content = textBlocks.map((block: any) => block.text).join('') || '';
650
- }
651
-
652
- if (typeof content.content === 'string') {
653
- // Strip keepalive markers from saved messages (backward compat for pre-Phase3 records)
654
- const escapedPrefix = KEEPALIVE_PREFIX.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
655
- content.content = content.content.replace(new RegExp(escapedPrefix + '.*?(?=' + escapedPrefix + '|$)', 'g'), '');
656
- content.content = stripToolCallTags(content.content);
657
- }
658
-
659
- // Clean internal keepalive flag from persisted additional_kwargs
660
- if (content.metadata?.additional_kwargs?.__keepalive !== undefined) {
661
- const { __keepalive, ...cleanKwargs } = content.metadata.additional_kwargs;
662
- content.metadata = { ...content.metadata, additional_kwargs: cleanKwargs };
663
- }
664
-
665
- return {
666
- key: messageId,
667
- content,
668
- role,
669
- };
670
- }
671
-
672
- parseReasoningContent(chunk: any): { status: string; content: string } | null {
673
- const resConfig = this.responseConfig;
674
- const reasoningKey = resConfig.reasoningKey || 'reasoning_content';
675
-
676
- // Check multiple paths — different providers/chunk formats nest reasoning differently
677
- const reasoning =
678
- chunk?.additional_kwargs?.[reasoningKey] ??
679
- chunk?.kwargs?.additional_kwargs?.[reasoningKey];
680
-
681
- if (reasoning && typeof reasoning === 'string') {
682
- return { status: 'streaming', content: reasoning };
683
- }
684
- return null;
685
- }
686
-
687
- /**
688
- * Extract response metadata from LLM output for post-save enrichment.
689
- * Sanitizes overly long message IDs from Gemini or other providers.
690
- */
691
- parseResponseMetadata(output: any): any {
692
- try {
693
- const generation = output?.generations?.[0]?.[0];
694
- if (!generation) return [null, null];
695
-
696
- const message = generation.message;
697
- let id = message?.id;
698
- if (!id) return [null, null];
699
-
700
- // Sanitize overly long IDs (Gemini can return very long chatcmpl-xxx or run-xxx IDs)
701
- if (typeof id === 'string' && id.length > 128) {
702
- id = id.substring(0, 128);
703
- }
704
-
705
- const metadata: Record<string, any> = {};
706
- if (message?.response_metadata) {
707
- metadata.finish_reason = message.response_metadata.finish_reason;
708
- metadata.system_fingerprint = message.response_metadata.system_fingerprint;
709
- }
710
- if (message?.usage_metadata) {
711
- metadata.usage_metadata = message.usage_metadata;
712
- }
713
-
714
- return Object.keys(metadata).length > 0 ? [id, metadata] : [null, null];
715
- } catch {
716
- return [null, null];
717
- }
718
- }
719
-
720
- parseResponseError(err: any) {
721
- return err?.message ?? 'Unexpected LLM service error';
722
- }
723
-
724
- /**
725
- * Self-contained file reading that correctly handles the APP_PUBLIC_PATH prefix.
726
- *
727
- * plugin-ai's encodeLocalFile does path.join(cwd, url) without stripping
728
- * APP_PUBLIC_PATH, so when the app is deployed under a sub-path (e.g. /my-app)
729
- * the resolved path becomes '{cwd}/my-app/storage/uploads/…' which does not exist.
730
- * We cannot fix that in plugin-ai (core), so we re-implement file reading here
731
- * with the prefix stripped before the cwd join.
732
- */
733
- /**
734
- * Reads the attachment and returns its base64-encoded content plus, when the
735
- * file lives on the local filesystem, the resolved absolute path so callers
736
- * can hand that path directly to tools like DocPixie and avoid a second
737
- * write-to-disk round-trip.
738
- */
739
- private async readFileData(
740
- ctx: Context,
741
- attachment: any,
742
- ): Promise<{ base64: string; absPath?: string }> {
743
- const fileManager = this.app.pm.get('file-manager') as any;
744
- const rawUrl: string = await fileManager.getFileURL(attachment);
745
- const url = decodeURIComponent(rawUrl);
746
-
747
- if (url.startsWith('http://') || url.startsWith('https://')) {
748
- const referer = ctx.get('referer') || '';
749
- const ua = ctx.get('user-agent') || '';
750
- const response = await axios.get(url, {
751
- responseType: 'arraybuffer',
752
- timeout: 30_000,
753
- headers: { referer, 'User-Agent': ua },
754
- });
755
- return { base64: Buffer.from(response.data).toString('base64') };
756
- }
757
-
758
- // Internal API stream URL (e.g. s3-private-storage proxy) — read directly via fileManager
759
- if (url.includes('/api/attachments:stream')) {
760
- const { stream } = await fileManager.getFileStream(attachment);
761
- const chunks: Buffer[] = [];
762
- for await (const chunk of stream) {
763
- chunks.push(typeof chunk === 'string' ? Buffer.from(chunk) : chunk);
764
- }
765
- return { base64: Buffer.concat(chunks).toString('base64') };
766
- }
767
-
768
- // Local file — strip APP_PUBLIC_PATH prefix before joining with cwd
769
- let localPath = url;
770
- const appPublicPath = (process.env.APP_PUBLIC_PATH || '/').replace(/\/+$/, '');
771
- if (appPublicPath && localPath.startsWith(appPublicPath + '/')) {
772
- localPath = localPath.slice(appPublicPath.length);
773
- }
774
-
775
- // Resolve and guard against path traversal
776
- const storageRoot = path.resolve(process.cwd());
777
- const absPath = path.resolve(storageRoot, localPath.replace(/^\//, ''));
778
- if (!absPath.startsWith(storageRoot + path.sep) && absPath !== storageRoot) {
779
- throw new Error(`Attachment path escapes storage root: ${localPath}`);
780
- }
781
-
782
- const data = await fs.readFile(absPath);
783
- // Return absPath so parseAttachment can pass it directly to DocPixie
784
- return { base64: Buffer.from(data).toString('base64'), absPath };
785
- }
786
-
787
- /**
788
- * Override parseAttachment to convert all attachments into formats that
789
- * generic OpenAI-compatible endpoints actually support:
790
- *
791
- * - Images → image_url block with base64 data URI (vision models)
792
- * - Text files text block with decoded UTF-8 content
793
- * - Binary → text block with base64 data URI (multi-modal or fallback)
794
- *
795
- * The base-class implementation returns a LangChain ContentBlock.Multimodal.File
796
- * (`type: 'file'`) for non-image attachments. LangChain serialises this as the
797
- * newer OpenAI Files API format which most custom/local endpoints do NOT understand,
798
- * causing file content to be silently dropped.
799
- *
800
- * This method is entirely self-contained — it does not call super — so it is
801
- * safe to use without modifying plugin-ai core.
802
- */
803
- /**
804
- * Try to extract text from an attachment using DocPixie (if available and
805
- * the file type is supported). Returns null if DocPixie is unavailable,
806
- * not ready, or the file type is not supported.
807
- */
808
- /**
809
- * Check whether the DocPixie skill (`docpixie.query.document`) is configured
810
- * on the AI employee that initiated this request.
811
- *
812
- * Reads `ctx.action.params.values.aiEmployee` (the employee username set by the
813
- * `sendMessages` action handler), then looks up the employee's `skillSettings`
814
- * from DB. Result is cached on `ctx.state._docPixieActive` for the request lifetime.
815
- */
816
- private async hasDocPixieSkill(ctx: Context): Promise<boolean> {
817
- if (ctx.state._docPixieActive !== undefined) return ctx.state._docPixieActive as boolean;
818
-
819
- try {
820
- const employeeUsername = ctx.action?.params?.values?.aiEmployee;
821
- if (!employeeUsername) {
822
- ctx.state._docPixieActive = false;
823
- return false;
824
- }
825
-
826
- const employee = await ctx.db.getRepository('aiEmployees').findOne({
827
- filter: { username: String(employeeUsername) },
828
- fields: ['skillSettings'],
829
- });
830
-
831
- const skills: Array<{ name: string }> = (employee?.get?.('skillSettings') as any)?.skills ?? [];
832
- const has = skills.some((s) => s.name === 'docpixie.query.document');
833
- ctx.state._docPixieActive = has;
834
- return has;
835
- } catch {
836
- ctx.state._docPixieActive = false;
837
- return false;
838
- }
839
- }
840
-
841
- /**
842
- * Run the full DocPixie ingestion pipeline (extract pages generate summary → index).
843
- * Returns a formatted `<processed_document>` context block the LLM can use immediately,
844
- * plus a clear instruction to call the RAG tool with the returned documentId for details.
845
- *
846
- * Prefers passing `absPath` directly for local-storage files to avoid a second
847
- * write-to-disk round-trip. Falls back to Buffer for remote / S3 files.
848
- *
849
- * Returns null if DocPixie is unavailable, not configured, or processing fails.
850
- */
851
- private async tryDocPixieFullProcess(
852
- fileData: { base64: string; absPath?: string },
853
- filename: string,
854
- ctx: Context,
855
- ): Promise<string | null> {
856
- try {
857
- const docpixie = this.app.pm.get('docpixie') as any;
858
- if (!docpixie?.service?.isReady?.()) return null;
859
-
860
- const userId: number | undefined = ctx.state?.currentUser?.id;
861
- let result: { documentId: number; summary: string; pageCount: number };
862
-
863
- if (fileData.absPath) {
864
- result = await docpixie.service.processDocumentFromPath(fileData.absPath, filename, { userId });
865
- } else {
866
- const buffer = Buffer.from(fileData.base64, 'base64');
867
- result = await docpixie.service.processDocumentFromBuffer(buffer, filename, { userId });
868
- }
869
-
870
- const { documentId, summary, pageCount } = result;
871
- const summaryText = summary?.trim() || 'No summary available.';
872
-
873
- return (
874
- `<processed_document id="${documentId}" filename="${filename}" pages="${pageCount}">\n` +
875
- `<summary>\n${summaryText}\n</summary>\n` +
876
- `<rag_instruction>This document is fully indexed. ` +
877
- `Call docpixie.query.document with documentId=${documentId} to retrieve specific details.</rag_instruction>\n` +
878
- `</processed_document>`
879
- );
880
- } catch {
881
- return null;
882
- }
883
- }
884
-
885
- /**
886
- * Try to extract text from an attachment using DocPixie (transient — no DB indexing).
887
- * When `absPath` is provided (local-storage file), DocPixie reads the file
888
- * directly no Buffer decode/re-encode or extra temp-file write.
889
- * Falls back to `extractTextFromBuffer` for remote/S3 files.
890
- * Returns null if DocPixie is unavailable, not ready, or file type unsupported.
891
- */
892
- private async tryDocPixieExtract(
893
- fileData: { base64: string; absPath?: string },
894
- filename: string,
895
- ): Promise<string | null> {
896
- try {
897
- const docpixie = this.app.pm.get('docpixie') as any;
898
- if (!docpixie?.service) return null;
899
- let text: string;
900
- if (fileData.absPath) {
901
- text = await docpixie.service.extractTextFromPath(fileData.absPath, filename);
902
- } else {
903
- const buffer = Buffer.from(fileData.base64, 'base64');
904
- text = await docpixie.service.extractTextFromBuffer(buffer, filename);
905
- }
906
- return text || null;
907
- } catch {
908
- return null;
909
- }
910
- }
911
-
912
- async parseAttachment(ctx: Context, attachment: any): Promise<ParsedAttachmentResult> {
913
- const mimetype: string = attachment.mimetype || 'application/octet-stream';
914
- const filename: string = attachment.filename || attachment.name || 'file';
915
- const fileData = await this.readFileData(ctx, attachment);
916
- const { base64: data } = fileData;
917
-
918
- const isDocPixieSupported = mimetype === 'application/pdf' || mimetype.startsWith('image/');
919
-
920
- // ── Path A: DocPixie skill active → full ingestion pipeline ──────────────
921
- // Runs processDocument (extract pages + generate summary + DB index) so the
922
- // LLM gets a rich summary + documentId it can pass to the RAG tool for specifics.
923
- if (isDocPixieSupported && (await this.hasDocPixieSkill(ctx))) {
924
- const contextBlock = await this.tryDocPixieFullProcess(fileData, filename, ctx);
925
- if (contextBlock) {
926
- return {
927
- placement: 'contentBlocks',
928
- content: { type: 'text', text: contextBlock },
929
- };
930
- }
931
- // DocPixie not configured / failed → fall through to Path B
932
- }
933
-
934
- // ── Path B: DocPixie skill absent → transient extraction (no DB) ─────────
935
- if (mimetype === 'application/pdf') {
936
- const extracted = await this.tryDocPixieExtract(fileData, filename);
937
- if (extracted) {
938
- return {
939
- placement: 'contentBlocks',
940
- content: {
941
- type: 'text',
942
- text: `<attachment filename="${filename}" type="${mimetype}">\n${extracted}\n</attachment>`,
943
- },
944
- };
945
- }
946
- // DocPixie unavailable fall through to base64 data-URI
947
- }
948
-
949
- if (mimetype.startsWith('image/')) {
950
- // Transient DocPixie extraction (e.g. OCR); fallback to image_url for vision models
951
- const extracted = await this.tryDocPixieExtract(fileData, filename);
952
- if (extracted) {
953
- return {
954
- placement: 'contentBlocks',
955
- content: {
956
- type: 'text',
957
- text: `<attachment filename="${filename}" type="${mimetype}">\n${extracted}\n</attachment>`,
958
- },
959
- };
960
- }
961
- // Final fallback vision-capable models handle image_url natively
962
- return {
963
- placement: 'contentBlocks',
964
- content: {
965
- type: 'image_url',
966
- image_url: { url: `data:${mimetype};base64,${data}` },
967
- },
968
- };
969
- }
970
-
971
- let textContent: string;
972
- if (isTextMimetype(mimetype)) {
973
- // Decode to readable UTF-8 so the model can actually read the content
974
- const decoded = Buffer.from(data, 'base64').toString('utf-8');
975
- textContent = `<attachment filename="${filename}" type="${mimetype}">\n${decoded}\n</attachment>`;
976
- } else {
977
- // Binary non-image: embed as data-URI; multi-modal models may process it,
978
- // text-only models at minimum see the filename and type
979
- textContent = `<attachment filename="${filename}" type="${mimetype}">\ndata:${mimetype};base64,${data}\n</attachment>`;
980
- }
981
-
982
- return {
983
- placement: 'contentBlocks',
984
- content: { type: 'text', text: textContent },
985
- };
986
- }
987
- }
988
-
989
- export const customLLMProviderOptions: LLMProviderMeta = {
990
- title: 'Custom LLM (OpenAI Compatible)',
991
- provider: CustomLLMProvider,
992
- };
1
+ /**
2
+ * This file is part of the NocoBase (R) project.
3
+ * Copyright (c) 2020-2024 NocoBase Co., Ltd.
4
+ * Authors: NocoBase Team.
5
+ *
6
+ * This project is dual-licensed under AGPL-3.0 and NocoBase Commercial License.
7
+ * For more information, please refer to: https://www.nocobase.com/agreement.
8
+ */
9
+
10
+ import { LLMProvider, LLMProviderMeta } from '@nocobase/plugin-ai';
11
+ import { Model } from '@nocobase/database';
12
+ import path from 'node:path';
13
+ import fs from 'node:fs/promises';
14
+ import axios from 'axios';
15
+ import { Context } from '@nocobase/actions';
16
+ import type { ParsedAttachmentResult } from '@nocobase/plugin-ai';
17
+
18
+ // Keepalive marker — zero-width space prefix to distinguish from real content
19
+ const KEEPALIVE_PREFIX = '\u200B\u200B\u200B';
20
+
21
+ /**
22
+ * Resolve a module from the main NocoBase app's node_modules.
23
+ */
24
+ function requireFromApp(moduleName: string) {
25
+ const appNodeModules = process.env.NODE_MODULES_PATH || path.join(process.cwd(), 'node_modules');
26
+ const resolved = require.resolve(moduleName, { paths: [appNodeModules] });
27
+ return require(resolved);
28
+ }
29
+
30
+ let _ChatOpenAI: any = null;
31
+ function getChatOpenAI() {
32
+ if (!_ChatOpenAI) {
33
+ const mod = requireFromApp('@langchain/openai');
34
+ _ChatOpenAI = mod.ChatOpenAI;
35
+ }
36
+ return _ChatOpenAI;
37
+ }
38
+
39
+ /**
40
+ * Lazy-load ChatOpenAICompletions — the lower-level class used as the base
41
+ * for ReasoningChatOpenAI so we can support reasoning_content round-trips
42
+ * required by models like DeepSeek-R1.
43
+ */
44
+ let _ChatOpenAICompletions: any = null;
45
+ function getChatOpenAICompletions() {
46
+ if (!_ChatOpenAICompletions) {
47
+ const mod = requireFromApp('@langchain/openai');
48
+ _ChatOpenAICompletions = mod.ChatOpenAICompletions;
49
+ }
50
+ return _ChatOpenAICompletions;
51
+ }
52
+
53
+ /**
54
+ * Build tool_calls key for reasoning content map lookup.
55
+ */
56
+ function getToolCallsKey(toolCalls: Array<{ id?: string; name?: string; function?: { name?: string } }> = []): string {
57
+ return toolCalls
58
+ .map((tc) => {
59
+ const id = tc?.id ?? '';
60
+ const name = tc?.name ?? tc?.function?.name ?? '';
61
+ return `${id}:${name}`;
62
+ })
63
+ .join('|');
64
+ }
65
+
66
+ /**
67
+ * Collect reasoning_content from history messages keyed by their tool_calls.
68
+ * This is needed because some APIs (DeepSeek) require reasoning_content to
69
+ * be present in assistant messages that precede tool results.
70
+ */
71
+ function collectReasoningMap(messages: any[]): Map<string, string> {
72
+ const reasoningMap = new Map<string, string>();
73
+ for (const message of messages ?? []) {
74
+ if (message?.getType?.() !== 'ai' && message?._getType?.() !== 'ai') continue;
75
+ if (!message?.tool_calls?.length) continue;
76
+ const reasoningContent = message?.additional_kwargs?.reasoning_content;
77
+ if (typeof reasoningContent !== 'string' || !reasoningContent) continue;
78
+ const key = getToolCallsKey(message.tool_calls);
79
+ if (key) reasoningMap.set(key, reasoningContent);
80
+ }
81
+ return reasoningMap;
82
+ }
83
+
84
+ /**
85
+ * Patch request messages to restore reasoning_content on assistant messages
86
+ * that have tool_calls — needed for APIs that require it on round-trip.
87
+ */
88
+ function patchRequestMessagesReasoning(request: any, reasoningMap?: Map<string, string>): void {
89
+ if (!reasoningMap?.size || !Array.isArray(request?.messages)) return;
90
+ const lastMsg = request.messages.at(-1);
91
+ if (lastMsg?.role !== 'tool') return;
92
+ for (const msg of request.messages) {
93
+ if (msg?.role !== 'assistant') continue;
94
+ if (!Array.isArray(msg.tool_calls) || msg.tool_calls.length === 0) continue;
95
+ if (msg.reasoning_content) continue;
96
+ const key = getToolCallsKey(msg.tool_calls);
97
+ const rc = key ? reasoningMap.get(key) : undefined;
98
+ if (rc) msg.reasoning_content = rc;
99
+ }
100
+ }
101
+
102
+ const REASONING_MAP_KEY = '__nb_reasoning_map';
103
+
104
+ /**
105
+ * Create a ReasoningChatOpenAI class that extends ChatOpenAICompletions.
106
+ * This patches reasoning_content into the request messages before sending
107
+ * to the API, which is required for models like DeepSeek-R1 that need
108
+ * reasoning_content present in assistant messages during tool call cycles.
109
+ */
110
+ function createReasoningChatClass() {
111
+ const ChatOpenAICompletions = getChatOpenAICompletions();
112
+ if (!ChatOpenAICompletions) {
113
+ // Fallback — completions class not available, use plain ChatOpenAI
114
+ return getChatOpenAI();
115
+ }
116
+
117
+ return class ReasoningChatOpenAI extends ChatOpenAICompletions {
118
+ async _generate(messages: any[], options: any, runManager?: any) {
119
+ const reasoningMap = collectReasoningMap(messages);
120
+ return super._generate(messages, { ...(options || {}), [REASONING_MAP_KEY]: reasoningMap }, runManager);
121
+ }
122
+
123
+ async *_streamResponseChunks(messages: any[], options: any, runManager?: any) {
124
+ const reasoningMap =
125
+ options?.[REASONING_MAP_KEY] instanceof Map
126
+ ? (options[REASONING_MAP_KEY] as Map<string, string>)
127
+ : collectReasoningMap(messages);
128
+ yield* super._streamResponseChunks(messages, { ...(options || {}), [REASONING_MAP_KEY]: reasoningMap }, runManager);
129
+ }
130
+
131
+ _convertCompletionsDeltaToBaseMessageChunk(delta: any, rawResponse: any, defaultRole?: any) {
132
+ const messageChunk = super._convertCompletionsDeltaToBaseMessageChunk(delta, rawResponse, defaultRole);
133
+ if (delta?.reasoning_content) {
134
+ messageChunk.additional_kwargs = {
135
+ ...(messageChunk.additional_kwargs || {}),
136
+ reasoning_content: delta.reasoning_content,
137
+ };
138
+ }
139
+ return messageChunk;
140
+ }
141
+
142
+ _convertCompletionsMessageToBaseMessage(message: any, rawResponse: any) {
143
+ const langChainMessage = super._convertCompletionsMessageToBaseMessage(message, rawResponse);
144
+ if (message?.reasoning_content) {
145
+ langChainMessage.additional_kwargs = {
146
+ ...(langChainMessage.additional_kwargs || {}),
147
+ reasoning_content: message.reasoning_content,
148
+ };
149
+ }
150
+ return langChainMessage;
151
+ }
152
+
153
+ async completionWithRetry(request: any, requestOptions?: any): Promise<any> {
154
+ const reasoningMap = requestOptions?.[REASONING_MAP_KEY] as Map<string, string> | undefined;
155
+ patchRequestMessagesReasoning(request, reasoningMap);
156
+ return super.completionWithRetry(request, requestOptions) as any;
157
+ }
158
+ };
159
+ }
160
+
161
+ let _ChatGenerationChunk: any = null;
162
+ function getChatGenerationChunk() {
163
+ if (!_ChatGenerationChunk) {
164
+ const mod = requireFromApp('@langchain/core/outputs');
165
+ _ChatGenerationChunk = mod.ChatGenerationChunk;
166
+ }
167
+ return _ChatGenerationChunk;
168
+ }
169
+
170
+ let _AIMessageChunk: any = null;
171
+ function getAIMessageChunk() {
172
+ if (!_AIMessageChunk) {
173
+ const mod = requireFromApp('@langchain/core/messages');
174
+ _AIMessageChunk = mod.AIMessageChunk;
175
+ }
176
+ return _AIMessageChunk;
177
+ }
178
+
179
+ function stripToolCallTags(content: string): string | null {
180
+ if (typeof content !== 'string') {
181
+ return content;
182
+ }
183
+ return content.replace(/<[||]tool▁(?:calls▁begin|calls▁end|call▁begin|call▁end|sep)[||]>/g, '');
184
+ }
185
+
186
+ function extractTextContent(content: any, contentPath?: string): string {
187
+ if (contentPath && contentPath !== 'auto') {
188
+ try {
189
+ const keys = contentPath.split('.');
190
+ let result = content;
191
+ for (const key of keys) {
192
+ if (result == null) break;
193
+ result = result[key];
194
+ }
195
+ if (typeof result === 'string') return result;
196
+ } catch {
197
+ // Fall through to auto
198
+ }
199
+ }
200
+ if (typeof content === 'string') return content;
201
+ if (Array.isArray(content)) {
202
+ return content
203
+ .filter((block: any) => block && block.type === 'text')
204
+ .map((block: any) => block.text ?? '')
205
+ .join('');
206
+ }
207
+ if (content && typeof content === 'object' && content.text) {
208
+ return String(content.text);
209
+ }
210
+ return '';
211
+ }
212
+
213
+ /**
214
+ * Detect whether a MIME type is text-decodable (UTF-8 safe).
215
+ */
216
+ function isTextMimetype(mimetype?: string): boolean {
217
+ if (!mimetype) return false;
218
+ // All text/* subtypes are UTF-8 decodable
219
+ if (mimetype.startsWith('text/')) return true;
220
+ // Common text-based application types
221
+ const TEXT_APPLICATION_TYPES = new Set([
222
+ 'application/json',
223
+ 'application/xml',
224
+ 'application/xhtml+xml',
225
+ 'application/atom+xml',
226
+ 'application/rss+xml',
227
+ 'application/csv',
228
+ 'application/javascript',
229
+ 'application/typescript',
230
+ 'application/x-javascript',
231
+ 'application/x-typescript',
232
+ 'application/x-yaml',
233
+ 'application/yaml',
234
+ 'application/x-json',
235
+ 'application/geo+json',
236
+ 'application/ld+json',
237
+ 'application/manifest+json',
238
+ 'application/graphql',
239
+ 'application/x-www-form-urlencoded',
240
+ 'application/toml',
241
+ 'application/x-sh',
242
+ 'application/x-shellscript',
243
+ 'application/sql',
244
+ ]);
245
+ return TEXT_APPLICATION_TYPES.has(mimetype);
246
+ }
247
+
248
+ function safeParseJSON(str: any, fieldName?: string): any {
249
+ if (!str || typeof str !== 'string') return {};
250
+ try {
251
+ return JSON.parse(str);
252
+ } catch (e) {
253
+ // Warn so misconfigured JSON doesn't silently fall through to defaults
254
+ console.warn(`[CustomLLM] Failed to parse ${fieldName || 'JSON config'}: ${(e as Error).message}`);
255
+ return {};
256
+ }
257
+ }
258
+
259
+ /**
260
+ * Get a nested value from an object using a dot-path string.
261
+ * e.g. getByPath({a:{b:"hello"}}, "a.b") => "hello"
262
+ */
263
+ function getByPath(obj: any, dotPath: string): any {
264
+ if (!obj || !dotPath) return undefined;
265
+ const keys = dotPath.split('.');
266
+ let current = obj;
267
+ for (const key of keys) {
268
+ if (current == null) return undefined;
269
+ current = current[key];
270
+ }
271
+ return current;
272
+ }
273
+
274
+ /**
275
+ * Create a custom fetch that intercepts LLM responses and maps them
276
+ * from a non-standard format to OpenAI-compatible format.
277
+ *
278
+ * responseMapping config example:
279
+ * {
280
+ * "content": "message.response" // dot-path to the content field
281
+ * "role": "message.role" // optional, dot-path to role (default: "assistant")
282
+ * "id": "id" // optional, dot-path to response id
283
+ * "tool_calls": "message.tool_calls" // optional, dot-path to tool_calls array
284
+ * "finish_reason": "finish_reason" // optional, dot-path to finish_reason
285
+ * }
286
+ */
287
+ function createMappingFetch(responseMapping: Record<string, string>, timeoutMs?: number) {
288
+ const contentPath = responseMapping.content;
289
+ if (!contentPath) return undefined; // No mapping needed
290
+
291
+ // Resolve path for tool_calls — if not set, try the standard OpenAI paths as fallback
292
+ const toolCallsPath = responseMapping.tool_calls;
293
+ const finishReasonPath = responseMapping.finish_reason;
294
+
295
+ return async (url: RequestInfo | URL, init?: RequestInit): Promise<Response> => {
296
+ // Apply timeout via AbortController if configured (Issue #7)
297
+ let response: Response;
298
+ if (timeoutMs && timeoutMs > 0) {
299
+ const controller = new AbortController();
300
+ const timer = setTimeout(() => controller.abort(), timeoutMs);
301
+ try {
302
+ response = await fetch(url, { ...init, signal: controller.signal });
303
+ } finally {
304
+ clearTimeout(timer);
305
+ }
306
+ } else {
307
+ response = await fetch(url, init);
308
+ }
309
+
310
+ // Only intercept successful JSON responses
311
+ if (!response.ok) return response;
312
+
313
+ const contentType = response.headers.get('content-type') || '';
314
+
315
+ // Handle streaming responses (SSE)transform each chunk
316
+ if (contentType.includes('text/event-stream') || init?.headers?.['Accept'] === 'text/event-stream') {
317
+ const reader = response.body?.getReader();
318
+ if (!reader) return response;
319
+
320
+ const stream = new ReadableStream({
321
+ async start(controller) {
322
+ const decoder = new TextDecoder();
323
+ const encoder = new TextEncoder();
324
+ let buffer = '';
325
+
326
+ try {
327
+ // eslint-disable-next-line no-constant-condition
328
+ while (true) {
329
+ const { done, value } = await reader.read();
330
+ if (done) {
331
+ controller.close();
332
+ break;
333
+ }
334
+
335
+ buffer += decoder.decode(value, { stream: true });
336
+ const lines = buffer.split('\n');
337
+ buffer = lines.pop() || '';
338
+
339
+ for (const line of lines) {
340
+ if (line.startsWith('data: ')) {
341
+ const data = line.slice(6).trim();
342
+ if (data === '[DONE]') {
343
+ controller.enqueue(encoder.encode('data: [DONE]\n\n'));
344
+ continue;
345
+ }
346
+ try {
347
+ const parsed = JSON.parse(data);
348
+ const mappedContent = getByPath(parsed, contentPath);
349
+
350
+ // Extract tool_calls from the response (Issue #1)
351
+ // Try custom path first, then fall back to standard OpenAI chunk paths
352
+ const mappedToolCalls = toolCallsPath
353
+ ? getByPath(parsed, toolCallsPath)
354
+ : getByPath(parsed, 'choices.0.delta.tool_calls') ?? getByPath(parsed, 'delta.tool_calls');
355
+
356
+ const mappedFinishReason = finishReasonPath
357
+ ? getByPath(parsed, finishReasonPath)
358
+ : getByPath(parsed, 'choices.0.finish_reason') ?? getByPath(parsed, 'finish_reason');
359
+
360
+ if (mappedContent !== undefined || mappedToolCalls) {
361
+ // Build the delta — include both content and tool_calls
362
+ const delta: Record<string, any> = { role: 'assistant' };
363
+ if (mappedContent !== undefined) {
364
+ delta.content = String(mappedContent);
365
+ }
366
+ if (mappedToolCalls) {
367
+ delta.tool_calls = mappedToolCalls;
368
+ }
369
+
370
+ const mapped = {
371
+ id: getByPath(parsed, responseMapping.id || 'id') || 'chatcmpl-custom',
372
+ object: 'chat.completion.chunk',
373
+ created: Math.floor(Date.now() / 1000),
374
+ model: 'custom',
375
+ choices: [
376
+ {
377
+ index: 0,
378
+ delta,
379
+ finish_reason: mappedFinishReason ?? null,
380
+ },
381
+ ],
382
+ };
383
+ controller.enqueue(encoder.encode(`data: ${JSON.stringify(mapped)}\n\n`));
384
+ } else {
385
+ // Pass through unmapped — SSE events must be terminated with \n\n
386
+ controller.enqueue(encoder.encode(line + '\n\n'));
387
+ }
388
+ } catch {
389
+ // Preserve SSE framing: each event line needs \n\n terminator
390
+ controller.enqueue(encoder.encode(line + '\n\n'));
391
+ }
392
+ } else {
393
+ controller.enqueue(encoder.encode(line + '\n\n'));
394
+ }
395
+ }
396
+ }
397
+ } catch (err) {
398
+ controller.error(err);
399
+ }
400
+ },
401
+ });
402
+
403
+ return new Response(stream, {
404
+ status: response.status,
405
+ statusText: response.statusText,
406
+ headers: new Headers({
407
+ 'content-type': 'text/event-stream',
408
+ }),
409
+ });
410
+ }
411
+
412
+ // Handle non-streaming JSON responses
413
+ if (contentType.includes('application/json')) {
414
+ const body = await response.json();
415
+ const mappedContent = getByPath(body, contentPath);
416
+
417
+ // Extract tool_calls for non-streaming (Issue #1)
418
+ const mappedToolCalls = toolCallsPath
419
+ ? getByPath(body, toolCallsPath)
420
+ : getByPath(body, 'choices.0.message.tool_calls') ?? getByPath(body, 'message.tool_calls');
421
+
422
+ const mappedFinishReason = finishReasonPath
423
+ ? getByPath(body, finishReasonPath)
424
+ : getByPath(body, 'choices.0.finish_reason') ?? getByPath(body, 'finish_reason');
425
+
426
+ if (mappedContent !== undefined || mappedToolCalls) {
427
+ const message: Record<string, any> = {
428
+ role: getByPath(body, responseMapping.role || '') || 'assistant',
429
+ };
430
+ if (mappedContent !== undefined) {
431
+ message.content = String(mappedContent);
432
+ } else {
433
+ // When only tool_calls, content should be null (OpenAI convention)
434
+ message.content = null;
435
+ }
436
+ if (mappedToolCalls) {
437
+ message.tool_calls = mappedToolCalls;
438
+ }
439
+
440
+ const mapped = {
441
+ id: getByPath(body, responseMapping.id || 'id') || 'chatcmpl-custom',
442
+ object: 'chat.completion',
443
+ created: Math.floor(Date.now() / 1000),
444
+ model: 'custom',
445
+ choices: [
446
+ {
447
+ index: 0,
448
+ message,
449
+ finish_reason: mappedFinishReason ?? (mappedToolCalls ? 'tool_calls' : 'stop'),
450
+ },
451
+ ],
452
+ usage: body.usage ?? { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
453
+ };
454
+
455
+ return new Response(JSON.stringify(mapped), {
456
+ status: response.status,
457
+ statusText: response.statusText,
458
+ headers: new Headers({
459
+ 'content-type': 'application/json',
460
+ }),
461
+ });
462
+ }
463
+ }
464
+
465
+ return response;
466
+ };
467
+ }
468
+
469
+ /**
470
+ * Wrap a ChatOpenAI model's _stream method to inject keepalive chunks
471
+ * during long idle periods (e.g., model thinking/reasoning phase).
472
+ *
473
+ * This runs the base stream in a background task and uses Promise.race
474
+ * to send keepalive chunks when no real data arrives within the interval.
475
+ */
476
+ function wrapWithStreamKeepAlive(model: any, options: { intervalMs: number; keepAliveContent: string }) {
477
+ const streamMethodName = typeof model._streamResponseChunks === 'function' ? '_streamResponseChunks' : '_stream';
478
+ const originalStream = model[streamMethodName].bind(model);
479
+ const { intervalMs, keepAliveContent } = options;
480
+
481
+ model[streamMethodName] = async function* (messages: any[], opts: any, runManager?: any) {
482
+ const ChatGenerationChunk = getChatGenerationChunk();
483
+ const AIMessageChunk = getAIMessageChunk();
484
+
485
+ const baseIterator = originalStream(messages, opts, runManager);
486
+
487
+ // Queue for chunks from the base stream
488
+ const buffer: any[] = [];
489
+ let streamDone = false;
490
+ let streamError: Error | null = null;
491
+ let notifyReady: (() => void) | null = null;
492
+ // Track whether tool call chunks are present in the current batch
493
+ // to avoid injecting keepalive during tool calling sequences
494
+ let hasToolCallChunks = false;
495
+ // Phase 6: Track error state to prevent further keepalive after errors
496
+ let hasErrored = false;
497
+
498
+ // Consume the base stream in a background task
499
+ const consumer = (async () => {
500
+ try {
501
+ for await (const chunk of baseIterator) {
502
+ // Detect tool call activity — keepalive must not be injected
503
+ // while tool call chunks are streaming in
504
+ const msg = chunk?.message;
505
+ if (msg?.tool_call_chunks?.length || msg?.tool_calls?.length) {
506
+ hasToolCallChunks = true;
507
+ }
508
+ buffer.push(chunk);
509
+ // Wake up the main loop
510
+ if (notifyReady) {
511
+ notifyReady();
512
+ notifyReady = null;
513
+ }
514
+ }
515
+ } catch (err) {
516
+ streamError = err as Error;
517
+ hasErrored = true;
518
+ // Wake up main loop immediately for prompt error propagation
519
+ if (notifyReady) {
520
+ notifyReady();
521
+ notifyReady = null;
522
+ }
523
+ } finally {
524
+ streamDone = true;
525
+ if (notifyReady) {
526
+ notifyReady();
527
+ notifyReady = null;
528
+ }
529
+ }
530
+ })();
531
+
532
+ try {
533
+ while (!streamDone || buffer.length > 0) {
534
+ // Flush buffered chunks first
535
+ while (buffer.length > 0) {
536
+ yield buffer.shift();
537
+ }
538
+ // Reset tool call flag after flushing — if tool calling has
539
+ // completed, keepalive may resume on the next idle interval
540
+ hasToolCallChunks = false;
541
+
542
+ if (streamDone) break;
543
+
544
+ // Wait for either: new chunk arrives OR keepalive interval expires
545
+ const waitForChunk = new Promise<void>((resolve) => {
546
+ notifyReady = resolve;
547
+ });
548
+
549
+ let timer: ReturnType<typeof setTimeout> | null = null;
550
+ const result = await Promise.race([
551
+ waitForChunk.then(() => 'chunk' as const),
552
+ new Promise<'timeout'>((resolve) => {
553
+ timer = setTimeout(() => resolve('timeout'), intervalMs);
554
+ }),
555
+ ]);
556
+
557
+ // Clear the timer to prevent leaks
558
+ if (timer) clearTimeout(timer);
559
+
560
+ if (result === 'timeout' && !streamDone && buffer.length === 0) {
561
+ // Don't emit keepalive if stream has errored — propagate immediately
562
+ if (streamError || hasErrored) break;
563
+ // Don't emit keepalive during active tool call sequences
564
+ if (hasToolCallChunks) continue;
565
+ // Send keepalive with KEEPALIVE_PREFIX as content.
566
+ // Must be truthy so plugin-ai's `if (chunk.content)` check passes
567
+ // and protocol.content() writes an SSE event to prevent proxy timeouts.
568
+ // KEEPALIVE_PREFIX is zero-width spaces — invisible in client UI.
569
+ // parseResponseChunk returns it, protocol.content() emits it.
570
+ // gathered.content accumulates ZWS but parseResponseMessage strips them.
571
+ const keepAliveChunk = new ChatGenerationChunk({
572
+ message: new AIMessageChunk({
573
+ content: KEEPALIVE_PREFIX,
574
+ additional_kwargs: { __keepalive: true },
575
+ }),
576
+ text: KEEPALIVE_PREFIX,
577
+ });
578
+ yield keepAliveChunk;
579
+ }
580
+ // If result === 'chunk', flush happens at top of loop
581
+ }
582
+
583
+ // Re-throw any stream error
584
+ if (streamError) {
585
+ throw streamError;
586
+ }
587
+ } finally {
588
+ // Ensure the consumer finishes
589
+ await consumer;
590
+ }
591
+ };
592
+
593
+ return model;
594
+ }
595
+
596
+ /**
597
+ * Check if a text string is a keepalive marker.
598
+ */
599
+ function isKeepAlive(text: string): boolean {
600
+ return typeof text === 'string' && text.startsWith(KEEPALIVE_PREFIX);
601
+ }
602
+
603
+ /**
604
+ * Wrap bindTools on the model to fix empty tool properties.
605
+ * Gemini and some providers reject tools with `properties: {}`.
606
+ * This ensures empty properties objects get a placeholder property.
607
+ *
608
+ * The fix works at TWO levels:
609
+ * 1. Pre-conversion: Fix raw tool definitions before LangChain converts them
610
+ * 2. Post-conversion: Fix the converted OpenAI-format tools after bindTools
611
+ * returns, catching cases where Zod `z.object({})` schemas get converted
612
+ * to `{ properties: {} }` by LangChain's _convertToOpenAITool
613
+ */
614
+ function fixEmptyToolProperties(model: any) {
615
+ const originalBind = model.bindTools?.bind(model);
616
+ if (!originalBind) return model;
617
+
618
+ const PLACEHOLDER_PROP = {
619
+ _placeholder: { type: 'string', description: 'No parameters required' },
620
+ };
621
+
622
+ /**
623
+ * Recursively fix empty properties in a JSON Schema-like object.
624
+ * Handles: top-level properties, function.parameters.properties,
625
+ * and nested anyOf/oneOf/allOf schemas.
626
+ */
627
+ function fixPropertiesInSchema(schema: any): void {
628
+ if (!schema || typeof schema !== 'object') return;
629
+
630
+ // Fix direct properties
631
+ if (schema.properties && typeof schema.properties === 'object' && Object.keys(schema.properties).length === 0) {
632
+ schema.properties = { ...PLACEHOLDER_PROP };
633
+ }
634
+
635
+ // Recurse into nested schemas
636
+ for (const key of ['anyOf', 'oneOf', 'allOf']) {
637
+ if (Array.isArray(schema[key])) {
638
+ schema[key].forEach((sub: any) => fixPropertiesInSchema(sub));
639
+ }
640
+ }
641
+ }
642
+
643
+ model.bindTools = function (tools: any[], kwargs?: any) {
644
+ // Phase 1: Pre-conversion fix for raw JSON Schema tool definitions
645
+ const fixedTools = tools.map((tool: any) => {
646
+ if (!tool || typeof tool !== 'object') return tool;
647
+
648
+ // Skip Zod schema tools they'll be handled post-conversion
649
+ if (typeof tool.schema?.safeParse === 'function') {
650
+ return tool;
651
+ }
652
+
653
+ // Handle raw schema objects (already JSON Schema)
654
+ const schema = tool.schema;
655
+ if (schema && typeof schema === 'object' && !schema.safeParse) {
656
+ const props = schema.properties;
657
+ if (props && typeof props === 'object' && Object.keys(props).length === 0) {
658
+ return {
659
+ ...tool,
660
+ schema: {
661
+ ...schema,
662
+ properties: { ...PLACEHOLDER_PROP },
663
+ },
664
+ };
665
+ }
666
+ }
667
+
668
+ // Handle function-calling style definitions (OpenAI format)
669
+ const funcParams = tool.function?.parameters;
670
+ if (funcParams?.properties) {
671
+ if (typeof funcParams.properties === 'object' && Object.keys(funcParams.properties).length === 0) {
672
+ return {
673
+ ...tool,
674
+ function: {
675
+ ...tool.function,
676
+ parameters: {
677
+ ...funcParams,
678
+ properties: { ...PLACEHOLDER_PROP },
679
+ },
680
+ },
681
+ };
682
+ }
683
+ }
684
+
685
+ return tool;
686
+ });
687
+
688
+ // Call the original bindTools this converts Zod → JSON Schema internally
689
+ const result = originalBind(fixedTools, kwargs);
690
+
691
+ // Phase 2: Post-conversion fix — patch the converted tools in the result
692
+ // LangChain's bindTools returns a RunnableBinding or the model itself with
693
+ // tools stored in defaultOptions or bound config
694
+ try {
695
+ const config = result?.kwargs ?? result?.defaultOptions;
696
+ if (config?.tools && Array.isArray(config.tools)) {
697
+ for (const tool of config.tools) {
698
+ // OpenAI format: { type: 'function', function: { parameters: { properties: {} } } }
699
+ if (tool?.function?.parameters) {
700
+ fixPropertiesInSchema(tool.function.parameters);
701
+ }
702
+ // Direct parameter format (some providers)
703
+ if (tool?.parameters) {
704
+ fixPropertiesInSchema(tool.parameters);
705
+ }
706
+ }
707
+ }
708
+ } catch {
709
+ // Don't break tool binding if post-fix inspection fails
710
+ }
711
+
712
+ return result;
713
+ };
714
+
715
+ return model;
716
+ }
717
+
718
+ export class CustomLLMProvider extends LLMProvider {
719
+ get baseURL() {
720
+ return null;
721
+ }
722
+
723
+ private get requestConfig() {
724
+ return safeParseJSON(this.serviceOptions?.requestConfig, 'requestConfig');
725
+ }
726
+
727
+ private get responseConfig() {
728
+ return safeParseJSON(this.serviceOptions?.responseConfig, 'responseConfig');
729
+ }
730
+
731
+ createModel() {
732
+ const {
733
+ apiKey, disableStream, timeout, streamKeepAlive,
734
+ keepAliveIntervalMs, keepAliveContent, enableReasoning,
735
+ } = this.serviceOptions || {};
736
+ // baseURL comes from core's options.baseURL field
737
+ const baseURL = this.serviceOptions?.baseURL;
738
+ const { responseFormat } = this.modelOptions || {};
739
+ const reqConfig = this.requestConfig;
740
+ const resConfig = this.responseConfig;
741
+
742
+ const responseFormatOptions: Record<string, any> = {
743
+ type: responseFormat ?? 'text',
744
+ };
745
+
746
+ const modelKwargs: Record<string, any> = {
747
+ response_format: responseFormatOptions,
748
+ ...(reqConfig.modelKwargs || {}),
749
+ };
750
+
751
+ if (reqConfig.extraBody && typeof reqConfig.extraBody === 'object') {
752
+ Object.assign(modelKwargs, reqConfig.extraBody);
753
+ }
754
+
755
+ // Issue #4: Use ReasoningChatOpenAI when enableReasoning is set.
756
+ // This ensures reasoning_content is preserved and patched back into
757
+ // assistant messages during tool call round-trips (required by DeepSeek-R1, etc.)
758
+ const ChatClass = enableReasoning ? createReasoningChatClass() : getChatOpenAI();
759
+ const config: Record<string, any> = {
760
+ apiKey,
761
+ ...this.modelOptions,
762
+ modelKwargs,
763
+ configuration: {
764
+ baseURL,
765
+ },
766
+ verbose: false,
767
+ };
768
+
769
+ // Disable streaming for models with long thinking phases
770
+ // that return empty stream values causing processing to terminate
771
+ if (disableStream) {
772
+ config.streaming = false;
773
+ }
774
+
775
+ // Apply custom timeout (in milliseconds) for slow-responding models
776
+ const timeoutMs = timeout && Number(timeout) > 0 ? Number(timeout) : 0;
777
+ if (timeoutMs) {
778
+ config.timeout = timeoutMs;
779
+ config.configuration.timeout = timeoutMs;
780
+ }
781
+
782
+ // Apply extra headers
783
+ if (reqConfig.extraHeaders && typeof reqConfig.extraHeaders === 'object') {
784
+ config.configuration.defaultHeaders = reqConfig.extraHeaders;
785
+ }
786
+
787
+ // Apply response mapping via custom fetch — pass timeout for fetch-level protection (Issue #7)
788
+ if (resConfig.responseMapping) {
789
+ config.configuration.fetch = createMappingFetch(resConfig.responseMapping, timeoutMs || 120_000);
790
+ }
791
+
792
+ let model = new ChatClass(config);
793
+
794
+ // Fix empty tool properties for strict providers (Gemini, etc.)
795
+ model = fixEmptyToolProperties(model);
796
+
797
+ // Wrap with keepalive proxy if enabled (and streaming is not disabled)
798
+ if (streamKeepAlive && !disableStream) {
799
+ return wrapWithStreamKeepAlive(model, {
800
+ intervalMs: Number(keepAliveIntervalMs) || 5000,
801
+ keepAliveContent: keepAliveContent || '...',
802
+ });
803
+ }
804
+
805
+ return model;
806
+ }
807
+
808
+ parseResponseChunk(chunk: any): string | null {
809
+ const resConfig = this.responseConfig;
810
+ const text = extractTextContent(chunk, resConfig.contentPath);
811
+
812
+ // Return keepalive prefix as-is so protocol.content() emits SSE event.
813
+ // The zero-width spaces are invisible in the client UI but keep
814
+ // proxy/gateway connections alive during long model thinking phases.
815
+ if (isKeepAlive(text)) {
816
+ return KEEPALIVE_PREFIX;
817
+ }
818
+
819
+ return stripToolCallTags(text);
820
+ }
821
+
822
+ parseResponseMessage(message: Model) {
823
+ const { content: rawContent, messageId, metadata, role, toolCalls, attachments, workContext } = message;
824
+ const content: Record<string, any> = {
825
+ ...(rawContent ?? {}),
826
+ messageId,
827
+ metadata,
828
+ attachments,
829
+ workContext,
830
+ };
831
+
832
+ if (toolCalls) {
833
+ content.tool_calls = toolCalls;
834
+ }
835
+
836
+ if (Array.isArray(content.content)) {
837
+ const textBlocks = content.content.filter((block: any) => block.type === 'text');
838
+ content.content = textBlocks.map((block: any) => block.text).join('') || '';
839
+ }
840
+
841
+ if (typeof content.content === 'string') {
842
+ // Issue #2: Strip keepalive markers safely use simple replaceAll instead of
843
+ // greedy regex that could accidentally eat real content between two markers.
844
+ content.content = content.content.replaceAll(KEEPALIVE_PREFIX, '');
845
+ content.content = stripToolCallTags(content.content);
846
+ }
847
+
848
+ // Clean internal keepalive flag from persisted additional_kwargs
849
+ if (content.metadata?.additional_kwargs?.__keepalive !== undefined) {
850
+ const { __keepalive, ...cleanKwargs } = content.metadata.additional_kwargs;
851
+ content.metadata = { ...content.metadata, additional_kwargs: cleanKwargs };
852
+ }
853
+
854
+ return {
855
+ key: messageId,
856
+ content,
857
+ role,
858
+ };
859
+ }
860
+
861
+ parseReasoningContent(chunk: any): { status: string; content: string } | null {
862
+ const resConfig = this.responseConfig;
863
+ const reasoningKey = resConfig.reasoningKey || 'reasoning_content';
864
+
865
+ // Check multiple paths — different providers/chunk formats nest reasoning differently
866
+ const reasoning = chunk?.additional_kwargs?.[reasoningKey] ?? chunk?.kwargs?.additional_kwargs?.[reasoningKey];
867
+
868
+ if (reasoning && typeof reasoning === 'string') {
869
+ return { status: 'streaming', content: reasoning };
870
+ }
871
+ return null;
872
+ }
873
+
874
+ /**
875
+ * Extract response metadata from LLM output for post-save enrichment.
876
+ * Sanitizes overly long message IDs from Gemini or other providers.
877
+ */
878
+ parseResponseMetadata(output: any): any {
879
+ try {
880
+ const generation = output?.generations?.[0]?.[0];
881
+ if (!generation) return [null, null];
882
+
883
+ const message = generation.message;
884
+ let id = message?.id;
885
+ if (!id) return [null, null];
886
+
887
+ // Sanitize overly long IDs (Gemini can return very long chatcmpl-xxx or run-xxx IDs)
888
+ if (typeof id === 'string' && id.length > 128) {
889
+ id = id.substring(0, 128);
890
+ }
891
+
892
+ const metadata: Record<string, any> = {};
893
+ if (message?.response_metadata) {
894
+ metadata.finish_reason = message.response_metadata.finish_reason;
895
+ metadata.system_fingerprint = message.response_metadata.system_fingerprint;
896
+ }
897
+ if (message?.usage_metadata) {
898
+ metadata.usage_metadata = message.usage_metadata;
899
+ }
900
+
901
+ return Object.keys(metadata).length > 0 ? [id, metadata] : [null, null];
902
+ } catch {
903
+ return [null, null];
904
+ }
905
+ }
906
+
907
+ parseResponseError(err: any) {
908
+ return err?.message ?? 'Unexpected LLM service error';
909
+ }
910
+
911
+ /**
912
+ * Self-contained file reading that correctly handles the APP_PUBLIC_PATH prefix.
913
+ *
914
+ * plugin-ai's encodeLocalFile does path.join(cwd, url) without stripping
915
+ * APP_PUBLIC_PATH, so when the app is deployed under a sub-path (e.g. /my-app)
916
+ * the resolved path becomes '{cwd}/my-app/storage/uploads/…' which does not exist.
917
+ * We cannot fix that in plugin-ai (core), so we re-implement file reading here
918
+ * with the prefix stripped before the cwd join.
919
+ */
920
+ /**
921
+ * Reads the attachment and returns its base64-encoded content plus, when the
922
+ * file lives on the local filesystem, the resolved absolute path so callers
923
+ * can hand that path directly to tools like DocPixie and avoid a second
924
+ * write-to-disk round-trip.
925
+ */
926
+ private async readFileData(ctx: Context, attachment: any): Promise<{ base64: string; absPath?: string }> {
927
+ const fileManager = this.app.pm.get('file-manager') as any;
928
+ const rawUrl: string = await fileManager.getFileURL(attachment);
929
+ const url = decodeURIComponent(rawUrl);
930
+
931
+ if (url.startsWith('http://') || url.startsWith('https://')) {
932
+ const referer = ctx.get('referer') || '';
933
+ const ua = ctx.get('user-agent') || '';
934
+ const response = await axios.get(url, {
935
+ responseType: 'arraybuffer',
936
+ timeout: 30_000,
937
+ headers: { referer, 'User-Agent': ua },
938
+ });
939
+ return { base64: Buffer.from(response.data).toString('base64') };
940
+ }
941
+
942
+ // Internal API stream URL (e.g. s3-private-storage proxy) — read directly via fileManager
943
+ if (url.includes('/api/attachments:stream')) {
944
+ const { stream } = await fileManager.getFileStream(attachment);
945
+ const chunks: Buffer[] = [];
946
+ for await (const chunk of stream) {
947
+ chunks.push(typeof chunk === 'string' ? Buffer.from(chunk) : chunk);
948
+ }
949
+ return { base64: Buffer.concat(chunks).toString('base64') };
950
+ }
951
+
952
+ // Local file — strip APP_PUBLIC_PATH prefix before joining with cwd
953
+ let localPath = url;
954
+ const appPublicPath = (process.env.APP_PUBLIC_PATH || '/').replace(/\/+$/, '');
955
+ if (appPublicPath && localPath.startsWith(appPublicPath + '/')) {
956
+ localPath = localPath.slice(appPublicPath.length);
957
+ }
958
+
959
+ // Resolve and guard against path traversal
960
+ const storageRoot = path.resolve(process.cwd());
961
+ const absPath = path.resolve(storageRoot, localPath.replace(/^\//, ''));
962
+ if (!absPath.startsWith(storageRoot + path.sep) && absPath !== storageRoot) {
963
+ throw new Error(`Attachment path escapes storage root: ${localPath}`);
964
+ }
965
+
966
+ const data = await fs.readFile(absPath);
967
+ // Return absPath so parseAttachment can pass it directly to DocPixie
968
+ return { base64: Buffer.from(data).toString('base64'), absPath };
969
+ }
970
+
971
+ /**
972
+ * Override parseAttachment to convert all attachments into formats that
973
+ * generic OpenAI-compatible endpoints actually support:
974
+ *
975
+ * - Images → image_url block with base64 data URI (vision models)
976
+ * - Text files → text block with decoded UTF-8 content
977
+ * - Binary text block with base64 data URI (multi-modal or fallback)
978
+ *
979
+ * The base-class implementation returns a LangChain ContentBlock.Multimodal.File
980
+ * (`type: 'file'`) for non-image attachments. LangChain serialises this as the
981
+ * newer OpenAI Files API format which most custom/local endpoints do NOT understand,
982
+ * causing file content to be silently dropped.
983
+ *
984
+ * This method is entirely self-contained — it does not call super — so it is
985
+ * safe to use without modifying plugin-ai core.
986
+ */
987
+ /**
988
+ * Try to extract text from an attachment using DocPixie (if available and
989
+ * the file type is supported). Returns null if DocPixie is unavailable,
990
+ * not ready, or the file type is not supported.
991
+ */
992
+ /**
993
+ * Check whether the DocPixie skill (`docpixie.query.document`) is configured
994
+ * on the AI employee that initiated this request.
995
+ *
996
+ * Reads `ctx.action.params.values.aiEmployee` (the employee username set by the
997
+ * `sendMessages` action handler), then looks up the employee's `skillSettings`
998
+ * from DB. Result is cached on `ctx.state._docPixieActive` for the request lifetime.
999
+ */
1000
+ private async hasDocPixieSkill(ctx: Context): Promise<boolean> {
1001
+ if (ctx.state._docPixieActive !== undefined) return ctx.state._docPixieActive as boolean;
1002
+
1003
+ try {
1004
+ // Issue #6: Try multiple sources for the AI employee username.
1005
+ // The field may be placed differently depending on whether the request
1006
+ // comes from sendMessages action, workflow invoke, or direct API call.
1007
+ const employeeUsername =
1008
+ ctx.action?.params?.values?.aiEmployee ??
1009
+ ctx.action?.params?.aiEmployee ??
1010
+ ctx.state?.currentAiEmployee;
1011
+
1012
+ if (!employeeUsername) {
1013
+ ctx.state._docPixieActive = false;
1014
+ return false;
1015
+ }
1016
+
1017
+ const employee = await ctx.db.getRepository('aiEmployees').findOne({
1018
+ filter: { username: String(employeeUsername) },
1019
+ fields: ['skillSettings'],
1020
+ });
1021
+
1022
+ const skills: Array<{ name: string }> = (employee?.get?.('skillSettings') as any)?.skills ?? [];
1023
+ const has = skills.some((s) => s.name === 'docpixie.query.document');
1024
+ ctx.state._docPixieActive = has;
1025
+ return has;
1026
+ } catch {
1027
+ ctx.state._docPixieActive = false;
1028
+ return false;
1029
+ }
1030
+ }
1031
+
1032
+ /**
1033
+ * Run the full DocPixie ingestion pipeline (extract pages → generate summary → index).
1034
+ * Returns a formatted `<processed_document>` context block the LLM can use immediately,
1035
+ * plus a clear instruction to call the RAG tool with the returned documentId for details.
1036
+ *
1037
+ * Prefers passing `absPath` directly for local-storage files to avoid a second
1038
+ * write-to-disk round-trip. Falls back to Buffer for remote / S3 files.
1039
+ *
1040
+ * Returns null if DocPixie is unavailable, not configured, or processing fails.
1041
+ */
1042
+ private async tryDocPixieFullProcess(
1043
+ fileData: { base64: string; absPath?: string },
1044
+ filename: string,
1045
+ ctx: Context,
1046
+ ): Promise<string | null> {
1047
+ try {
1048
+ const docpixie = this.app.pm.get('docpixie') as any;
1049
+ if (!docpixie?.service?.isReady?.()) return null;
1050
+
1051
+ const userId: number | undefined = ctx.state?.currentUser?.id;
1052
+ let result: { documentId: number; summary: string; pageCount: number };
1053
+
1054
+ if (fileData.absPath) {
1055
+ result = await docpixie.service.processDocumentFromPath(fileData.absPath, filename, { userId });
1056
+ } else {
1057
+ const buffer = Buffer.from(fileData.base64, 'base64');
1058
+ result = await docpixie.service.processDocumentFromBuffer(buffer, filename, { userId });
1059
+ }
1060
+
1061
+ const { documentId, summary, pageCount } = result;
1062
+ const summaryText = summary?.trim() || 'No summary available.';
1063
+
1064
+ return (
1065
+ `<processed_document id="${documentId}" filename="${filename}" pages="${pageCount}">\n` +
1066
+ `<summary>\n${summaryText}\n</summary>\n` +
1067
+ `<rag_instruction>This document is fully indexed. ` +
1068
+ `Call docpixie.query.document with documentId=${documentId} to retrieve specific details.</rag_instruction>\n` +
1069
+ `</processed_document>`
1070
+ );
1071
+ } catch {
1072
+ return null;
1073
+ }
1074
+ }
1075
+
1076
+ /**
1077
+ * Try to extract text from an attachment using DocPixie (transient — no DB indexing).
1078
+ * When `absPath` is provided (local-storage file), DocPixie reads the file
1079
+ * directly — no Buffer decode/re-encode or extra temp-file write.
1080
+ * Falls back to `extractTextFromBuffer` for remote/S3 files.
1081
+ * Returns null if DocPixie is unavailable, not ready, or file type unsupported.
1082
+ */
1083
+ private async tryDocPixieExtract(
1084
+ fileData: { base64: string; absPath?: string },
1085
+ filename: string,
1086
+ ): Promise<string | null> {
1087
+ try {
1088
+ const docpixie = this.app.pm.get('docpixie') as any;
1089
+ if (!docpixie?.service) return null;
1090
+ let text: string;
1091
+ if (fileData.absPath) {
1092
+ text = await docpixie.service.extractTextFromPath(fileData.absPath, filename);
1093
+ } else {
1094
+ const buffer = Buffer.from(fileData.base64, 'base64');
1095
+ text = await docpixie.service.extractTextFromBuffer(buffer, filename);
1096
+ }
1097
+ return text || null;
1098
+ } catch {
1099
+ return null;
1100
+ }
1101
+ }
1102
+
1103
+ async parseAttachment(ctx: Context, attachment: any): Promise<ParsedAttachmentResult> {
1104
+ const mimetype: string = attachment.mimetype || 'application/octet-stream';
1105
+ const filename: string = attachment.filename || attachment.name || 'file';
1106
+ const fileData = await this.readFileData(ctx, attachment);
1107
+ const { base64: data } = fileData;
1108
+
1109
+ const isDocPixieSupported = mimetype === 'application/pdf' || mimetype.startsWith('image/');
1110
+
1111
+ // ── Path A: DocPixie skill active → full ingestion pipeline ──────────────
1112
+ // Runs processDocument (extract pages + generate summary + DB index) so the
1113
+ // LLM gets a rich summary + documentId it can pass to the RAG tool for specifics.
1114
+ if (isDocPixieSupported && (await this.hasDocPixieSkill(ctx))) {
1115
+ const contextBlock = await this.tryDocPixieFullProcess(fileData, filename, ctx);
1116
+ if (contextBlock) {
1117
+ return {
1118
+ placement: 'contentBlocks',
1119
+ content: { type: 'text', text: contextBlock },
1120
+ };
1121
+ }
1122
+ // DocPixie not configured / failed → fall through to Path B
1123
+ }
1124
+
1125
+ // ── Path B: DocPixie skill absent → transient extraction (no DB) ─────────
1126
+ if (mimetype === 'application/pdf') {
1127
+ const extracted = await this.tryDocPixieExtract(fileData, filename);
1128
+ if (extracted) {
1129
+ return {
1130
+ placement: 'contentBlocks',
1131
+ content: {
1132
+ type: 'text',
1133
+ text: `<attachment filename="${filename}" type="${mimetype}">\n${extracted}\n</attachment>`,
1134
+ },
1135
+ };
1136
+ }
1137
+ // DocPixie unavailable — fall through to base64 data-URI
1138
+ }
1139
+
1140
+ if (mimetype.startsWith('image/')) {
1141
+ // Transient DocPixie extraction (e.g. OCR); fallback to image_url for vision models
1142
+ const extracted = await this.tryDocPixieExtract(fileData, filename);
1143
+ if (extracted) {
1144
+ return {
1145
+ placement: 'contentBlocks',
1146
+ content: {
1147
+ type: 'text',
1148
+ text: `<attachment filename="${filename}" type="${mimetype}">\n${extracted}\n</attachment>`,
1149
+ },
1150
+ };
1151
+ }
1152
+ // Final fallback — vision-capable models handle image_url natively
1153
+ return {
1154
+ placement: 'contentBlocks',
1155
+ content: {
1156
+ type: 'image_url',
1157
+ image_url: { url: `data:${mimetype};base64,${data}` },
1158
+ },
1159
+ };
1160
+ }
1161
+
1162
+ let textContent: string;
1163
+ if (isTextMimetype(mimetype)) {
1164
+ // Decode to readable UTF-8 so the model can actually read the content
1165
+ const decoded = Buffer.from(data, 'base64').toString('utf-8');
1166
+ textContent = `<attachment filename="${filename}" type="${mimetype}">\n${decoded}\n</attachment>`;
1167
+ } else {
1168
+ // Binary non-image: embed as data-URI; multi-modal models may process it,
1169
+ // text-only models at minimum see the filename and type
1170
+ textContent = `<attachment filename="${filename}" type="${mimetype}">\ndata:${mimetype};base64,${data}\n</attachment>`;
1171
+ }
1172
+
1173
+ return {
1174
+ placement: 'contentBlocks',
1175
+ content: { type: 'text', text: textContent },
1176
+ };
1177
+ }
1178
+ }
1179
+
1180
+ export const customLLMProviderOptions: LLMProviderMeta = {
1181
+ title: 'Custom LLM (OpenAI Compatible)',
1182
+ provider: CustomLLMProvider,
1183
+ };