stelo 1.0.4 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cortex.d.ts +120 -0
- package/dist/cortex.d.ts.map +1 -1
- package/dist/cortex.js +1354 -4
- package/dist/cortex.js.map +1 -1
- package/dist/index.d.ts +2 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +23 -2
- package/dist/index.js.map +1 -1
- package/package.json +10 -10
package/dist/cortex.js
CHANGED
|
@@ -9,22 +9,171 @@
|
|
|
9
9
|
// ██ ██ ██ ██ ██ ██ ██ ██ ██
|
|
10
10
|
// ██████ ██████ ██ ██ ██ ███████ ██ ██
|
|
11
11
|
//
|
|
12
|
-
// The missing layer between
|
|
12
|
+
// The missing layer between AI models and the physical computer.
|
|
13
13
|
// Cortex gives a model eyes (screen), ears (audio), hands (mouse/keyboard),
|
|
14
14
|
// and a voice — all flowing bidirectionally in real time.
|
|
15
15
|
//
|
|
16
|
-
// Provider-agnostic: works with
|
|
17
|
-
//
|
|
18
|
-
//
|
|
16
|
+
// Provider-agnostic: works with EVERY major AI provider out of the box.
|
|
17
|
+
// One unified interface, every model on Earth.
|
|
18
|
+
//
|
|
19
|
+
// ┌─────────────────────────────────────────────────────────────────────┐
|
|
20
|
+
// │ REAL-TIME (WebSocket, bidirectional audio) │
|
|
21
|
+
// │ • geminiLiveProvider() — Google Gemini Live API │
|
|
22
|
+
// │ • openAIRealtimeProvider() — OpenAI Realtime API │
|
|
23
|
+
// │ │
|
|
24
|
+
// │ HTTP STREAMING (SSE, text + vision + tool calling) │
|
|
25
|
+
// │ • geminiProvider() — Google Gemini (generateContent) │
|
|
26
|
+
// │ • openAIChatProvider() — OpenAI Chat Completions │
|
|
27
|
+
// │ • openAIResponsesProvider()— OpenAI Responses API │
|
|
28
|
+
// │ • anthropicProvider() — Anthropic Claude │
|
|
29
|
+
// │ • mistralProvider() — Mistral AI │
|
|
30
|
+
// │ • xaiProvider() — xAI / Grok │
|
|
31
|
+
// │ • deepSeekProvider() — DeepSeek │
|
|
32
|
+
// │ • groqProvider() — Groq (fast inference) │
|
|
33
|
+
// │ • togetherProvider() — Together AI │
|
|
34
|
+
// │ • fireworksProvider() — Fireworks AI │
|
|
35
|
+
// │ • perplexityProvider() — Perplexity │
|
|
36
|
+
// │ • sambanovaProvider() — SambaNova │
|
|
37
|
+
// │ • cerebrasProvider() — Cerebras │
|
|
38
|
+
// │ │
|
|
39
|
+
// │ CUSTOM │
|
|
40
|
+
// │ • customProvider() — Any WebSocket model │
|
|
41
|
+
// │ • openAIChatProvider({ — Any OpenAI-compatible endpoint │
|
|
42
|
+
// │ baseUrl: '...' (Ollama, LM Studio, vLLM, etc.) │
|
|
43
|
+
// │ }) │
|
|
44
|
+
// └─────────────────────────────────────────────────────────────────────┘
|
|
19
45
|
//
|
|
20
46
|
// ============================================================================
|
|
21
47
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
22
48
|
exports.Cortex = void 0;
|
|
49
|
+
exports.buildScreenContext = buildScreenContext;
|
|
23
50
|
exports.geminiLiveProvider = geminiLiveProvider;
|
|
24
51
|
exports.openAIRealtimeProvider = openAIRealtimeProvider;
|
|
25
52
|
exports.customProvider = customProvider;
|
|
53
|
+
exports.openAIChatProvider = openAIChatProvider;
|
|
54
|
+
exports.geminiProvider = geminiProvider;
|
|
55
|
+
exports.mistralProvider = mistralProvider;
|
|
56
|
+
exports.xaiProvider = xaiProvider;
|
|
57
|
+
exports.deepSeekProvider = deepSeekProvider;
|
|
58
|
+
exports.groqProvider = groqProvider;
|
|
59
|
+
exports.togetherProvider = togetherProvider;
|
|
60
|
+
exports.fireworksProvider = fireworksProvider;
|
|
61
|
+
exports.perplexityProvider = perplexityProvider;
|
|
62
|
+
exports.sambanovaProvider = sambanovaProvider;
|
|
63
|
+
exports.cerebrasProvider = cerebrasProvider;
|
|
64
|
+
exports.openAIResponsesProvider = openAIResponsesProvider;
|
|
65
|
+
exports.anthropicProvider = anthropicProvider;
|
|
26
66
|
exports.createCortex = createCortex;
|
|
27
67
|
const native = require('../index.js');
|
|
68
|
+
// ═════════════════════════════════════════════════════════════════════════════
|
|
69
|
+
// SEMANTIC SCREEN CONTEXT
|
|
70
|
+
// Converts the live screen into a structured textual description that ANY
|
|
71
|
+
// model can understand — including text-only models with no vision capability.
|
|
72
|
+
// Like giving a model "eyes" that output semantic meaning, not raw pixels.
|
|
73
|
+
// ═════════════════════════════════════════════════════════════════════════════
|
|
74
|
+
/**
|
|
75
|
+
* Build a full semantic description of the current screen state.
|
|
76
|
+
* Returns structured text: active window, all open windows, all visible
|
|
77
|
+
* text via OCR, and clickable element positions.
|
|
78
|
+
*
|
|
79
|
+
* Works for every provider — including text-only models.
|
|
80
|
+
*/
|
|
81
|
+
function buildScreenContext(region) {
|
|
82
|
+
let activeWindow = null;
|
|
83
|
+
let windows = [];
|
|
84
|
+
let ocrResult = {};
|
|
85
|
+
let screenSize = { width: 0, height: 0 };
|
|
86
|
+
try {
|
|
87
|
+
activeWindow = native.windowGetActive();
|
|
88
|
+
}
|
|
89
|
+
catch { /* not on all platforms */ }
|
|
90
|
+
try {
|
|
91
|
+
windows = native.windowGetAll() ?? [];
|
|
92
|
+
}
|
|
93
|
+
catch { /* not on all platforms */ }
|
|
94
|
+
try {
|
|
95
|
+
const sz = native.screenGetSize();
|
|
96
|
+
screenSize = { width: sz.width ?? sz.w ?? 0, height: sz.height ?? sz.h ?? 0 };
|
|
97
|
+
}
|
|
98
|
+
catch { /* fallback */ }
|
|
99
|
+
try {
|
|
100
|
+
if (region) {
|
|
101
|
+
ocrResult = native.ocrRecognize(region.x, region.y, region.width, region.height);
|
|
102
|
+
}
|
|
103
|
+
else {
|
|
104
|
+
ocrResult = native.ocrRecognize();
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
catch { /* OCR might not be available */ }
|
|
108
|
+
const lines = [];
|
|
109
|
+
// Header
|
|
110
|
+
lines.push('## SCREEN CONTEXT');
|
|
111
|
+
if (screenSize.width > 0) {
|
|
112
|
+
lines.push(`Resolution: ${screenSize.width}×${screenSize.height}`);
|
|
113
|
+
}
|
|
114
|
+
if (region) {
|
|
115
|
+
lines.push(`Region: (${region.x},${region.y}) ${region.width}×${region.height}`);
|
|
116
|
+
}
|
|
117
|
+
lines.push('');
|
|
118
|
+
// Active window
|
|
119
|
+
if (activeWindow) {
|
|
120
|
+
lines.push(`### Active Window`);
|
|
121
|
+
lines.push(`Title: "${activeWindow.title}"`);
|
|
122
|
+
if (activeWindow.processName)
|
|
123
|
+
lines.push(`App: ${activeWindow.processName}`);
|
|
124
|
+
if (activeWindow.width && activeWindow.height) {
|
|
125
|
+
lines.push(`Size: ${activeWindow.width}×${activeWindow.height} at (${activeWindow.x ?? 0}, ${activeWindow.y ?? 0})`);
|
|
126
|
+
}
|
|
127
|
+
lines.push('');
|
|
128
|
+
}
|
|
129
|
+
// Other open windows
|
|
130
|
+
const otherWindows = activeWindow
|
|
131
|
+
? windows.filter((w) => w.id !== activeWindow?.id && w.isVisible !== false).slice(0, 8)
|
|
132
|
+
: windows.slice(0, 8);
|
|
133
|
+
if (otherWindows.length > 0) {
|
|
134
|
+
lines.push(`### Open Windows`);
|
|
135
|
+
for (const w of otherWindows) {
|
|
136
|
+
lines.push(` - "${w.title}"${w.processName ? ` (${w.processName})` : ''}`);
|
|
137
|
+
}
|
|
138
|
+
lines.push('');
|
|
139
|
+
}
|
|
140
|
+
// OCR text content
|
|
141
|
+
const fullText = ocrResult.text ?? '';
|
|
142
|
+
const words = ocrResult.words ?? [];
|
|
143
|
+
if (fullText.trim()) {
|
|
144
|
+
lines.push(`### Visible Text`);
|
|
145
|
+
lines.push(fullText.trim());
|
|
146
|
+
lines.push('');
|
|
147
|
+
}
|
|
148
|
+
// Clickable text elements with coordinates
|
|
149
|
+
const textElements = [];
|
|
150
|
+
if (words.length > 0) {
|
|
151
|
+
lines.push(`### UI Elements (text → click coordinates)`);
|
|
152
|
+
const limit = Math.min(words.length, 60);
|
|
153
|
+
for (let i = 0; i < limit; i++) {
|
|
154
|
+
const w = words[i];
|
|
155
|
+
const cx = Math.round(w.x + (w.width ?? 0) / 2);
|
|
156
|
+
const cy = Math.round(w.y + (w.height ?? 0) / 2);
|
|
157
|
+
textElements.push({ text: w.text, x: w.x, y: w.y, centerX: cx, centerY: cy });
|
|
158
|
+
lines.push(` "${w.text}" → (${cx}, ${cy})`);
|
|
159
|
+
}
|
|
160
|
+
if (words.length > 60)
|
|
161
|
+
lines.push(` ... and ${words.length - 60} more elements`);
|
|
162
|
+
lines.push('');
|
|
163
|
+
}
|
|
164
|
+
// Actionable summary hint
|
|
165
|
+
lines.push(`### How to interact`);
|
|
166
|
+
lines.push(`- Click: use mouse_click at the (x,y) coordinates above`);
|
|
167
|
+
lines.push(`- Find & click by text: use screen_find_and_click with the visible label`);
|
|
168
|
+
lines.push(`- Type into active element: use keyboard_type`);
|
|
169
|
+
lines.push(`- After acting, call screen_context again to see updated state`);
|
|
170
|
+
return {
|
|
171
|
+
context: lines.join('\n'),
|
|
172
|
+
activeWindow,
|
|
173
|
+
textElements,
|
|
174
|
+
fullText,
|
|
175
|
+
};
|
|
176
|
+
}
|
|
28
177
|
class Cortex {
|
|
29
178
|
config = null;
|
|
30
179
|
transport = null;
|
|
@@ -565,6 +714,207 @@ class Cortex {
|
|
|
565
714
|
},
|
|
566
715
|
nonBlocking: true,
|
|
567
716
|
});
|
|
717
|
+
// ── Semantic Screen Understanding ─────────────────────────────────────
|
|
718
|
+
// These tools give the model a HUMAN-LIKE understanding of the screen —
|
|
719
|
+
// not raw pixels, but structured semantic context: what windows are open,
|
|
720
|
+
// what text is visible, where UI elements are, what changed.
|
|
721
|
+
// Works for every model, including text-only models with no vision.
|
|
722
|
+
this.registerTool('screen_context', (region) => {
|
|
723
|
+
return buildScreenContext(region);
|
|
724
|
+
}, {
|
|
725
|
+
description: 'Get a full semantic understanding of the current screen — visible text, active window, all open windows, and UI element positions. Use this to understand what is on screen before clicking or typing. Works without vision capability.',
|
|
726
|
+
parameters: {
|
|
727
|
+
region: {
|
|
728
|
+
type: 'object',
|
|
729
|
+
description: 'Optional screen region to read (x, y, width, height). Omit for full screen.',
|
|
730
|
+
properties: {
|
|
731
|
+
x: { type: 'number' },
|
|
732
|
+
y: { type: 'number' },
|
|
733
|
+
width: { type: 'number' },
|
|
734
|
+
height: { type: 'number' },
|
|
735
|
+
},
|
|
736
|
+
},
|
|
737
|
+
},
|
|
738
|
+
});
|
|
739
|
+
this.registerTool('screen_find_text', (text) => {
|
|
740
|
+
try {
|
|
741
|
+
const result = native.ocrFindText(text);
|
|
742
|
+
if (!result)
|
|
743
|
+
return { found: false, text };
|
|
744
|
+
const cx = Math.round(result.x + result.width / 2);
|
|
745
|
+
const cy = Math.round(result.y + result.height / 2);
|
|
746
|
+
return {
|
|
747
|
+
found: true,
|
|
748
|
+
text,
|
|
749
|
+
x: result.x,
|
|
750
|
+
y: result.y,
|
|
751
|
+
width: result.width,
|
|
752
|
+
height: result.height,
|
|
753
|
+
centerX: cx,
|
|
754
|
+
centerY: cy,
|
|
755
|
+
clickTarget: `(${cx}, ${cy})`,
|
|
756
|
+
};
|
|
757
|
+
}
|
|
758
|
+
catch {
|
|
759
|
+
return { found: false, text, error: 'OCR unavailable' };
|
|
760
|
+
}
|
|
761
|
+
}, {
|
|
762
|
+
description: 'Find specific text on screen and get its exact pixel position. Returns center coordinates ready to click. Use this to locate buttons, labels, or any visible text.',
|
|
763
|
+
parameters: {
|
|
764
|
+
text: { type: 'string', description: 'The text to find on screen', required: true },
|
|
765
|
+
},
|
|
766
|
+
});
|
|
767
|
+
this.registerTool('screen_find_and_click', async (text) => {
|
|
768
|
+
try {
|
|
769
|
+
const result = native.ocrFindText(text);
|
|
770
|
+
if (!result)
|
|
771
|
+
return { success: false, error: `Text "${text}" not found on screen` };
|
|
772
|
+
const cx = Math.round(result.x + result.width / 2);
|
|
773
|
+
const cy = Math.round(result.y + result.height / 2);
|
|
774
|
+
native.mouseClickAt(cx, cy, 'left', false);
|
|
775
|
+
return { success: true, clicked: text, x: cx, y: cy };
|
|
776
|
+
}
|
|
777
|
+
catch {
|
|
778
|
+
return { success: false, error: 'OCR unavailable' };
|
|
779
|
+
}
|
|
780
|
+
}, {
|
|
781
|
+
description: 'Find text on screen by OCR and click it directly. The easiest way to click buttons and UI elements by their visible label.',
|
|
782
|
+
parameters: {
|
|
783
|
+
text: { type: 'string', description: 'Visible text of the element to click', required: true },
|
|
784
|
+
},
|
|
785
|
+
});
|
|
786
|
+
this.registerTool('text_select_all', () => {
|
|
787
|
+
native.keyboardHotkey(['ctrl', 'a']);
|
|
788
|
+
return { success: true };
|
|
789
|
+
}, {
|
|
790
|
+
description: 'Select all text in the focused element (Ctrl+A)',
|
|
791
|
+
});
|
|
792
|
+
this.registerTool('text_select_word', () => {
|
|
793
|
+
native.mouseDoubleClick('left');
|
|
794
|
+
return { success: true };
|
|
795
|
+
}, {
|
|
796
|
+
description: 'Double-click to select a single word under the cursor',
|
|
797
|
+
});
|
|
798
|
+
this.registerTool('text_select_line', () => {
|
|
799
|
+
native.keyboardPress('Home');
|
|
800
|
+
native.keyboardHotkey(['shift', 'End']);
|
|
801
|
+
return { success: true };
|
|
802
|
+
}, {
|
|
803
|
+
description: 'Select the entire current line (Home then Shift+End)',
|
|
804
|
+
});
|
|
805
|
+
this.registerTool('text_cut', () => {
|
|
806
|
+
native.keyboardHotkey(['ctrl', 'x']);
|
|
807
|
+
return { success: true };
|
|
808
|
+
}, {
|
|
809
|
+
description: 'Cut selected text to clipboard (Ctrl+X)',
|
|
810
|
+
});
|
|
811
|
+
this.registerTool('text_copy', () => {
|
|
812
|
+
native.keyboardHotkey(['ctrl', 'c']);
|
|
813
|
+
return { success: true };
|
|
814
|
+
}, {
|
|
815
|
+
description: 'Copy selected text to clipboard (Ctrl+C)',
|
|
816
|
+
});
|
|
817
|
+
this.registerTool('text_paste', () => {
|
|
818
|
+
native.keyboardHotkey(['ctrl', 'v']);
|
|
819
|
+
return { success: true };
|
|
820
|
+
}, {
|
|
821
|
+
description: 'Paste clipboard contents (Ctrl+V)',
|
|
822
|
+
});
|
|
823
|
+
this.registerTool('text_undo', () => {
|
|
824
|
+
native.keyboardHotkey(['ctrl', 'z']);
|
|
825
|
+
return { success: true };
|
|
826
|
+
}, {
|
|
827
|
+
description: 'Undo the last action (Ctrl+Z)',
|
|
828
|
+
});
|
|
829
|
+
this.registerTool('text_redo', () => {
|
|
830
|
+
native.keyboardHotkey(['ctrl', 'y']);
|
|
831
|
+
return { success: true };
|
|
832
|
+
}, {
|
|
833
|
+
description: 'Redo the last undone action (Ctrl+Y)',
|
|
834
|
+
});
|
|
835
|
+
this.registerTool('text_replace', (find, replace) => {
|
|
836
|
+
// Read clipboard state, write find text, then type replacement
|
|
837
|
+
// Use clipboard to avoid slow key-by-key typing for find dialogs
|
|
838
|
+
const original = (() => { try {
|
|
839
|
+
return native.clipboardRead();
|
|
840
|
+
}
|
|
841
|
+
catch {
|
|
842
|
+
return '';
|
|
843
|
+
} })();
|
|
844
|
+
native.clipboardWrite(replace);
|
|
845
|
+
// Try Ctrl+H (most apps: find+replace)
|
|
846
|
+
native.keyboardHotkey(['ctrl', 'h']);
|
|
847
|
+
return { success: true, hint: `Opened Find & Replace. Find: "${find}", Replace: "${replace}" staged in clipboard.` };
|
|
848
|
+
}, {
|
|
849
|
+
description: 'Open the Find & Replace dialog in the active window and stage replacement text in clipboard',
|
|
850
|
+
parameters: {
|
|
851
|
+
find: { type: 'string', description: 'Text to find', required: true },
|
|
852
|
+
replace: { type: 'string', description: 'Replacement text', required: true },
|
|
853
|
+
},
|
|
854
|
+
});
|
|
855
|
+
this.registerTool('window_screenshot_context', (titleSubstring) => {
|
|
856
|
+
try {
|
|
857
|
+
// Get a specific window's context if title given, otherwise active
|
|
858
|
+
const windows = native.windowGetAll();
|
|
859
|
+
const target = titleSubstring
|
|
860
|
+
? windows.find((w) => w.title.toLowerCase().includes(titleSubstring.toLowerCase()) && w.isVisible)
|
|
861
|
+
: native.windowGetActive();
|
|
862
|
+
if (!target)
|
|
863
|
+
return { error: 'Window not found' };
|
|
864
|
+
// OCR the window region
|
|
865
|
+
const ocr = (() => {
|
|
866
|
+
try {
|
|
867
|
+
return native.ocrRecognize(target.x, target.y, target.width, target.height);
|
|
868
|
+
}
|
|
869
|
+
catch {
|
|
870
|
+
return { text: '', words: [] };
|
|
871
|
+
}
|
|
872
|
+
})();
|
|
873
|
+
return {
|
|
874
|
+
title: target.title,
|
|
875
|
+
process: target.processName,
|
|
876
|
+
bounds: { x: target.x, y: target.y, width: target.width, height: target.height },
|
|
877
|
+
text: ocr.text ?? '',
|
|
878
|
+
wordCount: (ocr.words ?? []).length,
|
|
879
|
+
words: (ocr.words ?? []).slice(0, 50).map((w) => ({
|
|
880
|
+
text: w.text,
|
|
881
|
+
x: w.x,
|
|
882
|
+
y: w.y,
|
|
883
|
+
})),
|
|
884
|
+
};
|
|
885
|
+
}
|
|
886
|
+
catch (e) {
|
|
887
|
+
return { error: e.message };
|
|
888
|
+
}
|
|
889
|
+
}, {
|
|
890
|
+
description: 'Get OCR text and element positions from a specific window. More focused than screen_context for a single app.',
|
|
891
|
+
parameters: {
|
|
892
|
+
titleSubstring: { type: 'string', description: 'Part of the window title to target. Omit for active window.' },
|
|
893
|
+
},
|
|
894
|
+
});
|
|
895
|
+
this.registerTool('scroll_to_text', async (text) => {
|
|
896
|
+
// Try to scroll until the text appears
|
|
897
|
+
for (let attempt = 0; attempt < 8; attempt++) {
|
|
898
|
+
try {
|
|
899
|
+
const result = native.ocrFindText(text);
|
|
900
|
+
if (result) {
|
|
901
|
+
const cx = Math.round(result.x + result.width / 2);
|
|
902
|
+
const cy = Math.round(result.y + result.height / 2);
|
|
903
|
+
return { found: true, x: cx, y: cy };
|
|
904
|
+
}
|
|
905
|
+
}
|
|
906
|
+
catch { /* ocr miss */ }
|
|
907
|
+
native.mouseScroll(3, 'down');
|
|
908
|
+
await new Promise(r => setTimeout(r, 300));
|
|
909
|
+
}
|
|
910
|
+
return { found: false, text };
|
|
911
|
+
}, {
|
|
912
|
+
description: 'Scroll down the page until text becomes visible, then return its position',
|
|
913
|
+
parameters: {
|
|
914
|
+
text: { type: 'string', description: 'Text to scroll until visible', required: true },
|
|
915
|
+
},
|
|
916
|
+
nonBlocking: true,
|
|
917
|
+
});
|
|
568
918
|
}
|
|
569
919
|
/**
|
|
570
920
|
* Get all registered tool names.
|
|
@@ -1546,6 +1896,1006 @@ function customProvider(custom) {
|
|
|
1546
1896
|
};
|
|
1547
1897
|
}
|
|
1548
1898
|
// ═════════════════════════════════════════════════════════════════════════════
|
|
1899
|
+
// SCHEMA TRANSLATION — Universal tool definition → provider-native format
|
|
1900
|
+
// ═════════════════════════════════════════════════════════════════════════════
|
|
1901
|
+
/**
|
|
1902
|
+
* Convert Stelo ToolDefinition parameters to a standard JSON Schema "object"
|
|
1903
|
+
* envelope, stripping Stelo's per-property `required: boolean` and building
|
|
1904
|
+
* the spec-compliant `required: string[]` at the object level.
|
|
1905
|
+
*/
|
|
1906
|
+
function buildJsonSchemaObject(params) {
|
|
1907
|
+
if (!params || Object.keys(params).length === 0) {
|
|
1908
|
+
return { type: 'object', properties: {} };
|
|
1909
|
+
}
|
|
1910
|
+
const properties = {};
|
|
1911
|
+
const required = [];
|
|
1912
|
+
for (const [key, val] of Object.entries(params)) {
|
|
1913
|
+
const { required: isReq, ...rest } = val;
|
|
1914
|
+
properties[key] = rest;
|
|
1915
|
+
if (isReq === true)
|
|
1916
|
+
required.push(key);
|
|
1917
|
+
}
|
|
1918
|
+
const schema = { type: 'object', properties };
|
|
1919
|
+
if (required.length > 0)
|
|
1920
|
+
schema.required = required;
|
|
1921
|
+
return schema;
|
|
1922
|
+
}
|
|
1923
|
+
/** Convert ToolDefinition[] → OpenAI Chat Completions tools format. */
|
|
1924
|
+
function toolsToOpenAIChat(tools) {
|
|
1925
|
+
return tools
|
|
1926
|
+
.filter(t => !t.name.startsWith('__'))
|
|
1927
|
+
.map(t => ({
|
|
1928
|
+
type: 'function',
|
|
1929
|
+
function: {
|
|
1930
|
+
name: t.name,
|
|
1931
|
+
description: t.description,
|
|
1932
|
+
parameters: buildJsonSchemaObject(t.parameters),
|
|
1933
|
+
},
|
|
1934
|
+
}));
|
|
1935
|
+
}
|
|
1936
|
+
/** Convert ToolDefinition[] → OpenAI Responses API tools format (flat, no function wrapper). */
|
|
1937
|
+
function toolsToOpenAIResponses(tools) {
|
|
1938
|
+
return tools
|
|
1939
|
+
.filter(t => !t.name.startsWith('__'))
|
|
1940
|
+
.map(t => ({
|
|
1941
|
+
type: 'function',
|
|
1942
|
+
name: t.name,
|
|
1943
|
+
description: t.description,
|
|
1944
|
+
parameters: buildJsonSchemaObject(t.parameters),
|
|
1945
|
+
}));
|
|
1946
|
+
}
|
|
1947
|
+
/** Convert ToolDefinition[] → Anthropic Claude tools format. */
|
|
1948
|
+
function toolsToAnthropic(tools) {
|
|
1949
|
+
return tools
|
|
1950
|
+
.filter(t => !t.name.startsWith('__'))
|
|
1951
|
+
.map(t => ({
|
|
1952
|
+
name: t.name,
|
|
1953
|
+
description: t.description,
|
|
1954
|
+
input_schema: buildJsonSchemaObject(t.parameters),
|
|
1955
|
+
}));
|
|
1956
|
+
}
|
|
1957
|
+
// ═════════════════════════════════════════════════════════════════════════════
|
|
1958
|
+
// HTTP SSE STREAMING INFRASTRUCTURE
|
|
1959
|
+
// ═════════════════════════════════════════════════════════════════════════════
|
|
1960
|
+
/**
|
|
1961
|
+
* Parse Server-Sent Events from a fetch Response.
|
|
1962
|
+
* Handles both OpenAI-style (data-only) and Anthropic-style (event + data) SSE.
|
|
1963
|
+
* Yields parsed JSON objects from `data:` lines (skipping `[DONE]`).
|
|
1964
|
+
* If an `event:` line precedes a data line, its value is set as `type` on the
|
|
1965
|
+
* parsed object when the object doesn't already have a `type` field.
|
|
1966
|
+
*/
|
|
1967
|
+
async function* parseSSE(response) {
|
|
1968
|
+
const reader = response.body.getReader();
|
|
1969
|
+
const decoder = new TextDecoder();
|
|
1970
|
+
let buffer = '';
|
|
1971
|
+
let currentEvent;
|
|
1972
|
+
while (true) {
|
|
1973
|
+
const { done, value } = await reader.read();
|
|
1974
|
+
if (done)
|
|
1975
|
+
break;
|
|
1976
|
+
buffer += decoder.decode(value, { stream: true });
|
|
1977
|
+
const lines = buffer.split('\n');
|
|
1978
|
+
buffer = lines.pop() ?? '';
|
|
1979
|
+
for (const line of lines) {
|
|
1980
|
+
const trimmed = line.trim();
|
|
1981
|
+
if (trimmed === '') {
|
|
1982
|
+
currentEvent = undefined;
|
|
1983
|
+
continue;
|
|
1984
|
+
}
|
|
1985
|
+
if (trimmed.startsWith('event:')) {
|
|
1986
|
+
currentEvent = trimmed.slice(trimmed.indexOf(':') + 1).trim();
|
|
1987
|
+
}
|
|
1988
|
+
else if (trimmed.startsWith('data:')) {
|
|
1989
|
+
const data = trimmed.slice(trimmed.indexOf(':') + 1).trim();
|
|
1990
|
+
if (data === '[DONE]')
|
|
1991
|
+
return;
|
|
1992
|
+
try {
|
|
1993
|
+
const parsed = JSON.parse(data);
|
|
1994
|
+
if (currentEvent && !parsed.type)
|
|
1995
|
+
parsed.type = currentEvent;
|
|
1996
|
+
yield parsed;
|
|
1997
|
+
}
|
|
1998
|
+
catch { /* skip non-JSON data lines */ }
|
|
1999
|
+
}
|
|
2000
|
+
}
|
|
2001
|
+
}
|
|
2002
|
+
}
|
|
2003
|
+
// ═════════════════════════════════════════════════════════════════════════════
|
|
2004
|
+
// OPENAI-COMPATIBLE CHAT PROVIDER (HTTP + SSE Streaming)
|
|
2005
|
+
// ═════════════════════════════════════════════════════════════════════════════
|
|
2006
|
+
//
|
|
2007
|
+
// Works with: OpenAI, Mistral, xAI/Grok, DeepSeek, Groq, Together, Fireworks,
|
|
2008
|
+
// Perplexity, SambaNova, Cerebras, LM Studio, Ollama, vLLM, and any endpoint
|
|
2009
|
+
// that speaks the OpenAI Chat Completions format.
|
|
2010
|
+
//
|
|
2011
|
+
/**
|
|
2012
|
+
* Create a provider for any OpenAI-compatible Chat Completions API.
|
|
2013
|
+
* This is the universal HTTP provider — one implementation, every endpoint.
|
|
2014
|
+
*
|
|
2015
|
+
* @example
|
|
2016
|
+
* ```typescript
|
|
2017
|
+
* // OpenAI
|
|
2018
|
+
* await cortex.connect({
|
|
2019
|
+
* provider: openAIChatProvider(),
|
|
2020
|
+
* connection: { apiKey: 'sk-...', model: 'gpt-4.1' },
|
|
2021
|
+
* });
|
|
2022
|
+
*
|
|
2023
|
+
* // Mistral
|
|
2024
|
+
* await cortex.connect({
|
|
2025
|
+
* provider: openAIChatProvider({ baseUrl: 'https://api.mistral.ai/v1', name: 'mistral' }),
|
|
2026
|
+
* connection: { apiKey: 'your-key', model: 'mistral-large-latest' },
|
|
2027
|
+
* });
|
|
2028
|
+
*
|
|
2029
|
+
* // Local Ollama
|
|
2030
|
+
* await cortex.connect({
|
|
2031
|
+
* provider: openAIChatProvider({ baseUrl: 'http://localhost:11434/v1', name: 'ollama' }),
|
|
2032
|
+
* connection: { apiKey: 'ollama', model: 'llama3.1' },
|
|
2033
|
+
* });
|
|
2034
|
+
* ```
|
|
2035
|
+
*/
|
|
2036
|
+
function openAIChatProvider(options) {
|
|
2037
|
+
return {
|
|
2038
|
+
name: options?.name ?? 'openai-chat',
|
|
2039
|
+
async connect(config, handlers) {
|
|
2040
|
+
const baseUrl = (options?.baseUrl ?? config.providerOptions?.baseUrl ?? 'https://api.openai.com/v1').replace(/\/$/, '');
|
|
2041
|
+
const maxTokens = config.providerOptions?.maxTokens ?? 8192;
|
|
2042
|
+
const temperature = config.providerOptions?.temperature;
|
|
2043
|
+
const messages = [];
|
|
2044
|
+
if (config.systemInstruction) {
|
|
2045
|
+
messages.push({ role: 'system', content: config.systemInstruction });
|
|
2046
|
+
}
|
|
2047
|
+
const tools = config.tools ? toolsToOpenAIChat(config.tools) : [];
|
|
2048
|
+
let abortController = null;
|
|
2049
|
+
let closed = false;
|
|
2050
|
+
let pendingImage = null;
|
|
2051
|
+
// Tool response batching for parallel tool calls
|
|
2052
|
+
const toolResponseQueue = [];
|
|
2053
|
+
let toolFlushTimer = null;
|
|
2054
|
+
async function makeRequest() {
|
|
2055
|
+
if (closed)
|
|
2056
|
+
return;
|
|
2057
|
+
abortController = new AbortController();
|
|
2058
|
+
const body = {
|
|
2059
|
+
model: config.model,
|
|
2060
|
+
messages,
|
|
2061
|
+
stream: true,
|
|
2062
|
+
stream_options: { include_usage: true },
|
|
2063
|
+
max_tokens: maxTokens,
|
|
2064
|
+
};
|
|
2065
|
+
if (tools.length > 0)
|
|
2066
|
+
body.tools = tools;
|
|
2067
|
+
if (temperature !== undefined)
|
|
2068
|
+
body.temperature = temperature;
|
|
2069
|
+
if (config.providerOptions?.parallelToolCalls !== undefined) {
|
|
2070
|
+
body.parallel_tool_calls = config.providerOptions.parallelToolCalls;
|
|
2071
|
+
}
|
|
2072
|
+
const headers = {
|
|
2073
|
+
'Content-Type': 'application/json',
|
|
2074
|
+
'Authorization': `Bearer ${config.apiKey}`,
|
|
2075
|
+
...(config.providerOptions?.headers ?? {}),
|
|
2076
|
+
};
|
|
2077
|
+
try {
|
|
2078
|
+
const response = await fetch(`${baseUrl}/chat/completions`, {
|
|
2079
|
+
method: 'POST',
|
|
2080
|
+
headers,
|
|
2081
|
+
body: JSON.stringify(body),
|
|
2082
|
+
signal: abortController.signal,
|
|
2083
|
+
});
|
|
2084
|
+
if (!response.ok) {
|
|
2085
|
+
const errText = await response.text();
|
|
2086
|
+
handlers.error(new Error(`${baseUrl} ${response.status}: ${errText}`));
|
|
2087
|
+
return;
|
|
2088
|
+
}
|
|
2089
|
+
const accToolCalls = new Map();
|
|
2090
|
+
let assistantText = '';
|
|
2091
|
+
for await (const chunk of parseSSE(response)) {
|
|
2092
|
+
const delta = chunk.choices?.[0]?.delta;
|
|
2093
|
+
const finishReason = chunk.choices?.[0]?.finish_reason;
|
|
2094
|
+
if (delta?.content) {
|
|
2095
|
+
handlers.text(delta.content, false);
|
|
2096
|
+
assistantText += delta.content;
|
|
2097
|
+
}
|
|
2098
|
+
// Accumulate streamed tool calls
|
|
2099
|
+
if (delta?.tool_calls) {
|
|
2100
|
+
for (const tc of delta.tool_calls) {
|
|
2101
|
+
const idx = tc.index ?? 0;
|
|
2102
|
+
if (!accToolCalls.has(idx))
|
|
2103
|
+
accToolCalls.set(idx, { id: '', name: '', args: '' });
|
|
2104
|
+
const acc = accToolCalls.get(idx);
|
|
2105
|
+
if (tc.id)
|
|
2106
|
+
acc.id = tc.id;
|
|
2107
|
+
if (tc.function?.name)
|
|
2108
|
+
acc.name = tc.function.name;
|
|
2109
|
+
if (tc.function?.arguments)
|
|
2110
|
+
acc.args += tc.function.arguments;
|
|
2111
|
+
}
|
|
2112
|
+
}
|
|
2113
|
+
if (finishReason === 'stop') {
|
|
2114
|
+
handlers.text('', true);
|
|
2115
|
+
if (assistantText) {
|
|
2116
|
+
messages.push({ role: 'assistant', content: assistantText });
|
|
2117
|
+
}
|
|
2118
|
+
}
|
|
2119
|
+
else if (finishReason === 'tool_calls' || finishReason === 'function_call') {
|
|
2120
|
+
// All tool calls complete — add assistant message, fire handlers
|
|
2121
|
+
const assistantMsg = { role: 'assistant', content: null, tool_calls: [] };
|
|
2122
|
+
for (const [, tc] of [...accToolCalls.entries()].sort((a, b) => a[0] - b[0])) {
|
|
2123
|
+
assistantMsg.tool_calls.push({
|
|
2124
|
+
id: tc.id,
|
|
2125
|
+
type: 'function',
|
|
2126
|
+
function: { name: tc.name, arguments: tc.args },
|
|
2127
|
+
});
|
|
2128
|
+
try {
|
|
2129
|
+
handlers.toolCall(tc.id, tc.name, JSON.parse(tc.args || '{}'));
|
|
2130
|
+
}
|
|
2131
|
+
catch {
|
|
2132
|
+
handlers.toolCall(tc.id, tc.name, {});
|
|
2133
|
+
}
|
|
2134
|
+
}
|
|
2135
|
+
messages.push(assistantMsg);
|
|
2136
|
+
accToolCalls.clear();
|
|
2137
|
+
}
|
|
2138
|
+
if (chunk.usage) {
|
|
2139
|
+
handlers.usage(chunk.usage.total_tokens ?? 0, chunk.usage.prompt_tokens ?? 0, chunk.usage.completion_tokens ?? 0);
|
|
2140
|
+
}
|
|
2141
|
+
}
|
|
2142
|
+
handlers.turnComplete();
|
|
2143
|
+
}
|
|
2144
|
+
catch (err) {
|
|
2145
|
+
if (err.name !== 'AbortError') {
|
|
2146
|
+
handlers.error(err instanceof Error ? err : new Error(String(err)));
|
|
2147
|
+
}
|
|
2148
|
+
}
|
|
2149
|
+
finally {
|
|
2150
|
+
abortController = null;
|
|
2151
|
+
}
|
|
2152
|
+
}
|
|
2153
|
+
function flushToolResponses() {
|
|
2154
|
+
if (toolResponseQueue.length === 0)
|
|
2155
|
+
return;
|
|
2156
|
+
for (const { callId, result } of toolResponseQueue) {
|
|
2157
|
+
messages.push({
|
|
2158
|
+
role: 'tool',
|
|
2159
|
+
tool_call_id: callId,
|
|
2160
|
+
content: typeof result === 'string' ? result : JSON.stringify(result),
|
|
2161
|
+
});
|
|
2162
|
+
}
|
|
2163
|
+
toolResponseQueue.length = 0;
|
|
2164
|
+
makeRequest();
|
|
2165
|
+
}
|
|
2166
|
+
const transport = {
|
|
2167
|
+
sendAudio(_pcm, _sampleRate) {
|
|
2168
|
+
// HTTP Chat Completions does not support native audio streaming.
|
|
2169
|
+
// Use OpenAI Realtime or Gemini Live for audio.
|
|
2170
|
+
},
|
|
2171
|
+
sendVideo(jpeg) {
|
|
2172
|
+
pendingImage = jpeg;
|
|
2173
|
+
},
|
|
2174
|
+
sendText(text, role) {
|
|
2175
|
+
if (closed)
|
|
2176
|
+
return;
|
|
2177
|
+
if (role === 'system') {
|
|
2178
|
+
messages.push({ role: 'system', content: text });
|
|
2179
|
+
}
|
|
2180
|
+
else if (pendingImage) {
|
|
2181
|
+
messages.push({
|
|
2182
|
+
role: 'user',
|
|
2183
|
+
content: [
|
|
2184
|
+
{ type: 'image_url', image_url: { url: `data:image/jpeg;base64,${pendingImage.toString('base64')}` } },
|
|
2185
|
+
{ type: 'text', text },
|
|
2186
|
+
],
|
|
2187
|
+
});
|
|
2188
|
+
pendingImage = null;
|
|
2189
|
+
}
|
|
2190
|
+
else {
|
|
2191
|
+
messages.push({ role: 'user', content: text });
|
|
2192
|
+
}
|
|
2193
|
+
makeRequest();
|
|
2194
|
+
},
|
|
2195
|
+
sendToolResponse(callId, result) {
|
|
2196
|
+
if (closed)
|
|
2197
|
+
return;
|
|
2198
|
+
toolResponseQueue.push({ callId, result });
|
|
2199
|
+
if (toolFlushTimer)
|
|
2200
|
+
clearTimeout(toolFlushTimer);
|
|
2201
|
+
toolFlushTimer = setTimeout(flushToolResponses, 50);
|
|
2202
|
+
},
|
|
2203
|
+
sendActivityStart() { },
|
|
2204
|
+
sendActivityEnd() { },
|
|
2205
|
+
interrupt() {
|
|
2206
|
+
if (abortController) {
|
|
2207
|
+
abortController.abort();
|
|
2208
|
+
handlers.interrupted();
|
|
2209
|
+
}
|
|
2210
|
+
},
|
|
2211
|
+
async close() {
|
|
2212
|
+
closed = true;
|
|
2213
|
+
if (abortController)
|
|
2214
|
+
abortController.abort();
|
|
2215
|
+
if (toolFlushTimer)
|
|
2216
|
+
clearTimeout(toolFlushTimer);
|
|
2217
|
+
handlers.close('closed');
|
|
2218
|
+
},
|
|
2219
|
+
};
|
|
2220
|
+
return transport;
|
|
2221
|
+
},
|
|
2222
|
+
};
|
|
2223
|
+
}
|
|
2224
|
+
// ═════════════════════════════════════════════════════════════════════════════
|
|
2225
|
+
// GOOGLE GEMINI PROVIDER (HTTP + SSE Streaming)
|
|
2226
|
+
// ═════════════════════════════════════════════════════════════════════════════
|
|
2227
|
+
//
|
|
2228
|
+
// For the generateContent / streamGenerateContent REST API.
|
|
2229
|
+
// Use this when you don't need real-time audio (for audio, use geminiLiveProvider).
|
|
2230
|
+
//
|
|
2231
|
+
/**
|
|
2232
|
+
* Google Gemini HTTP streaming provider (generateContent API).
|
|
2233
|
+
* Uses SSE streaming. Supports tool calling, vision, thinking, and all Gemini models.
|
|
2234
|
+
*
|
|
2235
|
+
* @example
|
|
2236
|
+
* ```typescript
|
|
2237
|
+
* await cortex.connect({
|
|
2238
|
+
* provider: geminiProvider(),
|
|
2239
|
+
* connection: {
|
|
2240
|
+
* apiKey: 'YOUR_GEMINI_KEY',
|
|
2241
|
+
* model: 'gemini-2.5-flash',
|
|
2242
|
+
* systemInstruction: 'You are a desktop assistant.',
|
|
2243
|
+
* },
|
|
2244
|
+
* });
|
|
2245
|
+
* ```
|
|
2246
|
+
*/
|
|
2247
|
+
function geminiProvider() {
|
|
2248
|
+
return {
|
|
2249
|
+
name: 'gemini',
|
|
2250
|
+
async connect(config, handlers) {
|
|
2251
|
+
const apiVersion = config.providerOptions?.apiVersion ?? 'v1beta';
|
|
2252
|
+
const host = config.providerOptions?.host ?? 'generativelanguage.googleapis.com';
|
|
2253
|
+
const baseUrl = `https://${host}/${apiVersion}`;
|
|
2254
|
+
// Conversation history (Gemini uses "user" and "model" roles)
|
|
2255
|
+
const contents = [];
|
|
2256
|
+
let abortController = null;
|
|
2257
|
+
let closed = false;
|
|
2258
|
+
let pendingImage = null;
|
|
2259
|
+
// Build tool declarations
|
|
2260
|
+
const toolsBlock = [];
|
|
2261
|
+
if (config.tools && config.tools.length > 0) {
|
|
2262
|
+
const functionTools = config.tools.filter(t => t.name !== '__googleSearch' && t.name !== '__codeExecution');
|
|
2263
|
+
const useGoogleSearch = config.tools.some(t => t.name === '__googleSearch');
|
|
2264
|
+
const useCodeExecution = config.tools.some(t => t.name === '__codeExecution');
|
|
2265
|
+
if (functionTools.length > 0) {
|
|
2266
|
+
toolsBlock.push({
|
|
2267
|
+
function_declarations: functionTools.map(t => ({
|
|
2268
|
+
name: t.name,
|
|
2269
|
+
description: t.description,
|
|
2270
|
+
parameters: t.parameters ? {
|
|
2271
|
+
type: 'OBJECT',
|
|
2272
|
+
properties: Object.fromEntries(Object.entries(t.parameters).map(([k, v]) => [k, cleanSchemaForGemini(v)])),
|
|
2273
|
+
required: Object.entries(t.parameters).filter(([, v]) => v.required).map(([k]) => k),
|
|
2274
|
+
} : undefined,
|
|
2275
|
+
})),
|
|
2276
|
+
});
|
|
2277
|
+
}
|
|
2278
|
+
if (useGoogleSearch)
|
|
2279
|
+
toolsBlock.push({ googleSearch: {} });
|
|
2280
|
+
if (useCodeExecution)
|
|
2281
|
+
toolsBlock.push({ codeExecution: {} });
|
|
2282
|
+
}
|
|
2283
|
+
// Tool response batching
|
|
2284
|
+
const toolResponseQueue = [];
|
|
2285
|
+
let toolFlushTimer = null;
|
|
2286
|
+
// Map callId → function name for tool responses
|
|
2287
|
+
const callIdToName = new Map();
|
|
2288
|
+
async function makeRequest() {
|
|
2289
|
+
if (closed || contents.length === 0)
|
|
2290
|
+
return;
|
|
2291
|
+
abortController = new AbortController();
|
|
2292
|
+
const body = { contents };
|
|
2293
|
+
if (config.systemInstruction) {
|
|
2294
|
+
body.system_instruction = { parts: [{ text: config.systemInstruction }] };
|
|
2295
|
+
}
|
|
2296
|
+
if (toolsBlock.length > 0)
|
|
2297
|
+
body.tools = toolsBlock;
|
|
2298
|
+
const generationConfig = {};
|
|
2299
|
+
if (config.providerOptions?.maxTokens)
|
|
2300
|
+
generationConfig.maxOutputTokens = config.providerOptions.maxTokens;
|
|
2301
|
+
if (config.providerOptions?.temperature !== undefined)
|
|
2302
|
+
generationConfig.temperature = config.providerOptions.temperature;
|
|
2303
|
+
if (config.providerOptions?.thinkingLevel) {
|
|
2304
|
+
generationConfig.thinking_config = { thinking_level: config.providerOptions.thinkingLevel };
|
|
2305
|
+
}
|
|
2306
|
+
if (Object.keys(generationConfig).length > 0)
|
|
2307
|
+
body.generationConfig = generationConfig;
|
|
2308
|
+
const url = `${baseUrl}/models/${config.model}:streamGenerateContent?key=${config.apiKey}&alt=sse`;
|
|
2309
|
+
try {
|
|
2310
|
+
const response = await fetch(url, {
|
|
2311
|
+
method: 'POST',
|
|
2312
|
+
headers: { 'Content-Type': 'application/json' },
|
|
2313
|
+
body: JSON.stringify(body),
|
|
2314
|
+
signal: abortController.signal,
|
|
2315
|
+
});
|
|
2316
|
+
if (!response.ok) {
|
|
2317
|
+
const errText = await response.text();
|
|
2318
|
+
handlers.error(new Error(`Gemini ${response.status}: ${errText}`));
|
|
2319
|
+
return;
|
|
2320
|
+
}
|
|
2321
|
+
const modelParts = [];
|
|
2322
|
+
let textBuffer = '';
|
|
2323
|
+
for await (const chunk of parseSSE(response)) {
|
|
2324
|
+
const candidate = chunk.candidates?.[0];
|
|
2325
|
+
if (!candidate)
|
|
2326
|
+
continue;
|
|
2327
|
+
if (candidate.content?.parts) {
|
|
2328
|
+
for (const part of candidate.content.parts) {
|
|
2329
|
+
if (part.text !== undefined) {
|
|
2330
|
+
handlers.text(part.text, false);
|
|
2331
|
+
textBuffer += part.text;
|
|
2332
|
+
}
|
|
2333
|
+
if (part.functionCall) {
|
|
2334
|
+
const callId = `gemini_${Date.now()}_${part.functionCall.name}`;
|
|
2335
|
+
callIdToName.set(callId, part.functionCall.name);
|
|
2336
|
+
modelParts.push({ functionCall: part.functionCall });
|
|
2337
|
+
handlers.toolCall(callId, part.functionCall.name, part.functionCall.args ?? {});
|
|
2338
|
+
}
|
|
2339
|
+
if (part.thought) {
|
|
2340
|
+
handlers.thought(typeof part.thought === 'string' ? part.thought : part.thought.text ?? '');
|
|
2341
|
+
}
|
|
2342
|
+
}
|
|
2343
|
+
}
|
|
2344
|
+
if (candidate.finishReason) {
|
|
2345
|
+
if (textBuffer) {
|
|
2346
|
+
modelParts.push({ text: textBuffer });
|
|
2347
|
+
handlers.text('', true);
|
|
2348
|
+
}
|
|
2349
|
+
}
|
|
2350
|
+
if (chunk.usageMetadata) {
|
|
2351
|
+
handlers.usage(chunk.usageMetadata.totalTokenCount ?? 0, chunk.usageMetadata.promptTokenCount ?? 0, chunk.usageMetadata.candidatesTokenCount ?? 0);
|
|
2352
|
+
}
|
|
2353
|
+
}
|
|
2354
|
+
// Add model turn to conversation history
|
|
2355
|
+
if (modelParts.length > 0) {
|
|
2356
|
+
contents.push({ role: 'model', parts: modelParts });
|
|
2357
|
+
}
|
|
2358
|
+
handlers.turnComplete();
|
|
2359
|
+
}
|
|
2360
|
+
catch (err) {
|
|
2361
|
+
if (err.name !== 'AbortError') {
|
|
2362
|
+
handlers.error(err instanceof Error ? err : new Error(String(err)));
|
|
2363
|
+
}
|
|
2364
|
+
}
|
|
2365
|
+
finally {
|
|
2366
|
+
abortController = null;
|
|
2367
|
+
}
|
|
2368
|
+
}
|
|
2369
|
+
function flushToolResponses() {
|
|
2370
|
+
if (toolResponseQueue.length === 0)
|
|
2371
|
+
return;
|
|
2372
|
+
// Gemini: tool responses go as a user turn with functionResponse parts
|
|
2373
|
+
const parts = toolResponseQueue.map(({ name, result }) => ({
|
|
2374
|
+
functionResponse: {
|
|
2375
|
+
name,
|
|
2376
|
+
response: typeof result === 'string' ? { output: result } : { output: JSON.stringify(result) },
|
|
2377
|
+
},
|
|
2378
|
+
}));
|
|
2379
|
+
toolResponseQueue.length = 0;
|
|
2380
|
+
contents.push({ role: 'user', parts });
|
|
2381
|
+
makeRequest();
|
|
2382
|
+
}
|
|
2383
|
+
const transport = {
|
|
2384
|
+
sendAudio(_pcm, _sampleRate) {
|
|
2385
|
+
// HTTP Gemini doesn't support streaming audio. Use geminiLiveProvider.
|
|
2386
|
+
},
|
|
2387
|
+
sendVideo(jpeg) { pendingImage = jpeg; },
|
|
2388
|
+
sendText(text, role) {
|
|
2389
|
+
if (closed)
|
|
2390
|
+
return;
|
|
2391
|
+
const parts = [];
|
|
2392
|
+
if (pendingImage) {
|
|
2393
|
+
parts.push({
|
|
2394
|
+
inlineData: {
|
|
2395
|
+
mimeType: 'image/jpeg',
|
|
2396
|
+
data: pendingImage.toString('base64'),
|
|
2397
|
+
},
|
|
2398
|
+
});
|
|
2399
|
+
pendingImage = null;
|
|
2400
|
+
}
|
|
2401
|
+
parts.push({ text: role === 'system' ? `[System]: ${text}` : text });
|
|
2402
|
+
contents.push({ role: 'user', parts });
|
|
2403
|
+
makeRequest();
|
|
2404
|
+
},
|
|
2405
|
+
sendToolResponse(callId, result) {
|
|
2406
|
+
if (closed)
|
|
2407
|
+
return;
|
|
2408
|
+
const name = callIdToName.get(callId) ?? callId;
|
|
2409
|
+
callIdToName.delete(callId);
|
|
2410
|
+
toolResponseQueue.push({ name, result });
|
|
2411
|
+
if (toolFlushTimer)
|
|
2412
|
+
clearTimeout(toolFlushTimer);
|
|
2413
|
+
toolFlushTimer = setTimeout(flushToolResponses, 50);
|
|
2414
|
+
},
|
|
2415
|
+
sendActivityStart() { },
|
|
2416
|
+
sendActivityEnd() { },
|
|
2417
|
+
interrupt() {
|
|
2418
|
+
if (abortController) {
|
|
2419
|
+
abortController.abort();
|
|
2420
|
+
handlers.interrupted();
|
|
2421
|
+
}
|
|
2422
|
+
},
|
|
2423
|
+
async close() {
|
|
2424
|
+
closed = true;
|
|
2425
|
+
if (abortController)
|
|
2426
|
+
abortController.abort();
|
|
2427
|
+
if (toolFlushTimer)
|
|
2428
|
+
clearTimeout(toolFlushTimer);
|
|
2429
|
+
handlers.close('closed');
|
|
2430
|
+
},
|
|
2431
|
+
};
|
|
2432
|
+
return transport;
|
|
2433
|
+
},
|
|
2434
|
+
};
|
|
2435
|
+
}
|
|
2436
|
+
// ─── Pre-configured OpenAI-compatible provider shortcuts ────────────────────
|
|
2437
|
+
/** Mistral AI provider. */
|
|
2438
|
+
function mistralProvider() {
|
|
2439
|
+
return openAIChatProvider({ baseUrl: 'https://api.mistral.ai/v1', name: 'mistral' });
|
|
2440
|
+
}
|
|
2441
|
+
/** xAI / Grok provider. */
|
|
2442
|
+
function xaiProvider() {
|
|
2443
|
+
return openAIChatProvider({ baseUrl: 'https://api.x.ai/v1', name: 'xai' });
|
|
2444
|
+
}
|
|
2445
|
+
/** DeepSeek provider. */
|
|
2446
|
+
function deepSeekProvider() {
|
|
2447
|
+
return openAIChatProvider({ baseUrl: 'https://api.deepseek.com', name: 'deepseek' });
|
|
2448
|
+
}
|
|
2449
|
+
/** Groq provider (fast inference). */
|
|
2450
|
+
function groqProvider() {
|
|
2451
|
+
return openAIChatProvider({ baseUrl: 'https://api.groq.com/openai/v1', name: 'groq' });
|
|
2452
|
+
}
|
|
2453
|
+
/** Together AI provider. */
|
|
2454
|
+
function togetherProvider() {
|
|
2455
|
+
return openAIChatProvider({ baseUrl: 'https://api.together.xyz/v1', name: 'together' });
|
|
2456
|
+
}
|
|
2457
|
+
/** Fireworks AI provider. */
|
|
2458
|
+
function fireworksProvider() {
|
|
2459
|
+
return openAIChatProvider({ baseUrl: 'https://api.fireworks.ai/inference/v1', name: 'fireworks' });
|
|
2460
|
+
}
|
|
2461
|
+
/** Perplexity provider. */
|
|
2462
|
+
function perplexityProvider() {
|
|
2463
|
+
return openAIChatProvider({ baseUrl: 'https://api.perplexity.ai', name: 'perplexity' });
|
|
2464
|
+
}
|
|
2465
|
+
/** SambaNova provider. */
|
|
2466
|
+
function sambanovaProvider() {
|
|
2467
|
+
return openAIChatProvider({ baseUrl: 'https://api.sambanova.ai/v1', name: 'sambanova' });
|
|
2468
|
+
}
|
|
2469
|
+
/** Cerebras provider (fast inference). */
|
|
2470
|
+
function cerebrasProvider() {
|
|
2471
|
+
return openAIChatProvider({ baseUrl: 'https://api.cerebras.ai/v1', name: 'cerebras' });
|
|
2472
|
+
}
|
|
2473
|
+
// ═════════════════════════════════════════════════════════════════════════════
|
|
2474
|
+
// OPENAI RESPONSES API PROVIDER (HTTP + SSE Streaming)
|
|
2475
|
+
// ═════════════════════════════════════════════════════════════════════════════
|
|
2476
|
+
/**
|
|
2477
|
+
* Provider for OpenAI's newer Responses API (stateful conversation chaining).
|
|
2478
|
+
*
|
|
2479
|
+
* @example
|
|
2480
|
+
* ```typescript
|
|
2481
|
+
* await cortex.connect({
|
|
2482
|
+
* provider: openAIResponsesProvider(),
|
|
2483
|
+
* connection: { apiKey: 'sk-...', model: 'gpt-4.1' },
|
|
2484
|
+
* });
|
|
2485
|
+
* ```
|
|
2486
|
+
*/
|
|
2487
|
+
function openAIResponsesProvider() {
|
|
2488
|
+
return {
|
|
2489
|
+
name: 'openai-responses',
|
|
2490
|
+
async connect(config, handlers) {
|
|
2491
|
+
const baseUrl = (config.providerOptions?.baseUrl ?? 'https://api.openai.com/v1').replace(/\/$/, '');
|
|
2492
|
+
const tools = config.tools ? toolsToOpenAIResponses(config.tools) : [];
|
|
2493
|
+
let abortController = null;
|
|
2494
|
+
let closed = false;
|
|
2495
|
+
let previousResponseId = null;
|
|
2496
|
+
let pendingImage = null;
|
|
2497
|
+
// Tool response batching
|
|
2498
|
+
const toolResponseQueue = [];
|
|
2499
|
+
let toolFlushTimer = null;
|
|
2500
|
+
async function makeRequest(input) {
|
|
2501
|
+
if (closed)
|
|
2502
|
+
return;
|
|
2503
|
+
abortController = new AbortController();
|
|
2504
|
+
const body = {
|
|
2505
|
+
model: config.model,
|
|
2506
|
+
input,
|
|
2507
|
+
stream: true,
|
|
2508
|
+
};
|
|
2509
|
+
if (config.systemInstruction)
|
|
2510
|
+
body.instructions = config.systemInstruction;
|
|
2511
|
+
if (tools.length > 0)
|
|
2512
|
+
body.tools = tools;
|
|
2513
|
+
if (previousResponseId)
|
|
2514
|
+
body.previous_response_id = previousResponseId;
|
|
2515
|
+
if (config.providerOptions?.maxTokens)
|
|
2516
|
+
body.max_output_tokens = config.providerOptions.maxTokens;
|
|
2517
|
+
if (config.providerOptions?.temperature !== undefined)
|
|
2518
|
+
body.temperature = config.providerOptions.temperature;
|
|
2519
|
+
const headers = {
|
|
2520
|
+
'Content-Type': 'application/json',
|
|
2521
|
+
'Authorization': `Bearer ${config.apiKey}`,
|
|
2522
|
+
};
|
|
2523
|
+
try {
|
|
2524
|
+
const response = await fetch(`${baseUrl}/responses`, {
|
|
2525
|
+
method: 'POST',
|
|
2526
|
+
headers,
|
|
2527
|
+
body: JSON.stringify(body),
|
|
2528
|
+
signal: abortController.signal,
|
|
2529
|
+
});
|
|
2530
|
+
if (!response.ok) {
|
|
2531
|
+
const errText = await response.text();
|
|
2532
|
+
handlers.error(new Error(`OpenAI Responses ${response.status}: ${errText}`));
|
|
2533
|
+
return;
|
|
2534
|
+
}
|
|
2535
|
+
// Map item_id → { callId, name } for function calls
|
|
2536
|
+
const itemToCallId = new Map();
|
|
2537
|
+
for await (const event of parseSSE(response)) {
|
|
2538
|
+
switch (event.type) {
|
|
2539
|
+
case 'response.output_item.added':
|
|
2540
|
+
if (event.item?.type === 'function_call') {
|
|
2541
|
+
itemToCallId.set(event.item.id, {
|
|
2542
|
+
callId: event.item.call_id,
|
|
2543
|
+
name: event.item.name,
|
|
2544
|
+
});
|
|
2545
|
+
}
|
|
2546
|
+
break;
|
|
2547
|
+
case 'response.text.delta':
|
|
2548
|
+
case 'response.output_text.delta':
|
|
2549
|
+
if (event.delta)
|
|
2550
|
+
handlers.text(event.delta, false);
|
|
2551
|
+
break;
|
|
2552
|
+
case 'response.text.done':
|
|
2553
|
+
case 'response.output_text.done':
|
|
2554
|
+
handlers.text('', true);
|
|
2555
|
+
break;
|
|
2556
|
+
case 'response.output_item.done':
|
|
2557
|
+
if (event.item?.type === 'function_call') {
|
|
2558
|
+
try {
|
|
2559
|
+
const args = JSON.parse(event.item.arguments ?? '{}');
|
|
2560
|
+
handlers.toolCall(event.item.call_id, event.item.name, args);
|
|
2561
|
+
}
|
|
2562
|
+
catch {
|
|
2563
|
+
handlers.toolCall(event.item.call_id, event.item.name ?? '', {});
|
|
2564
|
+
}
|
|
2565
|
+
}
|
|
2566
|
+
break;
|
|
2567
|
+
case 'response.function_call_arguments.done': {
|
|
2568
|
+
// Fallback: if we didn't get output_item.done for function calls
|
|
2569
|
+
const mapping = itemToCallId.get(event.item_id);
|
|
2570
|
+
if (mapping) {
|
|
2571
|
+
try {
|
|
2572
|
+
const args = JSON.parse(event.arguments ?? '{}');
|
|
2573
|
+
handlers.toolCall(mapping.callId, mapping.name, args);
|
|
2574
|
+
}
|
|
2575
|
+
catch {
|
|
2576
|
+
handlers.toolCall(mapping.callId, mapping.name, {});
|
|
2577
|
+
}
|
|
2578
|
+
}
|
|
2579
|
+
break;
|
|
2580
|
+
}
|
|
2581
|
+
case 'response.completed':
|
|
2582
|
+
if (event.response?.id)
|
|
2583
|
+
previousResponseId = event.response.id;
|
|
2584
|
+
if (event.response?.usage) {
|
|
2585
|
+
handlers.usage(event.response.usage.total_tokens ?? 0, event.response.usage.input_tokens ?? 0, event.response.usage.output_tokens ?? 0);
|
|
2586
|
+
}
|
|
2587
|
+
handlers.turnComplete();
|
|
2588
|
+
break;
|
|
2589
|
+
case 'response.failed':
|
|
2590
|
+
case 'error':
|
|
2591
|
+
handlers.error(new Error(`OpenAI Responses: ${event.error?.message ?? event.response?.error?.message ?? JSON.stringify(event)}`));
|
|
2592
|
+
break;
|
|
2593
|
+
case 'response.audio.delta':
|
|
2594
|
+
if (event.delta)
|
|
2595
|
+
handlers.audio(Buffer.from(event.delta, 'base64'), 24000);
|
|
2596
|
+
break;
|
|
2597
|
+
case 'response.audio_transcript.delta':
|
|
2598
|
+
if (event.delta)
|
|
2599
|
+
handlers.transcript(event.delta, 'model', false);
|
|
2600
|
+
break;
|
|
2601
|
+
case 'response.audio_transcript.done':
|
|
2602
|
+
if (event.transcript)
|
|
2603
|
+
handlers.transcript(event.transcript, 'model', true);
|
|
2604
|
+
break;
|
|
2605
|
+
}
|
|
2606
|
+
}
|
|
2607
|
+
}
|
|
2608
|
+
catch (err) {
|
|
2609
|
+
if (err.name !== 'AbortError') {
|
|
2610
|
+
handlers.error(err instanceof Error ? err : new Error(String(err)));
|
|
2611
|
+
}
|
|
2612
|
+
}
|
|
2613
|
+
finally {
|
|
2614
|
+
abortController = null;
|
|
2615
|
+
}
|
|
2616
|
+
}
|
|
2617
|
+
function flushToolResponses() {
|
|
2618
|
+
if (toolResponseQueue.length === 0)
|
|
2619
|
+
return;
|
|
2620
|
+
const input = toolResponseQueue.map(({ callId, result }) => ({
|
|
2621
|
+
type: 'function_call_output',
|
|
2622
|
+
call_id: callId,
|
|
2623
|
+
output: typeof result === 'string' ? result : JSON.stringify(result),
|
|
2624
|
+
}));
|
|
2625
|
+
toolResponseQueue.length = 0;
|
|
2626
|
+
makeRequest(input);
|
|
2627
|
+
}
|
|
2628
|
+
const transport = {
|
|
2629
|
+
sendAudio(_pcm, _sampleRate) {
|
|
2630
|
+
// Responses API does not support realtime audio streaming.
|
|
2631
|
+
// Use OpenAI Realtime provider for audio.
|
|
2632
|
+
},
|
|
2633
|
+
sendVideo(jpeg) { pendingImage = jpeg; },
|
|
2634
|
+
sendText(text, role) {
|
|
2635
|
+
if (closed)
|
|
2636
|
+
return;
|
|
2637
|
+
const input = [];
|
|
2638
|
+
if (pendingImage) {
|
|
2639
|
+
input.push({
|
|
2640
|
+
role: role === 'system' ? 'developer' : 'user',
|
|
2641
|
+
content: [
|
|
2642
|
+
{ type: 'input_image', image_url: `data:image/jpeg;base64,${pendingImage.toString('base64')}` },
|
|
2643
|
+
{ type: 'input_text', text },
|
|
2644
|
+
],
|
|
2645
|
+
});
|
|
2646
|
+
pendingImage = null;
|
|
2647
|
+
}
|
|
2648
|
+
else {
|
|
2649
|
+
input.push({ role: role === 'system' ? 'developer' : 'user', content: text });
|
|
2650
|
+
}
|
|
2651
|
+
makeRequest(input);
|
|
2652
|
+
},
|
|
2653
|
+
sendToolResponse(callId, result) {
|
|
2654
|
+
if (closed)
|
|
2655
|
+
return;
|
|
2656
|
+
toolResponseQueue.push({ callId, result });
|
|
2657
|
+
if (toolFlushTimer)
|
|
2658
|
+
clearTimeout(toolFlushTimer);
|
|
2659
|
+
toolFlushTimer = setTimeout(flushToolResponses, 50);
|
|
2660
|
+
},
|
|
2661
|
+
sendActivityStart() { },
|
|
2662
|
+
sendActivityEnd() { },
|
|
2663
|
+
interrupt() {
|
|
2664
|
+
if (abortController) {
|
|
2665
|
+
abortController.abort();
|
|
2666
|
+
handlers.interrupted();
|
|
2667
|
+
}
|
|
2668
|
+
},
|
|
2669
|
+
async close() {
|
|
2670
|
+
closed = true;
|
|
2671
|
+
if (abortController)
|
|
2672
|
+
abortController.abort();
|
|
2673
|
+
if (toolFlushTimer)
|
|
2674
|
+
clearTimeout(toolFlushTimer);
|
|
2675
|
+
handlers.close('closed');
|
|
2676
|
+
},
|
|
2677
|
+
};
|
|
2678
|
+
return transport;
|
|
2679
|
+
},
|
|
2680
|
+
};
|
|
2681
|
+
}
|
|
2682
|
+
// ═════════════════════════════════════════════════════════════════════════════
|
|
2683
|
+
// ANTHROPIC CLAUDE PROVIDER (HTTP + SSE Streaming)
|
|
2684
|
+
// ═════════════════════════════════════════════════════════════════════════════
|
|
2685
|
+
/**
|
|
2686
|
+
* Anthropic Claude Messages API provider.
|
|
2687
|
+
* Supports tool calling, vision (images), and extended thinking.
|
|
2688
|
+
*
|
|
2689
|
+
* @example
|
|
2690
|
+
* ```typescript
|
|
2691
|
+
* await cortex.connect({
|
|
2692
|
+
* provider: anthropicProvider(),
|
|
2693
|
+
* connection: {
|
|
2694
|
+
* apiKey: 'sk-ant-...',
|
|
2695
|
+
* model: 'claude-sonnet-4-20250514',
|
|
2696
|
+
* systemInstruction: 'You are a desktop assistant.',
|
|
2697
|
+
* providerOptions: { maxTokens: 4096 },
|
|
2698
|
+
* },
|
|
2699
|
+
* });
|
|
2700
|
+
* ```
|
|
2701
|
+
*/
|
|
2702
|
+
function anthropicProvider() {
|
|
2703
|
+
return {
|
|
2704
|
+
name: 'anthropic',
|
|
2705
|
+
async connect(config, handlers) {
|
|
2706
|
+
const baseUrl = (config.providerOptions?.baseUrl ?? 'https://api.anthropic.com').replace(/\/$/, '');
|
|
2707
|
+
const apiVersion = config.providerOptions?.apiVersion ?? '2023-06-01';
|
|
2708
|
+
const maxTokens = config.providerOptions?.maxTokens ?? 8192;
|
|
2709
|
+
const messages = [];
|
|
2710
|
+
const tools = config.tools ? toolsToAnthropic(config.tools) : [];
|
|
2711
|
+
let abortController = null;
|
|
2712
|
+
let closed = false;
|
|
2713
|
+
let pendingImage = null;
|
|
2714
|
+
// Tool response batching (Claude requires all tool_results in one user message)
|
|
2715
|
+
const toolResponseQueue = [];
|
|
2716
|
+
let toolFlushTimer = null;
|
|
2717
|
+
async function makeRequest() {
|
|
2718
|
+
if (closed || messages.length === 0)
|
|
2719
|
+
return;
|
|
2720
|
+
abortController = new AbortController();
|
|
2721
|
+
const body = {
|
|
2722
|
+
model: config.model,
|
|
2723
|
+
max_tokens: maxTokens,
|
|
2724
|
+
messages,
|
|
2725
|
+
stream: true,
|
|
2726
|
+
};
|
|
2727
|
+
if (config.systemInstruction)
|
|
2728
|
+
body.system = config.systemInstruction;
|
|
2729
|
+
if (tools.length > 0)
|
|
2730
|
+
body.tools = tools;
|
|
2731
|
+
if (config.providerOptions?.temperature !== undefined)
|
|
2732
|
+
body.temperature = config.providerOptions.temperature;
|
|
2733
|
+
const headers = {
|
|
2734
|
+
'Content-Type': 'application/json',
|
|
2735
|
+
'x-api-key': config.apiKey,
|
|
2736
|
+
'anthropic-version': apiVersion,
|
|
2737
|
+
...(config.providerOptions?.headers ?? {}),
|
|
2738
|
+
};
|
|
2739
|
+
try {
|
|
2740
|
+
const response = await fetch(`${baseUrl}/v1/messages`, {
|
|
2741
|
+
method: 'POST',
|
|
2742
|
+
headers,
|
|
2743
|
+
body: JSON.stringify(body),
|
|
2744
|
+
signal: abortController.signal,
|
|
2745
|
+
});
|
|
2746
|
+
if (!response.ok) {
|
|
2747
|
+
const errText = await response.text();
|
|
2748
|
+
handlers.error(new Error(`Anthropic ${response.status}: ${errText}`));
|
|
2749
|
+
return;
|
|
2750
|
+
}
|
|
2751
|
+
let currentToolId = '';
|
|
2752
|
+
let currentToolName = '';
|
|
2753
|
+
let toolJsonBuffer = '';
|
|
2754
|
+
let textBuffer = '';
|
|
2755
|
+
const assistantContent = [];
|
|
2756
|
+
for await (const event of parseSSE(response)) {
|
|
2757
|
+
switch (event.type) {
|
|
2758
|
+
case 'content_block_start':
|
|
2759
|
+
if (event.content_block?.type === 'tool_use') {
|
|
2760
|
+
currentToolId = event.content_block.id;
|
|
2761
|
+
currentToolName = event.content_block.name;
|
|
2762
|
+
toolJsonBuffer = '';
|
|
2763
|
+
}
|
|
2764
|
+
else if (event.content_block?.type === 'text') {
|
|
2765
|
+
textBuffer = '';
|
|
2766
|
+
}
|
|
2767
|
+
break;
|
|
2768
|
+
case 'content_block_delta':
|
|
2769
|
+
if (event.delta?.type === 'text_delta') {
|
|
2770
|
+
handlers.text(event.delta.text, false);
|
|
2771
|
+
textBuffer += event.delta.text;
|
|
2772
|
+
}
|
|
2773
|
+
else if (event.delta?.type === 'input_json_delta') {
|
|
2774
|
+
toolJsonBuffer += event.delta.partial_json;
|
|
2775
|
+
}
|
|
2776
|
+
else if (event.delta?.type === 'thinking_delta') {
|
|
2777
|
+
handlers.thought(event.delta.thinking);
|
|
2778
|
+
}
|
|
2779
|
+
break;
|
|
2780
|
+
case 'content_block_stop':
|
|
2781
|
+
if (currentToolId) {
|
|
2782
|
+
try {
|
|
2783
|
+
const args = toolJsonBuffer ? JSON.parse(toolJsonBuffer) : {};
|
|
2784
|
+
assistantContent.push({
|
|
2785
|
+
type: 'tool_use',
|
|
2786
|
+
id: currentToolId,
|
|
2787
|
+
name: currentToolName,
|
|
2788
|
+
input: args,
|
|
2789
|
+
});
|
|
2790
|
+
handlers.toolCall(currentToolId, currentToolName, args);
|
|
2791
|
+
}
|
|
2792
|
+
catch { /* parse error */ }
|
|
2793
|
+
currentToolId = '';
|
|
2794
|
+
currentToolName = '';
|
|
2795
|
+
toolJsonBuffer = '';
|
|
2796
|
+
}
|
|
2797
|
+
else if (textBuffer) {
|
|
2798
|
+
assistantContent.push({ type: 'text', text: textBuffer });
|
|
2799
|
+
handlers.text('', true);
|
|
2800
|
+
textBuffer = '';
|
|
2801
|
+
}
|
|
2802
|
+
break;
|
|
2803
|
+
case 'message_delta':
|
|
2804
|
+
if (event.usage) {
|
|
2805
|
+
handlers.usage((event.usage.input_tokens ?? 0) + (event.usage.output_tokens ?? 0), event.usage.input_tokens ?? 0, event.usage.output_tokens ?? 0);
|
|
2806
|
+
}
|
|
2807
|
+
break;
|
|
2808
|
+
case 'message_stop':
|
|
2809
|
+
if (assistantContent.length > 0) {
|
|
2810
|
+
messages.push({ role: 'assistant', content: assistantContent });
|
|
2811
|
+
}
|
|
2812
|
+
handlers.turnComplete();
|
|
2813
|
+
break;
|
|
2814
|
+
case 'error':
|
|
2815
|
+
handlers.error(new Error(`Anthropic: ${event.error?.message ?? JSON.stringify(event)}`));
|
|
2816
|
+
break;
|
|
2817
|
+
}
|
|
2818
|
+
}
|
|
2819
|
+
}
|
|
2820
|
+
catch (err) {
|
|
2821
|
+
if (err.name !== 'AbortError') {
|
|
2822
|
+
handlers.error(err instanceof Error ? err : new Error(String(err)));
|
|
2823
|
+
}
|
|
2824
|
+
}
|
|
2825
|
+
finally {
|
|
2826
|
+
abortController = null;
|
|
2827
|
+
}
|
|
2828
|
+
}
|
|
2829
|
+
function flushToolResponses() {
|
|
2830
|
+
if (toolResponseQueue.length === 0)
|
|
2831
|
+
return;
|
|
2832
|
+
// Anthropic: all tool results go in one user message as tool_result blocks
|
|
2833
|
+
const content = toolResponseQueue.map(({ callId, result }) => ({
|
|
2834
|
+
type: 'tool_result',
|
|
2835
|
+
tool_use_id: callId,
|
|
2836
|
+
content: typeof result === 'string' ? result : JSON.stringify(result),
|
|
2837
|
+
}));
|
|
2838
|
+
toolResponseQueue.length = 0;
|
|
2839
|
+
messages.push({ role: 'user', content });
|
|
2840
|
+
makeRequest();
|
|
2841
|
+
}
|
|
2842
|
+
const transport = {
|
|
2843
|
+
sendAudio(_pcm, _sampleRate) {
|
|
2844
|
+
// Claude does not process raw audio natively.
|
|
2845
|
+
},
|
|
2846
|
+
sendVideo(jpeg) { pendingImage = jpeg; },
|
|
2847
|
+
sendText(text, role) {
|
|
2848
|
+
if (closed)
|
|
2849
|
+
return;
|
|
2850
|
+
const content = [];
|
|
2851
|
+
if (pendingImage) {
|
|
2852
|
+
content.push({
|
|
2853
|
+
type: 'image',
|
|
2854
|
+
source: {
|
|
2855
|
+
type: 'base64',
|
|
2856
|
+
media_type: 'image/jpeg',
|
|
2857
|
+
data: pendingImage.toString('base64'),
|
|
2858
|
+
},
|
|
2859
|
+
});
|
|
2860
|
+
pendingImage = null;
|
|
2861
|
+
}
|
|
2862
|
+
content.push({
|
|
2863
|
+
type: 'text',
|
|
2864
|
+
text: role === 'system' ? `[System]: ${text}` : text,
|
|
2865
|
+
});
|
|
2866
|
+
messages.push({ role: 'user', content });
|
|
2867
|
+
makeRequest();
|
|
2868
|
+
},
|
|
2869
|
+
sendToolResponse(callId, result) {
|
|
2870
|
+
if (closed)
|
|
2871
|
+
return;
|
|
2872
|
+
toolResponseQueue.push({ callId, result });
|
|
2873
|
+
if (toolFlushTimer)
|
|
2874
|
+
clearTimeout(toolFlushTimer);
|
|
2875
|
+
toolFlushTimer = setTimeout(flushToolResponses, 50);
|
|
2876
|
+
},
|
|
2877
|
+
sendActivityStart() { },
|
|
2878
|
+
sendActivityEnd() { },
|
|
2879
|
+
interrupt() {
|
|
2880
|
+
if (abortController) {
|
|
2881
|
+
abortController.abort();
|
|
2882
|
+
handlers.interrupted();
|
|
2883
|
+
}
|
|
2884
|
+
},
|
|
2885
|
+
async close() {
|
|
2886
|
+
closed = true;
|
|
2887
|
+
if (abortController)
|
|
2888
|
+
abortController.abort();
|
|
2889
|
+
if (toolFlushTimer)
|
|
2890
|
+
clearTimeout(toolFlushTimer);
|
|
2891
|
+
handlers.close('closed');
|
|
2892
|
+
},
|
|
2893
|
+
};
|
|
2894
|
+
return transport;
|
|
2895
|
+
},
|
|
2896
|
+
};
|
|
2897
|
+
}
|
|
2898
|
+
// ═════════════════════════════════════════════════════════════════════════════
|
|
1549
2899
|
// FACTORY
|
|
1550
2900
|
// ═════════════════════════════════════════════════════════════════════════════
|
|
1551
2901
|
/** Create a new Cortex instance. */
|