cactus-react-native 0.2.3 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/android/src/main/java/com/cactus/Cactus.java +35 -0
- package/android/src/main/java/com/cactus/LlamaContext.java +5 -0
- package/android/src/main/jni.cpp +46 -15
- package/android/src/main/jniLibs/arm64-v8a/libcactus.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libcactus_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2_i8mm.so +0 -0
- package/android/src/newarch/java/com/cactus/CactusModule.java +5 -0
- package/android/src/oldarch/java/com/cactus/CactusModule.java +5 -0
- package/ios/Cactus.mm +21 -0
- package/ios/CactusContext.h +1 -0
- package/ios/CactusContext.mm +6 -2
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_ffi.h +0 -12
- package/ios/cactus.xcframework/ios-arm64/cactus.framework/cactus +0 -0
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/cactus_ffi.h +0 -12
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/cactus +0 -0
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/cactus_ffi.h +0 -12
- package/ios/cactus.xcframework/tvos-arm64/cactus.framework/cactus +0 -0
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/cactus_ffi.h +0 -12
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/cactus +0 -0
- package/lib/commonjs/NativeCactus.js +0 -1
- package/lib/commonjs/NativeCactus.js.map +1 -1
- package/lib/commonjs/chat.js +36 -0
- package/lib/commonjs/chat.js.map +1 -1
- package/lib/commonjs/index.js +0 -23
- package/lib/commonjs/index.js.map +1 -1
- package/lib/commonjs/lm.js +102 -57
- package/lib/commonjs/lm.js.map +1 -1
- package/lib/commonjs/tools.js +0 -7
- package/lib/commonjs/tools.js.map +1 -1
- package/lib/commonjs/tts.js +1 -4
- package/lib/commonjs/tts.js.map +1 -1
- package/lib/commonjs/vlm.js +100 -31
- package/lib/commonjs/vlm.js.map +1 -1
- package/lib/module/NativeCactus.js +0 -3
- package/lib/module/NativeCactus.js.map +1 -1
- package/lib/module/chat.js +34 -0
- package/lib/module/chat.js.map +1 -1
- package/lib/module/index.js +1 -10
- package/lib/module/index.js.map +1 -1
- package/lib/module/lm.js +102 -57
- package/lib/module/lm.js.map +1 -1
- package/lib/module/tools.js +0 -7
- package/lib/module/tools.js.map +1 -1
- package/lib/module/tts.js +1 -4
- package/lib/module/tts.js.map +1 -1
- package/lib/module/vlm.js +100 -31
- package/lib/module/vlm.js.map +1 -1
- package/lib/typescript/NativeCactus.d.ts +1 -142
- package/lib/typescript/NativeCactus.d.ts.map +1 -1
- package/lib/typescript/chat.d.ts +11 -0
- package/lib/typescript/chat.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +2 -4
- package/lib/typescript/index.d.ts.map +1 -1
- package/lib/typescript/lm.d.ts +12 -6
- package/lib/typescript/lm.d.ts.map +1 -1
- package/lib/typescript/tools.d.ts.map +1 -1
- package/lib/typescript/tts.d.ts.map +1 -1
- package/lib/typescript/vlm.d.ts +10 -2
- package/lib/typescript/vlm.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/NativeCactus.ts +6 -175
- package/src/chat.ts +46 -1
- package/src/index.ts +6 -17
- package/src/lm.ts +112 -58
- package/src/tools.ts +0 -5
- package/src/tts.ts +1 -4
- package/src/vlm.ts +113 -31
- package/android/src/main/jniLibs/x86_64/libcactus.so +0 -0
- package/android/src/main/jniLibs/x86_64/libcactus_x86_64.so +0 -0
- package/lib/commonjs/grammar.js +0 -560
- package/lib/commonjs/grammar.js.map +0 -1
- package/lib/module/grammar.js +0 -553
- package/lib/module/grammar.js.map +0 -1
- package/lib/typescript/grammar.d.ts +0 -37
- package/lib/typescript/grammar.d.ts.map +0 -1
- package/src/grammar.ts +0 -854
package/src/lm.ts
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
import { initLlama, LlamaContext } from './index'
|
|
2
|
+
// @ts-ignore
|
|
3
|
+
import { Platform } from 'react-native'
|
|
2
4
|
import type {
|
|
3
5
|
ContextParams,
|
|
4
6
|
CompletionParams,
|
|
@@ -7,8 +9,10 @@ import type {
|
|
|
7
9
|
EmbeddingParams,
|
|
8
10
|
NativeEmbeddingResult,
|
|
9
11
|
} from './index'
|
|
12
|
+
|
|
10
13
|
import { Telemetry } from './telemetry'
|
|
11
14
|
import { setCactusToken, getVertexAIEmbedding } from './remote'
|
|
15
|
+
import { ConversationHistoryManager } from './chat'
|
|
12
16
|
|
|
13
17
|
interface CactusLMReturn {
|
|
14
18
|
lm: CactusLM | null
|
|
@@ -16,10 +20,20 @@ interface CactusLMReturn {
|
|
|
16
20
|
}
|
|
17
21
|
|
|
18
22
|
export class CactusLM {
|
|
19
|
-
|
|
23
|
+
protected context: LlamaContext
|
|
24
|
+
protected conversationHistoryManager: ConversationHistoryManager
|
|
25
|
+
|
|
26
|
+
// the initPromise enables a "async singleton" initialization pattern which
|
|
27
|
+
// protects against a race condition in the event of multiple init attempts
|
|
28
|
+
private static _initCache: Map<string, Promise<CactusLMReturn>> = new Map();
|
|
29
|
+
|
|
30
|
+
private static getCacheKey(params: ContextParams, cactusToken?: string, retryOptions?: { maxRetries?: number; delayMs?: number }): string {
|
|
31
|
+
return JSON.stringify({ params, cactusToken, retryOptions });
|
|
32
|
+
}
|
|
20
33
|
|
|
21
|
-
|
|
34
|
+
protected constructor(context: LlamaContext) {
|
|
22
35
|
this.context = context
|
|
36
|
+
this.conversationHistoryManager = new ConversationHistoryManager()
|
|
23
37
|
}
|
|
24
38
|
|
|
25
39
|
static async init(
|
|
@@ -28,72 +42,108 @@ export class CactusLM {
|
|
|
28
42
|
cactusToken?: string,
|
|
29
43
|
retryOptions?: { maxRetries?: number; delayMs?: number },
|
|
30
44
|
): Promise<CactusLMReturn> {
|
|
45
|
+
|
|
31
46
|
if (cactusToken) {
|
|
32
47
|
setCactusToken(cactusToken);
|
|
33
48
|
}
|
|
34
49
|
|
|
35
|
-
const
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
{ ...params, n_gpu_layers: 0 }
|
|
41
|
-
];
|
|
42
|
-
|
|
43
|
-
const sleep = (ms: number): Promise<void> => {
|
|
44
|
-
return new Promise(resolve => {
|
|
45
|
-
const start = Date.now();
|
|
46
|
-
const wait = () => {
|
|
47
|
-
if (Date.now() - start >= ms) {
|
|
48
|
-
resolve();
|
|
49
|
-
} else {
|
|
50
|
-
Promise.resolve().then(wait);
|
|
51
|
-
}
|
|
52
|
-
};
|
|
53
|
-
wait();
|
|
54
|
-
});
|
|
55
|
-
};
|
|
50
|
+
const key = CactusLM.getCacheKey(params, cactusToken, retryOptions);
|
|
51
|
+
if (CactusLM._initCache.has(key)) {
|
|
52
|
+
// concurrent initialization calls with the same params all get the same cached Promise
|
|
53
|
+
return CactusLM._initCache.get(key)!;
|
|
54
|
+
}
|
|
56
55
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
56
|
+
const initPromise = (async () => {
|
|
57
|
+
const maxRetries = retryOptions?.maxRetries ?? 3;
|
|
58
|
+
const delayMs = retryOptions?.delayMs ?? 1000;
|
|
59
|
+
|
|
60
|
+
const configs = [
|
|
61
|
+
params,
|
|
62
|
+
{ ...params, n_gpu_layers: 0 }
|
|
63
|
+
];
|
|
64
|
+
|
|
65
|
+
const sleep = (ms: number): Promise<void> => {
|
|
66
|
+
return new Promise(resolve => {
|
|
67
|
+
const start = Date.now();
|
|
68
|
+
const wait = () => {
|
|
69
|
+
if (Date.now() - start >= ms) {
|
|
70
|
+
resolve();
|
|
71
|
+
} else {
|
|
72
|
+
Promise.resolve().then(wait);
|
|
73
|
+
}
|
|
74
|
+
};
|
|
75
|
+
wait();
|
|
76
|
+
});
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
for (const config of configs) {
|
|
80
|
+
let lastError: Error | null = null;
|
|
81
|
+
|
|
82
|
+
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
83
|
+
try {
|
|
84
|
+
const context = await initLlama(config, onProgress);
|
|
85
|
+
return { lm: new CactusLM(context), error: null };
|
|
86
|
+
} catch (e) {
|
|
87
|
+
lastError = e as Error;
|
|
88
|
+
const isLastConfig = configs.indexOf(config) === configs.length - 1;
|
|
89
|
+
const isLastAttempt = attempt === maxRetries;
|
|
90
|
+
|
|
91
|
+
Telemetry.error(e as Error, {
|
|
92
|
+
n_gpu_layers: config.n_gpu_layers ?? null,
|
|
93
|
+
n_ctx: config.n_ctx ?? null,
|
|
94
|
+
model: config.model ?? null,
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
if (!isLastAttempt) {
|
|
98
|
+
const delay = delayMs * Math.pow(2, attempt - 1);
|
|
99
|
+
await sleep(delay);
|
|
100
|
+
} else if (!isLastConfig) {
|
|
101
|
+
break;
|
|
102
|
+
}
|
|
80
103
|
}
|
|
81
104
|
}
|
|
105
|
+
|
|
106
|
+
if (configs.indexOf(config) === configs.length - 1 && lastError) {
|
|
107
|
+
return { lm: null, error: lastError };
|
|
108
|
+
}
|
|
82
109
|
}
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
110
|
+
return { lm: null, error: new Error('Failed to initialize CactusLM after all retries') };
|
|
111
|
+
})();
|
|
112
|
+
|
|
113
|
+
CactusLM._initCache.set(key, initPromise);
|
|
114
|
+
|
|
115
|
+
const result = await initPromise;
|
|
116
|
+
if (result.error) {
|
|
117
|
+
CactusLM._initCache.delete(key); // Reset on failure to allow retries
|
|
87
118
|
}
|
|
88
|
-
return
|
|
119
|
+
return result;
|
|
89
120
|
}
|
|
90
121
|
|
|
91
|
-
async
|
|
122
|
+
completion = async (
|
|
92
123
|
messages: CactusOAICompatibleMessage[],
|
|
93
124
|
params: CompletionParams = {},
|
|
94
125
|
callback?: (data: any) => void,
|
|
95
|
-
): Promise<NativeCompletionResult> {
|
|
96
|
-
|
|
126
|
+
): Promise<NativeCompletionResult> => {
|
|
127
|
+
const { newMessages, requiresReset } =
|
|
128
|
+
this.conversationHistoryManager.processNewMessages(messages);
|
|
129
|
+
|
|
130
|
+
if (requiresReset) {
|
|
131
|
+
this.context?.rewind();
|
|
132
|
+
this.conversationHistoryManager.reset();
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
if (newMessages.length === 0) {
|
|
136
|
+
console.warn('No messages to complete!');
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
const result = await this.context.completion({ messages: newMessages, ...params }, callback);
|
|
140
|
+
|
|
141
|
+
this.conversationHistoryManager.update(newMessages, {
|
|
142
|
+
role: 'assistant',
|
|
143
|
+
content: result.content,
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
return result;
|
|
97
147
|
}
|
|
98
148
|
|
|
99
149
|
async embedding(
|
|
@@ -136,23 +186,27 @@ export class CactusLM {
|
|
|
136
186
|
return result;
|
|
137
187
|
}
|
|
138
188
|
|
|
139
|
-
|
|
189
|
+
protected async _handleLocalEmbedding(text: string, params?: EmbeddingParams): Promise<NativeEmbeddingResult> {
|
|
140
190
|
return this.context.embedding(text, params);
|
|
141
191
|
}
|
|
142
192
|
|
|
143
|
-
|
|
193
|
+
protected async _handleRemoteEmbedding(text: string): Promise<NativeEmbeddingResult> {
|
|
144
194
|
const embeddingValues = await getVertexAIEmbedding(text);
|
|
145
195
|
return {
|
|
146
196
|
embedding: embeddingValues,
|
|
147
197
|
};
|
|
148
198
|
}
|
|
149
199
|
|
|
150
|
-
async
|
|
151
|
-
// @ts-ignore
|
|
200
|
+
rewind = async (): Promise<void> => {
|
|
152
201
|
return this.context?.rewind()
|
|
153
202
|
}
|
|
154
203
|
|
|
155
204
|
async release(): Promise<void> {
|
|
156
205
|
return this.context.release()
|
|
157
206
|
}
|
|
207
|
+
|
|
208
|
+
async stopCompletion(): Promise<void> {
|
|
209
|
+
return await this.context.stopCompletion()
|
|
210
|
+
}
|
|
211
|
+
|
|
158
212
|
}
|
package/src/tools.ts
CHANGED
|
@@ -56,22 +56,18 @@ export class Tools {
|
|
|
56
56
|
|
|
57
57
|
export async function parseAndExecuteTool(result: NativeCompletionResult, tools: Tools): Promise<{toolCalled: boolean, toolName?: string, toolInput?: any, toolOutput?: any}> {
|
|
58
58
|
if (!result.tool_calls || result.tool_calls.length === 0) {
|
|
59
|
-
// console.log('No tool calls found');
|
|
60
59
|
return {toolCalled: false};
|
|
61
60
|
}
|
|
62
61
|
|
|
63
62
|
try {
|
|
64
63
|
const toolCall = result.tool_calls[0];
|
|
65
64
|
if (!toolCall) {
|
|
66
|
-
// console.log('No tool call found');
|
|
67
65
|
return {toolCalled: false};
|
|
68
66
|
}
|
|
69
67
|
const toolName = toolCall.function.name;
|
|
70
68
|
const toolInput = JSON.parse(toolCall.function.arguments);
|
|
71
69
|
|
|
72
|
-
// console.log('Calling tool:', toolName, toolInput);
|
|
73
70
|
const toolOutput = await tools.execute(toolName, toolInput);
|
|
74
|
-
// console.log('Tool called result:', toolOutput);
|
|
75
71
|
|
|
76
72
|
return {
|
|
77
73
|
toolCalled: true,
|
|
@@ -80,7 +76,6 @@ export async function parseAndExecuteTool(result: NativeCompletionResult, tools:
|
|
|
80
76
|
toolOutput
|
|
81
77
|
};
|
|
82
78
|
} catch (error) {
|
|
83
|
-
// console.error('Error parsing tool call:', error);
|
|
84
79
|
return {toolCalled: false};
|
|
85
80
|
}
|
|
86
81
|
}
|
package/src/tts.ts
CHANGED
|
@@ -31,10 +31,7 @@ export class CactusTTS {
|
|
|
31
31
|
speakerJsonStr,
|
|
32
32
|
textToSpeak,
|
|
33
33
|
)
|
|
34
|
-
//
|
|
35
|
-
// the main model would be generated and passed to decodeAudioTokens.
|
|
36
|
-
// For now, we are assuming a direct path which may not be fully functional
|
|
37
|
-
// without the main model's token output for TTS.
|
|
34
|
+
// To-DO: Fix
|
|
38
35
|
const tokens = (await this.context.tokenize(formatted_prompt)).tokens
|
|
39
36
|
return decodeAudioTokens(this.context.id, tokens)
|
|
40
37
|
}
|
package/src/vlm.ts
CHANGED
|
@@ -3,15 +3,15 @@ import {
|
|
|
3
3
|
initMultimodal,
|
|
4
4
|
multimodalCompletion,
|
|
5
5
|
LlamaContext,
|
|
6
|
+
type ContextParams,
|
|
7
|
+
type CompletionParams,
|
|
8
|
+
type CactusOAICompatibleMessage,
|
|
9
|
+
type NativeCompletionResult,
|
|
6
10
|
} from './index'
|
|
7
|
-
|
|
8
|
-
ContextParams,
|
|
9
|
-
CompletionParams,
|
|
10
|
-
CactusOAICompatibleMessage,
|
|
11
|
-
NativeCompletionResult,
|
|
12
|
-
} from './index'
|
|
11
|
+
|
|
13
12
|
import { Telemetry } from './telemetry'
|
|
14
13
|
import { setCactusToken, getTextCompletion, getVisionCompletion } from './remote'
|
|
14
|
+
import { ConversationHistoryManager } from './chat'
|
|
15
15
|
|
|
16
16
|
interface CactusVLMReturn {
|
|
17
17
|
vlm: CactusVLM | null
|
|
@@ -29,43 +29,101 @@ export type VLMCompletionParams = Omit<CompletionParams, 'prompt'> & {
|
|
|
29
29
|
|
|
30
30
|
export class CactusVLM {
|
|
31
31
|
private context: LlamaContext
|
|
32
|
-
|
|
32
|
+
protected conversationHistoryManager: ConversationHistoryManager
|
|
33
|
+
|
|
34
|
+
// see CactusLM for detailed docs
|
|
35
|
+
private static _initCache: Map<string, Promise<CactusVLMReturn>> = new Map();
|
|
36
|
+
|
|
37
|
+
private static getCacheKey(params: VLMContextParams, cactusToken?: string, retryOptions?: { maxRetries?: number; delayMs?: number }): string {
|
|
38
|
+
return JSON.stringify({ params, cactusToken, retryOptions });
|
|
39
|
+
}
|
|
40
|
+
|
|
33
41
|
private constructor(context: LlamaContext) {
|
|
34
42
|
this.context = context
|
|
43
|
+
this.conversationHistoryManager = new ConversationHistoryManager()
|
|
35
44
|
}
|
|
36
45
|
|
|
37
46
|
static async init(
|
|
38
47
|
params: VLMContextParams,
|
|
39
48
|
onProgress?: (progress: number) => void,
|
|
40
49
|
cactusToken?: string,
|
|
50
|
+
retryOptions?: { maxRetries?: number; delayMs?: number },
|
|
41
51
|
): Promise<CactusVLMReturn> {
|
|
42
52
|
if (cactusToken) {
|
|
43
53
|
setCactusToken(cactusToken);
|
|
44
54
|
}
|
|
45
55
|
|
|
46
|
-
const
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
56
|
+
const key = CactusVLM.getCacheKey(params, cactusToken, retryOptions);
|
|
57
|
+
if (CactusVLM._initCache.has(key)) {
|
|
58
|
+
return CactusVLM._initCache.get(key)!;
|
|
59
|
+
}
|
|
50
60
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
+
const initPromise = (async () => {
|
|
62
|
+
const maxRetries = retryOptions?.maxRetries ?? 3;
|
|
63
|
+
const delayMs = retryOptions?.delayMs ?? 1000;
|
|
64
|
+
|
|
65
|
+
const configs = [
|
|
66
|
+
params,
|
|
67
|
+
{ ...params, n_gpu_layers: 0 }
|
|
68
|
+
];
|
|
69
|
+
|
|
70
|
+
const sleep = (ms: number): Promise<void> => {
|
|
71
|
+
return new Promise(resolve => {
|
|
72
|
+
const start = Date.now();
|
|
73
|
+
const wait = () => {
|
|
74
|
+
if (Date.now() - start >= ms) {
|
|
75
|
+
resolve();
|
|
76
|
+
} else {
|
|
77
|
+
Promise.resolve().then(wait);
|
|
78
|
+
}
|
|
79
|
+
};
|
|
80
|
+
wait();
|
|
61
81
|
});
|
|
62
|
-
|
|
63
|
-
|
|
82
|
+
};
|
|
83
|
+
|
|
84
|
+
for (const config of configs) {
|
|
85
|
+
let lastError: Error | null = null;
|
|
86
|
+
|
|
87
|
+
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
88
|
+
try {
|
|
89
|
+
const context = await initLlama(config, onProgress)
|
|
90
|
+
await initMultimodal(context.id, params.mmproj, false)
|
|
91
|
+
return {vlm: new CactusVLM(context), error: null}
|
|
92
|
+
} catch (e) {
|
|
93
|
+
lastError = e as Error;
|
|
94
|
+
const isLastConfig = configs.indexOf(config) === configs.length - 1;
|
|
95
|
+
const isLastAttempt = attempt === maxRetries;
|
|
96
|
+
|
|
97
|
+
Telemetry.error(e as Error, {
|
|
98
|
+
n_gpu_layers: config.n_gpu_layers ?? null,
|
|
99
|
+
n_ctx: config.n_ctx ?? null,
|
|
100
|
+
model: config.model ?? null,
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
if (!isLastAttempt) {
|
|
104
|
+
const delay = delayMs * Math.pow(2, attempt - 1);
|
|
105
|
+
await sleep(delay);
|
|
106
|
+
} else if (!isLastConfig) {
|
|
107
|
+
break;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
if (configs.indexOf(config) === configs.length - 1 && lastError) {
|
|
113
|
+
return {vlm: null, error: lastError}
|
|
64
114
|
}
|
|
65
115
|
}
|
|
66
|
-
}
|
|
67
116
|
|
|
68
|
-
|
|
117
|
+
return {vlm: null, error: new Error('Failed to initialize CactusVLM')}
|
|
118
|
+
})();
|
|
119
|
+
|
|
120
|
+
CactusVLM._initCache.set(key, initPromise);
|
|
121
|
+
|
|
122
|
+
const result = await initPromise;
|
|
123
|
+
if (result.error) {
|
|
124
|
+
CactusVLM._initCache.delete(key);
|
|
125
|
+
}
|
|
126
|
+
return result;
|
|
69
127
|
}
|
|
70
128
|
|
|
71
129
|
async completion(
|
|
@@ -111,26 +169,47 @@ export class CactusVLM {
|
|
|
111
169
|
return result;
|
|
112
170
|
}
|
|
113
171
|
|
|
114
|
-
private async
|
|
172
|
+
private _handleLocalCompletion = async(
|
|
115
173
|
messages: CactusOAICompatibleMessage[],
|
|
116
174
|
params: VLMCompletionParams,
|
|
117
175
|
callback?: (data: any) => void,
|
|
118
|
-
): Promise<NativeCompletionResult> {
|
|
176
|
+
): Promise<NativeCompletionResult> => {
|
|
177
|
+
const { newMessages, requiresReset } =
|
|
178
|
+
this.conversationHistoryManager.processNewMessages(messages);
|
|
179
|
+
|
|
180
|
+
if (requiresReset) {
|
|
181
|
+
this.context?.rewind();
|
|
182
|
+
this.conversationHistoryManager.reset();
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
if (newMessages.length === 0) {
|
|
186
|
+
console.warn('No messages to complete!');
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
let result: NativeCompletionResult;
|
|
190
|
+
|
|
119
191
|
if (params.images && params.images.length > 0) {
|
|
120
|
-
const formattedPrompt = await this.context.getFormattedChat(
|
|
192
|
+
const formattedPrompt = await this.context.getFormattedChat(newMessages)
|
|
121
193
|
const prompt =
|
|
122
194
|
typeof formattedPrompt === 'string'
|
|
123
195
|
? formattedPrompt
|
|
124
196
|
: formattedPrompt.prompt
|
|
125
|
-
|
|
197
|
+
result = await multimodalCompletion(
|
|
126
198
|
this.context.id,
|
|
127
199
|
prompt,
|
|
128
200
|
params.images,
|
|
129
201
|
{ ...params, prompt, emit_partial_completion: !!callback },
|
|
130
202
|
)
|
|
131
203
|
} else {
|
|
132
|
-
|
|
204
|
+
result = await this.context.completion({ messages: newMessages, ...params }, callback)
|
|
133
205
|
}
|
|
206
|
+
|
|
207
|
+
this.conversationHistoryManager.update(newMessages, {
|
|
208
|
+
role: 'assistant',
|
|
209
|
+
content: result.content || result.text,
|
|
210
|
+
});
|
|
211
|
+
|
|
212
|
+
return result;
|
|
134
213
|
}
|
|
135
214
|
|
|
136
215
|
private async _handleRemoteCompletion(
|
|
@@ -181,11 +260,14 @@ export class CactusVLM {
|
|
|
181
260
|
}
|
|
182
261
|
|
|
183
262
|
async rewind(): Promise<void> {
|
|
184
|
-
// @ts-ignore
|
|
185
263
|
return this.context?.rewind()
|
|
186
264
|
}
|
|
187
265
|
|
|
188
266
|
async release(): Promise<void> {
|
|
189
267
|
return this.context.release()
|
|
190
268
|
}
|
|
269
|
+
|
|
270
|
+
async stopCompletion(): Promise<void> {
|
|
271
|
+
return await this.context.stopCompletion()
|
|
272
|
+
}
|
|
191
273
|
}
|
|
Binary file
|
|
Binary file
|