cactus-react-native 0.1.4 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +550 -721
- package/android/src/main/java/com/cactus/Cactus.java +41 -0
- package/android/src/main/java/com/cactus/LlamaContext.java +19 -0
- package/android/src/main/jni.cpp +36 -11
- package/android/src/main/jniLibs/arm64-v8a/libcactus.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libcactus_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/libcactus.so +0 -0
- package/android/src/main/jniLibs/x86_64/libcactus_x86_64.so +0 -0
- package/android/src/newarch/java/com/cactus/CactusModule.java +5 -0
- package/android/src/oldarch/java/com/cactus/CactusModule.java +5 -0
- package/ios/Cactus.mm +14 -0
- package/ios/CactusContext.h +1 -0
- package/ios/CactusContext.mm +18 -0
- package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/cactus +0 -0
- package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/cactus +0 -0
- package/lib/commonjs/NativeCactus.js.map +1 -1
- package/lib/commonjs/index.js +92 -6
- package/lib/commonjs/index.js.map +1 -1
- package/lib/commonjs/lm.js +64 -21
- package/lib/commonjs/lm.js.map +1 -1
- package/lib/commonjs/projectId.js +8 -0
- package/lib/commonjs/projectId.js.map +1 -0
- package/lib/commonjs/remote.js +153 -0
- package/lib/commonjs/remote.js.map +1 -0
- package/lib/commonjs/telemetry.js +11 -5
- package/lib/commonjs/telemetry.js.map +1 -1
- package/lib/commonjs/vlm.js +90 -23
- package/lib/commonjs/vlm.js.map +1 -1
- package/lib/module/NativeCactus.js.map +1 -1
- package/lib/module/index.js +48 -5
- package/lib/module/index.js.map +1 -1
- package/lib/module/lm.js +63 -21
- package/lib/module/lm.js.map +1 -1
- package/lib/module/projectId.js +4 -0
- package/lib/module/projectId.js.map +1 -0
- package/lib/module/remote.js +144 -0
- package/lib/module/remote.js.map +1 -0
- package/lib/module/telemetry.js +11 -5
- package/lib/module/telemetry.js.map +1 -1
- package/lib/module/vlm.js +90 -23
- package/lib/module/vlm.js.map +1 -1
- package/lib/typescript/NativeCactus.d.ts +7 -0
- package/lib/typescript/NativeCactus.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +3 -1
- package/lib/typescript/index.d.ts.map +1 -1
- package/lib/typescript/lm.d.ts +4 -3
- package/lib/typescript/lm.d.ts.map +1 -1
- package/lib/typescript/projectId.d.ts +2 -0
- package/lib/typescript/projectId.d.ts.map +1 -0
- package/lib/typescript/remote.d.ts +7 -0
- package/lib/typescript/remote.d.ts.map +1 -0
- package/lib/typescript/telemetry.d.ts +7 -3
- package/lib/typescript/telemetry.d.ts.map +1 -1
- package/lib/typescript/vlm.d.ts +4 -2
- package/lib/typescript/vlm.d.ts.map +1 -1
- package/package.json +4 -4
- package/scripts/postInstall.js +33 -0
- package/src/NativeCactus.ts +7 -0
- package/src/index.ts +58 -5
- package/src/lm.ts +66 -28
- package/src/projectId.ts +1 -0
- package/src/remote.ts +175 -0
- package/src/telemetry.ts +27 -12
- package/src/vlm.ts +104 -25
package/src/remote.ts
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
let _cactusToken: string | null = null;
|
|
2
|
+
|
|
3
|
+
export function setCactusToken(token: string | null): void {
|
|
4
|
+
_cactusToken = token;
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
export async function getVertexAIEmbedding(text: string): Promise<number[]> {
|
|
8
|
+
if (_cactusToken === null) {
|
|
9
|
+
throw new Error('CactusToken not set. Please call CactusLM.init with cactusToken parameter.');
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
const projectId = 'cactus-v1-452518';
|
|
13
|
+
const location = 'us-central1';
|
|
14
|
+
const modelId = 'text-embedding-005';
|
|
15
|
+
|
|
16
|
+
const endpoint = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/publishers/google/models/${modelId}:predict`;
|
|
17
|
+
|
|
18
|
+
const headers = {
|
|
19
|
+
'Authorization': `Bearer ${_cactusToken}`,
|
|
20
|
+
'Content-Type': 'application/json',
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
const requestBody = {
|
|
24
|
+
instances: [{ content: text }]
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
const response = await fetch(endpoint, {
|
|
28
|
+
method: 'POST',
|
|
29
|
+
headers,
|
|
30
|
+
body: JSON.stringify(requestBody),
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
if (response.status === 401) {
|
|
34
|
+
_cactusToken = null;
|
|
35
|
+
throw new Error('Authentication failed. Please update your cactusToken.');
|
|
36
|
+
} else if (!response.ok) {
|
|
37
|
+
const errorText = await response.text();
|
|
38
|
+
throw new Error(`HTTP ${response.status}: ${errorText}`);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const responseBody = await response.json();
|
|
42
|
+
|
|
43
|
+
if (responseBody.error) {
|
|
44
|
+
throw new Error(`API Error: ${responseBody.error.message}`);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
const predictions = responseBody.predictions;
|
|
48
|
+
if (!predictions || predictions.length === 0) {
|
|
49
|
+
throw new Error('No predictions in response');
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const embeddings = predictions[0].embeddings;
|
|
53
|
+
const values = embeddings.values;
|
|
54
|
+
|
|
55
|
+
return values;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export async function getVertexAICompletion(
|
|
59
|
+
textPrompt: string,
|
|
60
|
+
imageData?: string,
|
|
61
|
+
imagePath?: string,
|
|
62
|
+
mimeType?: string,
|
|
63
|
+
): Promise<string> {
|
|
64
|
+
if (_cactusToken === null) {
|
|
65
|
+
throw new Error('CactusToken not set. Please call CactusVLM.init with cactusToken parameter.');
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const projectId = 'cactus-v1-452518';
|
|
69
|
+
const location = 'global';
|
|
70
|
+
const modelId = 'gemini-2.5-flash-lite-preview-06-17';
|
|
71
|
+
|
|
72
|
+
const endpoint = `https://aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/publishers/google/models/${modelId}:generateContent`;
|
|
73
|
+
|
|
74
|
+
const headers = {
|
|
75
|
+
'Authorization': `Bearer ${_cactusToken}`,
|
|
76
|
+
'Content-Type': 'application/json',
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
const parts: any[] = [];
|
|
80
|
+
|
|
81
|
+
if (imageData) {
|
|
82
|
+
const detectedMimeType = mimeType || 'image/jpeg';
|
|
83
|
+
parts.push({
|
|
84
|
+
inlineData: {
|
|
85
|
+
mimeType: detectedMimeType,
|
|
86
|
+
data: imageData
|
|
87
|
+
}
|
|
88
|
+
});
|
|
89
|
+
} else if (imagePath) {
|
|
90
|
+
const detectedMimeType = mimeType || detectMimeType(imagePath);
|
|
91
|
+
const RNFS = require('react-native-fs');
|
|
92
|
+
const base64Data = await RNFS.readFile(imagePath, 'base64');
|
|
93
|
+
parts.push({
|
|
94
|
+
inlineData: {
|
|
95
|
+
mimeType: detectedMimeType,
|
|
96
|
+
data: base64Data
|
|
97
|
+
}
|
|
98
|
+
});
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
parts.push({ text: textPrompt });
|
|
102
|
+
|
|
103
|
+
const requestBody = {
|
|
104
|
+
contents: {
|
|
105
|
+
role: 'user',
|
|
106
|
+
parts: parts,
|
|
107
|
+
}
|
|
108
|
+
};
|
|
109
|
+
|
|
110
|
+
const response = await fetch(endpoint, {
|
|
111
|
+
method: 'POST',
|
|
112
|
+
headers,
|
|
113
|
+
body: JSON.stringify(requestBody),
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
if (response.status === 401) {
|
|
117
|
+
_cactusToken = null;
|
|
118
|
+
throw new Error('Authentication failed. Please update your cactusToken.');
|
|
119
|
+
} else if (!response.ok) {
|
|
120
|
+
const errorText = await response.text();
|
|
121
|
+
throw new Error(`HTTP ${response.status}: ${errorText}`);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
const responseBody = await response.json();
|
|
125
|
+
|
|
126
|
+
if (Array.isArray(responseBody)) {
|
|
127
|
+
throw new Error('Unexpected response format: received array instead of object');
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
if (responseBody.error) {
|
|
131
|
+
throw new Error(`API Error: ${responseBody.error.message}`);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
const candidates = responseBody.candidates;
|
|
135
|
+
if (!candidates || candidates.length === 0) {
|
|
136
|
+
throw new Error('No candidates in response');
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
const content = candidates[0].content;
|
|
140
|
+
const responseParts = content.parts;
|
|
141
|
+
if (!responseParts || responseParts.length === 0) {
|
|
142
|
+
throw new Error('No parts in response');
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
return responseParts[0].text || '';
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
export async function getTextCompletion(prompt: string): Promise<string> {
|
|
149
|
+
return getVertexAICompletion(prompt);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
export async function getVisionCompletion(prompt: string, imagePath: string): Promise<string> {
|
|
153
|
+
return getVertexAICompletion(prompt, undefined, imagePath);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
export async function getVisionCompletionFromData(prompt: string, imageData: string, mimeType?: string): Promise<string> {
|
|
157
|
+
return getVertexAICompletion(prompt, imageData, undefined, mimeType);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
function detectMimeType(filePath: string): string {
|
|
161
|
+
const extension = filePath.toLowerCase().split('.').pop();
|
|
162
|
+
switch (extension) {
|
|
163
|
+
case 'jpg':
|
|
164
|
+
case 'jpeg':
|
|
165
|
+
return 'image/jpeg';
|
|
166
|
+
case 'png':
|
|
167
|
+
return 'image/png';
|
|
168
|
+
case 'gif':
|
|
169
|
+
return 'image/gif';
|
|
170
|
+
case 'webp':
|
|
171
|
+
return 'image/webp';
|
|
172
|
+
default:
|
|
173
|
+
return 'image/jpeg';
|
|
174
|
+
}
|
|
175
|
+
}
|
package/src/telemetry.ts
CHANGED
|
@@ -1,9 +1,19 @@
|
|
|
1
1
|
import { Platform } from 'react-native'
|
|
2
|
-
import type { ContextParams } from './index';
|
|
3
2
|
// Import package.json to get version
|
|
4
3
|
const packageJson = require('../package.json');
|
|
4
|
+
import { PROJECT_ID } from './projectId';
|
|
5
|
+
|
|
6
|
+
export interface TelemetryParams {
|
|
7
|
+
n_gpu_layers: number | null
|
|
8
|
+
n_ctx: number | null
|
|
9
|
+
model: string | null
|
|
10
|
+
}
|
|
5
11
|
|
|
6
12
|
interface TelemetryRecord {
|
|
13
|
+
project_id: string;
|
|
14
|
+
device_id?: string;
|
|
15
|
+
device_manufacturer?: string;
|
|
16
|
+
device_model?: string;
|
|
7
17
|
os: 'iOS' | 'Android';
|
|
8
18
|
os_version: string;
|
|
9
19
|
framework: string;
|
|
@@ -11,9 +21,9 @@ interface TelemetryRecord {
|
|
|
11
21
|
telemetry_payload?: Record<string, any>;
|
|
12
22
|
error_payload?: Record<string, any>;
|
|
13
23
|
timestamp: string;
|
|
14
|
-
model_filename: string;
|
|
15
|
-
n_ctx
|
|
16
|
-
n_gpu_layers
|
|
24
|
+
model_filename: string | null;
|
|
25
|
+
n_ctx: number | null;
|
|
26
|
+
n_gpu_layers: number | null;
|
|
17
27
|
}
|
|
18
28
|
|
|
19
29
|
interface TelemetryConfig {
|
|
@@ -56,37 +66,41 @@ export class Telemetry {
|
|
|
56
66
|
}
|
|
57
67
|
}
|
|
58
68
|
|
|
59
|
-
static track(payload: Record<string, any>, options:
|
|
69
|
+
static track(payload: Record<string, any>, options: TelemetryParams, deviceMetadata?: Record<string, any>): void {
|
|
60
70
|
if (!Telemetry.instance) {
|
|
61
71
|
Telemetry.autoInit();
|
|
62
72
|
}
|
|
63
|
-
Telemetry.instance!.trackInternal(payload, options);
|
|
73
|
+
Telemetry.instance!.trackInternal(payload, options, deviceMetadata);
|
|
64
74
|
}
|
|
65
75
|
|
|
66
|
-
static error(error: Error, options:
|
|
76
|
+
static error(error: Error, options: TelemetryParams): void {
|
|
67
77
|
if (!Telemetry.instance) {
|
|
68
78
|
Telemetry.autoInit();
|
|
69
79
|
}
|
|
70
80
|
Telemetry.instance!.errorInternal(error, options);
|
|
71
81
|
}
|
|
72
82
|
|
|
73
|
-
private trackInternal(payload: Record<string, any>, options:
|
|
83
|
+
private trackInternal(payload: Record<string, any>, options: TelemetryParams, deviceMetadata?: Record<string, any>): void {
|
|
74
84
|
const record: TelemetryRecord = {
|
|
85
|
+
project_id: PROJECT_ID,
|
|
86
|
+
device_id: deviceMetadata?.deviceId,
|
|
87
|
+
device_manufacturer: deviceMetadata?.make,
|
|
88
|
+
device_model: deviceMetadata?.model,
|
|
75
89
|
os: Platform.OS === 'ios' ? 'iOS' : 'Android',
|
|
76
90
|
os_version: Platform.Version.toString(),
|
|
77
91
|
framework: 'react-native',
|
|
78
92
|
framework_version: packageJson.version,
|
|
79
93
|
telemetry_payload: payload,
|
|
80
94
|
timestamp: new Date().toISOString(),
|
|
81
|
-
model_filename: Telemetry.getFilename(options.model),
|
|
95
|
+
model_filename: Telemetry.getFilename(options.model || ''),
|
|
82
96
|
n_ctx: options.n_ctx,
|
|
83
|
-
n_gpu_layers: options.n_gpu_layers
|
|
97
|
+
n_gpu_layers: options.n_gpu_layers,
|
|
84
98
|
};
|
|
85
99
|
|
|
86
100
|
this.sendRecord(record).catch(() => {});
|
|
87
101
|
}
|
|
88
102
|
|
|
89
|
-
private errorInternal(error: Error, options:
|
|
103
|
+
private errorInternal(error: Error, options: TelemetryParams): void {
|
|
90
104
|
const errorPayload = {
|
|
91
105
|
message: error.message,
|
|
92
106
|
stack: error.stack,
|
|
@@ -94,13 +108,14 @@ export class Telemetry {
|
|
|
94
108
|
};
|
|
95
109
|
|
|
96
110
|
const record: TelemetryRecord = {
|
|
111
|
+
project_id: PROJECT_ID,
|
|
97
112
|
os: Platform.OS === 'ios' ? 'iOS' : 'Android',
|
|
98
113
|
os_version: Platform.Version.toString(),
|
|
99
114
|
framework: 'react-native',
|
|
100
115
|
framework_version: packageJson.version,
|
|
101
116
|
error_payload: errorPayload,
|
|
102
117
|
timestamp: new Date().toISOString(),
|
|
103
|
-
model_filename: Telemetry.getFilename(options.model),
|
|
118
|
+
model_filename: Telemetry.getFilename(options.model || ''),
|
|
104
119
|
n_ctx: options.n_ctx,
|
|
105
120
|
n_gpu_layers: options.n_gpu_layers
|
|
106
121
|
};
|
package/src/vlm.ts
CHANGED
|
@@ -11,6 +11,7 @@ import type {
|
|
|
11
11
|
NativeCompletionResult,
|
|
12
12
|
} from './index'
|
|
13
13
|
import { Telemetry } from './telemetry'
|
|
14
|
+
import { setCactusToken, getTextCompletion, getVisionCompletion } from './remote'
|
|
14
15
|
|
|
15
16
|
interface CactusVLMReturn {
|
|
16
17
|
vlm: CactusVLM | null
|
|
@@ -23,21 +24,25 @@ export type VLMContextParams = ContextParams & {
|
|
|
23
24
|
|
|
24
25
|
export type VLMCompletionParams = Omit<CompletionParams, 'prompt'> & {
|
|
25
26
|
images?: string[]
|
|
27
|
+
mode?: string
|
|
26
28
|
}
|
|
27
29
|
|
|
28
30
|
export class CactusVLM {
|
|
29
31
|
private context: LlamaContext
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
private constructor(context: LlamaContext, initParams: VLMContextParams) {
|
|
32
|
+
|
|
33
|
+
private constructor(context: LlamaContext) {
|
|
33
34
|
this.context = context
|
|
34
|
-
this.initParams = initParams
|
|
35
35
|
}
|
|
36
36
|
|
|
37
37
|
static async init(
|
|
38
38
|
params: VLMContextParams,
|
|
39
39
|
onProgress?: (progress: number) => void,
|
|
40
|
+
cactusToken?: string,
|
|
40
41
|
): Promise<CactusVLMReturn> {
|
|
42
|
+
if (cactusToken) {
|
|
43
|
+
setCactusToken(cactusToken);
|
|
44
|
+
}
|
|
45
|
+
|
|
41
46
|
const configs = [
|
|
42
47
|
params,
|
|
43
48
|
{ ...params, n_gpu_layers: 0 }
|
|
@@ -46,11 +51,14 @@ export class CactusVLM {
|
|
|
46
51
|
for (const config of configs) {
|
|
47
52
|
try {
|
|
48
53
|
const context = await initLlama(config, onProgress)
|
|
49
|
-
// Explicitly disable GPU for the multimodal projector for stability.
|
|
50
54
|
await initMultimodal(context.id, params.mmproj, false)
|
|
51
|
-
return {vlm: new CactusVLM(context
|
|
55
|
+
return {vlm: new CactusVLM(context), error: null}
|
|
52
56
|
} catch (e) {
|
|
53
|
-
Telemetry.error(e as Error,
|
|
57
|
+
Telemetry.error(e as Error, {
|
|
58
|
+
n_gpu_layers: config.n_gpu_layers ?? null,
|
|
59
|
+
n_ctx: config.n_ctx ?? null,
|
|
60
|
+
model: config.model ?? null,
|
|
61
|
+
});
|
|
54
62
|
if (configs.indexOf(config) === configs.length - 1) {
|
|
55
63
|
return {vlm: null, error: e as Error}
|
|
56
64
|
}
|
|
@@ -65,40 +73,111 @@ export class CactusVLM {
|
|
|
65
73
|
params: VLMCompletionParams = {},
|
|
66
74
|
callback?: (data: any) => void,
|
|
67
75
|
): Promise<NativeCompletionResult> {
|
|
68
|
-
const
|
|
69
|
-
let firstTokenTime: number | null = null;
|
|
70
|
-
|
|
71
|
-
const wrappedCallback = callback ? (data: any) => {
|
|
72
|
-
if (firstTokenTime === null) firstTokenTime = Date.now();
|
|
73
|
-
callback(data);
|
|
74
|
-
} : undefined;
|
|
76
|
+
const mode = params.mode || 'local';
|
|
75
77
|
|
|
76
78
|
let result: NativeCompletionResult;
|
|
79
|
+
let lastError: Error | null = null;
|
|
80
|
+
|
|
81
|
+
if (mode === 'remote') {
|
|
82
|
+
result = await this._handleRemoteCompletion(messages, params, callback);
|
|
83
|
+
} else if (mode === 'local') {
|
|
84
|
+
result = await this._handleLocalCompletion(messages, params, callback);
|
|
85
|
+
} else if (mode === 'localfirst') {
|
|
86
|
+
try {
|
|
87
|
+
result = await this._handleLocalCompletion(messages, params, callback);
|
|
88
|
+
} catch (e) {
|
|
89
|
+
lastError = e as Error;
|
|
90
|
+
try {
|
|
91
|
+
result = await this._handleRemoteCompletion(messages, params, callback);
|
|
92
|
+
} catch (remoteError) {
|
|
93
|
+
throw lastError;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
} else if (mode === 'remotefirst') {
|
|
97
|
+
try {
|
|
98
|
+
result = await this._handleRemoteCompletion(messages, params, callback);
|
|
99
|
+
} catch (e) {
|
|
100
|
+
lastError = e as Error;
|
|
101
|
+
try {
|
|
102
|
+
result = await this._handleLocalCompletion(messages, params, callback);
|
|
103
|
+
} catch (localError) {
|
|
104
|
+
throw lastError;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
} else {
|
|
108
|
+
throw new Error('Invalid mode: ' + mode + '. Must be "local", "remote", "localfirst", or "remotefirst"');
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
return result;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
private async _handleLocalCompletion(
|
|
115
|
+
messages: CactusOAICompatibleMessage[],
|
|
116
|
+
params: VLMCompletionParams,
|
|
117
|
+
callback?: (data: any) => void,
|
|
118
|
+
): Promise<NativeCompletionResult> {
|
|
77
119
|
if (params.images && params.images.length > 0) {
|
|
78
120
|
const formattedPrompt = await this.context.getFormattedChat(messages)
|
|
79
121
|
const prompt =
|
|
80
122
|
typeof formattedPrompt === 'string'
|
|
81
123
|
? formattedPrompt
|
|
82
124
|
: formattedPrompt.prompt
|
|
83
|
-
|
|
125
|
+
return await multimodalCompletion(
|
|
84
126
|
this.context.id,
|
|
85
127
|
prompt,
|
|
86
128
|
params.images,
|
|
87
129
|
{ ...params, prompt, emit_partial_completion: !!callback },
|
|
88
130
|
)
|
|
89
131
|
} else {
|
|
90
|
-
|
|
132
|
+
return await this.context.completion({ messages, ...params }, callback)
|
|
91
133
|
}
|
|
92
|
-
|
|
93
|
-
Telemetry.track({
|
|
94
|
-
event: 'completion',
|
|
95
|
-
tok_per_sec: (result as any).timings?.predicted_per_second,
|
|
96
|
-
toks_generated: (result as any).timings?.predicted_n,
|
|
97
|
-
ttft: firstTokenTime ? firstTokenTime - startTime : null,
|
|
98
|
-
num_images: params.images?.length,
|
|
99
|
-
}, this.initParams);
|
|
134
|
+
}
|
|
100
135
|
|
|
101
|
-
|
|
136
|
+
private async _handleRemoteCompletion(
|
|
137
|
+
messages: CactusOAICompatibleMessage[],
|
|
138
|
+
params: VLMCompletionParams,
|
|
139
|
+
callback?: (data: any) => void,
|
|
140
|
+
): Promise<NativeCompletionResult> {
|
|
141
|
+
const prompt = messages.map((m) => `${m.role}: ${m.content}`).join('\n');
|
|
142
|
+
const imagePath = params.images && params.images.length > 0 ? params.images[0] : '';
|
|
143
|
+
|
|
144
|
+
let responseText: string;
|
|
145
|
+
if (imagePath) {
|
|
146
|
+
responseText = await getVisionCompletion(prompt, imagePath);
|
|
147
|
+
} else {
|
|
148
|
+
responseText = await getTextCompletion(prompt);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
if (callback) {
|
|
152
|
+
for (let i = 0; i < responseText.length; i++) {
|
|
153
|
+
callback({ token: responseText[i] });
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
return {
|
|
158
|
+
text: responseText,
|
|
159
|
+
reasoning_content: '',
|
|
160
|
+
tool_calls: [],
|
|
161
|
+
content: responseText,
|
|
162
|
+
tokens_predicted: responseText.split(' ').length,
|
|
163
|
+
tokens_evaluated: prompt.split(' ').length,
|
|
164
|
+
truncated: false,
|
|
165
|
+
stopped_eos: true,
|
|
166
|
+
stopped_word: '',
|
|
167
|
+
stopped_limit: 0,
|
|
168
|
+
stopping_word: '',
|
|
169
|
+
tokens_cached: 0,
|
|
170
|
+
timings: {
|
|
171
|
+
prompt_n: prompt.split(' ').length,
|
|
172
|
+
prompt_ms: 0,
|
|
173
|
+
prompt_per_token_ms: 0,
|
|
174
|
+
prompt_per_second: 0,
|
|
175
|
+
predicted_n: responseText.split(' ').length,
|
|
176
|
+
predicted_ms: 0,
|
|
177
|
+
predicted_per_token_ms: 0,
|
|
178
|
+
predicted_per_second: 0,
|
|
179
|
+
},
|
|
180
|
+
};
|
|
102
181
|
}
|
|
103
182
|
|
|
104
183
|
async rewind(): Promise<void> {
|