@elizaos/app-core 2.0.11-beta.5 → 2.0.11-beta.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@elizaos/app-core",
3
- "version": "2.0.11-beta.5",
3
+ "version": "2.0.11-beta.6",
4
4
  "description": "Shared application core for elizaOS white-label agent apps.",
5
5
  "type": "module",
6
6
  "license": "MIT",
@@ -150,22 +150,22 @@
150
150
  "three": "^0.184.0"
151
151
  },
152
152
  "optionalDependencies": {
153
- "@elizaos/capacitor-appblocker": "2.0.11-beta.5",
154
- "@elizaos/capacitor-bun-runtime": "2.0.11-beta.5",
155
- "@elizaos/capacitor-camera": "2.0.11-beta.5",
156
- "@elizaos/capacitor-canvas": "2.0.11-beta.5",
157
- "@elizaos/capacitor-contacts": "2.0.11-beta.5",
158
- "@elizaos/capacitor-gateway": "2.0.11-beta.5",
159
- "@elizaos/capacitor-location": "2.0.11-beta.5",
160
- "@elizaos/capacitor-messages": "2.0.11-beta.5",
161
- "@elizaos/capacitor-mobile-agent-bridge": "2.0.11-beta.5",
162
- "@elizaos/capacitor-mobile-signals": "2.0.11-beta.5",
163
- "@elizaos/capacitor-phone": "2.0.11-beta.5",
164
- "@elizaos/capacitor-screencapture": "2.0.11-beta.5",
165
- "@elizaos/capacitor-swabble": "2.0.11-beta.5",
166
- "@elizaos/capacitor-system": "2.0.11-beta.5",
167
- "@elizaos/capacitor-talkmode": "2.0.11-beta.5",
168
- "@elizaos/capacitor-websiteblocker": "2.0.11-beta.5",
153
+ "@elizaos/capacitor-appblocker": "2.0.11-beta.6",
154
+ "@elizaos/capacitor-bun-runtime": "2.0.11-beta.6",
155
+ "@elizaos/capacitor-camera": "2.0.11-beta.6",
156
+ "@elizaos/capacitor-canvas": "2.0.11-beta.6",
157
+ "@elizaos/capacitor-contacts": "2.0.11-beta.6",
158
+ "@elizaos/capacitor-gateway": "2.0.11-beta.6",
159
+ "@elizaos/capacitor-location": "2.0.11-beta.6",
160
+ "@elizaos/capacitor-messages": "2.0.11-beta.6",
161
+ "@elizaos/capacitor-mobile-agent-bridge": "2.0.11-beta.6",
162
+ "@elizaos/capacitor-mobile-signals": "2.0.11-beta.6",
163
+ "@elizaos/capacitor-phone": "2.0.11-beta.6",
164
+ "@elizaos/capacitor-screencapture": "2.0.11-beta.6",
165
+ "@elizaos/capacitor-swabble": "2.0.11-beta.6",
166
+ "@elizaos/capacitor-system": "2.0.11-beta.6",
167
+ "@elizaos/capacitor-talkmode": "2.0.11-beta.6",
168
+ "@elizaos/capacitor-websiteblocker": "2.0.11-beta.6",
169
169
  "sharp": "^0.34.5"
170
170
  },
171
171
  "dependencies": {
@@ -179,23 +179,23 @@
179
179
  "@capacitor/preferences": "^8.0.1",
180
180
  "@capacitor/push-notifications": "^8.0.0",
181
181
  "@clack/prompts": "^1.0.0",
182
- "@elizaos/core": "2.0.11-beta.5",
183
- "@elizaos/plugin-anthropic": "2.0.11-beta.5",
184
- "@elizaos/plugin-browser": "2.0.11-beta.5",
185
- "@elizaos/plugin-edge-tts": "2.0.11-beta.5",
186
- "@elizaos/plugin-elizacloud": "2.0.11-beta.5",
187
- "@elizaos/plugin-groq": "2.0.11-beta.5",
188
- "@elizaos/plugin-health": "2.0.11-beta.5",
189
- "@elizaos/plugin-local-inference": "2.0.11-beta.5",
190
- "@elizaos/plugin-openai": "2.0.11-beta.5",
191
- "@elizaos/plugin-registry": "2.0.11-beta.5",
192
- "@elizaos/plugin-sql": "2.0.11-beta.5",
193
- "@elizaos/plugin-wechat": "2.0.11-beta.5",
194
- "@elizaos/plugin-workflow": "2.0.11-beta.5",
195
- "@elizaos/shared": "2.0.11-beta.5",
196
- "@elizaos/skills": "2.0.11-beta.5",
197
- "@elizaos/ui": "2.0.11-beta.5",
198
- "@elizaos/vault": "2.0.11-beta.5",
182
+ "@elizaos/core": "2.0.11-beta.6",
183
+ "@elizaos/plugin-anthropic": "2.0.11-beta.6",
184
+ "@elizaos/plugin-browser": "2.0.11-beta.6",
185
+ "@elizaos/plugin-edge-tts": "2.0.11-beta.6",
186
+ "@elizaos/plugin-elizacloud": "2.0.11-beta.6",
187
+ "@elizaos/plugin-groq": "2.0.11-beta.6",
188
+ "@elizaos/plugin-health": "2.0.11-beta.6",
189
+ "@elizaos/plugin-local-inference": "2.0.11-beta.6",
190
+ "@elizaos/plugin-openai": "2.0.11-beta.6",
191
+ "@elizaos/plugin-registry": "2.0.11-beta.6",
192
+ "@elizaos/plugin-sql": "2.0.11-beta.6",
193
+ "@elizaos/plugin-wechat": "2.0.11-beta.6",
194
+ "@elizaos/plugin-workflow": "2.0.11-beta.6",
195
+ "@elizaos/shared": "2.0.11-beta.6",
196
+ "@elizaos/skills": "2.0.11-beta.6",
197
+ "@elizaos/ui": "2.0.11-beta.6",
198
+ "@elizaos/vault": "2.0.11-beta.6",
199
199
  "@node-rs/argon2": "^2.0.2",
200
200
  "@upstash/redis": "^1.37.0",
201
201
  "chalk": "^5.3.0",
@@ -216,5 +216,5 @@
216
216
  },
217
217
  "module": "./index.js",
218
218
  "types": "./index.d.ts",
219
- "gitHead": "3d7c571a6384249fb4c6cea103920adedd8f7b5f"
219
+ "gitHead": "61487df01a63da719a96b3b368cc63c45e22c092"
220
220
  }
@@ -63,12 +63,37 @@ add_library(elizainference SHARED IMPORTED)
63
63
  set_target_properties(elizainference PROPERTIES
64
64
  IMPORTED_LOCATION "${ELIZAVOICE_PREBUILT_SO}")
65
65
 
66
+ # The DYNAMIC-Vulkan variant of libelizainference.so links its ggml/llama/mtmd
67
+ # backends as separate .so (so the GPU backend libggml-vulkan.so can dlopen the
68
+ # device's libvulkan.so at runtime — only possible in the bionic app process,
69
+ # never the musl agent). Import each dep that is staged alongside so the
70
+ # elizavoicejni link resolves libelizainference's DT_NEEDED; the bionic loader
71
+ # pulls them in when System.loadLibrary("elizainference") runs. A CPU-only
72
+ # static-fused libelizainference.so has none of these siblings, so each import
73
+ # is guarded on the file existing — both variants build from this one list.
74
+ set(ELIZAVOICE_LIB_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../jniLibs/${ANDROID_ABI}")
75
+ set(ELIZAVOICE_DEP_TARGETS "")
76
+ foreach(dep ggml ggml-base ggml-cpu ggml-vulkan llama llama-common mtmd)
77
+ if(EXISTS "${ELIZAVOICE_LIB_DIR}/lib${dep}.so")
78
+ add_library(eliza_dep_${dep} SHARED IMPORTED)
79
+ set_target_properties(eliza_dep_${dep} PROPERTIES
80
+ IMPORTED_LOCATION "${ELIZAVOICE_LIB_DIR}/lib${dep}.so")
81
+ list(APPEND ELIZAVOICE_DEP_TARGETS eliza_dep_${dep})
82
+ endif()
83
+ endforeach()
84
+
66
85
  find_library(LOG_LIB log)
67
86
 
68
87
  add_library(elizavoicejni SHARED elizavoice-jni.cpp)
69
88
  target_include_directories(elizavoicejni PRIVATE "${ELIZAVOICE_FFI_INCLUDE_DIR}")
70
89
  target_compile_options(elizavoicejni PRIVATE -O2 -DNDEBUG)
71
- target_link_libraries(elizavoicejni PRIVATE elizainference ${LOG_LIB})
90
+ # --allow-shlib-undefined: libggml-vulkan.so NEEDs the device's libvulkan.so,
91
+ # which isn't a link input (it resolves from /system/lib64 at runtime in the
92
+ # app process). The shim never calls vulkan symbols directly, so leaving that
93
+ # transitive NEEDED unresolved at link time is correct.
94
+ target_link_libraries(elizavoicejni PRIVATE
95
+ elizainference ${ELIZAVOICE_DEP_TARGETS} ${LOG_LIB})
96
+ target_link_options(elizavoicejni PRIVATE "-Wl,--allow-shlib-undefined")
72
97
 
73
98
  message(STATUS "[elizavoice-jni] building bionic JNI bridge for ${ANDROID_ABI}")
74
99
  message(STATUS "[elizavoice-jni] FFI header: ${ELIZAVOICE_FFI_INCLUDE_DIR}")
@@ -1042,4 +1042,308 @@ Java_ai_elizaos_app_ElizaVoiceNative_nativeVadSelfTest(JNIEnv* env, jclass,
1042
1042
  return to_jstring(env, j);
1043
1043
  }
1044
1044
 
1045
+ // ── Text generation (LLM) ops — the GPU-accelerated text path ────────────
1046
+ //
1047
+ // Wrap the fused streaming-LLM ABI (eliza_inference_llm_stream_*), pooled
1048
+ // embeddings (eliza_inference_embed), end-of-turn scoring
1049
+ // (eliza_inference_llm_eot_score, ABI v11), and the tokenizer. When this JNI
1050
+ // host is built against the DYNAMIC-Vulkan libelizainference (libggml-vulkan.so
1051
+ // staged alongside it), llm_stream_open offloads the model to the GPU in the
1052
+ // bionic app process automatically — the path the musl bun agent cannot take.
1053
+
1054
+ JNIEXPORT jint JNICALL
1055
+ Java_ai_elizaos_app_ElizaVoiceNative_nativeLlmStreamSupported(JNIEnv*, jclass) {
1056
+ return static_cast<jint>(eliza_inference_llm_stream_supported());
1057
+ }
1058
+
1059
+ JNIEXPORT jint JNICALL
1060
+ Java_ai_elizaos_app_ElizaVoiceNative_nativeEmbedSupported(JNIEnv*, jclass) {
1061
+ return static_cast<jint>(eliza_inference_embed_supported());
1062
+ }
1063
+
1064
+ JNIEXPORT jint JNICALL
1065
+ Java_ai_elizaos_app_ElizaVoiceNative_nativeEotSupported(JNIEnv*, jclass) {
1066
+ return static_cast<jint>(eliza_inference_llm_eot_supported());
1067
+ }
1068
+
1069
+ // Tokenize text -> int[] token ids. addSpecial adds BOS; parseSpecial renders
1070
+ // special tokens (<|im_start|> etc.) from the input.
1071
+ JNIEXPORT jintArray JNICALL
1072
+ Java_ai_elizaos_app_ElizaVoiceNative_nativeTokenize(JNIEnv* env, jclass,
1073
+ jlong ctxHandle,
1074
+ jstring jText,
1075
+ jboolean addSpecial,
1076
+ jboolean parseSpecial) {
1077
+ auto* ctx = reinterpret_cast<EliInferenceContext*>(ctxHandle);
1078
+ const std::string text = from_jstring(env, jText);
1079
+ int* toks = nullptr;
1080
+ size_t n = 0;
1081
+ char* outError = nullptr;
1082
+ const int rc = eliza_inference_tokenize(
1083
+ ctx, text.c_str(), text.size(), addSpecial ? 1 : 0,
1084
+ parseSpecial ? 1 : 0, &toks, &n, &outError);
1085
+ if (rc != ELIZA_OK) {
1086
+ throw_runtime(env, "tokenize", outError);
1087
+ return nullptr;
1088
+ }
1089
+ jintArray out = env->NewIntArray(static_cast<jsize>(n));
1090
+ if (out && n > 0) {
1091
+ env->SetIntArrayRegion(out, 0, static_cast<jsize>(n),
1092
+ reinterpret_cast<const jint*>(toks));
1093
+ }
1094
+ if (toks) eliza_inference_free_tokens(toks);
1095
+ return out;
1096
+ }
1097
+
1098
+ // Pooled, L2-normalized sentence embedding (pooling: 1=MEAN default) ->
1099
+ // float[n_embd].
1100
+ JNIEXPORT jfloatArray JNICALL
1101
+ Java_ai_elizaos_app_ElizaVoiceNative_nativeEmbed(JNIEnv* env, jclass,
1102
+ jlong ctxHandle, jstring jText,
1103
+ jint pooling) {
1104
+ auto* ctx = reinterpret_cast<EliInferenceContext*>(ctxHandle);
1105
+ const std::string text = from_jstring(env, jText);
1106
+ std::vector<float> out(4096, 0.0f);
1107
+ int dim = 0;
1108
+ char* outError = nullptr;
1109
+ const int rc = eliza_inference_embed(ctx, text.c_str(), text.size(),
1110
+ pooling > 0 ? pooling : 1, out.data(),
1111
+ out.size(), &dim, &outError);
1112
+ if (rc != ELIZA_OK) {
1113
+ throw_runtime(env, "embed", outError);
1114
+ return nullptr;
1115
+ }
1116
+ jfloatArray ja = env->NewFloatArray(dim);
1117
+ if (ja && dim > 0) env->SetFloatArrayRegion(ja, 0, dim, out.data());
1118
+ return ja;
1119
+ }
1120
+
1121
+ // End-of-turn score: next-token P(targetToken | tokens) -> float.
1122
+ JNIEXPORT jfloat JNICALL
1123
+ Java_ai_elizaos_app_ElizaVoiceNative_nativeEotScore(JNIEnv* env, jclass,
1124
+ jlong ctxHandle,
1125
+ jintArray jTokens,
1126
+ jint targetToken) {
1127
+ auto* ctx = reinterpret_cast<EliInferenceContext*>(ctxHandle);
1128
+ const jsize n = env->GetArrayLength(jTokens);
1129
+ std::vector<int32_t> toks(static_cast<size_t>(n));
1130
+ if (n > 0) {
1131
+ env->GetIntArrayRegion(jTokens, 0, n,
1132
+ reinterpret_cast<jint*>(toks.data()));
1133
+ }
1134
+ float prob = 0.0f, topProb = 0.0f;
1135
+ int32_t topTok = -1;
1136
+ char* outError = nullptr;
1137
+ const int rc = eliza_inference_llm_eot_score(ctx, toks.data(), toks.size(),
1138
+ targetToken, &prob, &topTok,
1139
+ &topProb, &outError);
1140
+ if (rc != ELIZA_OK) {
1141
+ throw_runtime(env, "eot_score", outError);
1142
+ return 0.0f;
1143
+ }
1144
+ return prob;
1145
+ }
1146
+
1147
+ // Open a streaming-LLM session. nGpuLayers: -1 = all-GPU (default), 0 = CPU
1148
+ // (the lib ignores 0 when libggml-vulkan is linked; the CPU/GPU choice is the
1149
+ // staged LIB VARIANT, see the per-device selection). drafterPath ("" = none)
1150
+ // enables MTP speculative decoding.
1151
+ JNIEXPORT jlong JNICALL
1152
+ Java_ai_elizaos_app_ElizaVoiceNative_nativeLlmStreamOpen(
1153
+ JNIEnv* env, jclass, jlong ctxHandle, jint maxTokens, jfloat temperature,
1154
+ jfloat topP, jint topK, jint nGpuLayers, jstring jDrafterPath) {
1155
+ auto* ctx = reinterpret_cast<EliInferenceContext*>(ctxHandle);
1156
+ const std::string drafter = from_jstring(env, jDrafterPath);
1157
+ eliza_llm_stream_config_t cfg;
1158
+ std::memset(&cfg, 0, sizeof(cfg));
1159
+ cfg.max_tokens = maxTokens;
1160
+ cfg.temperature = temperature;
1161
+ cfg.top_p = topP > 0 ? topP : 1.0f;
1162
+ cfg.top_k = topK;
1163
+ cfg.repeat_penalty = 1.0f;
1164
+ cfg.n_gpu_layers = nGpuLayers;
1165
+ cfg.mtp_drafter_path = drafter.empty() ? nullptr : drafter.c_str();
1166
+ char* outError = nullptr;
1167
+ EliLlmStream* s = eliza_inference_llm_stream_open(ctx, &cfg, &outError);
1168
+ if (!s) {
1169
+ throw_runtime(env, "llm_stream_open returned null", outError);
1170
+ return 0;
1171
+ }
1172
+ return reinterpret_cast<jlong>(s);
1173
+ }
1174
+
1175
+ JNIEXPORT void JNICALL
1176
+ Java_ai_elizaos_app_ElizaVoiceNative_nativeLlmStreamPrefill(JNIEnv* env, jclass,
1177
+ jlong streamHandle,
1178
+ jintArray jTokens) {
1179
+ auto* s = reinterpret_cast<EliLlmStream*>(streamHandle);
1180
+ const jsize n = env->GetArrayLength(jTokens);
1181
+ std::vector<int32_t> toks(static_cast<size_t>(n));
1182
+ if (n > 0) {
1183
+ env->GetIntArrayRegion(jTokens, 0, n,
1184
+ reinterpret_cast<jint*>(toks.data()));
1185
+ }
1186
+ char* outError = nullptr;
1187
+ const int rc = eliza_inference_llm_stream_prefill(s, toks.data(),
1188
+ toks.size(), &outError);
1189
+ if (rc != ELIZA_OK) throw_runtime(env, "llm_stream_prefill", outError);
1190
+ }
1191
+
1192
+ // Pull the next decode step. Returns JSON {text, done, drafted, accepted}:
1193
+ // `text` is the detokenized chunk (may span multiple committed tokens via MTP),
1194
+ // `done` true at the final step. `text` is JSON-escaped.
1195
+ JNIEXPORT jstring JNICALL
1196
+ Java_ai_elizaos_app_ElizaVoiceNative_nativeLlmStreamNext(JNIEnv* env, jclass,
1197
+ jlong streamHandle) {
1198
+ auto* s = reinterpret_cast<EliLlmStream*>(streamHandle);
1199
+ int32_t toks[256];
1200
+ char text[4096];
1201
+ size_t nout = 0;
1202
+ int32_t drafted = 0, accepted = 0;
1203
+ char* outError = nullptr;
1204
+ const int rc = eliza_inference_llm_stream_next(
1205
+ s, toks, 256, &nout, text, sizeof(text), &drafted, &accepted, &outError);
1206
+ if (rc < 0) {
1207
+ throw_runtime(env, "llm_stream_next", outError);
1208
+ return nullptr;
1209
+ }
1210
+ std::string esc;
1211
+ for (const char* p = text; *p; ++p) {
1212
+ switch (*p) {
1213
+ case '"': esc += "\\\""; break;
1214
+ case '\\': esc += "\\\\"; break;
1215
+ case '\n': esc += "\\n"; break;
1216
+ case '\r': esc += "\\r"; break;
1217
+ case '\t': esc += "\\t"; break;
1218
+ default:
1219
+ if (static_cast<unsigned char>(*p) < 0x20) {
1220
+ char buf[8];
1221
+ std::snprintf(buf, sizeof(buf), "\\u%04x",
1222
+ static_cast<unsigned char>(*p));
1223
+ esc += buf;
1224
+ } else {
1225
+ esc += *p;
1226
+ }
1227
+ }
1228
+ }
1229
+ std::string json = "{\"text\":\"" + esc +
1230
+ "\",\"done\":" + (rc == 1 ? "true" : "false") +
1231
+ ",\"drafted\":" + std::to_string(drafted) +
1232
+ ",\"accepted\":" + std::to_string(accepted) + "}";
1233
+ return to_jstring(env, json);
1234
+ }
1235
+
1236
+ JNIEXPORT void JNICALL
1237
+ Java_ai_elizaos_app_ElizaVoiceNative_nativeLlmStreamClose(JNIEnv*, jclass,
1238
+ jlong streamHandle) {
1239
+ eliza_inference_llm_stream_close(
1240
+ reinterpret_cast<EliLlmStream*>(streamHandle));
1241
+ }
1242
+
1243
+ // ── LLM self-test (one native call: ctx→tokenize→stream→generate) ─────────
1244
+ //
1245
+ // THE KEYSTONE PROOF: runs a whole greedy text generation in ONE native call,
1246
+ // in the bionic app process, against whatever libelizainference.so is staged
1247
+ // into jniLibs. When that lib is the dynamic-Vulkan variant, ggml-vulkan logs
1248
+ // "Found 1 Vulkan devices: Mali-G715" + "offloaded N/N layers to GPU" to
1249
+ // logcat (the in-process GPU evidence). Returns JSON {ok,text,tokens,ms,tokS}.
1250
+ JNIEXPORT jstring JNICALL
1251
+ Java_ai_elizaos_app_ElizaVoiceNative_nativeLlmSelfTest(JNIEnv* env, jclass,
1252
+ jstring jBundleDir,
1253
+ jstring jPrompt,
1254
+ jint maxTokens) {
1255
+ const std::string bundleDir = from_jstring(env, jBundleDir);
1256
+ const std::string prompt = from_jstring(env, jPrompt);
1257
+ const int genCap = maxTokens > 0 ? maxTokens : 32;
1258
+ char* outError = nullptr;
1259
+
1260
+ EliInferenceContext* ctx =
1261
+ eliza_inference_create(bundleDir.c_str(), &outError);
1262
+ if (!ctx) { throw_runtime(env, "llmSelfTest: create", outError); return nullptr; }
1263
+
1264
+ int* tok = nullptr; size_t tn = 0;
1265
+ if (eliza_inference_tokenize(ctx, prompt.c_str(), prompt.size(), 1, 1, &tok,
1266
+ &tn, &outError) != ELIZA_OK) {
1267
+ eliza_inference_destroy(ctx);
1268
+ throw_runtime(env, "llmSelfTest: tokenize", outError);
1269
+ return nullptr;
1270
+ }
1271
+
1272
+ eliza_llm_stream_config_t cfg;
1273
+ std::memset(&cfg, 0, sizeof(cfg));
1274
+ cfg.max_tokens = genCap;
1275
+ cfg.temperature = 0.0f; // greedy, deterministic
1276
+ cfg.top_k = 1;
1277
+ cfg.top_p = 1.0f;
1278
+ cfg.repeat_penalty = 1.0f;
1279
+ cfg.n_gpu_layers = -1; // all-GPU when the vulkan lib is staged
1280
+ EliLlmStream* s = eliza_inference_llm_stream_open(ctx, &cfg, &outError);
1281
+ if (!s) {
1282
+ if (tok) eliza_inference_free_tokens(tok);
1283
+ eliza_inference_destroy(ctx);
1284
+ throw_runtime(env, "llmSelfTest: stream_open", outError);
1285
+ return nullptr;
1286
+ }
1287
+
1288
+ const double t0 = []() {
1289
+ timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts);
1290
+ return ts.tv_sec * 1000.0 + ts.tv_nsec / 1e6;
1291
+ }();
1292
+ if (eliza_inference_llm_stream_prefill(s, reinterpret_cast<int32_t*>(tok),
1293
+ tn, &outError) != ELIZA_OK) {
1294
+ eliza_inference_llm_stream_close(s);
1295
+ if (tok) eliza_inference_free_tokens(tok);
1296
+ eliza_inference_destroy(ctx);
1297
+ throw_runtime(env, "llmSelfTest: prefill", outError);
1298
+ return nullptr;
1299
+ }
1300
+
1301
+ std::string text;
1302
+ int produced = 0;
1303
+ while (produced < genCap) {
1304
+ int32_t toks[256]; char chunk[4096]; size_t nout = 0;
1305
+ int32_t dd = 0, da = 0;
1306
+ const int rc = eliza_inference_llm_stream_next(
1307
+ s, toks, 256, &nout, chunk, sizeof(chunk), &dd, &da, &outError);
1308
+ if (rc < 0) break;
1309
+ text += chunk;
1310
+ produced += static_cast<int>(nout);
1311
+ if (rc == 1) break;
1312
+ }
1313
+ const double t1 = []() {
1314
+ timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts);
1315
+ return ts.tv_sec * 1000.0 + ts.tv_nsec / 1e6;
1316
+ }();
1317
+ eliza_inference_llm_stream_close(s);
1318
+ if (tok) eliza_inference_free_tokens(tok);
1319
+ eliza_inference_destroy(ctx);
1320
+
1321
+ const double ms = t1 - t0;
1322
+ const double tokS = ms > 0 ? produced * 1000.0 / ms : 0.0;
1323
+ LOGI("LLM SELFTEST: generated %d tokens in %.0fms (%.2f tok/s) — \"%.80s\"",
1324
+ produced, ms, tokS, text.c_str());
1325
+
1326
+ // JSON-escape the generated text.
1327
+ std::string esc;
1328
+ for (char c : text) {
1329
+ switch (c) {
1330
+ case '"': esc += "\\\""; break;
1331
+ case '\\': esc += "\\\\"; break;
1332
+ case '\n': esc += "\\n"; break;
1333
+ case '\r': esc += "\\r"; break;
1334
+ case '\t': esc += "\\t"; break;
1335
+ default:
1336
+ if (static_cast<unsigned char>(c) < 0x20) {
1337
+ char b[8]; std::snprintf(b, sizeof(b), "\\u%04x",
1338
+ static_cast<unsigned char>(c));
1339
+ esc += b;
1340
+ } else esc += c;
1341
+ }
1342
+ }
1343
+ std::string json = "{\"ok\":true,\"tokens\":" + std::to_string(produced) +
1344
+ ",\"ms\":" + std::to_string(ms) + ",\"tokS\":" +
1345
+ std::to_string(tokS) + ",\"text\":\"" + esc + "\"}";
1346
+ return to_jstring(env, json);
1347
+ }
1348
+
1045
1349
  } // extern "C"
@@ -157,4 +157,49 @@ final class ElizaVoiceNative {
157
157
 
158
158
  /** Run the whole pipeline (ctx→open→feed→flush) on one PCM buffer in one call. */
159
159
  static native String nativePipelineSelfTest(String bundleDir, float[] pcm, int feedSamples);
160
+
161
+ // ── Text generation (LLM) — the GPU-accelerated text path ────────────
162
+ //
163
+ // When this host is built against the dynamic-Vulkan libelizainference
164
+ // (libggml-vulkan.so staged alongside), llm_stream_open offloads the model
165
+ // to the GPU in the bionic app process — the path the musl bun agent can't
166
+ // take. nGpuLayers=-1 means all-GPU (default); the CPU/GPU choice is the
167
+ // staged LIB variant, not this flag.
168
+
169
+ /** {@code eliza_inference_llm_stream_supported()}. */
170
+ static native int nativeLlmStreamSupported();
171
+
172
+ /** {@code eliza_inference_embed_supported()}. */
173
+ static native int nativeEmbedSupported();
174
+
175
+ /** {@code eliza_inference_llm_eot_supported()} (ABI v11). */
176
+ static native int nativeEotSupported();
177
+
178
+ /** Tokenize text → int[] token ids. */
179
+ static native int[] nativeTokenize(long ctxHandle, String text, boolean addSpecial, boolean parseSpecial);
180
+
181
+ /** Pooled (MEAN) L2-normalized sentence embedding → float[n_embd]. */
182
+ static native float[] nativeEmbed(long ctxHandle, String text, int pooling);
183
+
184
+ /** End-of-turn score: next-token P(targetToken | tokens). */
185
+ static native float nativeEotScore(long ctxHandle, int[] tokens, int targetToken);
186
+
187
+ /** Open a streaming-LLM session (nGpuLayers=-1 all-GPU; drafterPath ""=none). */
188
+ static native long nativeLlmStreamOpen(long ctxHandle, int maxTokens, float temperature, float topP, int topK, int nGpuLayers, String drafterPath);
189
+
190
+ /** Feed pre-tokenized prompt tokens into the session KV before the first next(). */
191
+ static native void nativeLlmStreamPrefill(long streamHandle, int[] tokens);
192
+
193
+ /** Pull the next decode step → JSON {text, done, drafted, accepted}. */
194
+ static native String nativeLlmStreamNext(long streamHandle);
195
+
196
+ static native void nativeLlmStreamClose(long streamHandle);
197
+
198
+ /**
199
+ * KEYSTONE proof: run a whole greedy text generation in one native call,
200
+ * in the bionic app process. With the dynamic-Vulkan lib staged, ggml-vulkan
201
+ * logs the Mali device + layer offload to logcat. Returns JSON
202
+ * {ok, text, tokens, ms, tokS}.
203
+ */
204
+ static native String nativeLlmSelfTest(String bundleDir, String prompt, int maxTokens);
160
205
  }
@@ -164,6 +164,54 @@ public class ElizaVoicePlugin extends Plugin {
164
164
  }
165
165
  }
166
166
 
167
+ // ── Text generation (LLM) — GPU-accelerated path in the bionic app process ──
168
+
169
+ /**
170
+ * Capability probe for the text path. With the dynamic-Vulkan
171
+ * libelizainference staged, llmStream is supported and runs on the Mali GPU
172
+ * in THIS process (the musl bun agent can't reach libvulkan).
173
+ */
174
+ @PluginMethod
175
+ public void llmAbiProbe(PluginCall call) {
176
+ if (!ensureLoadedOrReject(call)) return;
177
+ try {
178
+ JSObject r = new JSObject();
179
+ r.put("loaded", true);
180
+ r.put("abi", ElizaVoiceNative.nativeVoiceAbiVersion());
181
+ r.put("llmStream", ElizaVoiceNative.nativeLlmStreamSupported());
182
+ r.put("embed", ElizaVoiceNative.nativeEmbedSupported());
183
+ r.put("eot", ElizaVoiceNative.nativeEotSupported());
184
+ Log.i(TAG, "llmAbiProbe " + r.toString());
185
+ call.resolve(r);
186
+ } catch (Throwable e) {
187
+ call.reject("llmAbiProbe failed: " + e.getMessage());
188
+ }
189
+ }
190
+
191
+ /**
192
+ * KEYSTONE proof: run a whole greedy generation in one native call, in the
193
+ * bionic app process. ggml-vulkan logs the Mali device + layer offload to
194
+ * logcat; the returned JSON carries {ok, text, tokens, ms, tokS}.
195
+ */
196
+ @PluginMethod
197
+ public void llmSelfTest(PluginCall call) {
198
+ if (!ensureLoadedOrReject(call)) return;
199
+ String bundleDir = resolveBundleDir(call.getString("bundleDir"));
200
+ String prompt = call.getString("prompt",
201
+ "<|im_start|>user\nWrite one sentence about the ocean.<|im_end|>\n<|im_start|>assistant\n");
202
+ Integer maxTokens = call.getInt("maxTokens", 48);
203
+ try {
204
+ String json = ElizaVoiceNative.nativeLlmSelfTest(
205
+ bundleDir, prompt, maxTokens != null ? maxTokens : 48);
206
+ Log.i(TAG, "llmSelfTest(" + bundleDir + ") -> " + json);
207
+ JSObject r = new JSObject();
208
+ r.put("result", json);
209
+ call.resolve(r);
210
+ } catch (Throwable e) {
211
+ call.reject("llmSelfTest failed: " + e.getMessage());
212
+ }
213
+ }
214
+
167
215
  private String resolveBundleDir(String requested) {
168
216
  if (requested != null && !requested.isEmpty()) return requested;
169
217
  Context context = getContext();
@@ -162,7 +162,7 @@ public class ResourceProbePlugin extends Plugin {
162
162
  ticksPerSecond = 100; // POSIX-conventional default on Android
163
163
  }
164
164
  return ((utime + stime) * 1000.0) / ticksPerSecond;
165
- } catch (IOException | NumberFormatException | RuntimeException e) {
165
+ } catch (IOException | RuntimeException e) {
166
166
  return JSONObject.NULL;
167
167
  }
168
168
  }
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@elizaos/electrobun",
3
- "version": "2.0.11-beta.5",
3
+ "version": "2.0.11-beta.6",
4
4
  "type": "module",
5
5
  "files": [
6
6
  "assets",
@@ -25,12 +25,12 @@
25
25
  "build:native-effects": "bash scripts/build-macos-effects.sh"
26
26
  },
27
27
  "dependencies": {
28
- "@elizaos/agent": "2.0.11-beta.5",
29
- "@elizaos/app-core": "2.0.11-beta.5",
30
- "@elizaos/plugin-browser": "2.0.11-beta.5",
31
- "@elizaos/plugin-registry": "2.0.11-beta.5",
32
- "@elizaos/plugin-remote-manifest": "2.0.11-beta.5",
33
- "@elizaos/shared": "2.0.11-beta.5",
28
+ "@elizaos/agent": "2.0.11-beta.6",
29
+ "@elizaos/app-core": "2.0.11-beta.6",
30
+ "@elizaos/plugin-browser": "2.0.11-beta.6",
31
+ "@elizaos/plugin-registry": "2.0.11-beta.6",
32
+ "@elizaos/plugin-remote-manifest": "2.0.11-beta.6",
33
+ "@elizaos/shared": "2.0.11-beta.6",
34
34
  "bonjour-service": "1.3.0",
35
35
  "electrobun": "^1.18.1"
36
36
  },
@@ -44,5 +44,5 @@
44
44
  "publishConfig": {
45
45
  "access": "public"
46
46
  },
47
- "gitHead": "3d7c571a6384249fb4c6cea103920adedd8f7b5f"
47
+ "gitHead": "61487df01a63da719a96b3b368cc63c45e22c092"
48
48
  }