@elizaos/app-core 2.0.0-beta.3 → 2.0.11-beta.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agent-bridge.d.ts +27 -0
- package/agent-bridge.d.ts.map +1 -0
- package/agent-bridge.js +26 -0
- package/api/__tests__/sandbox-test-helpers.js +1 -1
- package/api/auth/audit.js +1 -1
- package/api/auth/auth-context.js +1 -1
- package/api/auth/bootstrap-token.js +2 -2
- package/api/auth/index.d.ts +9 -10
- package/api/auth/index.d.ts.map +1 -1
- package/api/auth/index.js +9 -10
- package/api/auth/passwords.js +2 -2
- package/api/auth/sensitive-rate-limit.d.ts +1 -4
- package/api/auth/sensitive-rate-limit.d.ts.map +1 -1
- package/api/auth/sensitive-rate-limit.js +6 -6
- package/api/auth/sessions.js +2 -2
- package/api/auth-bootstrap-routes.d.ts +6 -13
- package/api/auth-bootstrap-routes.d.ts.map +1 -1
- package/api/auth-bootstrap-routes.js +14 -27
- package/api/auth-pairing-routes.d.ts +17 -0
- package/api/auth-pairing-routes.d.ts.map +1 -0
- package/api/auth-pairing-routes.js +300 -0
- package/api/auth-session-routes.d.ts.map +1 -1
- package/api/auth-session-routes.js +36 -15
- package/api/auth.d.ts +12 -19
- package/api/auth.d.ts.map +1 -1
- package/api/auth.js +32 -27
- package/api/automations-compat-routes.d.ts.map +1 -1
- package/api/automations-compat-routes.js +5 -5
- package/api/background-tasks-routes.d.ts +4 -0
- package/api/background-tasks-routes.d.ts.map +1 -0
- package/api/background-tasks-routes.js +63 -0
- package/api/catalog-routes.js +3 -3
- package/api/cloud-pair-route.d.ts +26 -0
- package/api/cloud-pair-route.d.ts.map +1 -0
- package/api/cloud-pair-route.js +222 -0
- package/api/cloud-voice-routes.d.ts +52 -0
- package/api/cloud-voice-routes.d.ts.map +1 -0
- package/api/cloud-voice-routes.js +50 -0
- package/api/compat-route-shared.d.ts +2 -2
- package/api/compat-route-shared.d.ts.map +1 -1
- package/api/compat-route-shared.js +11 -7
- package/api/credential-resolver.d.ts +2 -2
- package/api/credential-resolver.d.ts.map +1 -1
- package/api/credential-resolver.js +8 -2
- package/api/database-rows-compat-routes.d.ts.map +1 -1
- package/api/database-rows-compat-routes.js +69 -31
- package/api/dev-boot-history.d.ts +26 -0
- package/api/dev-boot-history.d.ts.map +1 -0
- package/api/dev-boot-history.js +69 -0
- package/api/dev-compat-routes.d.ts +5 -0
- package/api/dev-compat-routes.d.ts.map +1 -1
- package/api/dev-compat-routes.js +127 -4
- package/api/dev-console-log.d.ts +2 -2
- package/api/dev-console-log.d.ts.map +1 -1
- package/api/dev-console-log.js +8 -5
- package/api/dev-route-catalog.d.ts +58 -0
- package/api/dev-route-catalog.d.ts.map +1 -0
- package/api/dev-route-catalog.js +447 -0
- package/api/dev-stack.d.ts.map +1 -1
- package/api/dev-stack.js +6 -9
- package/api/first-run-routes.d.ts +4 -0
- package/api/first-run-routes.d.ts.map +1 -0
- package/api/first-run-routes.js +208 -0
- package/api/first-run-tts-route.d.ts +19 -0
- package/api/first-run-tts-route.d.ts.map +1 -0
- package/api/first-run-tts-route.js +59 -0
- package/api/internal-routes.d.ts +23 -0
- package/api/internal-routes.d.ts.map +1 -0
- package/api/internal-routes.js +203 -0
- package/api/ios-local-agent-transport.d.ts +36 -0
- package/api/ios-local-agent-transport.d.ts.map +1 -0
- package/api/ios-local-agent-transport.js +566 -0
- package/api/onboarding-voice-lines.d.ts +23 -0
- package/api/onboarding-voice-lines.d.ts.map +1 -0
- package/api/onboarding-voice-lines.js +8 -0
- package/api/perf-instrument.d.ts +43 -0
- package/api/perf-instrument.d.ts.map +1 -0
- package/api/perf-instrument.js +113 -0
- package/api/response.d.ts.map +1 -1
- package/api/response.js +14 -14
- package/api/runtime-mode-routes.d.ts.map +1 -1
- package/api/runtime-mode-routes.js +2 -2
- package/api/secrets-inventory-routes.js +2 -2
- package/api/secrets-manager-routes.d.ts +1 -1
- package/api/secrets-manager-routes.d.ts.map +1 -1
- package/api/secrets-manager-routes.js +9 -10
- package/api/sensitive-request-routes.js +5 -5
- package/api/server-cors.d.ts.map +1 -1
- package/api/server-cors.js +13 -2
- package/api/server-first-run-helpers.d.ts +26 -0
- package/api/server-first-run-helpers.d.ts.map +1 -0
- package/api/server-first-run-helpers.js +271 -0
- package/api/server-security.js +1 -1
- package/api/server-startup.d.ts.map +1 -1
- package/api/server-startup.js +3 -4
- package/api/server-wallet-trade.js +1 -1
- package/api/server.d.ts +4 -4
- package/api/server.d.ts.map +1 -1
- package/api/server.js +222 -88
- package/api/setup-contract.d.ts +63 -0
- package/api/setup-contract.d.ts.map +1 -0
- package/api/setup-contract.js +39 -0
- package/api/training-benchmarks.d.ts +97 -0
- package/api/training-benchmarks.d.ts.map +1 -0
- package/api/training-benchmarks.js +307 -0
- package/api/workbench-compat-routes.js +2 -2
- package/benchmark/cerebras-autowire.d.ts +28 -0
- package/benchmark/cerebras-autowire.d.ts.map +1 -0
- package/benchmark/cerebras-autowire.js +62 -0
- package/benchmark/lifeops-bench-handler.d.ts +36 -0
- package/benchmark/lifeops-bench-handler.d.ts.map +1 -1
- package/benchmark/lifeops-bench-handler.js +63 -1
- package/benchmark/lifeops-fake-backend.d.ts +39 -0
- package/benchmark/lifeops-fake-backend.d.ts.map +1 -1
- package/benchmark/lifeops-fake-backend.js +993 -21
- package/benchmark/mock-plugin.d.ts.map +1 -1
- package/benchmark/mock-plugin.js +0 -24
- package/benchmark/plugin.d.ts +2 -1
- package/benchmark/plugin.d.ts.map +1 -1
- package/benchmark/plugin.js +989 -68
- package/benchmark/replay-capture.d.ts +2 -2
- package/benchmark/replay-capture.d.ts.map +1 -1
- package/benchmark/replay-capture.js +3 -3
- package/benchmark/server-utils.d.ts +162 -9
- package/benchmark/server-utils.d.ts.map +1 -1
- package/benchmark/server-utils.js +625 -62
- package/benchmark/server.d.ts.map +1 -1
- package/benchmark/server.js +1962 -118
- package/boot-profile.d.ts +3 -0
- package/boot-profile.d.ts.map +1 -0
- package/boot-profile.js +30 -0
- package/browser.d.ts +23 -1
- package/browser.d.ts.map +1 -1
- package/browser.js +20 -1
- package/cli/argv.js +1 -1
- package/cli/banner.js +1 -1
- package/cli/command-format.js +2 -2
- package/cli/doctor/checks.d.ts.map +1 -1
- package/cli/doctor/checks.js +6 -6
- package/cli/plugins-cli.d.ts.map +1 -1
- package/cli/plugins-cli.js +77 -32
- package/cli/profile.d.ts.map +1 -1
- package/cli/profile.js +5 -4
- package/cli/program/build-program.js +4 -4
- package/cli/program/command-registry.d.ts.map +1 -1
- package/cli/program/command-registry.js +13 -11
- package/cli/program/help.js +5 -5
- package/cli/program/preaction.js +5 -5
- package/cli/program/register.auth.d.ts.map +1 -1
- package/cli/program/register.auth.js +6 -12
- package/cli/program/register.capability-router.d.ts +29 -0
- package/cli/program/register.capability-router.d.ts.map +1 -0
- package/cli/program/register.capability-router.js +568 -0
- package/cli/program/register.config.js +1 -1
- package/cli/program/register.configure.d.ts.map +1 -1
- package/cli/program/register.configure.js +1 -1
- package/cli/program/register.dashboard.d.ts.map +1 -1
- package/cli/program/register.dashboard.js +6 -7
- package/cli/program/register.db.d.ts.map +1 -1
- package/cli/program/register.db.js +3 -4
- package/cli/program/register.doctor.js +7 -7
- package/cli/program/register.setup.d.ts.map +1 -1
- package/cli/program/register.setup.js +14 -10
- package/cli/program/register.start.d.ts.map +1 -1
- package/cli/program/register.start.js +5 -3
- package/cli/program/register.subclis.js +3 -3
- package/cli/program/register.update.d.ts +6 -0
- package/cli/program/register.update.d.ts.map +1 -1
- package/cli/program/register.update.js +58 -6
- package/cli/program.js +1 -1
- package/cli/run-main.js +4 -4
- package/config/app-config.d.ts +2 -0
- package/config/app-config.d.ts.map +1 -0
- package/config/app-config.js +1 -0
- package/connectors/capacitor-jsc.d.ts.map +1 -1
- package/connectors/capacitor-jsc.js +16 -10
- package/connectors/capacitor-quickjs.d.ts.map +1 -1
- package/connectors/capacitor-quickjs.js +18 -13
- package/connectors/capacitor-sqlite.d.ts.map +1 -1
- package/connectors/capacitor-sqlite.js +27 -12
- package/dispatch/approval-queue.d.ts +37 -0
- package/dispatch/approval-queue.d.ts.map +1 -0
- package/dispatch/approval-queue.js +25 -0
- package/dispatch/channel-registry.d.ts +30 -0
- package/dispatch/channel-registry.d.ts.map +1 -0
- package/dispatch/channel-registry.js +22 -0
- package/dispatch/connector-registry.d.ts +39 -0
- package/dispatch/connector-registry.d.ts.map +1 -0
- package/dispatch/connector-registry.js +24 -0
- package/dispatch/index.d.ts +14 -0
- package/dispatch/index.d.ts.map +1 -0
- package/dispatch/index.js +13 -0
- package/dispatch/send-policy.d.ts +36 -0
- package/dispatch/send-policy.d.ts.map +1 -0
- package/dispatch/send-policy.js +16 -0
- package/entry.js +28 -11
- package/first-run/first-run-config.d.ts +55 -0
- package/first-run/first-run-config.d.ts.map +1 -0
- package/first-run/first-run-config.js +178 -0
- package/first-run/runtime-target.d.ts +4 -0
- package/first-run/runtime-target.d.ts.map +1 -0
- package/first-run/runtime-target.js +13 -0
- package/index.d.ts +16 -3
- package/index.d.ts.map +1 -1
- package/index.js +57 -33
- package/package.json +159 -50
- package/packaging/debian/apt-repo-config/README.md +18 -0
- package/packaging/debian/apt-repo-config/conf/distributions +11 -0
- package/packaging/flatpak/README.md +26 -16
- package/packaging/flatpak/ai.elizaos.App.metainfo.xml +17 -12
- package/packaging/flatpak/ai.elizaos.App.store.yml +5 -5
- package/packaging/flatpak/ai.elizaos.App.yml +10 -24
- package/packaging/flatpak/elizaos-app-wrapper.store.sh +2 -2
- package/packaging/flatpak/generate-sources.sh +74 -0
- package/packaging/flatpak/node-sources.json +7930 -0
- package/packaging/inno/build-inno.ps1 +34 -9
- package/packaging/msix/AppxManifest.store.xml +1 -1
- package/packaging/msix/README.md +39 -19
- package/packaging/msix/build-msix.ps1 +44 -14
- package/packaging/snap/snapcraft.yaml +22 -21
- package/packaging/test-packaging.sh +2 -2
- package/permissions/types.d.ts +1 -1
- package/permissions/types.js +1 -1
- package/platform/elizaos-agent-browser-stub.d.ts +144 -0
- package/platform/elizaos-agent-browser-stub.d.ts.map +1 -0
- package/platform/elizaos-agent-browser-stub.js +158 -0
- package/platform/elizaos-plugin-elizacloud-browser-stub.d.ts +34 -0
- package/platform/elizaos-plugin-elizacloud-browser-stub.d.ts.map +1 -0
- package/platform/elizaos-plugin-elizacloud-browser-stub.js +51 -0
- package/platform/empty-node-module.d.ts +148 -0
- package/platform/empty-node-module.d.ts.map +1 -1
- package/platform/empty-node-module.js +140 -3
- package/platform/ios-runtime-backends.d.ts +83 -0
- package/platform/ios-runtime-backends.d.ts.map +1 -0
- package/platform/ios-runtime-backends.js +133 -0
- package/platform/ios-runtime-bridge.d.ts +15 -0
- package/platform/ios-runtime-bridge.d.ts.map +1 -0
- package/platform/ios-runtime-bridge.js +527 -0
- package/platform/native-library-policy.d.ts +23 -0
- package/platform/native-library-policy.d.ts.map +1 -0
- package/platform/native-library-policy.js +112 -0
- package/platform/native-plugin-entrypoints.d.ts +19 -0
- package/platform/native-plugin-entrypoints.d.ts.map +1 -0
- package/platform/native-plugin-entrypoints.js +29 -0
- package/platforms/android/README.md +68 -10
- package/platforms/android/app/build.gradle +268 -3
- package/platforms/android/app/capacitor.build.gradle +18 -1
- package/platforms/android/app/proguard-rules.pro +17 -2
- package/platforms/android/app/src/androidTest/java/ai/elizaos/app/ElizaOsInstrumentedTest.java +1 -1
- package/platforms/android/app/src/main/AndroidManifest.xml +334 -17
- package/platforms/android/app/src/main/assets/runners/eliza-tasks.js +177 -0
- package/platforms/android/app/src/main/elizavoice-jni/CMakeLists.txt +100 -0
- package/platforms/android/app/src/main/elizavoice-jni/elizavoice-jni.cpp +1349 -0
- package/platforms/android/app/src/main/java/ai/elizaos/app/AgentPlugin.java +111 -171
- package/platforms/android/app/src/main/java/ai/elizaos/app/AndroidVirtualizationBridge.java +284 -0
- package/platforms/android/app/src/main/java/ai/elizaos/app/BatteryOptimizationPlugin.java +95 -0
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaAccessibilityService.java +55 -0
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaAgentService.java +1198 -141
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaAndroidSystemBridge.java +83 -0
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaAssistActivity.java +50 -1
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaBootReceiver.java +90 -8
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaBrowserActivity.java +2 -2
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaCalendarActivity.java +1 -1
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaCameraActivity.java +1 -1
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaClockActivity.java +2 -2
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaContactsActivity.java +1 -1
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaDialActivity.java +1 -1
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaInCallService.java +1 -1
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaMmsReceiver.java +1 -1
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaNativeBridge.java +22 -0
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaNotificationListenerService.java +45 -0
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaQuickActionsWidgetProvider.java +68 -0
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaShareActivity.java +132 -0
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaSmsComposeActivity.java +1 -1
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaSmsGatewayService.java +268 -0
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaSmsReceiver.java +12 -1
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaTasksWorker.java +194 -0
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaVoiceCaptureService.java +198 -0
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaVoiceNative.java +205 -0
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaVoicePlugin.java +498 -0
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaVoiceTileService.java +39 -0
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaWorkScheduler.java +60 -0
- package/platforms/android/app/src/main/java/ai/elizaos/app/GatewayConnectionService.java +53 -19
- package/platforms/android/app/src/main/java/ai/elizaos/app/MainActivity.java +160 -33
- package/platforms/android/app/src/main/java/ai/elizaos/app/ResourceProbePlugin.java +169 -0
- package/platforms/android/app/src/main/java/ai/elizaos/app/VoiceCapturePlugin.java +119 -0
- package/platforms/android/app/src/main/res/drawable/eliza_widget_background.xml +10 -0
- package/platforms/android/app/src/main/res/drawable/eliza_widget_button_background.xml +13 -0
- package/platforms/android/app/src/main/res/drawable/splash.png +0 -0
- package/platforms/android/app/src/main/res/drawable-land-hdpi/splash.png +0 -0
- package/platforms/android/app/src/main/res/drawable-land-mdpi/splash.png +0 -0
- package/platforms/android/app/src/main/res/drawable-land-xhdpi/splash.png +0 -0
- package/platforms/android/app/src/main/res/drawable-land-xxhdpi/splash.png +0 -0
- package/platforms/android/app/src/main/res/drawable-land-xxxhdpi/splash.png +0 -0
- package/platforms/android/app/src/main/res/drawable-port-hdpi/splash.png +0 -0
- package/platforms/android/app/src/main/res/drawable-port-mdpi/splash.png +0 -0
- package/platforms/android/app/src/main/res/drawable-port-xhdpi/splash.png +0 -0
- package/platforms/android/app/src/main/res/drawable-port-xxhdpi/splash.png +0 -0
- package/platforms/android/app/src/main/res/drawable-port-xxxhdpi/splash.png +0 -0
- package/platforms/android/app/src/main/res/layout/eliza_quick_actions_widget.xml +86 -0
- package/platforms/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml +2 -1
- package/platforms/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml +2 -1
- package/platforms/android/app/src/main/res/mipmap-hdpi/ic_launcher.png +0 -0
- package/platforms/android/app/src/main/res/mipmap-hdpi/ic_launcher_foreground.png +0 -0
- package/platforms/android/app/src/main/res/mipmap-hdpi/ic_launcher_monochrome.png +0 -0
- package/platforms/android/app/src/main/res/mipmap-hdpi/ic_launcher_round.png +0 -0
- package/platforms/android/app/src/main/res/mipmap-mdpi/ic_launcher.png +0 -0
- package/platforms/android/app/src/main/res/mipmap-mdpi/ic_launcher_foreground.png +0 -0
- package/platforms/android/app/src/main/res/mipmap-mdpi/ic_launcher_monochrome.png +0 -0
- package/platforms/android/app/src/main/res/mipmap-mdpi/ic_launcher_round.png +0 -0
- package/platforms/android/app/src/main/res/mipmap-xhdpi/ic_launcher.png +0 -0
- package/platforms/android/app/src/main/res/mipmap-xhdpi/ic_launcher_foreground.png +0 -0
- package/platforms/android/app/src/main/res/mipmap-xhdpi/ic_launcher_monochrome.png +0 -0
- package/platforms/android/app/src/main/res/mipmap-xhdpi/ic_launcher_round.png +0 -0
- package/platforms/android/app/src/main/res/mipmap-xxhdpi/ic_launcher.png +0 -0
- package/platforms/android/app/src/main/res/mipmap-xxhdpi/ic_launcher_foreground.png +0 -0
- package/platforms/android/app/src/main/res/mipmap-xxhdpi/ic_launcher_monochrome.png +0 -0
- package/platforms/android/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.png +0 -0
- package/platforms/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png +0 -0
- package/platforms/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher_foreground.png +0 -0
- package/platforms/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher_monochrome.png +0 -0
- package/platforms/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.png +0 -0
- package/platforms/android/app/src/main/res/values/android_app_actions.xml +48 -0
- package/platforms/android/app/src/main/res/values/colors.xml +8 -0
- package/platforms/android/app/src/main/res/values/ic_launcher_background.xml +2 -2
- package/platforms/android/app/src/main/res/values/strings.xml +2 -2
- package/platforms/android/app/src/main/res/values/styles.xml +25 -1
- package/platforms/android/app/src/main/res/xml/eliza_accessibility_service.xml +9 -0
- package/platforms/android/app/src/main/res/xml/eliza_quick_actions_widget.xml +13 -0
- package/platforms/android/app/src/main/res/xml/shortcuts.xml +121 -0
- package/platforms/android/build.gradle +2 -2
- package/platforms/android/capacitor-cordova-android-plugins/build.gradle +9 -3
- package/platforms/android/capacitor-cordova-android-plugins/cordova.variables.gradle +6 -2
- package/platforms/android/capacitor-cordova-android-plugins/src/main/AndroidManifest.xml +7 -2
- package/platforms/android/capacitor-cordova-android-plugins/src/main/java/.gitkeep +0 -1
- package/platforms/android/capacitor.settings.gradle +66 -16
- package/platforms/android/gradle.properties +1 -0
- package/platforms/android/update-manifest/generate-manifest.mjs +97 -0
- package/platforms/android/update-manifest/schema.json +26 -0
- package/platforms/apple-store-entitlements.reviewed.json +155 -0
- package/platforms/electrobun/.generated/brand-config.json +3 -2
- package/platforms/electrobun/LICENSE +21 -0
- package/platforms/electrobun/README.md +15 -1
- package/platforms/electrobun/assets/appIcon.icns +0 -0
- package/platforms/electrobun/assets/appIcon.ico +0 -0
- package/platforms/electrobun/assets/appIcon.iconset/icon_128x128.png +0 -0
- package/platforms/electrobun/assets/appIcon.iconset/icon_128x128@2x.png +0 -0
- package/platforms/electrobun/assets/appIcon.iconset/icon_16x16.png +0 -0
- package/platforms/electrobun/assets/appIcon.iconset/icon_16x16@2x.png +0 -0
- package/platforms/electrobun/assets/appIcon.iconset/icon_256x256.png +0 -0
- package/platforms/electrobun/assets/appIcon.iconset/icon_256x256@2x.png +0 -0
- package/platforms/electrobun/assets/appIcon.iconset/icon_32x32.png +0 -0
- package/platforms/electrobun/assets/appIcon.iconset/icon_32x32@2x.png +0 -0
- package/platforms/electrobun/assets/appIcon.iconset/icon_512x512.png +0 -0
- package/platforms/electrobun/assets/brand-config.json +6 -6
- package/platforms/electrobun/biome.json +9 -9
- package/platforms/electrobun/docs/capability-collapse-matrix.json +318 -0
- package/platforms/electrobun/docs/capability-collapse-matrix.md +129 -0
- package/platforms/electrobun/docs/capability-routing.md +86 -0
- package/platforms/electrobun/docs/convergence-audit.json +3505 -0
- package/platforms/electrobun/docs/convergence-audit.md +694 -0
- package/platforms/electrobun/docs/database-boot-policy.md +90 -0
- package/platforms/electrobun/docs/riscv64-port.md +175 -0
- package/platforms/electrobun/docs/startup-first-run-cleanup.md +18 -0
- package/platforms/electrobun/docs/trace-first-annotations.md +52 -0
- package/platforms/electrobun/docs/ui-boundary-audit.json +580 -0
- package/platforms/electrobun/docs/ui-boundary-audit.md +257 -0
- package/platforms/electrobun/electrobun.config.ts +592 -364
- package/platforms/electrobun/entitlements/JUSTIFICATIONS.md +141 -0
- package/platforms/electrobun/entitlements/README.md +34 -6
- package/platforms/electrobun/entitlements/mas-bun.entitlements +15 -0
- package/platforms/electrobun/entitlements/mas.entitlements +6 -4
- package/platforms/electrobun/native/macos/window-effects.mm +1522 -0
- package/platforms/electrobun/package.json +18 -12
- package/platforms/electrobun/remotes/fs/README.md +70 -0
- package/platforms/electrobun/remotes/fs/electrobun.config.ts +38 -0
- package/platforms/electrobun/remotes/fs/package.json +12 -0
- package/platforms/electrobun/remotes/fs/plugin.json +25 -0
- package/platforms/electrobun/remotes/fs/src/bun/errors.ts +57 -0
- package/platforms/electrobun/remotes/fs/src/bun/file-limits.ts +50 -0
- package/platforms/electrobun/remotes/fs/src/bun/fs-service.ts +389 -0
- package/platforms/electrobun/remotes/fs/src/bun/path-guard.ts +270 -0
- package/platforms/electrobun/remotes/fs/src/bun/protocol.ts +149 -0
- package/platforms/electrobun/remotes/fs/src/bun/worker.ts +174 -0
- package/platforms/electrobun/remotes/fs/src/dev/phase5-smoke.ts +171 -0
- package/platforms/electrobun/remotes/fs/src/web/index.html +8 -0
- package/platforms/electrobun/remotes/git/README.md +75 -0
- package/platforms/electrobun/remotes/git/electrobun.config.ts +44 -0
- package/platforms/electrobun/remotes/git/package.json +12 -0
- package/platforms/electrobun/remotes/git/plugin.json +31 -0
- package/platforms/electrobun/remotes/git/src/bun/errors.ts +69 -0
- package/platforms/electrobun/remotes/git/src/bun/git-command.ts +156 -0
- package/platforms/electrobun/remotes/git/src/bun/git-service.ts +446 -0
- package/platforms/electrobun/remotes/git/src/bun/operation-history.ts +124 -0
- package/platforms/electrobun/remotes/git/src/bun/protocol.ts +252 -0
- package/platforms/electrobun/remotes/git/src/bun/worker.ts +316 -0
- package/platforms/electrobun/remotes/git/src/dev/phase7-smoke.ts +141 -0
- package/platforms/electrobun/remotes/git/src/web/index.html +8 -0
- package/platforms/electrobun/remotes/local-model/README.md +138 -0
- package/platforms/electrobun/remotes/local-model/electrobun.config.ts +46 -0
- package/platforms/electrobun/remotes/local-model/package.json +12 -0
- package/platforms/electrobun/remotes/local-model/plugin.json +33 -0
- package/platforms/electrobun/remotes/local-model/src/bun/download-state.ts +115 -0
- package/platforms/electrobun/remotes/local-model/src/bun/eliza1-catalog.ts +425 -0
- package/platforms/electrobun/remotes/local-model/src/bun/errors.ts +74 -0
- package/platforms/electrobun/remotes/local-model/src/bun/hf-eliza1-client.ts +169 -0
- package/platforms/electrobun/remotes/local-model/src/bun/local-inference-api-client.ts +245 -0
- package/platforms/electrobun/remotes/local-model/src/bun/model-service.ts +490 -0
- package/platforms/electrobun/remotes/local-model/src/bun/protocol.ts +301 -0
- package/platforms/electrobun/remotes/local-model/src/bun/worker.ts +248 -0
- package/platforms/electrobun/remotes/local-model/src/dev/phase8-smoke.ts +117 -0
- package/platforms/electrobun/remotes/local-model/src/web/index.html +13 -0
- package/platforms/electrobun/remotes/pty/README.md +65 -0
- package/platforms/electrobun/remotes/pty/electrobun.config.ts +47 -0
- package/platforms/electrobun/remotes/pty/package.json +12 -0
- package/platforms/electrobun/remotes/pty/plugin.json +34 -0
- package/platforms/electrobun/remotes/pty/src/bun/errors.ts +57 -0
- package/platforms/electrobun/remotes/pty/src/bun/output-buffer.ts +127 -0
- package/platforms/electrobun/remotes/pty/src/bun/protocol.ts +192 -0
- package/platforms/electrobun/remotes/pty/src/bun/pty-service.ts +562 -0
- package/platforms/electrobun/remotes/pty/src/bun/worker.ts +218 -0
- package/platforms/electrobun/remotes/pty/src/dev/phase6-smoke.ts +127 -0
- package/platforms/electrobun/remotes/pty/src/web/index.html +8 -0
- package/platforms/electrobun/remotes/runtime/README.md +370 -0
- package/platforms/electrobun/remotes/runtime/electrobun.config.ts +48 -0
- package/platforms/electrobun/remotes/runtime/package.json +14 -0
- package/platforms/electrobun/remotes/runtime/plugin.json +30 -0
- package/platforms/electrobun/remotes/runtime/src/bun/api-client.ts +620 -0
- package/platforms/electrobun/remotes/runtime/src/bun/errors.ts +45 -0
- package/platforms/electrobun/remotes/runtime/src/bun/log-buffer.ts +33 -0
- package/platforms/electrobun/remotes/runtime/src/bun/protocol.ts +366 -0
- package/platforms/electrobun/remotes/runtime/src/bun/route-discovery.ts +419 -0
- package/platforms/electrobun/remotes/runtime/src/bun/runtime-manager.ts +423 -0
- package/platforms/electrobun/remotes/runtime/src/bun/sse-parser.ts +99 -0
- package/platforms/electrobun/remotes/runtime/src/bun/stream-manager.ts +887 -0
- package/platforms/electrobun/remotes/runtime/src/bun/worker.ts +1231 -0
- package/platforms/electrobun/remotes/runtime/src/dev/phase1-smoke.ts +34 -0
- package/platforms/electrobun/remotes/runtime/src/dev/phase2-smoke.ts +86 -0
- package/platforms/electrobun/remotes/runtime/src/dev/phase3-smoke.ts +141 -0
- package/platforms/electrobun/remotes/runtime/src/web/index.css +187 -0
- package/platforms/electrobun/remotes/runtime/src/web/index.html +76 -0
- package/platforms/electrobun/remotes/runtime/src/web/index.ts +192 -0
- package/platforms/electrobun/remotes/surface/README.md +201 -0
- package/platforms/electrobun/remotes/surface/electrobun.config.ts +38 -0
- package/platforms/electrobun/remotes/surface/package.json +12 -0
- package/platforms/electrobun/remotes/surface/plugin.json +28 -0
- package/platforms/electrobun/remotes/surface/src/bun/worker.ts +132 -0
- package/platforms/electrobun/remotes/surface/src/dev/phase4-smoke.ts +566 -0
- package/platforms/electrobun/remotes/surface/src/protocol/event-types.ts +84 -0
- package/platforms/electrobun/remotes/surface/src/protocol/runtime-client.ts +673 -0
- package/platforms/electrobun/remotes/surface/src/web/app.ts +595 -0
- package/platforms/electrobun/remotes/surface/src/web/index.css +460 -0
- package/platforms/electrobun/remotes/surface/src/web/index.html +466 -0
- package/platforms/electrobun/remotes/surface/src/web/index.ts +5 -0
- package/platforms/electrobun/remotes/surface/src/web/render.ts +455 -0
- package/platforms/electrobun/remotes/surface/src/web/state.ts +427 -0
- package/platforms/electrobun/scripts/build-macos-effects.sh +4 -0
- package/platforms/electrobun/scripts/ensure-build-folder.ts +28 -0
- package/platforms/electrobun/scripts/ensure-whisper-gguf.sh +55 -0
- package/platforms/electrobun/scripts/ensure-whisper-model.sh +22 -80
- package/platforms/electrobun/scripts/generate-convergence-audit.ts +1203 -0
- package/platforms/electrobun/scripts/local-adhoc-sign-macos.ts +159 -159
- package/platforms/electrobun/scripts/postwrap-diagnostics.ts +424 -339
- package/platforms/electrobun/scripts/postwrap-sign-runtime-macos.ts +302 -271
- package/platforms/electrobun/scripts/smoke-test-windows.ps1 +17 -16
- package/platforms/electrobun/scripts/smoke-test.sh +5 -7
- package/platforms/electrobun/scripts/sync-web-assets.mjs +13 -13
- package/platforms/electrobun/scripts/verify-rpc-handlers.ts +109 -110
- package/platforms/electrobun/scripts/verify-windows-installer-proof.ps1 +3 -8
- package/platforms/electrobun/src/__stubs__/bun-ffi.ts +31 -31
- package/platforms/electrobun/src/__stubs__/electrobun-bun.ts +1 -1
- package/platforms/electrobun/src/agent-ready-state.ts +8 -8
- package/platforms/electrobun/src/agent-reset-from-main.test.ts +162 -0
- package/platforms/electrobun/src/agent-reset-from-main.ts +62 -62
- package/platforms/electrobun/src/agent-status-rpc.test.ts +95 -0
- package/platforms/electrobun/src/agent-status-rpc.ts +156 -0
- package/platforms/electrobun/src/api-base.test.ts +247 -0
- package/platforms/electrobun/src/api-base.ts +202 -93
- package/platforms/electrobun/src/application-menu-action-registry.ts +9 -9
- package/platforms/electrobun/src/application-menu.ts +348 -348
- package/platforms/electrobun/src/background-notice.ts +36 -36
- package/platforms/electrobun/src/boot-progress.test.ts +188 -0
- package/platforms/electrobun/src/boot-progress.ts +111 -0
- package/platforms/electrobun/src/brand-config.test.ts +39 -0
- package/platforms/electrobun/src/brand-config.ts +141 -129
- package/platforms/electrobun/src/bridge/browser-tabs-renderer-registry.ts +28 -28
- package/platforms/electrobun/src/bridge/electrobun-boot-config.ts +42 -0
- package/platforms/electrobun/src/bridge/electrobun-crypto-ready.ts +120 -0
- package/platforms/electrobun/src/bridge/electrobun-direct-rpc.ts +342 -357
- package/platforms/electrobun/src/bridge/electrobun-stub.ts +13 -13
- package/platforms/electrobun/src/browser-workspace-bridge-server.ts +285 -243
- package/platforms/electrobun/src/cloud-auth-window.ts +136 -136
- package/platforms/electrobun/src/cloud-disconnect-from-main.ts +90 -90
- package/platforms/electrobun/src/config-and-auth-rpc.test.ts +256 -0
- package/platforms/electrobun/src/config-and-auth-rpc.ts +302 -0
- package/platforms/electrobun/src/conversations-and-character-rpc.test.ts +185 -0
- package/platforms/electrobun/src/conversations-and-character-rpc.ts +131 -0
- package/platforms/electrobun/src/dashboard-rpc.test.ts +200 -0
- package/platforms/electrobun/src/dashboard-rpc.ts +344 -0
- package/platforms/electrobun/src/database/database-lock.ts +141 -0
- package/platforms/electrobun/src/database/database-mode.ts +149 -0
- package/platforms/electrobun/src/database/database-recovery.ts +72 -0
- package/platforms/electrobun/src/database/database-snapshot.ts +190 -0
- package/platforms/electrobun/src/database/database.test.ts +196 -0
- package/platforms/electrobun/src/database/index.ts +5 -0
- package/platforms/electrobun/src/database/pglite-paths.ts +100 -0
- package/platforms/electrobun/src/desktop-deep-link-events.test.ts +30 -0
- package/platforms/electrobun/src/desktop-deep-link-events.ts +17 -0
- package/platforms/electrobun/src/desktop-http-request.test.ts +73 -73
- package/platforms/electrobun/src/desktop-http-request.ts +85 -85
- package/platforms/electrobun/src/desktop-pill-config.test.ts +27 -0
- package/platforms/electrobun/src/desktop-pill-config.ts +40 -0
- package/platforms/electrobun/src/desktop-test-bridge-server.ts +204 -204
- package/platforms/electrobun/src/desktop-tray-config.test.ts +87 -0
- package/platforms/electrobun/src/desktop-tray-config.ts +84 -0
- package/platforms/electrobun/src/devtools-layout.ts +41 -41
- package/platforms/electrobun/src/diagnostic-format.test.ts +71 -0
- package/platforms/electrobun/src/diagnostic-format.ts +75 -36
- package/platforms/electrobun/src/dynamic-view-rpc-schema.test.ts +37 -0
- package/platforms/electrobun/src/dynamic-views/README.md +44 -0
- package/platforms/electrobun/src/dynamic-views/demo/agent-run-trace.html +135 -0
- package/platforms/electrobun/src/dynamic-views/errors.ts +29 -0
- package/platforms/electrobun/src/dynamic-views/host.test.ts +353 -0
- package/platforms/electrobun/src/dynamic-views/host.ts +332 -0
- package/platforms/electrobun/src/dynamic-views/index.ts +57 -0
- package/platforms/electrobun/src/dynamic-views/kiosk-canvas.ts +89 -0
- package/platforms/electrobun/src/dynamic-views/registry.test.ts +139 -0
- package/platforms/electrobun/src/dynamic-views/registry.ts +196 -0
- package/platforms/electrobun/src/dynamic-views/session-manager.test.ts +355 -0
- package/platforms/electrobun/src/dynamic-views/session-manager.ts +348 -0
- package/platforms/electrobun/src/dynamic-views/types.ts +105 -0
- package/platforms/electrobun/src/electrobun-boot-config.test.ts +50 -0
- package/platforms/electrobun/src/electrobun-config.test.ts +62 -0
- package/platforms/electrobun/src/electrobun-crypto-ready.test.ts +65 -0
- package/platforms/electrobun/src/electrobun-window-options.ts +25 -0
- package/platforms/electrobun/src/extension-rpc.test.ts +88 -0
- package/platforms/electrobun/src/extension-rpc.ts +102 -0
- package/platforms/electrobun/src/fatal-shutdown.test.ts +10 -10
- package/platforms/electrobun/src/fatal-shutdown.ts +1 -1
- package/platforms/electrobun/src/first-party-remotes.test.ts +169 -0
- package/platforms/electrobun/src/first-party-remotes.ts +297 -0
- package/platforms/electrobun/src/first-run-rpc.test.ts +192 -0
- package/platforms/electrobun/src/first-run-rpc.ts +146 -0
- package/platforms/electrobun/src/floating-chat-window.ts +181 -181
- package/platforms/electrobun/src/inbox-rpc.test.ts +123 -0
- package/platforms/electrobun/src/inbox-rpc.ts +158 -0
- package/platforms/electrobun/src/index.ts +2555 -2096
- package/platforms/electrobun/src/kiosk-mode.ts +50 -0
- package/platforms/electrobun/src/launch/index.ts +4 -0
- package/platforms/electrobun/src/launch/launch-dynamic-view.ts +37 -0
- package/platforms/electrobun/src/launch/launch-orchestrator.test.ts +224 -0
- package/platforms/electrobun/src/launch/launch-orchestrator.ts +456 -0
- package/platforms/electrobun/src/launch/launch-store.test.ts +97 -0
- package/platforms/electrobun/src/launch/launch-store.ts +134 -0
- package/platforms/electrobun/src/launch/types.ts +103 -0
- package/platforms/electrobun/src/launch/views/launch-diagnostics.html +205 -0
- package/platforms/electrobun/src/lifecycle/agent-ready-publish.test.ts +50 -0
- package/platforms/electrobun/src/lifecycle/agent-ready-publish.ts +27 -0
- package/platforms/electrobun/src/lifecycle/api-base-owner.ts +42 -31
- package/platforms/electrobun/src/lifecycle/desktop-session-prime.ts +44 -44
- package/platforms/electrobun/src/logger.ts +14 -14
- package/platforms/electrobun/src/main-window-runtime.ts +83 -83
- package/platforms/electrobun/src/main-window-session.test.ts +109 -0
- package/platforms/electrobun/src/main-window-session.ts +87 -51
- package/platforms/electrobun/src/menu-reset-from-main.ts +158 -158
- package/platforms/electrobun/src/native/agent-env.test.ts +52 -0
- package/platforms/electrobun/src/native/agent-runtime-layout.test.ts +42 -0
- package/platforms/electrobun/src/native/agent-state-dir.test.ts +91 -0
- package/platforms/electrobun/src/native/agent.ts +2122 -1682
- package/platforms/electrobun/src/native/auth-bridge.test.ts +67 -0
- package/platforms/electrobun/src/native/auth-bridge.ts +464 -360
- package/platforms/electrobun/src/native/browser-workspace.ts +723 -471
- package/platforms/electrobun/src/native/camera.ts +50 -50
- package/platforms/electrobun/src/native/canvas.ts +444 -445
- package/platforms/electrobun/src/native/credentials.ts +673 -616
- package/platforms/electrobun/src/native/desktop-window.test.ts +300 -0
- package/platforms/electrobun/src/native/desktop.ts +2196 -2156
- package/platforms/electrobun/src/native/editor-bridge.ts +201 -201
- package/platforms/electrobun/src/native/file-watcher.ts +154 -154
- package/platforms/electrobun/src/native/gateway.ts +179 -180
- package/platforms/electrobun/src/native/gpu-window.ts +256 -256
- package/platforms/electrobun/src/native/index.ts +76 -74
- package/platforms/electrobun/src/native/location.test.ts +44 -0
- package/platforms/electrobun/src/native/location.ts +90 -80
- package/platforms/electrobun/src/native/loopback-port.ts +60 -60
- package/platforms/electrobun/src/native/mac-window-effects.ts +166 -104
- package/platforms/electrobun/src/native/music-player.ts +38 -38
- package/platforms/electrobun/src/native/permissions-shared.ts +249 -150
- package/platforms/electrobun/src/native/permissions.ts +301 -208
- package/platforms/electrobun/src/native/power-state.ts +129 -129
- package/platforms/electrobun/src/native/remote-plugin-host.test.ts +1394 -0
- package/platforms/electrobun/src/native/remote-plugin-host.ts +1531 -0
- package/platforms/electrobun/src/native/screencapture.ts +667 -573
- package/platforms/electrobun/src/native/steward.ts +207 -204
- package/platforms/electrobun/src/native/swabble.ts +68 -324
- package/platforms/electrobun/src/native/talkmode.ts +253 -422
- package/platforms/electrobun/src/native/webgpu-browser-support.test.ts +18 -0
- package/platforms/electrobun/src/native/webgpu-browser-support.ts +165 -147
- package/platforms/electrobun/src/native/whisper-env.test.ts +71 -0
- package/platforms/electrobun/src/native/whisper-env.ts +68 -0
- package/platforms/electrobun/src/native-onboarding.ts +270 -0
- package/platforms/electrobun/src/onboarding-overlay-window.ts +141 -0
- package/platforms/electrobun/src/persisted-deployment.ts +91 -0
- package/platforms/electrobun/src/pill-window.test.ts +91 -0
- package/platforms/electrobun/src/pill-window.ts +99 -0
- package/platforms/electrobun/src/preload-validation.ts +44 -44
- package/platforms/electrobun/src/preload.js +1 -1
- package/platforms/electrobun/src/print-electrobun-dev-settings-banner.ts +120 -120
- package/platforms/electrobun/src/renderer-api-proxy.test.ts +73 -0
- package/platforms/electrobun/src/renderer-api-proxy.ts +86 -0
- package/platforms/electrobun/src/renderer-static.test.ts +53 -0
- package/platforms/electrobun/src/renderer-static.ts +144 -57
- package/platforms/electrobun/src/rpc-handler-slices.ts +121 -0
- package/platforms/electrobun/src/rpc-handlers.test.ts +267 -0
- package/platforms/electrobun/src/rpc-handlers.ts +1306 -913
- package/platforms/electrobun/src/rpc-parse-utils.ts +57 -0
- package/platforms/electrobun/src/rpc-port-resolver.test.ts +45 -0
- package/platforms/electrobun/src/rpc-port-resolver.ts +31 -0
- package/platforms/electrobun/src/rpc-schema.ts +2556 -1619
- package/platforms/electrobun/src/runtime-layout.ts +105 -105
- package/platforms/electrobun/src/runtime-permissions.ts +95 -95
- package/platforms/electrobun/src/runtime-rpc.test.ts +126 -0
- package/platforms/electrobun/src/runtime-rpc.ts +237 -0
- package/platforms/electrobun/src/screenshot-dev-server.ts +87 -87
- package/platforms/electrobun/src/settings-mutations-rpc.test.ts +193 -0
- package/platforms/electrobun/src/settings-mutations-rpc.ts +220 -0
- package/platforms/electrobun/src/startup-trace.ts +274 -270
- package/platforms/electrobun/src/subscription-rpc.test.ts +89 -0
- package/platforms/electrobun/src/subscription-rpc.ts +192 -0
- package/platforms/electrobun/src/surface-windows.test.ts +355 -0
- package/platforms/electrobun/src/surface-windows.ts +410 -410
- package/platforms/electrobun/src/trace/README.md +73 -0
- package/platforms/electrobun/src/trace/errors.ts +21 -0
- package/platforms/electrobun/src/trace/index.ts +40 -0
- package/platforms/electrobun/src/trace/trace-dynamic-view.ts +40 -0
- package/platforms/electrobun/src/trace/trace-host-requests.ts +473 -0
- package/platforms/electrobun/src/trace/trace-service.test.ts +186 -0
- package/platforms/electrobun/src/trace/trace-service.ts +324 -0
- package/platforms/electrobun/src/trace/trace-store.test.ts +141 -0
- package/platforms/electrobun/src/trace/trace-store.ts +551 -0
- package/platforms/electrobun/src/trace/types.ts +250 -0
- package/platforms/electrobun/src/trace/views/agent-run-trace.html +311 -0
- package/platforms/electrobun/src/types/web-speech.d.ts +28 -28
- package/platforms/electrobun/src/types.ts +5 -5
- package/platforms/electrobun/src/update-availability.test.ts +72 -0
- package/platforms/electrobun/src/update-availability.ts +90 -0
- package/platforms/electrobun/src/update-rpc.test.ts +83 -0
- package/platforms/electrobun/src/update-rpc.ts +123 -0
- package/platforms/electrobun/src/voice/README.md +184 -0
- package/platforms/electrobun/src/voice/errors.ts +42 -0
- package/platforms/electrobun/src/voice/index.ts +78 -0
- package/platforms/electrobun/src/voice/types.ts +316 -0
- package/platforms/electrobun/src/voice/voice-host-requests.ts +259 -0
- package/platforms/electrobun/src/voice/voice-latency-budget.test.ts +66 -0
- package/platforms/electrobun/src/voice/voice-latency-budget.ts +243 -0
- package/platforms/electrobun/src/voice/voice-live-validation.test.ts +352 -0
- package/platforms/electrobun/src/voice/voice-live-validation.ts +838 -0
- package/platforms/electrobun/src/voice/voice-pipeline.ts +250 -0
- package/platforms/electrobun/src/voice/voice-playback-adapter.ts +31 -0
- package/platforms/electrobun/src/voice/voice-runtime-adapter.test.ts +213 -0
- package/platforms/electrobun/src/voice/voice-runtime-adapter.ts +686 -0
- package/platforms/electrobun/src/voice/voice-service.test.ts +561 -0
- package/platforms/electrobun/src/voice/voice-service.ts +1027 -0
- package/platforms/electrobun/src/voice/voice-stream-coordinator.test.ts +115 -0
- package/platforms/electrobun/src/voice/voice-stream-coordinator.ts +270 -0
- package/platforms/electrobun/src/voice/voice-trace.ts +97 -0
- package/platforms/electrobun/src/voice/voice-tts-chunker.test.ts +91 -0
- package/platforms/electrobun/src/voice/voice-tts-chunker.ts +194 -0
- package/platforms/electrobun/src/windows-cef-profile.ts +88 -88
- package/platforms/electrobun/tsconfig.json +73 -13
- package/platforms/electrobun/update-channels.json +22 -0
- package/platforms/electrobun/vitest.electrobun.config.ts +72 -42
- package/platforms/ios/App/App/App.entitlements +4 -0
- package/platforms/ios/App/App/AppDelegate.swift +80 -18
- package/platforms/ios/App/App/Assets.xcassets/AppIcon.appiconset/AppIcon-ios-marketing-1024.png +0 -0
- package/platforms/ios/App/App/Assets.xcassets/AppIcon.appiconset/AppIcon-ipad-20x20@1x.png +0 -0
- package/platforms/ios/App/App/Assets.xcassets/AppIcon.appiconset/AppIcon-ipad-20x20@2x.png +0 -0
- package/platforms/ios/App/App/Assets.xcassets/AppIcon.appiconset/AppIcon-ipad-29x29@1x.png +0 -0
- package/platforms/ios/App/App/Assets.xcassets/AppIcon.appiconset/AppIcon-ipad-29x29@2x.png +0 -0
- package/platforms/ios/App/App/Assets.xcassets/AppIcon.appiconset/AppIcon-ipad-40x40@1x.png +0 -0
- package/platforms/ios/App/App/Assets.xcassets/AppIcon.appiconset/AppIcon-ipad-40x40@2x.png +0 -0
- package/platforms/ios/App/App/Assets.xcassets/AppIcon.appiconset/AppIcon-ipad-76x76@1x.png +0 -0
- package/platforms/ios/App/App/Assets.xcassets/AppIcon.appiconset/AppIcon-ipad-76x76@2x.png +0 -0
- package/platforms/ios/App/App/Assets.xcassets/AppIcon.appiconset/AppIcon-ipad-83_5x83_5@2x.png +0 -0
- package/platforms/ios/App/App/Assets.xcassets/AppIcon.appiconset/AppIcon-iphone-20x20@2x.png +0 -0
- package/platforms/ios/App/App/Assets.xcassets/AppIcon.appiconset/AppIcon-iphone-20x20@3x.png +0 -0
- package/platforms/ios/App/App/Assets.xcassets/AppIcon.appiconset/AppIcon-iphone-29x29@2x.png +0 -0
- package/platforms/ios/App/App/Assets.xcassets/AppIcon.appiconset/AppIcon-iphone-29x29@3x.png +0 -0
- package/platforms/ios/App/App/Assets.xcassets/AppIcon.appiconset/AppIcon-iphone-40x40@2x.png +0 -0
- package/platforms/ios/App/App/Assets.xcassets/AppIcon.appiconset/AppIcon-iphone-40x40@3x.png +0 -0
- package/platforms/ios/App/App/Assets.xcassets/AppIcon.appiconset/AppIcon-iphone-60x60@2x.png +0 -0
- package/platforms/ios/App/App/Assets.xcassets/AppIcon.appiconset/AppIcon-iphone-60x60@3x.png +0 -0
- package/platforms/ios/App/App/Base.lproj/LaunchScreen.storyboard +1 -4
- package/platforms/ios/App/App/ComputerUseBridge.swift +589 -0
- package/platforms/ios/App/App/DeviceActivityMonitorExtension/DeviceActivityMonitorExtension.entitlements +12 -0
- package/platforms/ios/App/App/DeviceActivityMonitorExtension/DeviceActivityMonitorExtension.swift +34 -0
- package/platforms/ios/App/App/DeviceActivityMonitorExtension/Info.plist +29 -0
- package/platforms/ios/App/App/DeviceActivityReportExtension/DeviceActivityReportExtension.entitlements +12 -0
- package/platforms/ios/App/App/DeviceActivityReportExtension/DeviceActivityReportExtension.swift +53 -0
- package/platforms/ios/App/App/DeviceActivityReportExtension/Info.plist +27 -0
- package/platforms/ios/App/App/ElizaAppIntents.swift +183 -0
- package/platforms/ios/App/App/ElizaIntentPlugin.swift +342 -5
- package/platforms/ios/App/App/Info.plist +17 -1
- package/platforms/ios/App/App/runners/eliza-tasks.js +177 -0
- package/platforms/ios/App/App.xcodeproj/project.pbxproj +262 -6
- package/platforms/ios/App/BroadcastExtension/SampleHandler.swift +100 -0
- package/platforms/ios/App/Podfile +5 -0
- package/platforms/ios/App/Podfile.lock +83 -59
- package/register-runtime-hooks.js +11 -5
- package/registry/app-registry.d.ts +14 -0
- package/registry/app-registry.d.ts.map +1 -0
- package/registry/app-registry.js +29 -0
- package/registry/entries/apps/app-polymarket.json +31 -0
- package/registry/entries/apps/clawville.json +27 -0
- package/registry/entries/apps/companion.json +28 -0
- package/registry/entries/apps/database-viewer.json +27 -0
- package/registry/entries/apps/defense-of-the-agents.json +27 -0
- package/registry/entries/apps/documents.json +30 -0
- package/registry/entries/apps/feed.json +27 -0
- package/registry/entries/apps/hyperliquid.json +31 -0
- package/registry/entries/apps/log-viewer.json +27 -0
- package/registry/entries/apps/memory-viewer.json +27 -0
- package/registry/entries/apps/model-tester.json +31 -0
- package/registry/entries/apps/plugin-viewer.json +27 -0
- package/registry/entries/apps/relationship-viewer.json +27 -0
- package/registry/entries/apps/runtime-debugger.json +27 -0
- package/registry/entries/apps/shopify.json +31 -0
- package/registry/entries/apps/skills-viewer.json +27 -0
- package/registry/entries/apps/steward.json +31 -0
- package/registry/entries/apps/training.json +54 -0
- package/registry/entries/apps/trajectory-viewer.json +27 -0
- package/registry/entries/apps/vincent.json +31 -0
- package/registry/entries/connectors/bluebubbles.json +99 -0
- package/registry/entries/connectors/bluesky.json +173 -0
- package/registry/entries/connectors/discord.json +119 -0
- package/registry/entries/connectors/farcaster.json +174 -0
- package/registry/entries/connectors/feishu.json +79 -0
- package/registry/entries/connectors/google-chat.json +120 -0
- package/registry/entries/connectors/google.json +82 -0
- package/registry/entries/connectors/imessage.json +96 -0
- package/registry/entries/connectors/instagram.json +64 -0
- package/registry/entries/connectors/line.json +86 -0
- package/registry/entries/connectors/matrix.json +94 -0
- package/registry/entries/connectors/mattermost.json +110 -0
- package/registry/entries/connectors/msteams.json +104 -0
- package/registry/entries/connectors/nextcloud-talk.json +104 -0
- package/registry/entries/connectors/nostr.json +70 -0
- package/registry/entries/connectors/signal.json +81 -0
- package/registry/entries/connectors/slack.json +102 -0
- package/registry/entries/connectors/telegram.json +71 -0
- package/registry/entries/connectors/tlon.json +94 -0
- package/registry/entries/connectors/twitch.json +110 -0
- package/registry/entries/connectors/whatsapp.json +113 -0
- package/registry/entries/connectors/x.json +231 -0
- package/registry/entries/connectors/zalo.json +112 -0
- package/registry/entries/connectors/zalouser.json +122 -0
- package/registry/entries/plugins/agent-orchestrator.json +33 -0
- package/registry/entries/plugins/agent-skills.json +72 -0
- package/registry/entries/plugins/anthropic.json +73 -0
- package/registry/entries/plugins/app-control.json +23 -0
- package/registry/entries/plugins/auto-trader.json +203 -0
- package/registry/entries/plugins/background-runner.json +26 -0
- package/registry/entries/plugins/blooio.json +102 -0
- package/registry/entries/plugins/browser.json +75 -0
- package/registry/entries/plugins/cli.json +40 -0
- package/registry/entries/plugins/clipboard.json +44 -0
- package/registry/entries/plugins/coding-tools.json +71 -0
- package/registry/entries/plugins/commands.json +63 -0
- package/registry/entries/plugins/computeruse.json +74 -0
- package/registry/entries/plugins/copilot-proxy.json +93 -0
- package/registry/entries/plugins/directives.json +63 -0
- package/registry/entries/plugins/edge-tts.json +97 -0
- package/registry/entries/plugins/elevenlabs.json +169 -0
- package/registry/entries/plugins/elizacloud.json +208 -0
- package/registry/entries/plugins/evm.json +134 -0
- package/registry/entries/plugins/experience.json +34 -0
- package/registry/entries/plugins/facewear.json +131 -0
- package/registry/entries/plugins/form.json +26 -0
- package/registry/entries/plugins/github.json +93 -0
- package/registry/entries/plugins/gmail-watch.json +25 -0
- package/registry/entries/plugins/goals.json +77 -0
- package/registry/entries/plugins/google-genai.json +106 -0
- package/registry/entries/plugins/groq.json +93 -0
- package/registry/entries/plugins/hedera.json +48 -0
- package/registry/entries/plugins/inmemorydb.json +25 -0
- package/registry/entries/plugins/linear.json +51 -0
- package/registry/entries/plugins/local-inference.json +142 -0
- package/registry/entries/plugins/local-storage.json +36 -0
- package/registry/entries/plugins/localdb.json +25 -0
- package/registry/entries/plugins/mcp.json +44 -0
- package/registry/entries/plugins/memory.json +124 -0
- package/registry/entries/plugins/minecraft.json +79 -0
- package/registry/entries/plugins/moltbook.json +83 -0
- package/registry/entries/plugins/music.json +155 -0
- package/registry/entries/plugins/mysticism.json +48 -0
- package/registry/entries/plugins/nearai.json +82 -0
- package/registry/entries/plugins/ngrok.json +69 -0
- package/registry/entries/plugins/ollama.json +96 -0
- package/registry/entries/plugins/openai.json +189 -0
- package/registry/entries/plugins/openrouter.json +188 -0
- package/registry/entries/plugins/pdf.json +26 -0
- package/registry/entries/plugins/plugin-manager.json +23 -0
- package/registry/entries/plugins/prose.json +48 -0
- package/registry/entries/plugins/rlm.json +26 -0
- package/registry/entries/plugins/roblox.json +88 -0
- package/registry/entries/plugins/rss.json +64 -0
- package/registry/entries/plugins/s3-storage.json +91 -0
- package/registry/entries/plugins/scheduling.json +35 -0
- package/registry/entries/plugins/shell.json +94 -0
- package/registry/entries/plugins/social-alpha.json +72 -0
- package/registry/entries/plugins/tailscale.json +81 -0
- package/registry/entries/plugins/tee.json +53 -0
- package/registry/entries/plugins/todos.json +26 -0
- package/registry/entries/plugins/trajectory-logger.json +33 -0
- package/registry/entries/plugins/trust.json +39 -0
- package/registry/entries/plugins/tts.json +71 -0
- package/registry/entries/plugins/tunnel.json +45 -0
- package/registry/entries/plugins/twilio.json +168 -0
- package/registry/entries/plugins/vercel-ai-gateway.json +128 -0
- package/registry/entries/plugins/video.json +23 -0
- package/registry/entries/plugins/vision.json +43 -0
- package/registry/entries/plugins/webhooks.json +23 -0
- package/registry/entries/plugins/workflow.json +25 -0
- package/registry/entries/plugins/xai.json +75 -0
- package/registry/index.d.ts +2 -1
- package/registry/index.d.ts.map +1 -1
- package/registry/index.js +46 -12
- package/registry/loader.d.ts +2 -1
- package/registry/loader.d.ts.map +1 -1
- package/registry/loader.js +49 -2
- package/registry/schema.d.ts +244 -34
- package/registry/schema.d.ts.map +1 -1
- package/registry/schema.js +36 -0
- package/runtime/android-avf-microdroid-bridge.d.ts +29 -0
- package/runtime/android-avf-microdroid-bridge.d.ts.map +1 -0
- package/runtime/android-avf-microdroid-bridge.js +149 -0
- package/runtime/api-dev-settings-banner.d.ts.map +1 -1
- package/runtime/api-dev-settings-banner.js +5 -13
- package/runtime/app-core-runtime-hooks.d.ts +21 -0
- package/runtime/app-core-runtime-hooks.d.ts.map +1 -0
- package/runtime/app-core-runtime-hooks.js +10 -0
- package/runtime/autonomy-policy.d.ts +2 -0
- package/runtime/autonomy-policy.d.ts.map +1 -0
- package/runtime/autonomy-policy.js +4 -0
- package/runtime/desktop/AppWindowRenderer.d.ts +17 -0
- package/runtime/desktop/AppWindowRenderer.d.ts.map +1 -0
- package/runtime/desktop/AppWindowRenderer.js +360 -0
- package/runtime/desktop/DesktopSurfaceNavigationRuntime.d.ts +2 -0
- package/runtime/desktop/DesktopSurfaceNavigationRuntime.d.ts.map +1 -0
- package/runtime/desktop/DesktopSurfaceNavigationRuntime.js +41 -0
- package/runtime/desktop/DesktopTrayRuntime.d.ts +2 -0
- package/runtime/desktop/DesktopTrayRuntime.d.ts.map +1 -0
- package/runtime/desktop/DesktopTrayRuntime.js +174 -0
- package/runtime/desktop/DetachedShellRoot.d.ts +10 -0
- package/runtime/desktop/DetachedShellRoot.d.ts.map +1 -0
- package/runtime/desktop/DetachedShellRoot.js +111 -0
- package/runtime/desktop/index.d.ts +6 -0
- package/runtime/desktop/index.d.ts.map +1 -0
- package/runtime/desktop/index.js +5 -0
- package/runtime/desktop/tray-menu.d.ts +20 -0
- package/runtime/desktop/tray-menu.d.ts.map +1 -0
- package/runtime/desktop/tray-menu.js +143 -0
- package/runtime/dev-server.d.ts +1 -1
- package/runtime/dev-server.d.ts.map +1 -1
- package/runtime/dev-server.js +93 -17
- package/runtime/eliza.d.ts +75 -1
- package/runtime/eliza.d.ts.map +1 -1
- package/runtime/eliza.js +596 -122
- package/runtime/ensure-text-to-speech-handler.d.ts.map +1 -1
- package/runtime/ensure-text-to-speech-handler.js +10 -3
- package/runtime/mobile-safe-runtime.d.ts +181 -2
- package/runtime/mobile-safe-runtime.d.ts.map +1 -1
- package/runtime/mobile-safe-runtime.js +1019 -12
- package/runtime/mode/remote-forwarder.d.ts.map +1 -1
- package/runtime/mode/remote-forwarder.js +2 -2
- package/runtime/mode/route-mode-guard.d.ts +1 -2
- package/runtime/mode/route-mode-guard.d.ts.map +1 -1
- package/runtime/mode/route-mode-guard.js +4 -5
- package/runtime/mode/route-mode-matrix.d.ts.map +1 -1
- package/runtime/mode/route-mode-matrix.js +14 -1
- package/runtime/mode/runtime-mode.d.ts +1 -1
- package/runtime/mode/runtime-mode.js +1 -1
- package/runtime/runtime-bootstrap-policy.d.ts.map +1 -1
- package/runtime/runtime-bootstrap-policy.js +14 -2
- package/runtime/telegram-standalone-handler.d.ts.map +1 -1
- package/runtime/telegram-standalone-handler.js +10 -9
- package/runtime/tts-cache-wiring.d.ts +29 -0
- package/runtime/tts-cache-wiring.d.ts.map +1 -0
- package/runtime/tts-cache-wiring.js +114 -0
- package/runtime/voice-warmup.d.ts +81 -0
- package/runtime/voice-warmup.d.ts.map +1 -0
- package/runtime/voice-warmup.js +111 -0
- package/scripts/android-sms-gateway-template.test.mjs +1014 -0
- package/scripts/aosp/README.md +19 -15
- package/scripts/aosp/compile-libllama.mjs +1344 -248
- package/scripts/aosp/compile-shim.mjs +47 -18
- package/scripts/aosp/deploy-pixel.mjs +405 -0
- package/scripts/aosp/lib/load-variant-config.mjs +3 -3
- package/scripts/aosp/llama-cpp-patches/README.md +8 -8
- package/scripts/aosp/llama-cpp-patches/apply-patches.mjs +23 -6
- package/scripts/aosp/llama-cpp-patches/polarquant/README.md +37 -0
- package/scripts/aosp/llama-cpp-patches/qjl/README.md +37 -0
- package/scripts/aosp/seccomp-shim/sigsys-handler-arm64.c +169 -0
- package/scripts/aosp/seccomp-shim/sigsys-handler-riscv64.c +217 -0
- package/scripts/aosp/smoke-cuttlefish.mjs +34 -4
- package/scripts/aosp/stage-default-models.mjs +18 -18
- package/scripts/aosp/variant-config-schema.ts +2 -2
- package/scripts/assert-required-bundled-packages.test.ts +534 -0
- package/scripts/audit-apple-store-sandbox.mjs +146 -0
- package/scripts/audit-live-test-surface.mjs +5 -2
- package/scripts/build-capacitor-app.mjs +21 -0
- package/scripts/build-flatpak.mjs +5 -5
- package/scripts/build-helpers/arm64-simd.mjs +72 -0
- package/scripts/build-helpers/omnivoice-merged.mjs +87 -0
- package/scripts/build-helpers/verify-fused-symbols.mjs +567 -0
- package/scripts/build-image.sh +1 -1
- package/scripts/build-llama-cpp-mtp.mjs +487 -0
- package/scripts/build-native-plugins.mjs +230 -18
- package/scripts/build-patched-electrobun-cli.mjs +68 -10
- package/scripts/build-win.mjs +1 -1
- package/scripts/bun-riscv64/Dockerfile +418 -0
- package/scripts/bun-riscv64/README.md +316 -0
- package/scripts/bun-riscv64/build.sh +469 -0
- package/scripts/bun-riscv64/bun-patches/0001-config-add-riscv64-arch.patch +74 -0
- package/scripts/bun-riscv64/bun-patches/0002-flags-add-riscv64-march-mabi.patch +16 -0
- package/scripts/bun-riscv64/bun-patches/0003-zig-add-riscv64-target-triple-and-cpu.patch +26 -0
- package/scripts/bun-riscv64/bun-patches/0004-webkit-force-local-mode-on-riscv64.patch +33 -0
- package/scripts/bun-riscv64/bun-patches/0005-tinycc-disable-on-riscv64.patch +16 -0
- package/scripts/bun-riscv64/bun-patches/0006-build-add-riscv64-cli-validation.patch +15 -0
- package/scripts/bun-riscv64/bun-patches/0007-deps-per-dep-riscv64-checks.patch +24 -0
- package/scripts/bun-riscv64/bun-patches/0008-source-stabilize-riscv64-musl-build.patch +226 -0
- package/scripts/bun-riscv64/bun-patches/0009-disable-wasm-streaming-hooks-for-c-loop.patch +162 -0
- package/scripts/bun-riscv64/bun-patches/0010-disable-inspector-profiler-for-riscv64-c-loop.patch +80 -0
- package/scripts/bun-riscv64/bun-patches/0011-process-arch-add-riscv64.patch +23 -0
- package/scripts/bun-riscv64/bun-patches/0012-cpu-features-add-riscv64-fallback.patch +13 -0
- package/scripts/bun-riscv64/bun-patches/0013-disable-console-inspector-hooks-for-riscv64-c-loop.patch +43 -0
- package/scripts/bun-riscv64/bun-patches/0014-disable-custom-inspector-dispatchers-on-riscv64.patch +127 -0
- package/scripts/bun-riscv64/bun-patches/0015-disable-jsc-profiler-builtins-on-riscv64.patch +75 -0
- package/scripts/bun-riscv64/bun-patches/0016-node-vm-disable-jit-cached-data-on-riscv64-c-loop.patch +96 -0
- package/scripts/bun-riscv64/bun-patches/0017-disable-performance-domjit-signature-on-riscv64-c-loop.patch +34 -0
- package/scripts/bun-riscv64/bun-patches/0018-fix-serialized-script-identifier-big-endian-path.patch +19 -0
- package/scripts/bun-riscv64/bun-patches/0019-add-wtf-timer-fire-bridge-for-c-loop.patch +24 -0
- package/scripts/bun-riscv64/bun-patches/0020-run-riscv64-smoke-test-under-qemu.patch +13 -0
- package/scripts/bun-riscv64/bun-patches/0021-fix-riscv64-linux-open-flags.patch +25 -0
- package/scripts/bun-riscv64/bun-patches/0022-zlib-riscv64-generic-kernels.patch +25 -0
- package/scripts/bun-riscv64/bun-patches/README.md +127 -0
- package/scripts/bun-riscv64/bun-version.json +202 -0
- package/scripts/bun-riscv64/run-build.sh +162 -0
- package/scripts/bun-riscv64/rust-core/0001-riscv64-rust-core-port.patch +868 -0
- package/scripts/bun-riscv64/rust-core/0002-second-wave-riscv64-source-gaps.patch +130 -0
- package/scripts/bun-riscv64/rust-core/0003-third-wave-riscv64-crash-handler-gaps.patch +78 -0
- package/scripts/bun-riscv64/rust-core/0004-rust-target-cpu-riscv64.patch +39 -0
- package/scripts/bun-riscv64/rust-core/0005-fifth-wave-riscv64-source-gaps.patch +96 -0
- package/scripts/bun-riscv64/rust-core/0006-cpp-wasm-and-inspector-guards-riscv64.patch +91 -0
- package/scripts/bun-riscv64/rust-core/0007-bun-alloc-max-align-t-riscv64.patch +36 -0
- package/scripts/bun-riscv64/rust-core/0008-workspace-lints-warn-not-deny-riscv64.patch +75 -0
- package/scripts/bun-riscv64/rust-core/0009-zigglobalobject-wasm-streaming-guards-riscv64.patch +109 -0
- package/scripts/bun-riscv64/rust-core/0010-tcc-externs-stub-on-riscv64.patch +62 -0
- package/scripts/bun-riscv64/rust-core/0011-clippy-ptr-cast-lints-warn-riscv64.patch +61 -0
- package/scripts/bun-riscv64/rust-core/README.md +80 -0
- package/scripts/bun-riscv64/rust-core/webkit-patches/0003-disable-dfg-ftl-on-riscv64.patch +60 -0
- package/scripts/bun-riscv64/rust-core/webkit-patches/0004-riscv64-do-not-force-wasm-in-c-loop.patch +31 -0
- package/scripts/bun-riscv64/rust-core/webkit-patches/0005-domjit-effect-allow-no-dfg-c-loop.patch +40 -0
- package/scripts/bun-riscv64/rust-core/webkit-patches/0006-disable-usewasm-when-webassembly-compiled-out.patch +33 -0
- package/scripts/bun-riscv64/rust-core/webkit-patches/0007-restore-dropped-includes-and-llint-fwd-decl.patch +31 -0
- package/scripts/bun-riscv64/validate.sh +264 -0
- package/scripts/bun-riscv64/webkit-patches/0001-cherry-pick-llint-riscv64.recipe +155 -0
- package/scripts/bun-riscv64/webkit-patches/0002-cherry-pick-baseline-jit-riscv64.recipe +40 -0
- package/scripts/bun-riscv64/webkit-patches/0003-disable-dfg-ftl-on-riscv64.patch +60 -0
- package/scripts/bun-riscv64/webkit-patches/0004-riscv64-do-not-force-wasm-in-c-loop.patch +31 -0
- package/scripts/bun-riscv64/webkit-patches/0005-domjit-effect-allow-no-dfg-c-loop.patch +40 -0
- package/scripts/bun-riscv64/webkit-patches/0006-disable-usewasm-when-webassembly-compiled-out.patch +33 -0
- package/scripts/bun-riscv64/webkit-patches/0007-restore-dropped-includes-and-llint-fwd-decl.patch +72 -0
- package/scripts/bun-riscv64/webkit-patches/README.md +146 -0
- package/scripts/check-homepage-public-readiness.mjs +353 -0
- package/scripts/check-homepage-release-data.mjs +110 -0
- package/scripts/check-i18n.mjs +2 -1
- package/scripts/check-real-local-chat.ts +147 -0
- package/scripts/check-real-local-provisioning.ts +104 -0
- package/scripts/check-real-local-reset.ts +249 -0
- package/scripts/check-sms-gateway-completion-audit.mjs +428 -0
- package/scripts/check-sms-gateway-readiness.mjs +266 -0
- package/scripts/clean-repo.mjs +5 -5
- package/scripts/codesign-mas.mjs +222 -16
- package/scripts/collect-docker-runtime-deps.mjs +229 -0
- package/scripts/continue-sms-gateway-work.mjs +121 -0
- package/scripts/copy-runtime-node-modules.ts +903 -195
- package/scripts/deploy-cloud-api-production-gateway.mjs +52 -0
- package/scripts/desktop-build.mjs +655 -101
- package/scripts/dev-platform.mjs +346 -102
- package/scripts/dev-startup-smoke.mjs +248 -0
- package/scripts/dev-ui.mjs +418 -176
- package/scripts/disable-local-eliza-workspace.mjs +35 -0
- package/scripts/docker-ci-smoke.sh +298 -96
- package/scripts/docker-entrypoint.sh +62 -1
- package/scripts/docker-entrypoint.test.ts +283 -0
- package/scripts/ensure-avatars.mjs +2 -2
- package/scripts/ensure-electrobun-core.mjs +1 -1
- package/scripts/ensure-generated-core-proto-js.mjs +1 -1
- package/scripts/ensure-type-package-aliases.mjs +62 -5
- package/scripts/ensure-vision-deps.mjs +20 -1
- package/scripts/entry.ts +1 -1
- package/scripts/ffi-stub/Makefile +64 -0
- package/scripts/ffi-stub/README.md +391 -0
- package/scripts/ffi-stub/asr-ffi-smoke.ts +139 -0
- package/scripts/ffi-stub/ffi-stub.c +539 -0
- package/scripts/ffi-stub/ffi.h +538 -0
- package/scripts/ffi-stub/libelizainference_stub.so +0 -0
- package/scripts/ffi-stub/tts-stream-ffi-smoke.ts +349 -0
- package/scripts/generate-first-run-voicelines.mjs +194 -0
- package/scripts/generate-plugin-index.js +4 -3
- package/scripts/generate-static-asset-manifest.mjs +1 -1
- package/scripts/i18n-dynamic-keys.json +5 -5
- package/scripts/init-submodules.mjs +2 -2
- package/scripts/install-android-sms-gateway.md +177 -0
- package/scripts/install-android-sms-gateway.mjs +1088 -0
- package/scripts/ios-xcframework/README.md +74 -72
- package/scripts/ios-xcframework/build-xcframework.mjs +204 -43
- package/scripts/ios-xcframework/run-physical-device-smoke.mjs +1943 -0
- package/scripts/ios-xcframework/runtime-symbol-shim.c +450 -0
- package/scripts/kernel-patches/cpu-polar-kernels.mjs +441 -0
- package/scripts/kernel-patches/cpu-simd-kernels.mjs +253 -0
- package/scripts/kernel-patches/cpu-thread-parallelism.mjs +368 -0
- package/scripts/kernel-patches/cuda-kernels.mjs +117 -0
- package/scripts/kernel-patches/metal-kernels.mjs +1698 -109
- package/scripts/kernel-patches/server-omnivoice-route.mjs +718 -0
- package/scripts/kernel-patches/server-structured-output.mjs +279 -0
- package/scripts/kernel-patches/vulkan-dispatch-log.mjs +166 -0
- package/scripts/kernel-patches/vulkan-dispatch-log.test.mjs +50 -0
- package/scripts/kernel-patches/vulkan-dispatch-patches/01-vulkan-shaders-gen.patch +30 -16
- package/scripts/kernel-patches/vulkan-dispatch-patches/02-ggml-vulkan-pipelines.patch +75 -30
- package/scripts/kernel-patches/vulkan-kernels.mjs +800 -49
- package/scripts/lib/agent-source-watcher.mjs +174 -0
- package/scripts/lib/agent-source-watcher.test.mjs +184 -0
- package/scripts/lib/api-supervisor.mjs +78 -9
- package/scripts/lib/api-supervisor.test.mjs +121 -0
- package/scripts/lib/app-dir.mjs +2 -16
- package/scripts/lib/apple-entitlement-audit.mjs +655 -0
- package/scripts/lib/apple-entitlement-audit.test.mjs +144 -0
- package/scripts/lib/bun-version-guard.mjs +13 -13
- package/scripts/lib/capacitor-plugin-build-needed.mjs +4 -3
- package/scripts/lib/capacitor-plugin-names.mjs +30 -14
- package/scripts/lib/desktop-preflight.mjs +9 -5
- package/scripts/lib/desktop-startup-embedding-warmup-policy.mjs +51 -0
- package/scripts/lib/desktop-startup-embedding-warmup-policy.test.mjs +55 -0
- package/scripts/lib/duet-bridge.d.mts +63 -0
- package/scripts/lib/duet-bridge.mjs +193 -0
- package/scripts/lib/node-path-env.mjs +4 -2
- package/scripts/lib/orchestrator-desktop-dev-banner.mjs +12 -3
- package/scripts/lib/patch-bun-exports.mjs +90 -27
- package/scripts/lib/patch-bun-exports.test.mjs +79 -0
- package/scripts/lib/renderer-build-action.mjs +35 -0
- package/scripts/lib/renderer-build-action.test.mjs +70 -0
- package/scripts/lib/stage-android-agent.mjs +748 -99
- package/scripts/lib/sync-eliza-env-aliases.mjs +3 -25
- package/scripts/lib/ui-smoke-stub-decision.mjs +33 -0
- package/scripts/lib/ui-smoke-stub-decision.test.mjs +46 -0
- package/scripts/lib/vite-renderer-dist-stale.mjs +5 -0
- package/scripts/lib/voice-latency-report.mjs +154 -0
- package/scripts/lifeops-prompt-benchmark.ts +21 -12
- package/scripts/link-docker-local-app-packages.mjs +89 -36
- package/scripts/local-stt-bench.ts +192 -0
- package/scripts/maintain-cloud-api-production-gateway.mjs +54 -0
- package/scripts/mas-smoke.mjs +459 -0
- package/scripts/mas-smoke.test.mjs +220 -0
- package/scripts/mobile-auth-simulator-smoke.mjs +0 -1
- package/scripts/normalize-eliza-capture.ts +97 -0
- package/scripts/omnivoice-fuse/prepare.mjs +2543 -23
- package/scripts/pack-upstreams.mjs +65 -5
- package/scripts/package-electrobun-linux.mjs +303 -0
- package/scripts/patch-deps.mjs +5 -3
- package/scripts/patches/llama-mobile-kokoro-tts.patch +480 -0
- package/scripts/playwright-ui-live-stack.ts +194 -49
- package/scripts/playwright-ui-smoke-api-stub.mjs +3501 -109
- package/scripts/pre-review-local.mjs +2 -2
- package/scripts/prepare-ios-cocoapods.sh +41 -3
- package/scripts/release-check.ts +180 -84
- package/scripts/release-workflow-drift.test.ts +57 -0
- package/scripts/relink-workspace-packages-to-dist.mjs +21 -4
- package/scripts/rt.mjs +16 -1
- package/scripts/run-biome-check.mjs +1 -1
- package/scripts/run-coding-agent-e2e.mjs +3 -3
- package/scripts/run-eliza-app-core-script.mjs +34 -0
- package/scripts/run-local-plugin-live-smoke.mjs +71 -2
- package/scripts/run-mobile-build-android-app-actions.test.mjs +426 -0
- package/scripts/run-mobile-build.mjs +4757 -607
- package/scripts/run-node-runtime.mjs +184 -7
- package/scripts/run-node-runtime.test.mjs +167 -0
- package/scripts/run-node-tsx.mjs +80 -33
- package/scripts/run-node.mjs +41 -1
- package/scripts/run-production-build.mjs +34 -27
- package/scripts/run-release-check.mjs +19 -0
- package/scripts/run-release-contract-suite.mjs +107 -14
- package/scripts/run-ui-smoke-playwright-suite.mjs +0 -2
- package/scripts/runtime-package-manifest.ts +21 -3
- package/scripts/setup-upstreams.mjs +42 -1
- package/scripts/sms-gateway-status.mjs +194 -0
- package/scripts/stage-android-agent.test.mjs +97 -0
- package/scripts/stage-elizavoice-lib.mjs +203 -0
- package/scripts/startup-integration-script-drift.test.ts +82 -4
- package/scripts/streaming-pipeline-bench.ts +543 -0
- package/scripts/sync-homepage-porkbun-dns.mjs +262 -0
- package/scripts/test-sms-gateway-software.mjs +100 -0
- package/scripts/type-audit.mjs +1 -1
- package/scripts/validate-bluebubbles-outbound.mjs +293 -0
- package/scripts/validate-cdn-assets.mjs +15 -7
- package/scripts/validate-regression-matrix.mjs +109 -8
- package/scripts/verify-android-sms-gateway-e2e.mjs +362 -0
- package/scripts/verify-bluebubbles-gateway-e2e.mjs +191 -0
- package/scripts/verify-bluebubbles-inbound-readiness.mjs +88 -0
- package/scripts/verify-cloud-api-production-deploy.mjs +87 -0
- package/scripts/verify-cloud-sms-onboarding-flow.mjs +336 -0
- package/scripts/voice/freeze-voice.mjs +521 -0
- package/scripts/voice-attribution-smoke.ts +538 -0
- package/scripts/voice-create-profile.mjs +379 -0
- package/scripts/voice-duet.mjs +1355 -0
- package/scripts/voice-e2e-hardware.ts +871 -0
- package/scripts/voice-interactive.mjs +1750 -0
- package/scripts/voice-latency-report.mjs +96 -0
- package/scripts/voice-latency-report.test.ts +176 -0
- package/scripts/voice-preset/build-default-voice-preset.mjs +249 -0
- package/scripts/voice-preset/build-onboarding-voice.mjs +281 -0
- package/scripts/watch-sms-gateway-readiness.mjs +303 -0
- package/scripts/write-homepage-release-data.mjs +458 -26
- package/security/agent-vault-id.d.ts +1 -1
- package/security/agent-vault-id.js +1 -1
- package/security/hydrate-wallet-keys-from-platform-store.d.ts.map +1 -1
- package/security/hydrate-wallet-keys-from-platform-store.js +23 -14
- package/security/platform-secure-store-node.d.ts +2 -2
- package/security/platform-secure-store-node.js +3 -3
- package/security/wallet-os-store-actions.d.ts +0 -9
- package/security/wallet-os-store-actions.d.ts.map +1 -1
- package/security/wallet-os-store-actions.js +3 -10
- package/services/account-pool.d.ts +23 -14
- package/services/account-pool.d.ts.map +1 -1
- package/services/account-pool.js +86 -24
- package/services/account-usage.d.ts.map +1 -1
- package/services/account-usage.js +2 -5
- package/services/ambient-audio/consent.d.ts +9 -0
- package/services/ambient-audio/consent.d.ts.map +1 -0
- package/services/ambient-audio/consent.js +28 -0
- package/services/ambient-audio/index.d.ts +7 -0
- package/services/ambient-audio/index.d.ts.map +1 -0
- package/services/ambient-audio/index.js +4 -0
- package/services/ambient-audio/replay-buffer.d.ts +14 -0
- package/services/ambient-audio/replay-buffer.d.ts.map +1 -0
- package/services/ambient-audio/replay-buffer.js +66 -0
- package/services/ambient-audio/response-gate.d.ts +3 -0
- package/services/ambient-audio/response-gate.d.ts.map +1 -0
- package/services/ambient-audio/response-gate.js +33 -0
- package/services/ambient-audio/service.d.ts +22 -0
- package/services/ambient-audio/service.d.ts.map +1 -0
- package/services/ambient-audio/service.js +47 -0
- package/services/ambient-audio/types.d.ts +42 -0
- package/services/ambient-audio/types.d.ts.map +1 -0
- package/services/app-updates/update-policy.d.ts +64 -0
- package/services/app-updates/update-policy.d.ts.map +1 -0
- package/services/app-updates/update-policy.js +228 -0
- package/services/auth-store.d.ts +37 -1
- package/services/auth-store.d.ts.map +1 -1
- package/services/auth-store.js +59 -26
- package/services/cloud-jwks-store.d.ts +3 -3
- package/services/cloud-jwks-store.d.ts.map +1 -1
- package/services/cloud-jwks-store.js +5 -8
- package/services/coding-account-bridge.d.ts +71 -0
- package/services/coding-account-bridge.d.ts.map +1 -0
- package/services/coding-account-bridge.js +267 -0
- package/services/connector-target-catalog.d.ts +10 -3
- package/services/connector-target-catalog.d.ts.map +1 -1
- package/services/connector-target-catalog.js +7 -4
- package/services/credential-tunnel-service.d.ts +66 -0
- package/services/credential-tunnel-service.d.ts.map +1 -0
- package/services/credential-tunnel-service.js +227 -0
- package/services/github-credentials.d.ts +1 -1
- package/services/github-credentials.js +1 -1
- package/services/inference-abort.d.ts +47 -0
- package/services/inference-abort.d.ts.map +1 -0
- package/services/inference-abort.js +76 -0
- package/services/persistence.d.ts +2 -3
- package/services/persistence.d.ts.map +1 -1
- package/services/persistence.js +2 -3
- package/services/phrase-chunked-tts.d.ts +136 -0
- package/services/phrase-chunked-tts.d.ts.map +1 -0
- package/services/phrase-chunked-tts.js +208 -0
- package/services/sandbox-registry.d.ts +78 -0
- package/services/sandbox-registry.d.ts.map +1 -0
- package/services/sandbox-registry.js +323 -0
- package/services/secrets-manager-installer.d.ts +8 -1
- package/services/secrets-manager-installer.d.ts.map +1 -1
- package/services/secrets-manager-installer.js +27 -2
- package/services/sensitive-requests/cloud-link-adapter.d.ts +15 -0
- package/services/sensitive-requests/cloud-link-adapter.d.ts.map +1 -0
- package/services/sensitive-requests/cloud-link-adapter.js +73 -0
- package/services/sensitive-requests/index.d.ts +27 -0
- package/services/sensitive-requests/index.d.ts.map +1 -0
- package/services/sensitive-requests/index.js +51 -0
- package/services/sensitive-requests/instruct-dm-only-adapter.d.ts +14 -0
- package/services/sensitive-requests/instruct-dm-only-adapter.d.ts.map +1 -0
- package/services/sensitive-requests/instruct-dm-only-adapter.js +22 -0
- package/services/sensitive-requests/owner-app-inline-adapter.d.ts +3 -0
- package/services/sensitive-requests/owner-app-inline-adapter.d.ts.map +1 -0
- package/services/sensitive-requests/owner-app-inline-adapter.js +146 -0
- package/services/sensitive-requests/owner-app-oauth-adapter.d.ts +3 -0
- package/services/sensitive-requests/owner-app-oauth-adapter.d.ts.map +1 -0
- package/services/sensitive-requests/owner-app-oauth-adapter.js +156 -0
- package/services/sensitive-requests/public-link-adapter.d.ts +14 -0
- package/services/sensitive-requests/public-link-adapter.d.ts.map +1 -0
- package/services/sensitive-requests/public-link-adapter.js +86 -0
- package/services/sensitive-requests/tunnel-link-adapter.d.ts +17 -0
- package/services/sensitive-requests/tunnel-link-adapter.d.ts.map +1 -0
- package/services/sensitive-requests/tunnel-link-adapter.js +38 -0
- package/services/steward-credentials.d.ts +1 -1
- package/services/steward-credentials.d.ts.map +1 -1
- package/services/steward-credentials.js +10 -6
- package/services/steward-sidecar/health-check.d.ts.map +1 -1
- package/services/steward-sidecar/health-check.js +4 -3
- package/services/steward-sidecar/process-management.d.ts +1 -1
- package/services/steward-sidecar/process-management.d.ts.map +1 -1
- package/services/steward-sidecar/process-management.js +9 -3
- package/services/steward-sidecar/types.d.ts +1 -1
- package/services/steward-sidecar/types.d.ts.map +1 -1
- package/services/steward-sidecar/wallet-setup.d.ts.map +1 -1
- package/services/steward-sidecar/wallet-setup.js +8 -7
- package/services/steward-sidecar.d.ts +2 -2
- package/services/steward-sidecar.d.ts.map +1 -1
- package/services/steward-sidecar.js +27 -19
- package/services/task-host-capabilities.d.ts +60 -0
- package/services/task-host-capabilities.d.ts.map +1 -0
- package/services/task-host-capabilities.js +122 -0
- package/services/tool-call-cache/index.d.ts +2 -2
- package/services/tool-call-cache/index.d.ts.map +1 -1
- package/services/tool-call-cache/index.js +1 -1
- package/services/trigger-event-bridge.js +1 -1
- package/services/tunnel-to-mobile/index.d.ts +2 -0
- package/services/tunnel-to-mobile/index.d.ts.map +1 -0
- package/services/tunnel-to-mobile/index.js +1 -0
- package/services/tunnel-to-mobile/tunnel-to-mobile-client.d.ts +105 -0
- package/services/tunnel-to-mobile/tunnel-to-mobile-client.d.ts.map +1 -0
- package/services/tunnel-to-mobile/tunnel-to-mobile-client.js +190 -0
- package/services/vault-bootstrap.d.ts.map +1 -1
- package/services/vault-bootstrap.js +48 -21
- package/services/vault-mirror.d.ts +1 -1
- package/services/vault-mirror.d.ts.map +1 -1
- package/services/vault-mirror.js +29 -6
- package/services/voice-profiles/diarization-pipeline.d.ts +6 -0
- package/services/voice-profiles/diarization-pipeline.d.ts.map +1 -0
- package/services/voice-profiles/diarization-pipeline.js +20 -0
- package/services/voice-profiles/index.d.ts +12 -0
- package/services/voice-profiles/index.d.ts.map +1 -0
- package/services/voice-profiles/index.js +5 -0
- package/services/voice-profiles/nickname-evaluator.d.ts +14 -0
- package/services/voice-profiles/nickname-evaluator.d.ts.map +1 -0
- package/services/voice-profiles/nickname-evaluator.js +46 -0
- package/services/voice-profiles/owner-confidence.d.ts +10 -0
- package/services/voice-profiles/owner-confidence.d.ts.map +1 -0
- package/services/voice-profiles/owner-confidence.js +38 -0
- package/services/voice-profiles/private-challenge.d.ts +20 -0
- package/services/voice-profiles/private-challenge.d.ts.map +1 -0
- package/services/voice-profiles/private-challenge.js +44 -0
- package/services/voice-profiles/store.d.ts +21 -0
- package/services/voice-profiles/store.d.ts.map +1 -0
- package/services/voice-profiles/store.js +50 -0
- package/services/voice-profiles/types.d.ts +38 -0
- package/services/voice-profiles/types.d.ts.map +1 -0
- package/services/voice-profiles/types.js +1 -0
- package/styles/electrobun-mac-window-drag.css +4 -4
- package/test/helpers/__tests__/live-agent-test.smoke.test.ts +43 -70
- package/test/helpers/browser-mocks.ts +2 -2
- package/test/helpers/conditional-tests.ts +2 -2
- package/test/helpers/i18n.ts +1 -1
- package/test/helpers/live-agent-test.ts +537 -551
- package/test/helpers/live-provider.test.ts +4 -4
- package/test/helpers/live-provider.ts +41 -7
- package/test/helpers/live-runtime-server.ts +4 -4
- package/test/helpers/pglite-runtime.ts +1 -1
- package/test/helpers/real-runtime.ts +54 -15
- package/test/helpers/trajectory-harness.ts +11 -7
- package/test/scripts/start-eliza-live.ts +9 -0
- package/test/scripts/test-parallel.mjs +1 -1
- package/test/scripts/test-root-unit.mjs +6 -7
- package/ui-compat.d.ts +13 -2
- package/ui-compat.d.ts.map +1 -1
- package/ui-compat.js +19 -3
- package/api/auth-pairing-compat-routes.d.ts +0 -17
- package/api/auth-pairing-compat-routes.d.ts.map +0 -1
- package/api/auth-pairing-compat-routes.js +0 -301
- package/api/local-inference-compat-routes.d.ts +0 -16
- package/api/local-inference-compat-routes.d.ts.map +0 -1
- package/api/local-inference-compat-routes.js +0 -617
- package/api/onboarding-compat-routes.d.ts +0 -4
- package/api/onboarding-compat-routes.d.ts.map +0 -1
- package/api/onboarding-compat-routes.js +0 -207
- package/api/plugins-compat-routes.d.ts +0 -103
- package/api/plugins-compat-routes.d.ts.map +0 -1
- package/api/plugins-compat-routes.js +0 -1181
- package/api/server-onboarding-compat.d.ts +0 -31
- package/api/server-onboarding-compat.d.ts.map +0 -1
- package/api/server-onboarding-compat.js +0 -283
- package/benchmark/cua-routes.d.ts +0 -10
- package/benchmark/cua-routes.d.ts.map +0 -1
- package/benchmark/cua-routes.js +0 -179
- package/benchmark/mock-plugin-base.d.ts +0 -9
- package/benchmark/mock-plugin-base.d.ts.map +0 -1
- package/benchmark/mock-plugin-base.js +0 -325
- package/cli/parse-duration.d.ts +0 -5
- package/cli/parse-duration.d.ts.map +0 -1
- package/cli/parse-duration.js +0 -27
- package/patches/llama-cpp-capacitor@0.1.5.patch +0 -2387
- package/platform/agent-browser-stub.d.ts +0 -27
- package/platform/agent-browser-stub.d.ts.map +0 -1
- package/platform/agent-browser-stub.js +0 -16
- package/platforms/android/app/src/androidTest/java/com/getcapacitor/myapp/ExampleInstrumentedTest.java +0 -26
- package/platforms/android/app/src/main/res/drawable/ic_launcher_background.xml +0 -170
- package/platforms/android/app/src/main/res/drawable-v24/ic_launcher_foreground.xml +0 -34
- package/platforms/android/app/src/test/java/com/getcapacitor/myapp/ExampleUnitTest.java +0 -18
- package/platforms/electrobun/assets/appIcon.iconset/icon_512x512@2x.png +0 -0
- package/platforms/electrobun/assets/appIcon.png +0 -0
- package/platforms/electrobun/scripts/build-whisper-universal.sh +0 -137
- package/platforms/electrobun/scripts/build-whisper.sh +0 -95
- package/platforms/electrobun/src/libMacWindowEffects.dylib +0 -0
- package/platforms/electrobun/src/native/whisper.ts +0 -280
- package/platforms/ios/App/App/Assets.xcassets/Splash.imageset/splash-2732x2732-1.png +0 -0
- package/platforms/ios/App/App/Assets.xcassets/Splash.imageset/splash-2732x2732-2.png +0 -0
- package/platforms/ios/App/App/Assets.xcassets/Splash.imageset/splash-2732x2732.png +0 -0
- package/registry/generate-apps.d.ts +0 -2
- package/registry/generate-apps.d.ts.map +0 -1
- package/registry/generate-apps.js +0 -338
- package/registry/generate.d.ts +0 -2
- package/registry/generate.d.ts.map +0 -1
- package/registry/generate.js +0 -506
- package/runtime/embedding-manager-support.d.ts +0 -77
- package/runtime/embedding-manager-support.d.ts.map +0 -1
- package/runtime/embedding-manager-support.js +0 -309
- package/runtime/embedding-presets.d.ts +0 -5
- package/runtime/embedding-presets.d.ts.map +0 -1
- package/runtime/embedding-presets.js +0 -47
- package/runtime/embedding-warmup-policy.d.ts +0 -13
- package/runtime/embedding-warmup-policy.d.ts.map +0 -1
- package/runtime/embedding-warmup-policy.js +0 -33
- package/runtime/ensure-local-inference-handler.d.ts +0 -25
- package/runtime/ensure-local-inference-handler.d.ts.map +0 -1
- package/runtime/ensure-local-inference-handler.js +0 -389
- package/runtime/mobile-local-inference-gate.d.ts +0 -21
- package/runtime/mobile-local-inference-gate.d.ts.map +0 -1
- package/runtime/mobile-local-inference-gate.js +0 -24
- package/scripts/aosp/avd-test.mjs +0 -403
- package/scripts/aosp/boot-validate.mjs +0 -536
- package/scripts/aosp/build-aosp.mjs +0 -448
- package/scripts/aosp/build-bootanimation.mjs +0 -178
- package/scripts/aosp/capture-screens.mjs +0 -325
- package/scripts/aosp/e2e-validate.mjs +0 -225
- package/scripts/aosp/lint-init-rc.mjs +0 -258
- package/scripts/aosp/llama-shim/eliza_llama_shim.c +0 -276
- package/scripts/aosp/sim.mjs +0 -277
- package/scripts/aosp/sync-to-aosp.mjs +0 -134
- package/scripts/aosp/validate.mjs +0 -1273
- package/scripts/build-llama-cpp-dflash.mjs +0 -1866
- package/scripts/generate-onboarding-voicelines.mjs +0 -194
- package/scripts/generated/static-asset-manifest.json +0 -4
- package/scripts/normalize-parallax-capture.ts +0 -97
- package/scripts/omnivoice-fuse/Makefile +0 -44
- package/scripts/omnivoice-fuse/README.md +0 -266
- package/scripts/omnivoice-fuse/cmake-graft.mjs +0 -180
- package/scripts/omnivoice-fuse/ffi-stub.c +0 -222
- package/scripts/omnivoice-fuse/ffi.h +0 -158
- package/scripts/omnivoice-fuse/libelizainference_stub.dylib +0 -0
- package/scripts/omnivoice-fuse/verify-symbols.mjs +0 -138
- package/security/cloud-secret-store.d.ts +0 -34
- package/security/cloud-secret-store.d.ts.map +0 -1
- package/security/cloud-secret-store.js +0 -65
- package/security/export-guard.d.ts +0 -34
- package/security/export-guard.d.ts.map +0 -1
- package/security/export-guard.js +0 -127
- package/services/local-inference/__stress__/cache-stress-helpers.d.ts +0 -76
- package/services/local-inference/__stress__/cache-stress-helpers.d.ts.map +0 -1
- package/services/local-inference/__stress__/cache-stress-helpers.js +0 -238
- package/services/local-inference/active-model.d.ts +0 -180
- package/services/local-inference/active-model.d.ts.map +0 -1
- package/services/local-inference/active-model.js +0 -362
- package/services/local-inference/assignments.d.ts +0 -58
- package/services/local-inference/assignments.d.ts.map +0 -1
- package/services/local-inference/assignments.js +0 -179
- package/services/local-inference/backend.d.ts +0 -200
- package/services/local-inference/backend.d.ts.map +0 -1
- package/services/local-inference/backend.js +0 -242
- package/services/local-inference/bundled-models.d.ts +0 -34
- package/services/local-inference/bundled-models.d.ts.map +0 -1
- package/services/local-inference/bundled-models.js +0 -104
- package/services/local-inference/cache-bridge.d.ts +0 -184
- package/services/local-inference/cache-bridge.d.ts.map +0 -1
- package/services/local-inference/cache-bridge.js +0 -333
- package/services/local-inference/catalog.d.ts +0 -57
- package/services/local-inference/catalog.d.ts.map +0 -1
- package/services/local-inference/catalog.js +0 -262
- package/services/local-inference/conversation-registry.d.ts +0 -122
- package/services/local-inference/conversation-registry.d.ts.map +0 -1
- package/services/local-inference/conversation-registry.js +0 -182
- package/services/local-inference/device-bridge.d.ts +0 -139
- package/services/local-inference/device-bridge.d.ts.map +0 -1
- package/services/local-inference/device-bridge.js +0 -774
- package/services/local-inference/dflash-doctor.d.ts +0 -27
- package/services/local-inference/dflash-doctor.d.ts.map +0 -1
- package/services/local-inference/dflash-doctor.js +0 -149
- package/services/local-inference/dflash-server.d.ts +0 -248
- package/services/local-inference/dflash-server.d.ts.map +0 -1
- package/services/local-inference/dflash-server.js +0 -1076
- package/services/local-inference/downloader.d.ts +0 -48
- package/services/local-inference/downloader.d.ts.map +0 -1
- package/services/local-inference/downloader.js +0 -688
- package/services/local-inference/engine.d.ts +0 -282
- package/services/local-inference/engine.d.ts.map +0 -1
- package/services/local-inference/engine.js +0 -743
- package/services/local-inference/external-scanner.d.ts +0 -17
- package/services/local-inference/external-scanner.d.ts.map +0 -1
- package/services/local-inference/external-scanner.js +0 -261
- package/services/local-inference/handler-registry.d.ts +0 -72
- package/services/local-inference/handler-registry.d.ts.map +0 -1
- package/services/local-inference/handler-registry.js +0 -159
- package/services/local-inference/hardware.d.ts +0 -26
- package/services/local-inference/hardware.d.ts.map +0 -1
- package/services/local-inference/hardware.js +0 -139
- package/services/local-inference/hf-search.d.ts +0 -19
- package/services/local-inference/hf-search.d.ts.map +0 -1
- package/services/local-inference/hf-search.js +0 -169
- package/services/local-inference/index.d.ts +0 -10
- package/services/local-inference/index.d.ts.map +0 -1
- package/services/local-inference/index.js +0 -7
- package/services/local-inference/llama-server-metrics.d.ts +0 -108
- package/services/local-inference/llama-server-metrics.d.ts.map +0 -1
- package/services/local-inference/llama-server-metrics.js +0 -175
- package/services/local-inference/manifest/index.d.ts +0 -4
- package/services/local-inference/manifest/index.d.ts.map +0 -1
- package/services/local-inference/manifest/index.js +0 -5
- package/services/local-inference/manifest/schema.d.ts +0 -419
- package/services/local-inference/manifest/schema.d.ts.map +0 -1
- package/services/local-inference/manifest/schema.js +0 -227
- package/services/local-inference/manifest/types.d.ts +0 -23
- package/services/local-inference/manifest/types.d.ts.map +0 -1
- package/services/local-inference/manifest/types.js +0 -5
- package/services/local-inference/manifest/validator.d.ts +0 -43
- package/services/local-inference/manifest/validator.d.ts.map +0 -1
- package/services/local-inference/manifest/validator.js +0 -187
- package/services/local-inference/paths.d.ts +0 -8
- package/services/local-inference/paths.d.ts.map +0 -1
- package/services/local-inference/paths.js +0 -7
- package/services/local-inference/providers.d.ts +0 -61
- package/services/local-inference/providers.d.ts.map +0 -1
- package/services/local-inference/providers.js +0 -334
- package/services/local-inference/ram-budget.d.ts +0 -57
- package/services/local-inference/ram-budget.d.ts.map +0 -1
- package/services/local-inference/ram-budget.js +0 -107
- package/services/local-inference/readiness.d.ts +0 -9
- package/services/local-inference/readiness.d.ts.map +0 -1
- package/services/local-inference/readiness.js +0 -153
- package/services/local-inference/recommendation.d.ts +0 -62
- package/services/local-inference/recommendation.d.ts.map +0 -1
- package/services/local-inference/recommendation.js +0 -309
- package/services/local-inference/registry.d.ts +0 -35
- package/services/local-inference/registry.d.ts.map +0 -1
- package/services/local-inference/registry.js +0 -117
- package/services/local-inference/router-handler.d.ts +0 -51
- package/services/local-inference/router-handler.d.ts.map +0 -1
- package/services/local-inference/router-handler.js +0 -165
- package/services/local-inference/routing-policy.d.ts +0 -55
- package/services/local-inference/routing-policy.d.ts.map +0 -1
- package/services/local-inference/routing-policy.js +0 -195
- package/services/local-inference/routing-preferences.d.ts +0 -8
- package/services/local-inference/routing-preferences.d.ts.map +0 -1
- package/services/local-inference/routing-preferences.js +0 -7
- package/services/local-inference/service.d.ts +0 -88
- package/services/local-inference/service.d.ts.map +0 -1
- package/services/local-inference/service.js +0 -210
- package/services/local-inference/session-pool.d.ts +0 -72
- package/services/local-inference/session-pool.d.ts.map +0 -1
- package/services/local-inference/session-pool.js +0 -125
- package/services/local-inference/types.d.ts +0 -309
- package/services/local-inference/types.d.ts.map +0 -1
- package/services/local-inference/types.js +0 -23
- package/services/local-inference/verify.d.ts +0 -8
- package/services/local-inference/verify.d.ts.map +0 -1
- package/services/local-inference/verify.js +0 -7
- package/services/local-inference/voice/barge-in.d.ts +0 -15
- package/services/local-inference/voice/barge-in.d.ts.map +0 -1
- package/services/local-inference/voice/barge-in.js +0 -20
- package/services/local-inference/voice/engine-bridge.d.ts +0 -256
- package/services/local-inference/voice/engine-bridge.d.ts.map +0 -1
- package/services/local-inference/voice/engine-bridge.js +0 -398
- package/services/local-inference/voice/ffi-bindings.d.ts +0 -114
- package/services/local-inference/voice/ffi-bindings.d.ts.map +0 -1
- package/services/local-inference/voice/ffi-bindings.js +0 -281
- package/services/local-inference/voice/index.d.ts +0 -51
- package/services/local-inference/voice/index.d.ts.map +0 -1
- package/services/local-inference/voice/index.js +0 -50
- package/services/local-inference/voice/lifecycle.d.ts +0 -135
- package/services/local-inference/voice/lifecycle.d.ts.map +0 -1
- package/services/local-inference/voice/lifecycle.js +0 -189
- package/services/local-inference/voice/phoneme-tokenizer.d.ts +0 -58
- package/services/local-inference/voice/phoneme-tokenizer.d.ts.map +0 -1
- package/services/local-inference/voice/phoneme-tokenizer.js +0 -53
- package/services/local-inference/voice/phrase-cache.d.ts +0 -24
- package/services/local-inference/voice/phrase-cache.d.ts.map +0 -1
- package/services/local-inference/voice/phrase-cache.js +0 -32
- package/services/local-inference/voice/phrase-chunker.d.ts +0 -20
- package/services/local-inference/voice/phrase-chunker.d.ts.map +0 -1
- package/services/local-inference/voice/phrase-chunker.js +0 -85
- package/services/local-inference/voice/ring-buffer.d.ts +0 -40
- package/services/local-inference/voice/ring-buffer.d.ts.map +0 -1
- package/services/local-inference/voice/ring-buffer.js +0 -85
- package/services/local-inference/voice/rollback-queue.d.ts +0 -24
- package/services/local-inference/voice/rollback-queue.d.ts.map +0 -1
- package/services/local-inference/voice/rollback-queue.js +0 -49
- package/services/local-inference/voice/scheduler.d.ts +0 -47
- package/services/local-inference/voice/scheduler.d.ts.map +0 -1
- package/services/local-inference/voice/scheduler.js +0 -123
- package/services/local-inference/voice/shared-resources.d.ts +0 -119
- package/services/local-inference/voice/shared-resources.d.ts.map +0 -1
- package/services/local-inference/voice/shared-resources.js +0 -83
- package/services/local-inference/voice/speaker-preset-cache.d.ts +0 -28
- package/services/local-inference/voice/speaker-preset-cache.d.ts.map +0 -1
- package/services/local-inference/voice/speaker-preset-cache.js +0 -44
- package/services/local-inference/voice/types.d.ts +0 -80
- package/services/local-inference/voice/types.d.ts.map +0 -1
- package/services/local-inference/voice/voice-preset-format.d.ts +0 -56
- package/services/local-inference/voice/voice-preset-format.d.ts.map +0 -1
- package/services/local-inference/voice/voice-preset-format.js +0 -184
- package/services/plugin-installer.d.ts +0 -22
- package/services/plugin-installer.d.ts.map +0 -1
- package/services/plugin-installer.js +0 -41
- package/test/scripts/task-agent-live-smoke.ts +0 -1335
- /package/services/{local-inference/voice → ambient-audio}/types.js +0 -0
package/benchmark/server.js
CHANGED
|
@@ -10,17 +10,892 @@ import crypto from "node:crypto";
|
|
|
10
10
|
import http from "node:http";
|
|
11
11
|
import path from "node:path";
|
|
12
12
|
import { pathToFileURL } from "node:url";
|
|
13
|
-
import {
|
|
14
|
-
import { AgentRuntime, elizaLogger, stringToUuid, } from "@elizaos/core";
|
|
13
|
+
import { AgentRuntime, elizaLogger, ModelType, stringToUuid, } from "@elizaos/core";
|
|
15
14
|
import dotenv from "dotenv";
|
|
16
|
-
import {
|
|
17
|
-
import {
|
|
18
|
-
import {
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
15
|
+
import { CORE_PLUGINS } from "../../../agent/src/runtime/core-plugins.js";
|
|
16
|
+
import { createElizaPlugin } from "../../../agent/src/runtime/eliza-plugin.js";
|
|
17
|
+
import { autoWireCerebras } from "./cerebras-autowire.js";
|
|
18
|
+
import { LifeOpsBenchHandler, } from "./lifeops-bench-handler.js";
|
|
19
|
+
import { clearCapturedAction, createBenchmarkPlugin, getCapturedAction, getCapturedActions, setBenchmarkContext, } from "./plugin.js";
|
|
20
|
+
import { benchmarkTurnMetadata, capturedActionsToToolCalls, capturedActionToParams, coerceActions, coerceParams, composeBenchmarkPrompt, createSession, ensureBenchmarkSessionContext, extractBenchmarkName, extractRecord, extractTaskId, formatUnknownError, normalizeBenchmarkContext, normalizeBenchmarkModelUsage, resolveHost, resolvePort, sessionKey, summarizeBenchmarkTurnUsage, toPlugin, } from "./server-utils.js";
|
|
21
|
+
// `dotenv.config({ path: cwd/.env })` only finds the file when the bench server
|
|
22
|
+
// is started from the repo root. When `ElizaServerManager` spawns us with
|
|
23
|
+
// `cwd=packages/app-core`, there is no `.env` next to that directory — so the
|
|
24
|
+
// repo-root `.env` is invisible and `CEREBRAS_API_KEY` arrives unset. Walk
|
|
25
|
+
// upward looking for the first `.env` so the bench server works regardless of
|
|
26
|
+
// where the parent process happened to anchor cwd.
|
|
27
|
+
function loadEnvFromAncestors(startDir) {
|
|
28
|
+
let current = path.resolve(startDir);
|
|
29
|
+
for (let i = 0; i < 8; i += 1) {
|
|
30
|
+
const candidate = path.join(current, ".env");
|
|
31
|
+
if (
|
|
32
|
+
// node:fs is heavy at top-level for a single existence check; use dotenv's
|
|
33
|
+
// own behavior — it returns no parsed data for missing files. We still need to
|
|
34
|
+
// know *which* path matched so we can log it and stop walking.
|
|
35
|
+
dotenv.config({ path: candidate, override: false }).parsed !== undefined) {
|
|
36
|
+
return candidate;
|
|
37
|
+
}
|
|
38
|
+
const parent = path.dirname(current);
|
|
39
|
+
if (parent === current)
|
|
40
|
+
break;
|
|
41
|
+
current = parent;
|
|
42
|
+
}
|
|
43
|
+
return null;
|
|
44
|
+
}
|
|
45
|
+
const _loadedEnvPath = loadEnvFromAncestors(process.cwd());
|
|
46
|
+
if (_loadedEnvPath) {
|
|
47
|
+
elizaLogger.debug(`[bench] Loaded env from ${_loadedEnvPath}`);
|
|
48
|
+
}
|
|
49
|
+
// Cerebras auto-wiring. See `./cerebras-autowire.ts` for the rationale and
|
|
50
|
+
// the rules under which `CEREBRAS_API_KEY` / `CEREBRAS_BASE_URL` /
|
|
51
|
+
// `CEREBRAS_MODEL` are promoted to OpenAI-compat env keys.
|
|
52
|
+
autoWireCerebras();
|
|
22
53
|
const BENCH_TOKEN = process.env.ELIZA_BENCH_TOKEN?.trim() || null;
|
|
23
54
|
const OPENROUTER_PLUGIN_MODULE = "@elizaos/plugin-openrouter";
|
|
55
|
+
const OPENAI_COMPAT_MAX_ATTEMPTS = envPositiveInt("CEREBRAS_BENCH_MAX_ATTEMPTS", 4);
|
|
56
|
+
const OPENAI_COMPAT_RETRY_BASE_MS = envPositiveInt("CEREBRAS_BENCH_RETRY_BASE_MS", 4000);
|
|
57
|
+
const OPENAI_COMPAT_RETRY_MAX_MS = envPositiveInt("CEREBRAS_BENCH_RETRY_MAX_MS", 30000);
|
|
58
|
+
function envPositiveInt(name, fallback) {
|
|
59
|
+
const raw = process.env[name];
|
|
60
|
+
if (!raw)
|
|
61
|
+
return fallback;
|
|
62
|
+
const parsed = Number.parseInt(raw, 10);
|
|
63
|
+
return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback;
|
|
64
|
+
}
|
|
65
|
+
function sleep(ms) {
|
|
66
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
67
|
+
}
|
|
68
|
+
function isRetryableOpenAiCompatibleStatus(status) {
|
|
69
|
+
return status === 408 || status === 409 || status === 429 || status >= 500;
|
|
70
|
+
}
|
|
71
|
+
function openAiCompatibleRetryDelayMs(response, attempt) {
|
|
72
|
+
const retryAfter = response.headers.get("retry-after");
|
|
73
|
+
if (retryAfter) {
|
|
74
|
+
const seconds = Number.parseFloat(retryAfter);
|
|
75
|
+
if (Number.isFinite(seconds) && seconds > 0) {
|
|
76
|
+
return Math.min(Math.ceil(seconds * 1000), OPENAI_COMPAT_RETRY_MAX_MS);
|
|
77
|
+
}
|
|
78
|
+
const timestamp = Date.parse(retryAfter);
|
|
79
|
+
if (Number.isFinite(timestamp)) {
|
|
80
|
+
return Math.min(Math.max(timestamp - Date.now(), 0), OPENAI_COMPAT_RETRY_MAX_MS);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
return (Math.min(OPENAI_COMPAT_RETRY_BASE_MS * 2 ** Math.max(attempt - 1, 0), OPENAI_COMPAT_RETRY_MAX_MS) + Math.floor(Math.random() * 250));
|
|
84
|
+
}
|
|
85
|
+
function normalizeBenchmarkTaskAgentEnv() {
|
|
86
|
+
const benchmarkRequested = process.env.BENCHMARK_TASK_AGENT?.trim();
|
|
87
|
+
const requested = benchmarkRequested ||
|
|
88
|
+
process.env.ELIZA_ACP_DEFAULT_AGENT?.trim() ||
|
|
89
|
+
process.env.ELIZA_DEFAULT_AGENT_TYPE?.trim();
|
|
90
|
+
if (!requested)
|
|
91
|
+
return;
|
|
92
|
+
const normalized = requested.toLowerCase().replace(/_/g, "-");
|
|
93
|
+
const acpAgent = normalized === "elizaos" ||
|
|
94
|
+
normalized === "eliza-os" ||
|
|
95
|
+
normalized === "pi-agent" ||
|
|
96
|
+
normalized === "pi agent"
|
|
97
|
+
? "opencode"
|
|
98
|
+
: normalized === "claude-code" || normalized === "claude code"
|
|
99
|
+
? "claude"
|
|
100
|
+
: normalized === "openai" ||
|
|
101
|
+
normalized === "openai-codex" ||
|
|
102
|
+
normalized === "openai codex"
|
|
103
|
+
? "codex"
|
|
104
|
+
: normalized === "open-code" || normalized === "open code"
|
|
105
|
+
? "opencode"
|
|
106
|
+
: normalized;
|
|
107
|
+
process.env.BENCHMARK_TASK_AGENT ??= requested;
|
|
108
|
+
process.env.ELIZA_AGENT_ORCHESTRATOR ??= "1";
|
|
109
|
+
process.env.ELIZA_AGENT_SELECTION_STRATEGY ??= "fixed";
|
|
110
|
+
if (benchmarkRequested) {
|
|
111
|
+
process.env.ELIZA_AGENT_SELECTION_STRATEGY = "fixed";
|
|
112
|
+
process.env.ELIZA_ACP_DEFAULT_AGENT = acpAgent;
|
|
113
|
+
process.env.ELIZA_DEFAULT_AGENT_TYPE = acpAgent;
|
|
114
|
+
}
|
|
115
|
+
else {
|
|
116
|
+
process.env.ELIZA_ACP_DEFAULT_AGENT ??= acpAgent;
|
|
117
|
+
process.env.ELIZA_DEFAULT_AGENT_TYPE ??= acpAgent;
|
|
118
|
+
}
|
|
119
|
+
elizaLogger.info(`[bench] Benchmark task-agent ${requested} mapped to ACP adapter ${acpAgent}`);
|
|
120
|
+
}
|
|
121
|
+
normalizeBenchmarkTaskAgentEnv();
|
|
122
|
+
function isLocaBenchmarkName(benchmark) {
|
|
123
|
+
const normalized = benchmark.trim().toLowerCase();
|
|
124
|
+
return normalized === "loca_bench" || normalized === "loca-bench";
|
|
125
|
+
}
|
|
126
|
+
function isBfclBenchmarkName(benchmark) {
|
|
127
|
+
return benchmark.trim().toLowerCase() === "bfcl";
|
|
128
|
+
}
|
|
129
|
+
function _isTauBenchmarkName(benchmark) {
|
|
130
|
+
const normalized = benchmark.trim().toLowerCase();
|
|
131
|
+
return normalized === "tau_bench" || normalized === "tau-bench";
|
|
132
|
+
}
|
|
133
|
+
function isTerminalBenchmarkName(benchmark) {
|
|
134
|
+
const normalized = benchmark.trim().toLowerCase();
|
|
135
|
+
return normalized === "terminal-bench" || normalized === "terminal_bench";
|
|
136
|
+
}
|
|
137
|
+
function isSweBenchmarkName(benchmark) {
|
|
138
|
+
const normalized = benchmark.trim().toLowerCase();
|
|
139
|
+
return normalized === "swe-bench" || normalized === "swe_bench";
|
|
140
|
+
}
|
|
141
|
+
function isVisualWebBenchmarkName(benchmark) {
|
|
142
|
+
const normalized = benchmark.trim().toLowerCase();
|
|
143
|
+
return normalized === "visualwebbench" || normalized === "visual-web-bench";
|
|
144
|
+
}
|
|
145
|
+
function isWebShopBenchmarkName(benchmark) {
|
|
146
|
+
const normalized = benchmark.trim().toLowerCase();
|
|
147
|
+
return normalized === "webshop" || normalized === "web-shop";
|
|
148
|
+
}
|
|
149
|
+
function isOsworldBenchmarkName(benchmark) {
|
|
150
|
+
const normalized = benchmark.trim().toLowerCase();
|
|
151
|
+
return normalized === "osworld" || normalized === "os-world";
|
|
152
|
+
}
|
|
153
|
+
function isHermesNativeEnvProxyName(benchmark) {
|
|
154
|
+
const normalized = benchmark.trim().toLowerCase();
|
|
155
|
+
return (normalized === "hermes_native_env" || normalized === "hermes-native-env");
|
|
156
|
+
}
|
|
157
|
+
function isWooBenchName(benchmark) {
|
|
158
|
+
const normalized = benchmark.trim().toLowerCase();
|
|
159
|
+
return normalized === "woobench" || normalized === "woo-bench";
|
|
160
|
+
}
|
|
161
|
+
function isActionCallingBenchmarkName(benchmark) {
|
|
162
|
+
const normalized = benchmark.trim().toLowerCase();
|
|
163
|
+
return (normalized === "action-calling" ||
|
|
164
|
+
normalized === "action_calling" ||
|
|
165
|
+
normalized === "vending-bench" ||
|
|
166
|
+
normalized === "vending_bench" ||
|
|
167
|
+
normalized === "tau_bench" ||
|
|
168
|
+
normalized === "tau-bench");
|
|
169
|
+
}
|
|
170
|
+
function isVendingBenchmarkName(benchmark) {
|
|
171
|
+
const normalized = benchmark.trim().toLowerCase();
|
|
172
|
+
return normalized === "vending-bench" || normalized === "vending_bench";
|
|
173
|
+
}
|
|
174
|
+
// ---------------------------------------------------------------------------
|
|
175
|
+
// Boundary adapters: the benchmark harness and the message normalizers below
|
|
176
|
+
// build OpenAI chat-completion *wire* objects (snake_case `tool_calls` /
|
|
177
|
+
// `tool_call_id`, free-form tool defs) which the direct HTTP path
|
|
178
|
+
// (`callOpenAiCompatibleActionCalling`) forwards verbatim. `runtime.useModel`
|
|
179
|
+
// instead consumes the typed `@elizaos/core` contracts (`ChatMessage[]` /
|
|
180
|
+
// `ToolDefinition[]` / `ToolChoice`). These converters validate the loosely
|
|
181
|
+
// typed wire data at that boundary and return genuinely well-formed core
|
|
182
|
+
// objects, mapping snake_case wire keys onto the camelCase fields the runtime
|
|
183
|
+
// reads.
|
|
184
|
+
// ---------------------------------------------------------------------------
|
|
185
|
+
const CHAT_MESSAGE_ROLES = new Set([
|
|
186
|
+
"system",
|
|
187
|
+
"developer",
|
|
188
|
+
"user",
|
|
189
|
+
"assistant",
|
|
190
|
+
"tool",
|
|
191
|
+
]);
|
|
192
|
+
function asChatMessageRole(value) {
|
|
193
|
+
return typeof value === "string" &&
|
|
194
|
+
CHAT_MESSAGE_ROLES.has(value)
|
|
195
|
+
? value
|
|
196
|
+
: "user";
|
|
197
|
+
}
|
|
198
|
+
function wireToolCallToToolCall(value) {
|
|
199
|
+
if (!value || typeof value !== "object" || Array.isArray(value))
|
|
200
|
+
return null;
|
|
201
|
+
const call = value;
|
|
202
|
+
const fn = call.function && typeof call.function === "object"
|
|
203
|
+
? call.function
|
|
204
|
+
: undefined;
|
|
205
|
+
const name = typeof call.name === "string"
|
|
206
|
+
? call.name
|
|
207
|
+
: typeof fn?.name === "string"
|
|
208
|
+
? fn.name
|
|
209
|
+
: "";
|
|
210
|
+
if (!name)
|
|
211
|
+
return null;
|
|
212
|
+
const rawArgs = call.arguments ?? fn?.arguments ?? {};
|
|
213
|
+
// OpenAI wire format carries tool-call arguments as a JSON string; match the
|
|
214
|
+
// file-wide convention (see `normalizeLocaIncomingToolCall`) and keep
|
|
215
|
+
// `ToolCall.arguments` as a string so no value-shape cast is needed.
|
|
216
|
+
const args = typeof rawArgs === "string" ? rawArgs : JSON.stringify(rawArgs);
|
|
217
|
+
const id = typeof call.id === "string" ? call.id : name;
|
|
218
|
+
return { id, name, arguments: args, type: "function" };
|
|
219
|
+
}
|
|
220
|
+
/**
|
|
221
|
+
* Convert OpenAI-wire chat messages into `ChatMessage[]` for `useModel`,
|
|
222
|
+
* mapping snake_case tool fields (`tool_calls`/`tool_call_id`) onto the
|
|
223
|
+
* camelCase `ChatMessage` fields the runtime reads.
|
|
224
|
+
*/
|
|
225
|
+
function toChatMessages(wire) {
|
|
226
|
+
return wire.map((message) => {
|
|
227
|
+
const role = asChatMessageRole(message.role);
|
|
228
|
+
const content = typeof message.content === "string" ? message.content : null;
|
|
229
|
+
const chatMessage = { role, content };
|
|
230
|
+
if (typeof message.name === "string")
|
|
231
|
+
chatMessage.name = message.name;
|
|
232
|
+
const toolCallId = message.tool_call_id ?? message.toolCallId;
|
|
233
|
+
if (typeof toolCallId === "string")
|
|
234
|
+
chatMessage.toolCallId = toolCallId;
|
|
235
|
+
const rawToolCalls = Array.isArray(message.tool_calls)
|
|
236
|
+
? message.tool_calls
|
|
237
|
+
: Array.isArray(message.toolCalls)
|
|
238
|
+
? message.toolCalls
|
|
239
|
+
: [];
|
|
240
|
+
const toolCalls = rawToolCalls
|
|
241
|
+
.map(wireToolCallToToolCall)
|
|
242
|
+
.filter((call) => call !== null);
|
|
243
|
+
if (toolCalls.length > 0)
|
|
244
|
+
chatMessage.toolCalls = toolCalls;
|
|
245
|
+
return chatMessage;
|
|
246
|
+
});
|
|
247
|
+
}
|
|
248
|
+
/**
|
|
249
|
+
* Convert harness-supplied tool definitions (OpenAI `{ type, function: {...} }`
|
|
250
|
+
* or flat `{ name, ... }`) into `ToolDefinition[]`. Entries without a usable
|
|
251
|
+
* name are dropped rather than fabricated.
|
|
252
|
+
*/
|
|
253
|
+
function toToolDefinitions(raw) {
|
|
254
|
+
const tools = [];
|
|
255
|
+
for (const entry of raw) {
|
|
256
|
+
const fn = entry.function && typeof entry.function === "object"
|
|
257
|
+
? entry.function
|
|
258
|
+
: undefined;
|
|
259
|
+
const name = typeof entry.name === "string"
|
|
260
|
+
? entry.name
|
|
261
|
+
: typeof fn?.name === "string"
|
|
262
|
+
? fn.name
|
|
263
|
+
: "";
|
|
264
|
+
if (!name)
|
|
265
|
+
continue;
|
|
266
|
+
const description = typeof entry.description === "string"
|
|
267
|
+
? entry.description
|
|
268
|
+
: typeof fn?.description === "string"
|
|
269
|
+
? fn.description
|
|
270
|
+
: undefined;
|
|
271
|
+
const rawParameters = entry.parameters ?? fn?.parameters;
|
|
272
|
+
const parameters = rawParameters && typeof rawParameters === "object"
|
|
273
|
+
? rawParameters
|
|
274
|
+
: undefined;
|
|
275
|
+
const tool = { name };
|
|
276
|
+
if (description !== undefined)
|
|
277
|
+
tool.description = description;
|
|
278
|
+
if (parameters !== undefined)
|
|
279
|
+
tool.parameters = parameters;
|
|
280
|
+
tools.push(tool);
|
|
281
|
+
}
|
|
282
|
+
return tools;
|
|
283
|
+
}
|
|
284
|
+
/** Narrow a benchmark-supplied tool-choice string to a `ToolChoice`. */
|
|
285
|
+
function toToolChoice(value) {
|
|
286
|
+
return value === "none" || value === "auto" || value === "required"
|
|
287
|
+
? value
|
|
288
|
+
: "required";
|
|
289
|
+
}
|
|
290
|
+
function normalizeActionCallingNativeMessages(text, context) {
|
|
291
|
+
const rawMessages = Array.isArray(context.messages) ? context.messages : [];
|
|
292
|
+
const messages = normalizeLocaNativeMessages(rawMessages);
|
|
293
|
+
messages[0] = {
|
|
294
|
+
role: "system",
|
|
295
|
+
content: "You are running an action-calling benchmark through the Eliza benchmark server. " +
|
|
296
|
+
"Use native tool calls only. Do not serialize tool calls in prose, XML, markdown, or JSON text. " +
|
|
297
|
+
"If the user asks for multiple operations, emit every required tool call.",
|
|
298
|
+
};
|
|
299
|
+
if (messages.length > 1)
|
|
300
|
+
return messages;
|
|
301
|
+
return [
|
|
302
|
+
messages[0],
|
|
303
|
+
{
|
|
304
|
+
role: "user",
|
|
305
|
+
content: text,
|
|
306
|
+
},
|
|
307
|
+
];
|
|
308
|
+
}
|
|
309
|
+
function normalizeActionCallingOpenAiMessages(text, context) {
|
|
310
|
+
const rawMessages = Array.isArray(context.messages) ? context.messages : [];
|
|
311
|
+
const messages = rawMessages
|
|
312
|
+
.map((message) => message && typeof message === "object" && !Array.isArray(message)
|
|
313
|
+
? { ...message }
|
|
314
|
+
: null)
|
|
315
|
+
.filter((message) => message !== null)
|
|
316
|
+
.filter((message) => typeof message.role === "string");
|
|
317
|
+
const systemMessage = {
|
|
318
|
+
role: "system",
|
|
319
|
+
content: "Use native function/tool calls for any requested operation. If several operations are required, call every required tool; after a tool result, continue with the remaining required tool calls. Do not serialize tool calls in text, XML, markdown, or JSON. Return assistant text only when no tool call is needed.",
|
|
320
|
+
};
|
|
321
|
+
if (messages.length > 0 && messages[0]?.role === "system") {
|
|
322
|
+
messages[0] = systemMessage;
|
|
323
|
+
}
|
|
324
|
+
else {
|
|
325
|
+
messages.unshift(systemMessage);
|
|
326
|
+
}
|
|
327
|
+
if (messages.some((message) => message.role === "user")) {
|
|
328
|
+
return messages;
|
|
329
|
+
}
|
|
330
|
+
messages.push({ role: "user", content: text });
|
|
331
|
+
return messages;
|
|
332
|
+
}
|
|
333
|
+
function normalizeWooBenchNativeMessages(text, context) {
|
|
334
|
+
const rawMessages = Array.isArray(context.messages) ? context.messages : [];
|
|
335
|
+
const messages = rawMessages
|
|
336
|
+
.map((message) => message && typeof message === "object" && !Array.isArray(message)
|
|
337
|
+
? { ...message }
|
|
338
|
+
: null)
|
|
339
|
+
.filter((message) => message !== null)
|
|
340
|
+
.filter((message) => typeof message.role === "string");
|
|
341
|
+
const systemPrompt = typeof context.system_prompt === "string" && context.system_prompt.trim()
|
|
342
|
+
? context.system_prompt.trim()
|
|
343
|
+
: "You are running WooBench. Respond naturally, and use payment tools when charging or checking payment.";
|
|
344
|
+
const systemMessage = {
|
|
345
|
+
role: "system",
|
|
346
|
+
content: systemPrompt +
|
|
347
|
+
"\n\nUse native tool calls for CREATE_APP_CHARGE and CHECK_PAYMENT. " +
|
|
348
|
+
"When you charge or check payment, include a short conversational message in assistant text. " +
|
|
349
|
+
"Do not serialize tool calls in JSON, XML, markdown, or prose.",
|
|
350
|
+
};
|
|
351
|
+
if (messages.length > 0 && messages[0]?.role === "system") {
|
|
352
|
+
messages[0] = systemMessage;
|
|
353
|
+
}
|
|
354
|
+
else {
|
|
355
|
+
messages.unshift(systemMessage);
|
|
356
|
+
}
|
|
357
|
+
if (messages.some((message) => message.role === "user")) {
|
|
358
|
+
return messages;
|
|
359
|
+
}
|
|
360
|
+
messages.push({ role: "user", content: text });
|
|
361
|
+
return messages;
|
|
362
|
+
}
|
|
363
|
+
function resolveOpenAiCompatibleActionCallingConfig() {
|
|
364
|
+
const provider = (process.env.BENCHMARK_MODEL_PROVIDER ||
|
|
365
|
+
process.env.ELIZA_PROVIDER ||
|
|
366
|
+
"")
|
|
367
|
+
.trim()
|
|
368
|
+
.toLowerCase();
|
|
369
|
+
const model = process.env.BENCHMARK_MODEL_NAME?.trim() ||
|
|
370
|
+
process.env.OPENAI_LARGE_MODEL?.trim() ||
|
|
371
|
+
process.env.LARGE_MODEL?.trim() ||
|
|
372
|
+
process.env.CEREBRAS_MODEL?.trim() ||
|
|
373
|
+
"";
|
|
374
|
+
const baseUrl = process.env.OPENAI_BASE_URL?.trim() ||
|
|
375
|
+
process.env.CEREBRAS_BASE_URL?.trim() ||
|
|
376
|
+
(provider === "cerebras" ? "https://api.cerebras.ai/v1" : "");
|
|
377
|
+
const baseUrlIsCerebras = /(^|\.)cerebras\.ai(\/|$)/i.test(baseUrl);
|
|
378
|
+
const apiKey = baseUrlIsCerebras || provider === "cerebras"
|
|
379
|
+
? process.env.CEREBRAS_API_KEY?.trim() ||
|
|
380
|
+
process.env.OPENAI_API_KEY?.trim() ||
|
|
381
|
+
""
|
|
382
|
+
: process.env.OPENAI_API_KEY?.trim() || "";
|
|
383
|
+
if (!model || !baseUrl || !apiKey)
|
|
384
|
+
return null;
|
|
385
|
+
return {
|
|
386
|
+
baseUrl,
|
|
387
|
+
apiKey,
|
|
388
|
+
model,
|
|
389
|
+
provider: provider || (baseUrlIsCerebras ? "cerebras" : "openai"),
|
|
390
|
+
};
|
|
391
|
+
}
|
|
392
|
+
function chatCompletionsUrl(baseUrl) {
|
|
393
|
+
const trimmed = baseUrl.replace(/\/+$/, "");
|
|
394
|
+
return trimmed.endsWith("/chat/completions")
|
|
395
|
+
? trimmed
|
|
396
|
+
: `${trimmed}/chat/completions`;
|
|
397
|
+
}
|
|
398
|
+
function pickUsageNumber(source, ...keys) {
|
|
399
|
+
if (!source)
|
|
400
|
+
return undefined;
|
|
401
|
+
for (const key of keys) {
|
|
402
|
+
const value = source[key];
|
|
403
|
+
if (typeof value === "number" && Number.isFinite(value))
|
|
404
|
+
return value;
|
|
405
|
+
if (typeof value === "string" && value.trim()) {
|
|
406
|
+
const parsed = Number(value);
|
|
407
|
+
if (Number.isFinite(parsed))
|
|
408
|
+
return parsed;
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
return undefined;
|
|
412
|
+
}
|
|
413
|
+
function normalizeOpenAiCompatibleUsage(usage, provider) {
|
|
414
|
+
if (!usage || typeof usage !== "object" || Array.isArray(usage)) {
|
|
415
|
+
return null;
|
|
416
|
+
}
|
|
417
|
+
const record = usage;
|
|
418
|
+
const promptDetails = record.prompt_tokens_details &&
|
|
419
|
+
typeof record.prompt_tokens_details === "object" &&
|
|
420
|
+
!Array.isArray(record.prompt_tokens_details)
|
|
421
|
+
? record.prompt_tokens_details
|
|
422
|
+
: undefined;
|
|
423
|
+
const inputDetails = record.input_tokens_details &&
|
|
424
|
+
typeof record.input_tokens_details === "object" &&
|
|
425
|
+
!Array.isArray(record.input_tokens_details)
|
|
426
|
+
? record.input_tokens_details
|
|
427
|
+
: undefined;
|
|
428
|
+
const promptTokens = pickUsageNumber(record, "prompt_tokens", "input_tokens", "promptTokens") ??
|
|
429
|
+
0;
|
|
430
|
+
const completionTokens = pickUsageNumber(record, "completion_tokens", "output_tokens", "completionTokens") ?? 0;
|
|
431
|
+
const totalTokens = pickUsageNumber(record, "total_tokens", "totalTokens") ??
|
|
432
|
+
promptTokens + completionTokens;
|
|
433
|
+
const cacheReadInputTokens = pickUsageNumber(record, "cache_read_input_tokens", "cached_tokens", "cachedInputTokens", "cacheReadInputTokens") ??
|
|
434
|
+
pickUsageNumber(promptDetails, "cached_tokens", "cache_read_input_tokens") ??
|
|
435
|
+
pickUsageNumber(inputDetails, "cached_tokens", "cache_read_input_tokens");
|
|
436
|
+
const cacheCreationInputTokens = pickUsageNumber(record, "cache_creation_input_tokens", "cacheCreationInputTokens") ??
|
|
437
|
+
pickUsageNumber(promptDetails, "cache_creation_input_tokens", "cacheCreationInputTokens") ??
|
|
438
|
+
pickUsageNumber(inputDetails, "cache_creation_input_tokens", "cacheCreationInputTokens");
|
|
439
|
+
return {
|
|
440
|
+
modelType: ModelType.TEXT_LARGE,
|
|
441
|
+
provider,
|
|
442
|
+
source: "openai-compatible-chat-completions",
|
|
443
|
+
promptTokens,
|
|
444
|
+
completionTokens,
|
|
445
|
+
totalTokens,
|
|
446
|
+
...(cacheReadInputTokens !== undefined
|
|
447
|
+
? { cachedTokens: cacheReadInputTokens, cacheReadInputTokens }
|
|
448
|
+
: {}),
|
|
449
|
+
...(cacheCreationInputTokens !== undefined
|
|
450
|
+
? { cacheCreationInputTokens }
|
|
451
|
+
: {}),
|
|
452
|
+
};
|
|
453
|
+
}
|
|
454
|
+
async function callOpenAiCompatibleActionCalling(params) {
|
|
455
|
+
const config = resolveOpenAiCompatibleActionCallingConfig();
|
|
456
|
+
if (!config)
|
|
457
|
+
return null;
|
|
458
|
+
const requestPayload = {
|
|
459
|
+
model: config.model,
|
|
460
|
+
messages: params.messages,
|
|
461
|
+
max_tokens: params.maxTokens,
|
|
462
|
+
temperature: params.temperature,
|
|
463
|
+
};
|
|
464
|
+
if (params.tools.length > 0) {
|
|
465
|
+
requestPayload.tools = params.tools;
|
|
466
|
+
requestPayload.tool_choice =
|
|
467
|
+
params.toolChoice === "none"
|
|
468
|
+
? "none"
|
|
469
|
+
: params.toolChoice === "auto"
|
|
470
|
+
? "auto"
|
|
471
|
+
: params.toolChoice || "required";
|
|
472
|
+
}
|
|
473
|
+
const requestBody = JSON.stringify(requestPayload);
|
|
474
|
+
let response = null;
|
|
475
|
+
for (let attempt = 1; attempt <= OPENAI_COMPAT_MAX_ATTEMPTS; attempt += 1) {
|
|
476
|
+
response = await fetch(chatCompletionsUrl(config.baseUrl), {
|
|
477
|
+
method: "POST",
|
|
478
|
+
headers: {
|
|
479
|
+
Authorization: `Bearer ${config.apiKey}`,
|
|
480
|
+
"Content-Type": "application/json",
|
|
481
|
+
},
|
|
482
|
+
body: requestBody,
|
|
483
|
+
});
|
|
484
|
+
if (response.ok ||
|
|
485
|
+
!isRetryableOpenAiCompatibleStatus(response.status) ||
|
|
486
|
+
attempt >= OPENAI_COMPAT_MAX_ATTEMPTS) {
|
|
487
|
+
break;
|
|
488
|
+
}
|
|
489
|
+
const delayMs = openAiCompatibleRetryDelayMs(response, attempt);
|
|
490
|
+
elizaLogger.warn(`[bench] OpenAI-compatible action-calling request failed (${response.status}); retrying in ${delayMs}ms (attempt ${attempt}/${OPENAI_COMPAT_MAX_ATTEMPTS})`);
|
|
491
|
+
await sleep(delayMs);
|
|
492
|
+
}
|
|
493
|
+
if (!response) {
|
|
494
|
+
throw new Error("OpenAI-compatible action-calling request was not sent");
|
|
495
|
+
}
|
|
496
|
+
if (!response.ok) {
|
|
497
|
+
const body = await response.text().catch(() => "");
|
|
498
|
+
throw new Error(`OpenAI-compatible action-calling request failed (${response.status}): ${body.slice(0, 500)}`);
|
|
499
|
+
}
|
|
500
|
+
const payload = (await response.json());
|
|
501
|
+
const choice = Array.isArray(payload.choices)
|
|
502
|
+
? payload.choices[0]
|
|
503
|
+
: undefined;
|
|
504
|
+
const message = choice?.message &&
|
|
505
|
+
typeof choice.message === "object" &&
|
|
506
|
+
!Array.isArray(choice.message)
|
|
507
|
+
? choice.message
|
|
508
|
+
: {};
|
|
509
|
+
return {
|
|
510
|
+
text: typeof message.content === "string" ? message.content : "",
|
|
511
|
+
toolCalls: normalizeLocaNativeToolCalls(message.tool_calls),
|
|
512
|
+
usage: normalizeOpenAiCompatibleUsage(payload.usage, config.provider),
|
|
513
|
+
};
|
|
514
|
+
}
|
|
515
|
+
async function callOpenAiCompatibleText(params) {
|
|
516
|
+
const config = resolveOpenAiCompatibleActionCallingConfig();
|
|
517
|
+
if (!config)
|
|
518
|
+
return null;
|
|
519
|
+
const requestBody = JSON.stringify({
|
|
520
|
+
model: config.model,
|
|
521
|
+
messages: [{ role: "user", content: params.prompt }],
|
|
522
|
+
max_tokens: params.maxTokens,
|
|
523
|
+
temperature: params.temperature,
|
|
524
|
+
...(config.provider === "cerebras" ? { reasoning_effort: "low" } : {}),
|
|
525
|
+
});
|
|
526
|
+
let response = null;
|
|
527
|
+
for (let attempt = 1; attempt <= OPENAI_COMPAT_MAX_ATTEMPTS; attempt += 1) {
|
|
528
|
+
response = await fetch(chatCompletionsUrl(config.baseUrl), {
|
|
529
|
+
method: "POST",
|
|
530
|
+
headers: {
|
|
531
|
+
Authorization: `Bearer ${config.apiKey}`,
|
|
532
|
+
"Content-Type": "application/json",
|
|
533
|
+
},
|
|
534
|
+
body: requestBody,
|
|
535
|
+
});
|
|
536
|
+
if (response.ok ||
|
|
537
|
+
!isRetryableOpenAiCompatibleStatus(response.status) ||
|
|
538
|
+
attempt >= OPENAI_COMPAT_MAX_ATTEMPTS) {
|
|
539
|
+
break;
|
|
540
|
+
}
|
|
541
|
+
const delayMs = openAiCompatibleRetryDelayMs(response, attempt);
|
|
542
|
+
elizaLogger.warn(`[bench] OpenAI-compatible text request failed (${response.status}); retrying in ${delayMs}ms (attempt ${attempt}/${OPENAI_COMPAT_MAX_ATTEMPTS})`);
|
|
543
|
+
await sleep(delayMs);
|
|
544
|
+
}
|
|
545
|
+
if (!response) {
|
|
546
|
+
throw new Error("OpenAI-compatible text request was not sent");
|
|
547
|
+
}
|
|
548
|
+
if (!response.ok) {
|
|
549
|
+
const body = await response.text().catch(() => "");
|
|
550
|
+
throw new Error(`OpenAI-compatible text request failed (${response.status}): ${body.slice(0, 500)}`);
|
|
551
|
+
}
|
|
552
|
+
const payload = (await response.json());
|
|
553
|
+
const choice = Array.isArray(payload.choices)
|
|
554
|
+
? payload.choices[0]
|
|
555
|
+
: undefined;
|
|
556
|
+
const message = choice?.message &&
|
|
557
|
+
typeof choice.message === "object" &&
|
|
558
|
+
!Array.isArray(choice.message)
|
|
559
|
+
? choice.message
|
|
560
|
+
: {};
|
|
561
|
+
return {
|
|
562
|
+
text: typeof message.content === "string" ? message.content : "",
|
|
563
|
+
usage: normalizeOpenAiCompatibleUsage(payload.usage, config.provider),
|
|
564
|
+
};
|
|
565
|
+
}
|
|
566
|
+
function normalizeBfclNativeMessages(text, context) {
|
|
567
|
+
const question = typeof context.question === "string" && context.question.trim()
|
|
568
|
+
? context.question.trim()
|
|
569
|
+
: text;
|
|
570
|
+
return [
|
|
571
|
+
{
|
|
572
|
+
role: "system",
|
|
573
|
+
content: "You are running BFCL through the Eliza benchmark server. Use native " +
|
|
574
|
+
"tool calls only. If the query asks for multiple or parallel calls, " +
|
|
575
|
+
"emit one tool call for each requested operation in the same assistant " +
|
|
576
|
+
"turn. Preserve schema field names and defaults exactly.",
|
|
577
|
+
},
|
|
578
|
+
{
|
|
579
|
+
role: "user",
|
|
580
|
+
content: question,
|
|
581
|
+
},
|
|
582
|
+
];
|
|
583
|
+
}
|
|
584
|
+
function _normalizeTauNativeMessages(text, context) {
|
|
585
|
+
const rawMessages = Array.isArray(context.messages) ? context.messages : [];
|
|
586
|
+
const messages = normalizeLocaNativeMessages(rawMessages);
|
|
587
|
+
messages[0] = {
|
|
588
|
+
role: "system",
|
|
589
|
+
content: "You are running TauBench through the Eliza benchmark server. Use " +
|
|
590
|
+
"native tool calls for TauBench tools. Do not describe a tool call in " +
|
|
591
|
+
"prose. Use ordinary assistant text only for required customer " +
|
|
592
|
+
"confirmation or final task completion.",
|
|
593
|
+
};
|
|
594
|
+
if (messages.length > 1)
|
|
595
|
+
return messages;
|
|
596
|
+
return [
|
|
597
|
+
messages[0],
|
|
598
|
+
{
|
|
599
|
+
role: "user",
|
|
600
|
+
content: text,
|
|
601
|
+
},
|
|
602
|
+
];
|
|
603
|
+
}
|
|
604
|
+
function normalizeLocaNativeMessages(rawMessages) {
|
|
605
|
+
const input = Array.isArray(rawMessages) ? rawMessages : [];
|
|
606
|
+
const toolNamesById = new Map();
|
|
607
|
+
const normalized = [
|
|
608
|
+
{
|
|
609
|
+
role: "system",
|
|
610
|
+
content: "You are running LOCA-bench through the Eliza benchmark server. " +
|
|
611
|
+
"Use native tool calls, not progress text. If work remains, call " +
|
|
612
|
+
"exactly one available filesystem or memory tool. Existing CSV rows " +
|
|
613
|
+
"may be examples; derive final rows from source_data/local_db and " +
|
|
614
|
+
"source_data/files, then write the requested CSV files.",
|
|
615
|
+
},
|
|
616
|
+
];
|
|
617
|
+
for (const item of input) {
|
|
618
|
+
if (!item || typeof item !== "object" || Array.isArray(item))
|
|
619
|
+
continue;
|
|
620
|
+
const message = item;
|
|
621
|
+
const role = typeof message.role === "string" ? message.role : "user";
|
|
622
|
+
if (role === "assistant") {
|
|
623
|
+
const rawToolCalls = Array.isArray(message.tool_calls)
|
|
624
|
+
? message.tool_calls
|
|
625
|
+
: Array.isArray(message.toolCalls)
|
|
626
|
+
? message.toolCalls
|
|
627
|
+
: [];
|
|
628
|
+
const toolCalls = rawToolCalls
|
|
629
|
+
.map((call) => normalizeLocaIncomingToolCall(call))
|
|
630
|
+
.filter((call) => Boolean(call));
|
|
631
|
+
for (const call of toolCalls) {
|
|
632
|
+
const id = typeof call.id === "string" ? call.id : "";
|
|
633
|
+
const fn = call.function && typeof call.function === "object"
|
|
634
|
+
? call.function
|
|
635
|
+
: {};
|
|
636
|
+
const name = typeof fn.name === "string" ? fn.name : "";
|
|
637
|
+
if (id && name)
|
|
638
|
+
toolNamesById.set(id, name);
|
|
639
|
+
}
|
|
640
|
+
normalized.push({
|
|
641
|
+
role: "assistant",
|
|
642
|
+
content: typeof message.content === "string" ? message.content : "",
|
|
643
|
+
...(toolCalls.length > 0 ? { tool_calls: toolCalls } : {}),
|
|
644
|
+
});
|
|
645
|
+
continue;
|
|
646
|
+
}
|
|
647
|
+
if (role === "tool") {
|
|
648
|
+
const toolCallId = typeof message.tool_call_id === "string"
|
|
649
|
+
? message.tool_call_id
|
|
650
|
+
: typeof message.toolCallId === "string"
|
|
651
|
+
? message.toolCallId
|
|
652
|
+
: typeof message.id === "string"
|
|
653
|
+
? message.id
|
|
654
|
+
: "tool-call";
|
|
655
|
+
const toolName = typeof message.name === "string"
|
|
656
|
+
? message.name
|
|
657
|
+
: typeof message.toolName === "string"
|
|
658
|
+
? message.toolName
|
|
659
|
+
: toolNamesById.get(toolCallId) || "tool";
|
|
660
|
+
normalized.push({
|
|
661
|
+
role: "tool",
|
|
662
|
+
tool_call_id: toolCallId,
|
|
663
|
+
name: toolName,
|
|
664
|
+
content: typeof message.content === "string"
|
|
665
|
+
? message.content
|
|
666
|
+
: JSON.stringify(message.content ?? ""),
|
|
667
|
+
});
|
|
668
|
+
continue;
|
|
669
|
+
}
|
|
670
|
+
normalized.push({
|
|
671
|
+
role: role === "system" ? "system" : "user",
|
|
672
|
+
content: typeof message.content === "string"
|
|
673
|
+
? message.content
|
|
674
|
+
: JSON.stringify(message.content ?? ""),
|
|
675
|
+
});
|
|
676
|
+
}
|
|
677
|
+
return normalized;
|
|
678
|
+
}
|
|
679
|
+
function normalizeGenericToolMessages(rawMessages, fallbackText) {
|
|
680
|
+
const input = Array.isArray(rawMessages) ? rawMessages : [];
|
|
681
|
+
const toolNamesById = new Map();
|
|
682
|
+
const normalized = [];
|
|
683
|
+
for (const item of input) {
|
|
684
|
+
if (!item || typeof item !== "object" || Array.isArray(item))
|
|
685
|
+
continue;
|
|
686
|
+
const message = item;
|
|
687
|
+
const role = typeof message.role === "string" ? message.role : "user";
|
|
688
|
+
if (role === "assistant") {
|
|
689
|
+
const rawToolCalls = Array.isArray(message.tool_calls)
|
|
690
|
+
? message.tool_calls
|
|
691
|
+
: Array.isArray(message.toolCalls)
|
|
692
|
+
? message.toolCalls
|
|
693
|
+
: [];
|
|
694
|
+
const toolCalls = rawToolCalls
|
|
695
|
+
.map((call) => normalizeLocaIncomingToolCall(call))
|
|
696
|
+
.filter((call) => Boolean(call));
|
|
697
|
+
for (const call of toolCalls) {
|
|
698
|
+
const id = typeof call.id === "string" ? call.id : "";
|
|
699
|
+
const fn = call.function && typeof call.function === "object"
|
|
700
|
+
? call.function
|
|
701
|
+
: {};
|
|
702
|
+
const name = typeof fn.name === "string" ? fn.name : "";
|
|
703
|
+
if (id && name)
|
|
704
|
+
toolNamesById.set(id, name);
|
|
705
|
+
}
|
|
706
|
+
normalized.push({
|
|
707
|
+
role: "assistant",
|
|
708
|
+
content: typeof message.content === "string" ? message.content : "",
|
|
709
|
+
...(toolCalls.length > 0 ? { tool_calls: toolCalls } : {}),
|
|
710
|
+
});
|
|
711
|
+
continue;
|
|
712
|
+
}
|
|
713
|
+
if (role === "tool") {
|
|
714
|
+
const toolCallId = typeof message.tool_call_id === "string"
|
|
715
|
+
? message.tool_call_id
|
|
716
|
+
: typeof message.toolCallId === "string"
|
|
717
|
+
? message.toolCallId
|
|
718
|
+
: typeof message.id === "string"
|
|
719
|
+
? message.id
|
|
720
|
+
: "tool-call";
|
|
721
|
+
const toolName = typeof message.name === "string"
|
|
722
|
+
? message.name
|
|
723
|
+
: typeof message.toolName === "string"
|
|
724
|
+
? message.toolName
|
|
725
|
+
: toolNamesById.get(toolCallId) || "tool";
|
|
726
|
+
normalized.push({
|
|
727
|
+
role: "tool",
|
|
728
|
+
tool_call_id: toolCallId,
|
|
729
|
+
name: toolName,
|
|
730
|
+
content: typeof message.content === "string"
|
|
731
|
+
? message.content
|
|
732
|
+
: JSON.stringify(message.content ?? ""),
|
|
733
|
+
});
|
|
734
|
+
continue;
|
|
735
|
+
}
|
|
736
|
+
normalized.push({
|
|
737
|
+
role: role === "system" ? "system" : "user",
|
|
738
|
+
content: typeof message.content === "string"
|
|
739
|
+
? message.content
|
|
740
|
+
: JSON.stringify(message.content ?? ""),
|
|
741
|
+
});
|
|
742
|
+
}
|
|
743
|
+
if (normalized.length === 0) {
|
|
744
|
+
normalized.push({ role: "user", content: fallbackText });
|
|
745
|
+
}
|
|
746
|
+
return normalized;
|
|
747
|
+
}
|
|
748
|
+
function normalizeLocaIncomingToolCall(raw) {
|
|
749
|
+
if (!raw || typeof raw !== "object" || Array.isArray(raw))
|
|
750
|
+
return null;
|
|
751
|
+
const call = raw;
|
|
752
|
+
const fn = call.function && typeof call.function === "object"
|
|
753
|
+
? call.function
|
|
754
|
+
: {};
|
|
755
|
+
const name = typeof fn.name === "string"
|
|
756
|
+
? fn.name
|
|
757
|
+
: typeof call.name === "string"
|
|
758
|
+
? call.name
|
|
759
|
+
: typeof call.toolName === "string"
|
|
760
|
+
? call.toolName
|
|
761
|
+
: "";
|
|
762
|
+
if (!name)
|
|
763
|
+
return null;
|
|
764
|
+
const args = fn.arguments ?? call.arguments ?? call.input ?? {};
|
|
765
|
+
return {
|
|
766
|
+
id: typeof call.id === "string"
|
|
767
|
+
? call.id
|
|
768
|
+
: typeof call.toolCallId === "string"
|
|
769
|
+
? call.toolCallId
|
|
770
|
+
: `call_loca_${Math.random().toString(16).slice(2)}`,
|
|
771
|
+
type: "function",
|
|
772
|
+
function: {
|
|
773
|
+
name,
|
|
774
|
+
arguments: typeof args === "string" ? args : JSON.stringify(args),
|
|
775
|
+
},
|
|
776
|
+
};
|
|
777
|
+
}
|
|
778
|
+
function normalizeLocaNativeToolCalls(rawToolCalls) {
|
|
779
|
+
if (!Array.isArray(rawToolCalls))
|
|
780
|
+
return [];
|
|
781
|
+
const calls = [];
|
|
782
|
+
for (const raw of rawToolCalls) {
|
|
783
|
+
if (!raw || typeof raw !== "object" || Array.isArray(raw))
|
|
784
|
+
continue;
|
|
785
|
+
const call = raw;
|
|
786
|
+
const fn = call.function && typeof call.function === "object"
|
|
787
|
+
? call.function
|
|
788
|
+
: {};
|
|
789
|
+
const name = typeof call.toolName === "string"
|
|
790
|
+
? call.toolName
|
|
791
|
+
: typeof call.name === "string"
|
|
792
|
+
? call.name
|
|
793
|
+
: typeof fn.name === "string"
|
|
794
|
+
? fn.name
|
|
795
|
+
: "";
|
|
796
|
+
if (!name)
|
|
797
|
+
continue;
|
|
798
|
+
const args = call.input ?? call.args ?? call.arguments ?? fn.arguments ?? {};
|
|
799
|
+
calls.push({
|
|
800
|
+
id: typeof call.toolCallId === "string"
|
|
801
|
+
? call.toolCallId
|
|
802
|
+
: typeof call.id === "string"
|
|
803
|
+
? call.id
|
|
804
|
+
: `call_loca_native_${calls.length}`,
|
|
805
|
+
type: "function",
|
|
806
|
+
function: {
|
|
807
|
+
name,
|
|
808
|
+
arguments: typeof args === "string" ? args : JSON.stringify(args),
|
|
809
|
+
},
|
|
810
|
+
});
|
|
811
|
+
}
|
|
812
|
+
return calls;
|
|
813
|
+
}
|
|
814
|
+
function firstLocaBenchmarkActionFromToolCalls(toolCalls) {
|
|
815
|
+
const first = toolCalls[0];
|
|
816
|
+
if (!first)
|
|
817
|
+
return null;
|
|
818
|
+
let args = {};
|
|
819
|
+
try {
|
|
820
|
+
args = JSON.parse(first.function.arguments || "{}");
|
|
821
|
+
}
|
|
822
|
+
catch {
|
|
823
|
+
args = { _raw: first.function.arguments };
|
|
824
|
+
}
|
|
825
|
+
return {
|
|
826
|
+
tool_name: first.function.name,
|
|
827
|
+
arguments: args,
|
|
828
|
+
};
|
|
829
|
+
}
|
|
830
|
+
function firstWooBenchActionFromToolCalls(toolCalls) {
|
|
831
|
+
const first = toolCalls[0];
|
|
832
|
+
if (!first)
|
|
833
|
+
return null;
|
|
834
|
+
const command = first.function.name.trim().toUpperCase();
|
|
835
|
+
if (command !== "CREATE_APP_CHARGE" && command !== "CHECK_PAYMENT") {
|
|
836
|
+
return null;
|
|
837
|
+
}
|
|
838
|
+
let args = {};
|
|
839
|
+
try {
|
|
840
|
+
args = JSON.parse(first.function.arguments || "{}");
|
|
841
|
+
}
|
|
842
|
+
catch {
|
|
843
|
+
args = {};
|
|
844
|
+
}
|
|
845
|
+
const payload = args && typeof args === "object" && !Array.isArray(args)
|
|
846
|
+
? { ...args }
|
|
847
|
+
: {};
|
|
848
|
+
return { command, ...payload };
|
|
849
|
+
}
|
|
850
|
+
function bfclBenchmarkActionFromToolCalls(toolCalls) {
|
|
851
|
+
if (toolCalls.length === 0)
|
|
852
|
+
return null;
|
|
853
|
+
const calls = toolCalls.map((call) => {
|
|
854
|
+
let args = {};
|
|
855
|
+
try {
|
|
856
|
+
args = JSON.parse(call.function.arguments || "{}");
|
|
857
|
+
}
|
|
858
|
+
catch {
|
|
859
|
+
args = { _raw: call.function.arguments };
|
|
860
|
+
}
|
|
861
|
+
return {
|
|
862
|
+
name: call.function.name,
|
|
863
|
+
arguments: args,
|
|
864
|
+
};
|
|
865
|
+
});
|
|
866
|
+
return {
|
|
867
|
+
calls,
|
|
868
|
+
arguments: { calls },
|
|
869
|
+
};
|
|
870
|
+
}
|
|
871
|
+
function webshopBenchmarkActionFromToolCalls(toolCalls) {
|
|
872
|
+
for (const call of toolCalls) {
|
|
873
|
+
const name = call.function.name.toLowerCase();
|
|
874
|
+
if (name !== "webshop_action" && name !== "benchmark_action") {
|
|
875
|
+
continue;
|
|
876
|
+
}
|
|
877
|
+
let args = {};
|
|
878
|
+
try {
|
|
879
|
+
args = JSON.parse(call.function.arguments || "{}");
|
|
880
|
+
}
|
|
881
|
+
catch {
|
|
882
|
+
args = { _raw: call.function.arguments };
|
|
883
|
+
}
|
|
884
|
+
if (!args || typeof args !== "object" || Array.isArray(args)) {
|
|
885
|
+
continue;
|
|
886
|
+
}
|
|
887
|
+
const record = args;
|
|
888
|
+
const command = typeof record.command === "string"
|
|
889
|
+
? record.command.trim()
|
|
890
|
+
: typeof record.action === "string"
|
|
891
|
+
? record.action.trim()
|
|
892
|
+
: "";
|
|
893
|
+
if (command) {
|
|
894
|
+
return { command };
|
|
895
|
+
}
|
|
896
|
+
}
|
|
897
|
+
return null;
|
|
898
|
+
}
|
|
24
899
|
// ---------------------------------------------------------------------------
|
|
25
900
|
// Security: authentication + CORS
|
|
26
901
|
// ---------------------------------------------------------------------------
|
|
@@ -31,6 +906,127 @@ const MAX_BODY_BYTES = Number.isFinite(configuredMaxBodyBytes) && configuredMaxB
|
|
|
31
906
|
: DEFAULT_MAX_BODY_BYTES;
|
|
32
907
|
/** Allowed CORS origins — only localhost variants. */
|
|
33
908
|
const LOCALHOST_ORIGINS = new Set(["http://localhost", "https://localhost"]);
|
|
909
|
+
function buildLifeOpsBenchmarkContext(backend, previousTurns) {
|
|
910
|
+
const world = backend.toDocument();
|
|
911
|
+
const nowIso = backend.getNow();
|
|
912
|
+
const nowMs = Date.parse(nowIso);
|
|
913
|
+
const calendarEvents = Object.values(world.stores.calendar_event)
|
|
914
|
+
.filter((event) => event.status !== "cancelled")
|
|
915
|
+
.sort((a, b) => {
|
|
916
|
+
const aDistance = Number.isFinite(nowMs)
|
|
917
|
+
? Math.abs(Date.parse(a.start) - nowMs)
|
|
918
|
+
: 0;
|
|
919
|
+
const bDistance = Number.isFinite(nowMs)
|
|
920
|
+
? Math.abs(Date.parse(b.start) - nowMs)
|
|
921
|
+
: 0;
|
|
922
|
+
if (aDistance !== bDistance)
|
|
923
|
+
return aDistance - bDistance;
|
|
924
|
+
return a.id.localeCompare(b.id);
|
|
925
|
+
})
|
|
926
|
+
.slice(0, 80)
|
|
927
|
+
.map((event) => ({
|
|
928
|
+
id: event.id,
|
|
929
|
+
calendarId: event.calendar_id,
|
|
930
|
+
title: event.title,
|
|
931
|
+
start: event.start,
|
|
932
|
+
end: event.end,
|
|
933
|
+
status: event.status,
|
|
934
|
+
source: event.source,
|
|
935
|
+
}));
|
|
936
|
+
const previousToolResults = previousTurns
|
|
937
|
+
.flatMap((turn) => turn.toolCalls.map((call) => ({
|
|
938
|
+
userText: turn.userText,
|
|
939
|
+
assistantText: turn.assistantText,
|
|
940
|
+
tool: call.name,
|
|
941
|
+
arguments: call.arguments,
|
|
942
|
+
ok: call.ok,
|
|
943
|
+
result: call.result,
|
|
944
|
+
error: call.error,
|
|
945
|
+
})))
|
|
946
|
+
.slice(-12);
|
|
947
|
+
return {
|
|
948
|
+
nowIso,
|
|
949
|
+
today: nowIso.slice(0, 10),
|
|
950
|
+
seed: backend.getSeed(),
|
|
951
|
+
calendarEvents,
|
|
952
|
+
previousToolResults,
|
|
953
|
+
};
|
|
954
|
+
}
|
|
955
|
+
function buildLifeOpsActionCallingMessages(params) {
|
|
956
|
+
const contextJson = JSON.stringify(params.lifeopsContext, null, 2);
|
|
957
|
+
return [
|
|
958
|
+
{
|
|
959
|
+
role: "system",
|
|
960
|
+
content: "You are running LifeOpsBench through the Eliza benchmark server. " +
|
|
961
|
+
"Use native tool calls for calendar, mail, message, task, and related LifeOps operations. " +
|
|
962
|
+
"For free/busy or availability questions, call CALENDAR with action and subaction exactly " +
|
|
963
|
+
"check_availability and provide top-level startAt/endAt ISO timestamps; do not use search_events. " +
|
|
964
|
+
"Do not serialize tool calls in text, XML, markdown, or JSON. " +
|
|
965
|
+
"After a tool call, the benchmark backend will execute it and feed back the result on the next turn. " +
|
|
966
|
+
"Return assistant text only when no tool call is needed.\n\n" +
|
|
967
|
+
`LifeOps benchmark context:\n${contextJson}`,
|
|
968
|
+
},
|
|
969
|
+
{
|
|
970
|
+
role: "user",
|
|
971
|
+
content: params.userText,
|
|
972
|
+
},
|
|
973
|
+
];
|
|
974
|
+
}
|
|
975
|
+
function lifeOpsToolCallsFromNativeToolCalls(toolCalls) {
|
|
976
|
+
return toolCalls.map((call, index) => {
|
|
977
|
+
let parsedArgs = {};
|
|
978
|
+
try {
|
|
979
|
+
parsedArgs = JSON.parse(call.function.arguments || "{}");
|
|
980
|
+
}
|
|
981
|
+
catch {
|
|
982
|
+
parsedArgs = {};
|
|
983
|
+
}
|
|
984
|
+
return {
|
|
985
|
+
id: call.id || `call_${index}`,
|
|
986
|
+
name: call.function.name,
|
|
987
|
+
arguments: parsedArgs &&
|
|
988
|
+
typeof parsedArgs === "object" &&
|
|
989
|
+
!Array.isArray(parsedArgs)
|
|
990
|
+
? parsedArgs
|
|
991
|
+
: {},
|
|
992
|
+
};
|
|
993
|
+
});
|
|
994
|
+
}
|
|
995
|
+
function shouldDropLifeOpsReadOnlyFollowupToolCalls(params) {
|
|
996
|
+
if (params.userText.trim() || !params.responseText.trim())
|
|
997
|
+
return false;
|
|
998
|
+
if (params.toolCalls.length === 0)
|
|
999
|
+
return false;
|
|
1000
|
+
const onlyReminderCreates = params.toolCalls.every((call) => {
|
|
1001
|
+
const name = call.name.trim().toUpperCase();
|
|
1002
|
+
const action = String(call.arguments.action ?? "").toLowerCase();
|
|
1003
|
+
return ((name === "SCHEDULED_TASKS" || name === "REMINDERS") &&
|
|
1004
|
+
(action === "create" || action === "add" || action === ""));
|
|
1005
|
+
});
|
|
1006
|
+
if (!onlyReminderCreates)
|
|
1007
|
+
return false;
|
|
1008
|
+
const previousToolResults = params.lifeopsContext.previousToolResults;
|
|
1009
|
+
if (!Array.isArray(previousToolResults))
|
|
1010
|
+
return false;
|
|
1011
|
+
return previousToolResults.some((entry) => {
|
|
1012
|
+
if (!entry || typeof entry !== "object")
|
|
1013
|
+
return false;
|
|
1014
|
+
const record = entry;
|
|
1015
|
+
if (record.ok !== true)
|
|
1016
|
+
return false;
|
|
1017
|
+
const tool = String(record.tool ?? "").toUpperCase();
|
|
1018
|
+
if (!tool.startsWith("CALENDAR"))
|
|
1019
|
+
return false;
|
|
1020
|
+
const args = record.arguments && typeof record.arguments === "object"
|
|
1021
|
+
? record.arguments
|
|
1022
|
+
: {};
|
|
1023
|
+
const result = record.result && typeof record.result === "object"
|
|
1024
|
+
? record.result
|
|
1025
|
+
: {};
|
|
1026
|
+
const subaction = String(args.subaction ?? args.action ?? result.subaction ?? "").toLowerCase();
|
|
1027
|
+
return subaction === "check_availability";
|
|
1028
|
+
});
|
|
1029
|
+
}
|
|
34
1030
|
function isAllowedOrigin(origin) {
|
|
35
1031
|
if (!origin)
|
|
36
1032
|
return false;
|
|
@@ -97,7 +1093,7 @@ function disableManualCompactionAction(runtime) {
|
|
|
97
1093
|
if (!Array.isArray(runtimeWithActions.actions)) {
|
|
98
1094
|
return;
|
|
99
1095
|
}
|
|
100
|
-
const compactSessionIndex = runtimeWithActions.actions.findIndex((action) => action
|
|
1096
|
+
const compactSessionIndex = runtimeWithActions.actions.findIndex((action) => action.name.toUpperCase() === "COMPACT_SESSION");
|
|
101
1097
|
if (compactSessionIndex === -1) {
|
|
102
1098
|
return;
|
|
103
1099
|
}
|
|
@@ -137,7 +1133,7 @@ async function collectSessionDiagnostics(runtime, session) {
|
|
|
137
1133
|
}),
|
|
138
1134
|
]);
|
|
139
1135
|
const compactionSummaries = allMessages
|
|
140
|
-
.filter((m) => m.content
|
|
1136
|
+
.filter((m) => m.content.source === "compaction")
|
|
141
1137
|
.sort((a, b) => (a.createdAt ?? 0) - (b.createdAt ?? 0));
|
|
142
1138
|
const latestCompactionSummary = compactionSummaries.at(-1) ?? null;
|
|
143
1139
|
const latestSummaryText = typeof latestCompactionSummary?.content?.text === "string"
|
|
@@ -146,10 +1142,10 @@ async function collectSessionDiagnostics(runtime, session) {
|
|
|
146
1142
|
const summaryPreview = latestSummaryText.slice(0, 400);
|
|
147
1143
|
const providerNames = runtime.providers.map((provider) => provider.name);
|
|
148
1144
|
const evaluatorNames = runtime.evaluators
|
|
149
|
-
?.map((evaluator) => evaluator
|
|
1145
|
+
?.map((evaluator) => evaluator.name ?? "")
|
|
150
1146
|
.filter((name) => name.length > 0) ?? [];
|
|
151
1147
|
const actionNames = runtime.actions
|
|
152
|
-
?.map((action) => action
|
|
1148
|
+
?.map((action) => action.name?.toUpperCase() ?? "")
|
|
153
1149
|
.filter((name) => name.length > 0) ?? [];
|
|
154
1150
|
return {
|
|
155
1151
|
benchmark: session.benchmark,
|
|
@@ -188,10 +1184,20 @@ async function collectSessionDiagnostics(runtime, session) {
|
|
|
188
1184
|
actions: actionNames,
|
|
189
1185
|
};
|
|
190
1186
|
}
|
|
191
|
-
// Proper robust server implementation
|
|
192
1187
|
export async function startBenchmarkServer() {
|
|
193
1188
|
const port = resolvePort();
|
|
194
1189
|
elizaLogger.info(`[bench] Initializing eliza benchmark runtime on port ${port}...`);
|
|
1190
|
+
// Force the v5 planner to require a structured tool call on every benchmark
|
|
1191
|
+
// turn (unless explicitly disabled). Without this, the planner often picks
|
|
1192
|
+
// `REPLY` and emits the answer as prose, which scores 0 against harnesses
|
|
1193
|
+
// like LifeOpsBench that judge on tool calls (`MESSAGE.triage`,
|
|
1194
|
+
// `CALENDAR.create_event`, etc.). The core gate in `services/message.ts`
|
|
1195
|
+
// (see `isBenchmarkForcingToolCall`) honors this env var ONLY for messages
|
|
1196
|
+
// whose `content.source === "benchmark"` or whose `content.metadata.benchmark`
|
|
1197
|
+
// is set, so a co-resident chat process is unaffected.
|
|
1198
|
+
if (process.env.ELIZA_BENCH_FORCE_TOOL_CALL === undefined) {
|
|
1199
|
+
process.env.ELIZA_BENCH_FORCE_TOOL_CALL = "1";
|
|
1200
|
+
}
|
|
195
1201
|
// ═══════════════════════════════════════════════════════════════════════════
|
|
196
1202
|
// PLUGIN LOADING — Use full CORE_PLUGINS to test with realistic context
|
|
197
1203
|
// ═══════════════════════════════════════════════════════════════════════════
|
|
@@ -208,13 +1214,45 @@ export async function startBenchmarkServer() {
|
|
|
208
1214
|
const skipPlugins = new Set([
|
|
209
1215
|
"@elizaos/plugin-elizacloud", // Requires elizaOS cloud auth, conflicts with local LLM
|
|
210
1216
|
]);
|
|
1217
|
+
const initialOpenAiBaseUrl = process.env.OPENAI_BASE_URL?.trim();
|
|
1218
|
+
const initialElizaProvider = process.env.ELIZA_PROVIDER?.trim().toLowerCase();
|
|
1219
|
+
const initialBenchProvider = process.env.BENCHMARK_MODEL_PROVIDER?.trim().toLowerCase();
|
|
1220
|
+
const initialCerebrasIntent = (!!initialOpenAiBaseUrl &&
|
|
1221
|
+
/(^|\.)cerebras\.ai(\/|$)/i.test(initialOpenAiBaseUrl)) ||
|
|
1222
|
+
initialElizaProvider === "cerebras" ||
|
|
1223
|
+
initialBenchProvider === "cerebras";
|
|
1224
|
+
// Local-inference stays enabled by default in benchmark mode so embedding,
|
|
1225
|
+
// memory, and retrieval behavior remain representative of the Eliza-1 stack.
|
|
1226
|
+
// A zero-vector stand-in is allowed only as an explicit diagnostic escape
|
|
1227
|
+
// hatch, and logs loudly because those runs are not release evidence.
|
|
1228
|
+
const skipEmbeddingPlugin = process.env.ELIZA_BENCH_ALLOW_STUB_EMBEDDING === "1" ||
|
|
1229
|
+
process.env.ELIZA_BENCH_SKIP_EMBEDDING === "1";
|
|
1230
|
+
if (skipEmbeddingPlugin) {
|
|
1231
|
+
skipPlugins.add("@elizaos/plugin-local-inference");
|
|
1232
|
+
}
|
|
1233
|
+
if (initialCerebrasIntent && !skipEmbeddingPlugin) {
|
|
1234
|
+
skipPlugins.add("@elizaos/plugin-local-inference");
|
|
1235
|
+
elizaLogger.info("[bench] Cerebras benchmark mode: using @elizaos/plugin-openai's deterministic local TEXT_EMBEDDING fallback instead of @elizaos/plugin-local-inference without an active backend.");
|
|
1236
|
+
}
|
|
211
1237
|
const skipCorePlugins = process.env.ELIZA_BENCH_SKIP_CORE_PLUGINS === "true";
|
|
212
|
-
const
|
|
1238
|
+
const corePluginsToLoadBase = skipCorePlugins
|
|
213
1239
|
? ["@elizaos/plugin-sql"]
|
|
214
1240
|
: CORE_PLUGINS;
|
|
1241
|
+
const shouldLoadTaskAgentPlugin = Boolean(process.env.BENCHMARK_TASK_AGENT?.trim() ||
|
|
1242
|
+
process.env.ELIZA_ACP_DEFAULT_AGENT?.trim() ||
|
|
1243
|
+
process.env.ELIZA_DEFAULT_AGENT_TYPE?.trim());
|
|
1244
|
+
const corePluginsToLoad = shouldLoadTaskAgentPlugin
|
|
1245
|
+
? Array.from(new Set([
|
|
1246
|
+
...corePluginsToLoadBase,
|
|
1247
|
+
"@elizaos/plugin-agent-orchestrator",
|
|
1248
|
+
]))
|
|
1249
|
+
: corePluginsToLoadBase;
|
|
215
1250
|
if (skipCorePlugins) {
|
|
216
1251
|
elizaLogger.info("[bench] Loading minimal core plugins for benchmark smoke run");
|
|
217
1252
|
}
|
|
1253
|
+
if (shouldLoadTaskAgentPlugin) {
|
|
1254
|
+
elizaLogger.info("[bench] Loading @elizaos/plugin-agent-orchestrator for benchmark task-agent routing");
|
|
1255
|
+
}
|
|
218
1256
|
// Load all CORE_PLUGINS by default; smoke runs can opt into the minimal
|
|
219
1257
|
// required set so credential-free bridge checks start quickly.
|
|
220
1258
|
for (const pluginName of corePluginsToLoad) {
|
|
@@ -274,8 +1312,29 @@ export async function startBenchmarkServer() {
|
|
|
274
1312
|
catch (error) {
|
|
275
1313
|
elizaLogger.error(`[bench] Failed to load benchmark plugin: ${formatUnknownError(error)}`);
|
|
276
1314
|
}
|
|
277
|
-
//
|
|
278
|
-
//
|
|
1315
|
+
// Register a zero-vector TEXT_EMBEDDING stand-in only when explicitly
|
|
1316
|
+
// requested. The runtime calls `useModel(TEXT_EMBEDDING, ...)` for every
|
|
1317
|
+
// persisted memory; without ANY handler, those calls throw and abort the
|
|
1318
|
+
// turn. This path is diagnostic-only because it does not measure real
|
|
1319
|
+
// Eliza-1 retrieval behavior.
|
|
1320
|
+
if (skipEmbeddingPlugin) {
|
|
1321
|
+
const EMBEDDING_DIMENSIONS = 1024;
|
|
1322
|
+
const benchEmbeddingPlugin = {
|
|
1323
|
+
name: "@elizaos/bench-stub-embedding",
|
|
1324
|
+
description: "Benchmark-mode zero-vector TEXT_EMBEDDING handler. Replaces " +
|
|
1325
|
+
"@elizaos/plugin-local-inference only when " +
|
|
1326
|
+
"ELIZA_BENCH_ALLOW_STUB_EMBEDDING=1 is set.",
|
|
1327
|
+
// Higher than local-embedding's `priority: 10` so we win even if a
|
|
1328
|
+
// CORE_PLUGINS race were to register a competing handler later.
|
|
1329
|
+
priority: 100,
|
|
1330
|
+
models: {
|
|
1331
|
+
TEXT_EMBEDDING: async () => new Array(EMBEDDING_DIMENSIONS).fill(0),
|
|
1332
|
+
},
|
|
1333
|
+
};
|
|
1334
|
+
plugins.push(toPlugin(benchEmbeddingPlugin, "bench-stub-embedding"));
|
|
1335
|
+
elizaLogger.warn(`[bench] Registered zero-vector TEXT_EMBEDDING stand-in (dim=${EMBEDDING_DIMENSIONS}, standIn=true); ` +
|
|
1336
|
+
"this run is not valid release evidence. Unset ELIZA_BENCH_ALLOW_STUB_EMBEDDING and ELIZA_BENCH_SKIP_EMBEDDING to use @elizaos/plugin-local-inference.");
|
|
1337
|
+
}
|
|
279
1338
|
// Load LLM provider plugins based on environment.
|
|
280
1339
|
//
|
|
281
1340
|
// Multi-plugin guard: when both Groq and another OpenAI-compatible
|
|
@@ -288,10 +1347,9 @@ export async function startBenchmarkServer() {
|
|
|
288
1347
|
// wrong on my end. Please try again."). Suppress Groq when the
|
|
289
1348
|
// explicit intent is a different provider.
|
|
290
1349
|
const groqApiKey = process.env.GROQ_API_KEY?.trim();
|
|
291
|
-
const
|
|
292
|
-
const
|
|
293
|
-
const
|
|
294
|
-
const _benchProvider = process.env.BENCHMARK_MODEL_PROVIDER?.trim().toLowerCase();
|
|
1350
|
+
const _cerebrasIntent = initialCerebrasIntent;
|
|
1351
|
+
const _explicitProvider = initialElizaProvider;
|
|
1352
|
+
const _benchProvider = initialBenchProvider;
|
|
295
1353
|
const _suppressGroqForOtherProvider = _cerebrasIntent ||
|
|
296
1354
|
(_explicitProvider !== undefined &&
|
|
297
1355
|
_explicitProvider !== "" &&
|
|
@@ -312,7 +1370,7 @@ export async function startBenchmarkServer() {
|
|
|
312
1370
|
}
|
|
313
1371
|
else if (groqApiKey && _suppressGroqForOtherProvider) {
|
|
314
1372
|
elizaLogger.info("[bench] Skipping @elizaos/plugin-groq: another provider is the explicit intent " +
|
|
315
|
-
`(cerebras=${_cerebrasIntent},
|
|
1373
|
+
`(cerebras=${_cerebrasIntent}, ELIZA_PROVIDER=${_explicitProvider ?? ""}, BENCHMARK_MODEL_PROVIDER=${_benchProvider ?? ""})`);
|
|
316
1374
|
}
|
|
317
1375
|
// Load the OpenAI plugin when either:
|
|
318
1376
|
// - OPENAI_API_KEY is set (and is not actually a Groq key, prefix `gsk_`), or
|
|
@@ -323,9 +1381,9 @@ export async function startBenchmarkServer() {
|
|
|
323
1381
|
const openAiApiKey = process.env.OPENAI_API_KEY?.trim();
|
|
324
1382
|
const openAiBaseURL = process.env.OPENAI_BASE_URL?.trim();
|
|
325
1383
|
const cerebrasApiKey = process.env.CEREBRAS_API_KEY?.trim();
|
|
326
|
-
const
|
|
1384
|
+
const elizaProvider = process.env.ELIZA_PROVIDER?.trim().toLowerCase();
|
|
327
1385
|
const baseUrlIsCerebras = !!openAiBaseURL && /(^|\.)cerebras\.ai(\/|$)/i.test(openAiBaseURL);
|
|
328
|
-
const providerIsCerebras =
|
|
1386
|
+
const providerIsCerebras = elizaProvider === "cerebras";
|
|
329
1387
|
const hasOpenAiCompatibleKey = (openAiApiKey && !openAiApiKey.startsWith("gsk_")) ||
|
|
330
1388
|
((baseUrlIsCerebras || providerIsCerebras) && !!cerebrasApiKey);
|
|
331
1389
|
if (hasOpenAiCompatibleKey) {
|
|
@@ -335,34 +1393,14 @@ export async function startBenchmarkServer() {
|
|
|
335
1393
|
try {
|
|
336
1394
|
const { default: openaiPlugin } = await import("@elizaos/plugin-openai");
|
|
337
1395
|
const openaiPluginResolved = toPlugin(openaiPlugin, "@elizaos/plugin-openai");
|
|
338
|
-
|
|
339
|
-
// TEXT_EMBEDDING handler will 404 against api.cerebras.ai and stall
|
|
340
|
-
// Stage 1 of the message pipeline before the planner picks an action.
|
|
341
|
-
// Strip TEXT_EMBEDDING when cerebras is the explicit intent so
|
|
342
|
-
// plugin-local-embedding (loaded via CORE_PLUGINS) wins for embeddings
|
|
343
|
-
// while the openai plugin still serves TEXT_LARGE / TEXT_SMALL.
|
|
344
|
-
let strippedEmbedding = false;
|
|
345
|
-
if ((baseUrlIsCerebras || providerIsCerebras) &&
|
|
346
|
-
openaiPluginResolved.models &&
|
|
347
|
-
"TEXT_EMBEDDING" in openaiPluginResolved.models) {
|
|
348
|
-
const filteredModels = { ...openaiPluginResolved.models };
|
|
349
|
-
delete filteredModels.TEXT_EMBEDDING;
|
|
350
|
-
plugins.push({
|
|
351
|
-
...openaiPluginResolved,
|
|
352
|
-
models: filteredModels,
|
|
353
|
-
});
|
|
354
|
-
strippedEmbedding = true;
|
|
355
|
-
}
|
|
356
|
-
else {
|
|
357
|
-
plugins.push(openaiPluginResolved);
|
|
358
|
-
}
|
|
1396
|
+
plugins.push(openaiPluginResolved);
|
|
359
1397
|
elizaLogger.info(`[bench] Loaded LLM plugin: @elizaos/plugin-openai (baseURL=${openAiBaseURL ?? "default"}, key=${openAiApiKey
|
|
360
1398
|
? "OPENAI_API_KEY"
|
|
361
1399
|
: cerebrasApiKey
|
|
362
1400
|
? "CEREBRAS_API_KEY"
|
|
363
|
-
: "none"}${
|
|
364
|
-
if (
|
|
365
|
-
elizaLogger.info("[bench] Cerebras detected:
|
|
1401
|
+
: "none"}${baseUrlIsCerebras || providerIsCerebras ? ", TEXT_EMBEDDING local fallback (cerebras)" : ""})`);
|
|
1402
|
+
if (baseUrlIsCerebras || providerIsCerebras) {
|
|
1403
|
+
elizaLogger.info("[bench] Cerebras detected: keeping openai plugin's deterministic local TEXT_EMBEDDING fallback because Cerebras does not expose /v1/embeddings.");
|
|
366
1404
|
}
|
|
367
1405
|
}
|
|
368
1406
|
catch (error) {
|
|
@@ -400,10 +1438,10 @@ export async function startBenchmarkServer() {
|
|
|
400
1438
|
elizaLogger.debug(`[bench] Anthropic plugin not available: ${formatUnknownError(error)}`);
|
|
401
1439
|
}
|
|
402
1440
|
}
|
|
403
|
-
// Load computer use plugin if enabled
|
|
404
|
-
if (process.env.
|
|
1441
|
+
// Load computer use plugin if enabled.
|
|
1442
|
+
if (process.env.COMPUTER_USE_ENABLED === "1") {
|
|
405
1443
|
try {
|
|
406
|
-
process.env.
|
|
1444
|
+
process.env.COMPUTER_USE_ENABLED ??= "1";
|
|
407
1445
|
process.env.COMPUTERUSE_MODE ??= "local";
|
|
408
1446
|
const localComputerusePath = "../../../../plugins/plugin-computeruse/src/index.ts";
|
|
409
1447
|
const computeruseModule = (await import(__rewriteRelativeImportExtension(localComputerusePath)));
|
|
@@ -419,13 +1457,15 @@ export async function startBenchmarkServer() {
|
|
|
419
1457
|
elizaLogger.debug(`[bench] Computer use plugin not available: ${formatUnknownError(error)}`);
|
|
420
1458
|
}
|
|
421
1459
|
}
|
|
422
|
-
|
|
423
|
-
|
|
1460
|
+
const mockBenchmarkEnabled = process.env.ELIZA_BENCH_MOCK === "true";
|
|
1461
|
+
// Load mock plugin for testing. Mock runs are diagnostic only and must not be
|
|
1462
|
+
// treated as release evidence.
|
|
1463
|
+
if (mockBenchmarkEnabled) {
|
|
424
1464
|
try {
|
|
425
1465
|
const mockLocation = "./mock-plugin.ts";
|
|
426
1466
|
const { mockPlugin } = await import(__rewriteRelativeImportExtension(mockLocation));
|
|
427
1467
|
plugins.push(toPlugin(mockPlugin, mockLocation));
|
|
428
|
-
elizaLogger.
|
|
1468
|
+
elizaLogger.warn("[bench] Loaded mock benchmark plugin (mock=true, standIn=true); this run is not valid release evidence.");
|
|
429
1469
|
}
|
|
430
1470
|
catch (error) {
|
|
431
1471
|
elizaLogger.error(`[bench] Failed to load mock benchmark plugin: ${formatUnknownError(error)}`);
|
|
@@ -467,6 +1507,7 @@ export async function startBenchmarkServer() {
|
|
|
467
1507
|
const envKeys = [
|
|
468
1508
|
"GROQ_API_KEY",
|
|
469
1509
|
"OPENAI_API_KEY",
|
|
1510
|
+
"CEREBRAS_API_KEY",
|
|
470
1511
|
"ANTHROPIC_API_KEY",
|
|
471
1512
|
"OPENROUTER_API_KEY",
|
|
472
1513
|
"GOOGLE_GENERATIVE_AI_API_KEY",
|
|
@@ -511,6 +1552,11 @@ export async function startBenchmarkServer() {
|
|
|
511
1552
|
"OPENAI_PLANNER_MODEL",
|
|
512
1553
|
"OPENAI_RESPONSE_HANDLER_MODEL",
|
|
513
1554
|
"OPENAI_SHOULD_RESPOND_MODEL",
|
|
1555
|
+
"CEREBRAS_MODEL",
|
|
1556
|
+
"BENCHMARK_TASK_AGENT",
|
|
1557
|
+
"ELIZA_ACP_DEFAULT_AGENT",
|
|
1558
|
+
"ELIZA_DEFAULT_AGENT_TYPE",
|
|
1559
|
+
"ELIZA_AGENT_SELECTION_STRATEGY",
|
|
514
1560
|
"OPENROUTER_SMALL_MODEL",
|
|
515
1561
|
"OPENROUTER_LARGE_MODEL",
|
|
516
1562
|
"OPENROUTER_NANO_MODEL",
|
|
@@ -550,6 +1596,29 @@ export async function startBenchmarkServer() {
|
|
|
550
1596
|
plugins,
|
|
551
1597
|
});
|
|
552
1598
|
await runtime.initialize();
|
|
1599
|
+
// Wire the local-inference loader subsystem the same way the main app boot
|
|
1600
|
+
// does (eliza/packages/app-core/src/runtime/eliza.ts). Without this, the
|
|
1601
|
+
// bench-server's @elizaos/plugin-local-inference Plugin.init() never
|
|
1602
|
+
// registers a `localInferenceLoader` service, so its TEXT_EMBEDDING handler
|
|
1603
|
+
// falls all the way through to the zero-vector path even when an Eliza-1
|
|
1604
|
+
// bundle is installed locally. Calling it here makes the bench-server use
|
|
1605
|
+
// the eliza-1 embedding model (text/eliza-1-0_8b-32k.gguf) when present,
|
|
1606
|
+
// and harmlessly skips handler upgrades when no backend is available —
|
|
1607
|
+
// matching the main app's behavior so benchmark runs reflect real
|
|
1608
|
+
// retrieval semantics.
|
|
1609
|
+
if (!skipEmbeddingPlugin) {
|
|
1610
|
+
try {
|
|
1611
|
+
const { ensureLocalInferenceHandler } = await import("@elizaos/plugin-local-inference/runtime");
|
|
1612
|
+
await ensureLocalInferenceHandler(runtime);
|
|
1613
|
+
elizaLogger.info("[bench] Wired @elizaos/plugin-local-inference loader (embedding + voice handlers)");
|
|
1614
|
+
}
|
|
1615
|
+
catch (err) {
|
|
1616
|
+
elizaLogger.warn(`[bench] Could not wire @elizaos/plugin-local-inference runtime: ${formatUnknownError(err)}`);
|
|
1617
|
+
}
|
|
1618
|
+
}
|
|
1619
|
+
else {
|
|
1620
|
+
elizaLogger.info("[bench] Skipping @elizaos/plugin-local-inference runtime wiring because benchmark embedding skip is enabled");
|
|
1621
|
+
}
|
|
553
1622
|
disableManualCompactionAction(runtime);
|
|
554
1623
|
const modelHandlers = runtime.models;
|
|
555
1624
|
const modelHandlerSummary = Object.fromEntries([...(modelHandlers?.entries() ?? [])].map(([modelType, handlers]) => [
|
|
@@ -576,25 +1645,10 @@ export async function startBenchmarkServer() {
|
|
|
576
1645
|
registerEvent("MODEL_USED", (payload) => {
|
|
577
1646
|
if (!activeUsageBuffer)
|
|
578
1647
|
return;
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
const promptTokens = typeof tokens.prompt === "number" ? tokens.prompt : 0;
|
|
584
|
-
const completionTokens = typeof tokens.completion === "number" ? tokens.completion : 0;
|
|
585
|
-
const totalTokens = typeof tokens.total === "number"
|
|
586
|
-
? tokens.total
|
|
587
|
-
: promptTokens + completionTokens;
|
|
588
|
-
const cachedTokens = typeof tokens.cached === "number" ? tokens.cached : undefined;
|
|
589
|
-
activeUsageBuffer.push({
|
|
590
|
-
modelType: typeof p.type === "string" ? p.type : "unknown",
|
|
591
|
-
provider: typeof p.provider === "string" ? p.provider : undefined,
|
|
592
|
-
source: typeof p.source === "string" ? p.source : undefined,
|
|
593
|
-
promptTokens,
|
|
594
|
-
completionTokens,
|
|
595
|
-
totalTokens,
|
|
596
|
-
...(cachedTokens !== undefined ? { cachedTokens } : {}),
|
|
597
|
-
});
|
|
1648
|
+
const normalizedUsage = normalizeBenchmarkModelUsage(payload);
|
|
1649
|
+
if (normalizedUsage) {
|
|
1650
|
+
activeUsageBuffer.push(normalizedUsage);
|
|
1651
|
+
}
|
|
598
1652
|
});
|
|
599
1653
|
elizaLogger.info("[bench] Registered MODEL_USED listener for trajectory usage capture");
|
|
600
1654
|
}
|
|
@@ -605,30 +1659,6 @@ export async function startBenchmarkServer() {
|
|
|
605
1659
|
catch (err) {
|
|
606
1660
|
elizaLogger.warn(`[bench] Could not register MODEL_USED listener: ${formatUnknownError(err)}`);
|
|
607
1661
|
}
|
|
608
|
-
const summarizeUsage = (calls) => {
|
|
609
|
-
let promptTokens = 0;
|
|
610
|
-
let completionTokens = 0;
|
|
611
|
-
let totalTokens = 0;
|
|
612
|
-
let cachedTokens = 0;
|
|
613
|
-
for (const call of calls) {
|
|
614
|
-
promptTokens += call.promptTokens;
|
|
615
|
-
completionTokens += call.completionTokens;
|
|
616
|
-
totalTokens += call.totalTokens;
|
|
617
|
-
if (typeof call.cachedTokens === "number") {
|
|
618
|
-
cachedTokens += call.cachedTokens;
|
|
619
|
-
}
|
|
620
|
-
}
|
|
621
|
-
const cacheHitRatio = promptTokens > 0 ? cachedTokens / promptTokens : 0;
|
|
622
|
-
return {
|
|
623
|
-
promptTokens,
|
|
624
|
-
completionTokens,
|
|
625
|
-
totalTokens,
|
|
626
|
-
cachedTokens,
|
|
627
|
-
cacheHitRatio,
|
|
628
|
-
callCount: calls.length,
|
|
629
|
-
calls,
|
|
630
|
-
};
|
|
631
|
-
};
|
|
632
1662
|
const roomToSession = new Map();
|
|
633
1663
|
const entityToSession = new Map();
|
|
634
1664
|
const trajectoriesBySession = new Map();
|
|
@@ -677,7 +1707,6 @@ export async function startBenchmarkServer() {
|
|
|
677
1707
|
};
|
|
678
1708
|
const sessions = new Map();
|
|
679
1709
|
let lastSessionKey = null;
|
|
680
|
-
// Session TTL eviction (R4)
|
|
681
1710
|
const SESSION_TTL_MS = 24 * 60 * 60 * 1000;
|
|
682
1711
|
const SESSION_SWEEP_INTERVAL_MS = 60_000;
|
|
683
1712
|
const sessionCreatedAt = new Map();
|
|
@@ -732,20 +1761,54 @@ export async function startBenchmarkServer() {
|
|
|
732
1761
|
// ────────────────────────────────────────────────────────────────────────
|
|
733
1762
|
const lifeopsBenchHandler = new LifeOpsBenchHandler({
|
|
734
1763
|
checkAuth: checkBenchAuth,
|
|
735
|
-
invokePlanner: async ({ taskId, userText, toolManifest, backend }) => {
|
|
1764
|
+
invokePlanner: async ({ taskId, userText, toolManifest, backend, previousTurns, }) => {
|
|
736
1765
|
const session = resolveSession(taskId, "lifeops_bench", true);
|
|
737
1766
|
if (!session)
|
|
738
1767
|
throw new Error("Failed to resolve lifeops_bench session");
|
|
739
1768
|
await ensureBenchmarkSessionContext(runtime, session);
|
|
1769
|
+
const lifeopsContext = buildLifeOpsBenchmarkContext(backend, previousTurns);
|
|
740
1770
|
const benchmarkContext = normalizeBenchmarkContext(session, {
|
|
741
1771
|
benchmark: "lifeops_bench",
|
|
742
1772
|
task_id: taskId,
|
|
743
1773
|
...(Array.isArray(toolManifest) ? { tools: toolManifest } : {}),
|
|
1774
|
+
lifeops: lifeopsContext,
|
|
744
1775
|
});
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
1776
|
+
if (Array.isArray(toolManifest) && toolManifest.length > 0) {
|
|
1777
|
+
const directUsageBuffer = [];
|
|
1778
|
+
activeUsageBuffer = directUsageBuffer;
|
|
1779
|
+
try {
|
|
1780
|
+
const directResult = await callOpenAiCompatibleActionCalling({
|
|
1781
|
+
messages: buildLifeOpsActionCallingMessages({
|
|
1782
|
+
userText,
|
|
1783
|
+
lifeopsContext,
|
|
1784
|
+
}),
|
|
1785
|
+
tools: toolManifest,
|
|
1786
|
+
toolChoice: "required",
|
|
1787
|
+
maxTokens: 1024,
|
|
1788
|
+
temperature: 0,
|
|
1789
|
+
});
|
|
1790
|
+
if (directResult) {
|
|
1791
|
+
if (directResult.usage) {
|
|
1792
|
+
directUsageBuffer.push(directResult.usage);
|
|
1793
|
+
}
|
|
1794
|
+
const toolCalls = lifeOpsToolCallsFromNativeToolCalls(directResult.toolCalls);
|
|
1795
|
+
if (toolCalls.length > 0) {
|
|
1796
|
+
const usage = summarizeBenchmarkTurnUsage(directUsageBuffer);
|
|
1797
|
+
return { text: directResult.text, toolCalls, usage };
|
|
1798
|
+
}
|
|
1799
|
+
}
|
|
1800
|
+
}
|
|
1801
|
+
finally {
|
|
1802
|
+
activeUsageBuffer = null;
|
|
1803
|
+
}
|
|
1804
|
+
}
|
|
1805
|
+
// The ELIZA_BENCHMARK provider already renders the full LifeOps clock,
|
|
1806
|
+
// world snapshot, tool manifest, and previous tool results. Duplicating
|
|
1807
|
+
// that JSON into the user message balloons Cerebras prompts and can leave
|
|
1808
|
+
// the TS bridge waiting on a huge outbound model call. Keep the message
|
|
1809
|
+
// itself to the user's benchmark instruction and let the provider carry
|
|
1810
|
+
// the structured context.
|
|
1811
|
+
const composedPrompt = userText.trim();
|
|
749
1812
|
const incomingMessage = {
|
|
750
1813
|
id: stringToUuid(`lifeops-msg:${Date.now()}:${Math.random()}`),
|
|
751
1814
|
content: {
|
|
@@ -789,34 +1852,67 @@ export async function startBenchmarkServer() {
|
|
|
789
1852
|
: callbackTexts.join("\n\n");
|
|
790
1853
|
const actions = coerceActions(result.responseContent?.actions);
|
|
791
1854
|
const params = coerceParams(result.responseContent?.params);
|
|
1855
|
+
const capturedAction = getCapturedAction();
|
|
792
1856
|
// Map captured Eliza actions into lifeops_bench tool calls.
|
|
793
1857
|
// Strategy: each action name in `actions` is treated as a tool name;
|
|
794
1858
|
// its arguments come from `params[actionName]` when present, otherwise
|
|
795
1859
|
// an empty object. This matches how OpenClaw/Hermes adapters expose
|
|
796
1860
|
// their tool-call traces. The fake-backend rejects unsupported names
|
|
797
1861
|
// with a clear error so scenario authors learn about gaps quickly.
|
|
798
|
-
const toolCalls =
|
|
1862
|
+
const toolCalls = [];
|
|
1863
|
+
// BENCHMARK_ACTION unwrap: when the planner picks BENCHMARK_ACTION, the
|
|
1864
|
+
// bench plugin captures the underlying tool name + arguments (tau-bench
|
|
1865
|
+
// shape: `{tool_name, arguments}`). Unwrap that capture into a real tool
|
|
1866
|
+
// call against the LifeOps fake backend instead of forwarding the
|
|
1867
|
+
// generic BENCHMARK_ACTION sentinel (which the fake backend rejects).
|
|
1868
|
+
if (capturedAction &&
|
|
1869
|
+
typeof capturedAction.toolName === "string" &&
|
|
1870
|
+
capturedAction.toolName.trim().length > 0) {
|
|
1871
|
+
toolCalls.push({
|
|
1872
|
+
id: "call_0",
|
|
1873
|
+
name: capturedAction.toolName,
|
|
1874
|
+
arguments: capturedAction.arguments &&
|
|
1875
|
+
typeof capturedAction.arguments === "object"
|
|
1876
|
+
? capturedAction.arguments
|
|
1877
|
+
: {},
|
|
1878
|
+
});
|
|
1879
|
+
}
|
|
1880
|
+
// Also pass through any directly-named actions (e.g. when the planner
|
|
1881
|
+
// emits MESSAGE/CALENDAR directly without the BENCHMARK_ACTION wrapper),
|
|
1882
|
+
// skipping the BENCHMARK_ACTION sentinel itself which has already been
|
|
1883
|
+
// unwrapped above. REPLY/RESPOND are terminal assistant messages, not
|
|
1884
|
+
// LifeOps backend tools; forwarding them as tool calls makes the Python
|
|
1885
|
+
// runner keep looping after a finished response.
|
|
1886
|
+
for (const name of actions) {
|
|
1887
|
+
if (name === "BENCHMARK_ACTION" ||
|
|
1888
|
+
name === "REPLY" ||
|
|
1889
|
+
name === "RESPOND")
|
|
1890
|
+
continue;
|
|
1891
|
+
if (capturedAction &&
|
|
1892
|
+
typeof capturedAction.toolName === "string" &&
|
|
1893
|
+
capturedAction.toolName === name)
|
|
1894
|
+
continue;
|
|
799
1895
|
const paramsForAction = params[name];
|
|
800
1896
|
const argumentsObj = paramsForAction &&
|
|
801
1897
|
typeof paramsForAction === "object" &&
|
|
802
1898
|
!Array.isArray(paramsForAction)
|
|
803
1899
|
? paramsForAction
|
|
804
1900
|
: {};
|
|
805
|
-
|
|
806
|
-
id: `call_${
|
|
1901
|
+
toolCalls.push({
|
|
1902
|
+
id: `call_${toolCalls.length}`,
|
|
807
1903
|
name,
|
|
808
1904
|
arguments: argumentsObj,
|
|
809
|
-
};
|
|
810
|
-
}
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
1905
|
+
});
|
|
1906
|
+
}
|
|
1907
|
+
if (shouldDropLifeOpsReadOnlyFollowupToolCalls({
|
|
1908
|
+
userText,
|
|
1909
|
+
responseText,
|
|
1910
|
+
lifeopsContext,
|
|
1911
|
+
toolCalls,
|
|
1912
|
+
})) {
|
|
1913
|
+
toolCalls.length = 0;
|
|
1914
|
+
}
|
|
1915
|
+
const usage = summarizeBenchmarkTurnUsage(turnUsageBuffer);
|
|
820
1916
|
return { text: responseText, toolCalls, usage };
|
|
821
1917
|
},
|
|
822
1918
|
});
|
|
@@ -844,6 +1940,10 @@ export async function startBenchmarkServer() {
|
|
|
844
1940
|
status: "ready",
|
|
845
1941
|
agent_name: runtime.character.name ?? "Eliza",
|
|
846
1942
|
plugins: plugins.length,
|
|
1943
|
+
standIn: skipEmbeddingPlugin || mockBenchmarkEnabled,
|
|
1944
|
+
mock: mockBenchmarkEnabled,
|
|
1945
|
+
stubEmbedding: skipEmbeddingPlugin,
|
|
1946
|
+
releaseEvidence: !(skipEmbeddingPlugin || mockBenchmarkEnabled),
|
|
847
1947
|
active_session: activeSession
|
|
848
1948
|
? {
|
|
849
1949
|
benchmark: activeSession.benchmark,
|
|
@@ -859,15 +1959,23 @@ export async function startBenchmarkServer() {
|
|
|
859
1959
|
return;
|
|
860
1960
|
let body = "";
|
|
861
1961
|
let bodyBytes = 0;
|
|
1962
|
+
let bodyTooLarge = false;
|
|
862
1963
|
req.on("data", (chunk) => {
|
|
863
1964
|
bodyBytes += chunk.length;
|
|
864
1965
|
if (bodyBytes > MAX_BODY_BYTES) {
|
|
1966
|
+
bodyTooLarge = true;
|
|
1967
|
+
res.writeHead(413, { "Content-Type": "application/json" });
|
|
1968
|
+
res.end(JSON.stringify({
|
|
1969
|
+
error: `Request body exceeded max size ${MAX_BODY_BYTES} bytes`,
|
|
1970
|
+
}));
|
|
865
1971
|
req.destroy();
|
|
866
1972
|
return;
|
|
867
1973
|
}
|
|
868
1974
|
body += chunk;
|
|
869
1975
|
});
|
|
870
1976
|
req.on("end", async () => {
|
|
1977
|
+
if (bodyTooLarge)
|
|
1978
|
+
return;
|
|
871
1979
|
try {
|
|
872
1980
|
const parsed = body.trim()
|
|
873
1981
|
? JSON.parse(body)
|
|
@@ -1000,15 +2108,23 @@ export async function startBenchmarkServer() {
|
|
|
1000
2108
|
return;
|
|
1001
2109
|
let body = "";
|
|
1002
2110
|
let bodyBytes = 0;
|
|
2111
|
+
let bodyTooLarge = false;
|
|
1003
2112
|
req.on("data", (chunk) => {
|
|
1004
2113
|
bodyBytes += chunk.length;
|
|
1005
2114
|
if (bodyBytes > MAX_BODY_BYTES) {
|
|
2115
|
+
bodyTooLarge = true;
|
|
2116
|
+
res.writeHead(413, { "Content-Type": "application/json" });
|
|
2117
|
+
res.end(JSON.stringify({
|
|
2118
|
+
error: `Request body exceeded max size ${MAX_BODY_BYTES} bytes`,
|
|
2119
|
+
}));
|
|
1006
2120
|
req.destroy();
|
|
1007
2121
|
return;
|
|
1008
2122
|
}
|
|
1009
2123
|
body += chunk;
|
|
1010
2124
|
});
|
|
1011
2125
|
req.on("end", async () => {
|
|
2126
|
+
if (bodyTooLarge)
|
|
2127
|
+
return;
|
|
1012
2128
|
try {
|
|
1013
2129
|
let parsed;
|
|
1014
2130
|
try {
|
|
@@ -1042,6 +2158,718 @@ export async function startBenchmarkServer() {
|
|
|
1042
2158
|
context: benchmarkContext,
|
|
1043
2159
|
image: parsed.image,
|
|
1044
2160
|
});
|
|
2161
|
+
if (isWooBenchName(session.benchmark)) {
|
|
2162
|
+
const messages = normalizeWooBenchNativeMessages(text, benchmarkContext);
|
|
2163
|
+
const tools = Array.isArray(benchmarkContext.tools)
|
|
2164
|
+
? benchmarkContext.tools
|
|
2165
|
+
: [];
|
|
2166
|
+
const maxTokens = typeof benchmarkContext.max_tokens === "number"
|
|
2167
|
+
? benchmarkContext.max_tokens
|
|
2168
|
+
: 2048;
|
|
2169
|
+
const temperature = typeof benchmarkContext.temperature === "number"
|
|
2170
|
+
? benchmarkContext.temperature
|
|
2171
|
+
: 0;
|
|
2172
|
+
const toolChoice = tools.length === 0
|
|
2173
|
+
? "none"
|
|
2174
|
+
: typeof benchmarkContext.tool_choice === "string"
|
|
2175
|
+
? benchmarkContext.tool_choice
|
|
2176
|
+
: "auto";
|
|
2177
|
+
const turnUsageBuffer = [];
|
|
2178
|
+
activeUsageBuffer = turnUsageBuffer;
|
|
2179
|
+
let nativeResult;
|
|
2180
|
+
try {
|
|
2181
|
+
const directResult = await callOpenAiCompatibleActionCalling({
|
|
2182
|
+
messages,
|
|
2183
|
+
tools,
|
|
2184
|
+
toolChoice,
|
|
2185
|
+
maxTokens,
|
|
2186
|
+
temperature,
|
|
2187
|
+
});
|
|
2188
|
+
if (directResult) {
|
|
2189
|
+
if (directResult.usage) {
|
|
2190
|
+
turnUsageBuffer.push(directResult.usage);
|
|
2191
|
+
}
|
|
2192
|
+
nativeResult = {
|
|
2193
|
+
text: directResult.text,
|
|
2194
|
+
toolCalls: directResult.toolCalls,
|
|
2195
|
+
};
|
|
2196
|
+
}
|
|
2197
|
+
else {
|
|
2198
|
+
const modelRequest = {
|
|
2199
|
+
messages,
|
|
2200
|
+
maxTokens,
|
|
2201
|
+
temperature,
|
|
2202
|
+
};
|
|
2203
|
+
if (tools.length > 0) {
|
|
2204
|
+
modelRequest.tools = tools;
|
|
2205
|
+
modelRequest.toolChoice = toolChoice;
|
|
2206
|
+
}
|
|
2207
|
+
nativeResult = await runtime.useModel(ModelType.TEXT_LARGE, modelRequest);
|
|
2208
|
+
}
|
|
2209
|
+
}
|
|
2210
|
+
finally {
|
|
2211
|
+
activeUsageBuffer = null;
|
|
2212
|
+
}
|
|
2213
|
+
const turnUsage = summarizeBenchmarkTurnUsage(turnUsageBuffer);
|
|
2214
|
+
const nativeRecord = nativeResult && typeof nativeResult === "object"
|
|
2215
|
+
? nativeResult
|
|
2216
|
+
: {};
|
|
2217
|
+
const toolCalls = normalizeLocaNativeToolCalls(nativeRecord.toolCalls);
|
|
2218
|
+
const responseText = typeof nativeRecord.text === "string"
|
|
2219
|
+
? nativeRecord.text
|
|
2220
|
+
: typeof nativeResult === "string"
|
|
2221
|
+
? nativeResult
|
|
2222
|
+
: "";
|
|
2223
|
+
const params = {};
|
|
2224
|
+
const benchmarkAction = firstWooBenchActionFromToolCalls(toolCalls);
|
|
2225
|
+
if (benchmarkAction) {
|
|
2226
|
+
params.BENCHMARK_ACTION = benchmarkAction;
|
|
2227
|
+
params.tool_calls = toolCalls;
|
|
2228
|
+
}
|
|
2229
|
+
const actions = benchmarkAction !== null
|
|
2230
|
+
? ["BENCHMARK_ACTION"]
|
|
2231
|
+
: responseText.trim()
|
|
2232
|
+
? ["REPLY"]
|
|
2233
|
+
: [];
|
|
2234
|
+
const finishedAt = Date.now();
|
|
2235
|
+
trajectory.push({
|
|
2236
|
+
step: trajectory.length + 1,
|
|
2237
|
+
startedAt,
|
|
2238
|
+
finishedAt,
|
|
2239
|
+
inputText: text,
|
|
2240
|
+
promptText: composedPrompt,
|
|
2241
|
+
context,
|
|
2242
|
+
thought: null,
|
|
2243
|
+
responseText,
|
|
2244
|
+
actions,
|
|
2245
|
+
params,
|
|
2246
|
+
usage: turnUsage,
|
|
2247
|
+
});
|
|
2248
|
+
trajectoriesBySession.set(key, trajectory);
|
|
2249
|
+
const metadata = benchmarkTurnMetadata({
|
|
2250
|
+
session,
|
|
2251
|
+
step: trajectory.length,
|
|
2252
|
+
context: benchmarkContext,
|
|
2253
|
+
});
|
|
2254
|
+
res.writeHead(200, { "Content-Type": "application/json" });
|
|
2255
|
+
res.end(JSON.stringify({
|
|
2256
|
+
text: responseText,
|
|
2257
|
+
thought: null,
|
|
2258
|
+
actions,
|
|
2259
|
+
params,
|
|
2260
|
+
captured_actions: [],
|
|
2261
|
+
tool_calls: toolCalls,
|
|
2262
|
+
usage: turnUsage,
|
|
2263
|
+
metadata,
|
|
2264
|
+
benchmark: session.benchmark,
|
|
2265
|
+
task_id: session.taskId,
|
|
2266
|
+
room_id: session.roomId,
|
|
2267
|
+
trajectory_step: trajectory.length,
|
|
2268
|
+
}));
|
|
2269
|
+
return;
|
|
2270
|
+
}
|
|
2271
|
+
if (isActionCallingBenchmarkName(session.benchmark) &&
|
|
2272
|
+
Array.isArray(benchmarkContext.tools) &&
|
|
2273
|
+
benchmarkContext.tools.length > 0) {
|
|
2274
|
+
const nativeMessages = _isTauBenchmarkName(session.benchmark)
|
|
2275
|
+
? _normalizeTauNativeMessages(text, benchmarkContext)
|
|
2276
|
+
: isVendingBenchmarkName(session.benchmark)
|
|
2277
|
+
? normalizeLocaNativeMessages(benchmarkContext.messages)
|
|
2278
|
+
: normalizeActionCallingNativeMessages(text, benchmarkContext);
|
|
2279
|
+
const openAiMessages = _isTauBenchmarkName(session.benchmark)
|
|
2280
|
+
? nativeMessages
|
|
2281
|
+
: isVendingBenchmarkName(session.benchmark)
|
|
2282
|
+
? nativeMessages
|
|
2283
|
+
: normalizeActionCallingOpenAiMessages(text, benchmarkContext);
|
|
2284
|
+
const maxTokens = typeof benchmarkContext.max_tokens === "number"
|
|
2285
|
+
? benchmarkContext.max_tokens
|
|
2286
|
+
: 2048;
|
|
2287
|
+
const temperature = typeof benchmarkContext.temperature === "number"
|
|
2288
|
+
? benchmarkContext.temperature
|
|
2289
|
+
: 0;
|
|
2290
|
+
const toolChoice = typeof benchmarkContext.tool_choice === "string"
|
|
2291
|
+
? benchmarkContext.tool_choice
|
|
2292
|
+
: "required";
|
|
2293
|
+
const turnUsageBuffer = [];
|
|
2294
|
+
activeUsageBuffer = turnUsageBuffer;
|
|
2295
|
+
let nativeResult;
|
|
2296
|
+
try {
|
|
2297
|
+
const directResult = await callOpenAiCompatibleActionCalling({
|
|
2298
|
+
messages: openAiMessages,
|
|
2299
|
+
tools: benchmarkContext.tools,
|
|
2300
|
+
toolChoice,
|
|
2301
|
+
maxTokens,
|
|
2302
|
+
temperature,
|
|
2303
|
+
});
|
|
2304
|
+
if (directResult) {
|
|
2305
|
+
if (directResult.usage) {
|
|
2306
|
+
turnUsageBuffer.push(directResult.usage);
|
|
2307
|
+
}
|
|
2308
|
+
nativeResult = {
|
|
2309
|
+
text: directResult.text,
|
|
2310
|
+
toolCalls: directResult.toolCalls,
|
|
2311
|
+
};
|
|
2312
|
+
}
|
|
2313
|
+
else {
|
|
2314
|
+
nativeResult = await runtime.useModel(ModelType.TEXT_LARGE, {
|
|
2315
|
+
messages: toChatMessages(nativeMessages),
|
|
2316
|
+
tools: toToolDefinitions(benchmarkContext.tools),
|
|
2317
|
+
toolChoice: toToolChoice(toolChoice),
|
|
2318
|
+
maxTokens,
|
|
2319
|
+
temperature,
|
|
2320
|
+
});
|
|
2321
|
+
}
|
|
2322
|
+
}
|
|
2323
|
+
finally {
|
|
2324
|
+
activeUsageBuffer = null;
|
|
2325
|
+
}
|
|
2326
|
+
const turnUsage = summarizeBenchmarkTurnUsage(turnUsageBuffer);
|
|
2327
|
+
const nativeRecord = nativeResult && typeof nativeResult === "object"
|
|
2328
|
+
? nativeResult
|
|
2329
|
+
: {};
|
|
2330
|
+
const toolCalls = normalizeLocaNativeToolCalls(nativeRecord.toolCalls);
|
|
2331
|
+
const responseText = typeof nativeRecord.text === "string"
|
|
2332
|
+
? nativeRecord.text
|
|
2333
|
+
: typeof nativeResult === "string"
|
|
2334
|
+
? nativeResult
|
|
2335
|
+
: "";
|
|
2336
|
+
const params = {};
|
|
2337
|
+
const benchmarkAction = firstLocaBenchmarkActionFromToolCalls(toolCalls);
|
|
2338
|
+
if (benchmarkAction) {
|
|
2339
|
+
params.BENCHMARK_ACTION = benchmarkAction;
|
|
2340
|
+
params.tool_calls = toolCalls;
|
|
2341
|
+
}
|
|
2342
|
+
const actions = toolCalls.length > 0
|
|
2343
|
+
? ["BENCHMARK_ACTION"]
|
|
2344
|
+
: responseText.trim()
|
|
2345
|
+
? ["REPLY"]
|
|
2346
|
+
: [];
|
|
2347
|
+
const finishedAt = Date.now();
|
|
2348
|
+
trajectory.push({
|
|
2349
|
+
step: trajectory.length + 1,
|
|
2350
|
+
startedAt,
|
|
2351
|
+
finishedAt,
|
|
2352
|
+
inputText: text,
|
|
2353
|
+
promptText: composedPrompt,
|
|
2354
|
+
context,
|
|
2355
|
+
thought: null,
|
|
2356
|
+
responseText,
|
|
2357
|
+
actions,
|
|
2358
|
+
params,
|
|
2359
|
+
usage: turnUsage,
|
|
2360
|
+
});
|
|
2361
|
+
trajectoriesBySession.set(key, trajectory);
|
|
2362
|
+
const metadata = benchmarkTurnMetadata({
|
|
2363
|
+
session,
|
|
2364
|
+
step: trajectory.length,
|
|
2365
|
+
context: benchmarkContext,
|
|
2366
|
+
});
|
|
2367
|
+
res.writeHead(200, { "Content-Type": "application/json" });
|
|
2368
|
+
res.end(JSON.stringify({
|
|
2369
|
+
text: responseText,
|
|
2370
|
+
thought: null,
|
|
2371
|
+
actions,
|
|
2372
|
+
params,
|
|
2373
|
+
captured_actions: [],
|
|
2374
|
+
tool_calls: toolCalls,
|
|
2375
|
+
usage: turnUsage,
|
|
2376
|
+
metadata,
|
|
2377
|
+
benchmark: session.benchmark,
|
|
2378
|
+
task_id: session.taskId,
|
|
2379
|
+
room_id: session.roomId,
|
|
2380
|
+
trajectory_step: trajectory.length,
|
|
2381
|
+
}));
|
|
2382
|
+
return;
|
|
2383
|
+
}
|
|
2384
|
+
if (isLocaBenchmarkName(session.benchmark) &&
|
|
2385
|
+
Array.isArray(benchmarkContext.tools) &&
|
|
2386
|
+
benchmarkContext.tools.length > 0) {
|
|
2387
|
+
const nativeMessages = normalizeLocaNativeMessages(benchmarkContext.messages);
|
|
2388
|
+
const maxTokens = typeof benchmarkContext.max_tokens === "number"
|
|
2389
|
+
? benchmarkContext.max_tokens
|
|
2390
|
+
: 2048;
|
|
2391
|
+
const temperature = typeof benchmarkContext.temperature === "number"
|
|
2392
|
+
? benchmarkContext.temperature
|
|
2393
|
+
: 0;
|
|
2394
|
+
const turnUsageBuffer = [];
|
|
2395
|
+
activeUsageBuffer = turnUsageBuffer;
|
|
2396
|
+
let nativeResult;
|
|
2397
|
+
try {
|
|
2398
|
+
const directResult = await callOpenAiCompatibleActionCalling({
|
|
2399
|
+
messages: nativeMessages,
|
|
2400
|
+
tools: benchmarkContext.tools,
|
|
2401
|
+
toolChoice: "required",
|
|
2402
|
+
maxTokens,
|
|
2403
|
+
temperature,
|
|
2404
|
+
});
|
|
2405
|
+
if (directResult) {
|
|
2406
|
+
if (directResult.usage) {
|
|
2407
|
+
turnUsageBuffer.push(directResult.usage);
|
|
2408
|
+
}
|
|
2409
|
+
nativeResult = {
|
|
2410
|
+
text: directResult.text,
|
|
2411
|
+
toolCalls: directResult.toolCalls,
|
|
2412
|
+
};
|
|
2413
|
+
}
|
|
2414
|
+
else {
|
|
2415
|
+
nativeResult = await runtime.useModel(ModelType.TEXT_LARGE, {
|
|
2416
|
+
messages: toChatMessages(nativeMessages),
|
|
2417
|
+
tools: toToolDefinitions(benchmarkContext.tools),
|
|
2418
|
+
toolChoice: "required",
|
|
2419
|
+
maxTokens,
|
|
2420
|
+
temperature,
|
|
2421
|
+
});
|
|
2422
|
+
}
|
|
2423
|
+
}
|
|
2424
|
+
finally {
|
|
2425
|
+
activeUsageBuffer = null;
|
|
2426
|
+
}
|
|
2427
|
+
const turnUsage = summarizeBenchmarkTurnUsage(turnUsageBuffer);
|
|
2428
|
+
const nativeRecord = nativeResult && typeof nativeResult === "object"
|
|
2429
|
+
? nativeResult
|
|
2430
|
+
: {};
|
|
2431
|
+
const toolCalls = normalizeLocaNativeToolCalls(nativeRecord.toolCalls);
|
|
2432
|
+
const responseText = typeof nativeRecord.text === "string"
|
|
2433
|
+
? nativeRecord.text
|
|
2434
|
+
: typeof nativeResult === "string"
|
|
2435
|
+
? nativeResult
|
|
2436
|
+
: "";
|
|
2437
|
+
const params = {};
|
|
2438
|
+
const benchmarkAction = firstLocaBenchmarkActionFromToolCalls(toolCalls);
|
|
2439
|
+
if (benchmarkAction) {
|
|
2440
|
+
params.BENCHMARK_ACTION = benchmarkAction;
|
|
2441
|
+
params.tool_calls = toolCalls;
|
|
2442
|
+
}
|
|
2443
|
+
const actions = toolCalls.length > 0
|
|
2444
|
+
? ["BENCHMARK_ACTION"]
|
|
2445
|
+
: responseText.trim()
|
|
2446
|
+
? ["REPLY"]
|
|
2447
|
+
: [];
|
|
2448
|
+
const finishedAt = Date.now();
|
|
2449
|
+
trajectory.push({
|
|
2450
|
+
step: trajectory.length + 1,
|
|
2451
|
+
startedAt,
|
|
2452
|
+
finishedAt,
|
|
2453
|
+
inputText: text,
|
|
2454
|
+
promptText: composedPrompt,
|
|
2455
|
+
context,
|
|
2456
|
+
thought: null,
|
|
2457
|
+
responseText,
|
|
2458
|
+
actions,
|
|
2459
|
+
params,
|
|
2460
|
+
usage: turnUsage,
|
|
2461
|
+
});
|
|
2462
|
+
trajectoriesBySession.set(key, trajectory);
|
|
2463
|
+
const metadata = benchmarkTurnMetadata({
|
|
2464
|
+
session,
|
|
2465
|
+
step: trajectory.length,
|
|
2466
|
+
context: benchmarkContext,
|
|
2467
|
+
});
|
|
2468
|
+
res.writeHead(200, { "Content-Type": "application/json" });
|
|
2469
|
+
res.end(JSON.stringify({
|
|
2470
|
+
text: responseText,
|
|
2471
|
+
thought: null,
|
|
2472
|
+
actions,
|
|
2473
|
+
params,
|
|
2474
|
+
captured_actions: [],
|
|
2475
|
+
tool_calls: toolCalls,
|
|
2476
|
+
usage: turnUsage,
|
|
2477
|
+
metadata,
|
|
2478
|
+
benchmark: session.benchmark,
|
|
2479
|
+
task_id: session.taskId,
|
|
2480
|
+
room_id: session.roomId,
|
|
2481
|
+
trajectory_step: trajectory.length,
|
|
2482
|
+
}));
|
|
2483
|
+
return;
|
|
2484
|
+
}
|
|
2485
|
+
if (isBfclBenchmarkName(session.benchmark) &&
|
|
2486
|
+
Array.isArray(benchmarkContext.tools) &&
|
|
2487
|
+
benchmarkContext.tools.length > 0) {
|
|
2488
|
+
const messages = normalizeBfclNativeMessages(text, benchmarkContext);
|
|
2489
|
+
const toolChoice = benchmarkContext.is_relevant === false ? "none" : "required";
|
|
2490
|
+
const maxTokens = typeof benchmarkContext.max_tokens === "number"
|
|
2491
|
+
? benchmarkContext.max_tokens
|
|
2492
|
+
: 2048;
|
|
2493
|
+
const temperature = typeof benchmarkContext.temperature === "number"
|
|
2494
|
+
? benchmarkContext.temperature
|
|
2495
|
+
: 0;
|
|
2496
|
+
const turnUsageBuffer = [];
|
|
2497
|
+
activeUsageBuffer = turnUsageBuffer;
|
|
2498
|
+
let nativeResult;
|
|
2499
|
+
try {
|
|
2500
|
+
const directResult = await callOpenAiCompatibleActionCalling({
|
|
2501
|
+
messages,
|
|
2502
|
+
tools: benchmarkContext.tools,
|
|
2503
|
+
toolChoice,
|
|
2504
|
+
maxTokens,
|
|
2505
|
+
temperature,
|
|
2506
|
+
}).catch((err) => {
|
|
2507
|
+
elizaLogger.warn(`[bench] BFCL direct native tool call failed; falling back to runtime model path: ${formatUnknownError(err)}`);
|
|
2508
|
+
return null;
|
|
2509
|
+
});
|
|
2510
|
+
if (directResult) {
|
|
2511
|
+
if (directResult.usage) {
|
|
2512
|
+
turnUsageBuffer.push(directResult.usage);
|
|
2513
|
+
}
|
|
2514
|
+
nativeResult = {
|
|
2515
|
+
text: directResult.text,
|
|
2516
|
+
toolCalls: directResult.toolCalls,
|
|
2517
|
+
};
|
|
2518
|
+
}
|
|
2519
|
+
else {
|
|
2520
|
+
nativeResult = await runtime.useModel(ModelType.TEXT_LARGE, {
|
|
2521
|
+
messages: toChatMessages(messages),
|
|
2522
|
+
tools: toToolDefinitions(benchmarkContext.tools),
|
|
2523
|
+
toolChoice: toToolChoice(toolChoice),
|
|
2524
|
+
maxTokens,
|
|
2525
|
+
temperature,
|
|
2526
|
+
});
|
|
2527
|
+
}
|
|
2528
|
+
}
|
|
2529
|
+
finally {
|
|
2530
|
+
activeUsageBuffer = null;
|
|
2531
|
+
}
|
|
2532
|
+
const turnUsage = summarizeBenchmarkTurnUsage(turnUsageBuffer);
|
|
2533
|
+
const nativeRecord = nativeResult && typeof nativeResult === "object"
|
|
2534
|
+
? nativeResult
|
|
2535
|
+
: {};
|
|
2536
|
+
const toolCalls = normalizeLocaNativeToolCalls(nativeRecord.toolCalls);
|
|
2537
|
+
const responseText = typeof nativeRecord.text === "string"
|
|
2538
|
+
? nativeRecord.text
|
|
2539
|
+
: typeof nativeResult === "string"
|
|
2540
|
+
? nativeResult
|
|
2541
|
+
: "";
|
|
2542
|
+
const params = {};
|
|
2543
|
+
const benchmarkAction = bfclBenchmarkActionFromToolCalls(toolCalls);
|
|
2544
|
+
if (benchmarkAction) {
|
|
2545
|
+
params.BENCHMARK_ACTION = benchmarkAction;
|
|
2546
|
+
params.tool_calls = toolCalls;
|
|
2547
|
+
}
|
|
2548
|
+
const actions = toolCalls.length > 0
|
|
2549
|
+
? ["BENCHMARK_ACTION"]
|
|
2550
|
+
: responseText.trim()
|
|
2551
|
+
? ["REPLY"]
|
|
2552
|
+
: [];
|
|
2553
|
+
const finishedAt = Date.now();
|
|
2554
|
+
trajectory.push({
|
|
2555
|
+
step: trajectory.length + 1,
|
|
2556
|
+
startedAt,
|
|
2557
|
+
finishedAt,
|
|
2558
|
+
inputText: text,
|
|
2559
|
+
promptText: composedPrompt,
|
|
2560
|
+
context,
|
|
2561
|
+
thought: null,
|
|
2562
|
+
responseText,
|
|
2563
|
+
actions,
|
|
2564
|
+
params,
|
|
2565
|
+
usage: turnUsage,
|
|
2566
|
+
});
|
|
2567
|
+
trajectoriesBySession.set(key, trajectory);
|
|
2568
|
+
const metadata = benchmarkTurnMetadata({
|
|
2569
|
+
session,
|
|
2570
|
+
step: trajectory.length,
|
|
2571
|
+
context: benchmarkContext,
|
|
2572
|
+
});
|
|
2573
|
+
res.writeHead(200, { "Content-Type": "application/json" });
|
|
2574
|
+
res.end(JSON.stringify({
|
|
2575
|
+
text: responseText,
|
|
2576
|
+
thought: null,
|
|
2577
|
+
actions,
|
|
2578
|
+
params,
|
|
2579
|
+
captured_actions: [],
|
|
2580
|
+
tool_calls: toolCalls,
|
|
2581
|
+
usage: turnUsage,
|
|
2582
|
+
metadata,
|
|
2583
|
+
benchmark: session.benchmark,
|
|
2584
|
+
task_id: session.taskId,
|
|
2585
|
+
room_id: session.roomId,
|
|
2586
|
+
trajectory_step: trajectory.length,
|
|
2587
|
+
}));
|
|
2588
|
+
return;
|
|
2589
|
+
}
|
|
2590
|
+
if (isWebShopBenchmarkName(session.benchmark) &&
|
|
2591
|
+
Array.isArray(benchmarkContext.tools) &&
|
|
2592
|
+
benchmarkContext.tools.length > 0) {
|
|
2593
|
+
const messages = [
|
|
2594
|
+
{
|
|
2595
|
+
role: "system",
|
|
2596
|
+
content: "You are running WebShop through the Eliza benchmark server. Use the webshop_action tool exactly once with command set to one valid command from the current available actions. Do not answer in prose.",
|
|
2597
|
+
},
|
|
2598
|
+
{ role: "user", content: text },
|
|
2599
|
+
];
|
|
2600
|
+
const turnUsageBuffer = [];
|
|
2601
|
+
activeUsageBuffer = turnUsageBuffer;
|
|
2602
|
+
let nativeResult;
|
|
2603
|
+
try {
|
|
2604
|
+
const directResult = await callOpenAiCompatibleActionCalling({
|
|
2605
|
+
messages,
|
|
2606
|
+
tools: benchmarkContext.tools,
|
|
2607
|
+
toolChoice: "required",
|
|
2608
|
+
maxTokens: 256,
|
|
2609
|
+
temperature: typeof benchmarkContext.temperature === "number"
|
|
2610
|
+
? benchmarkContext.temperature
|
|
2611
|
+
: 0,
|
|
2612
|
+
}).catch((err) => {
|
|
2613
|
+
elizaLogger.warn(`[bench] WebShop direct native tool call failed; falling back to runtime model path: ${formatUnknownError(err)}`);
|
|
2614
|
+
return null;
|
|
2615
|
+
});
|
|
2616
|
+
if (directResult) {
|
|
2617
|
+
if (directResult.usage) {
|
|
2618
|
+
turnUsageBuffer.push(directResult.usage);
|
|
2619
|
+
}
|
|
2620
|
+
nativeResult = {
|
|
2621
|
+
text: directResult.text,
|
|
2622
|
+
toolCalls: directResult.toolCalls,
|
|
2623
|
+
};
|
|
2624
|
+
}
|
|
2625
|
+
else {
|
|
2626
|
+
nativeResult = await runtime.useModel(ModelType.TEXT_LARGE, {
|
|
2627
|
+
messages: toChatMessages(messages),
|
|
2628
|
+
tools: toToolDefinitions(benchmarkContext.tools),
|
|
2629
|
+
toolChoice: "required",
|
|
2630
|
+
maxTokens: 256,
|
|
2631
|
+
temperature: typeof benchmarkContext.temperature === "number"
|
|
2632
|
+
? benchmarkContext.temperature
|
|
2633
|
+
: 0,
|
|
2634
|
+
});
|
|
2635
|
+
}
|
|
2636
|
+
}
|
|
2637
|
+
finally {
|
|
2638
|
+
activeUsageBuffer = null;
|
|
2639
|
+
}
|
|
2640
|
+
const turnUsage = summarizeBenchmarkTurnUsage(turnUsageBuffer);
|
|
2641
|
+
const nativeRecord = nativeResult && typeof nativeResult === "object"
|
|
2642
|
+
? nativeResult
|
|
2643
|
+
: {};
|
|
2644
|
+
const toolCalls = normalizeLocaNativeToolCalls(nativeRecord.toolCalls);
|
|
2645
|
+
const responseText = typeof nativeRecord.text === "string"
|
|
2646
|
+
? nativeRecord.text
|
|
2647
|
+
: typeof nativeResult === "string"
|
|
2648
|
+
? nativeResult
|
|
2649
|
+
: "";
|
|
2650
|
+
const params = {};
|
|
2651
|
+
const benchmarkAction = webshopBenchmarkActionFromToolCalls(toolCalls);
|
|
2652
|
+
if (benchmarkAction) {
|
|
2653
|
+
params.BENCHMARK_ACTION = benchmarkAction;
|
|
2654
|
+
params.tool_calls = toolCalls;
|
|
2655
|
+
}
|
|
2656
|
+
const actions = benchmarkAction !== null
|
|
2657
|
+
? ["BENCHMARK_ACTION"]
|
|
2658
|
+
: responseText.trim()
|
|
2659
|
+
? ["REPLY"]
|
|
2660
|
+
: [];
|
|
2661
|
+
const finishedAt = Date.now();
|
|
2662
|
+
trajectory.push({
|
|
2663
|
+
step: trajectory.length + 1,
|
|
2664
|
+
startedAt,
|
|
2665
|
+
finishedAt,
|
|
2666
|
+
inputText: text,
|
|
2667
|
+
promptText: composedPrompt,
|
|
2668
|
+
context,
|
|
2669
|
+
thought: null,
|
|
2670
|
+
responseText,
|
|
2671
|
+
actions,
|
|
2672
|
+
params,
|
|
2673
|
+
usage: turnUsage,
|
|
2674
|
+
});
|
|
2675
|
+
trajectoriesBySession.set(key, trajectory);
|
|
2676
|
+
const metadata = benchmarkTurnMetadata({
|
|
2677
|
+
session,
|
|
2678
|
+
step: trajectory.length,
|
|
2679
|
+
context: benchmarkContext,
|
|
2680
|
+
});
|
|
2681
|
+
res.writeHead(200, { "Content-Type": "application/json" });
|
|
2682
|
+
res.end(JSON.stringify({
|
|
2683
|
+
text: responseText,
|
|
2684
|
+
thought: null,
|
|
2685
|
+
actions,
|
|
2686
|
+
params,
|
|
2687
|
+
captured_actions: [],
|
|
2688
|
+
tool_calls: toolCalls,
|
|
2689
|
+
usage: turnUsage,
|
|
2690
|
+
metadata,
|
|
2691
|
+
benchmark: session.benchmark,
|
|
2692
|
+
task_id: session.taskId,
|
|
2693
|
+
room_id: session.roomId,
|
|
2694
|
+
trajectory_step: trajectory.length,
|
|
2695
|
+
}));
|
|
2696
|
+
return;
|
|
2697
|
+
}
|
|
2698
|
+
if (isHermesNativeEnvProxyName(session.benchmark) &&
|
|
2699
|
+
Array.isArray(benchmarkContext.tools) &&
|
|
2700
|
+
benchmarkContext.tools.length > 0) {
|
|
2701
|
+
const messages = normalizeGenericToolMessages(benchmarkContext.messages, text);
|
|
2702
|
+
const maxTokens = typeof benchmarkContext.max_tokens === "number"
|
|
2703
|
+
? benchmarkContext.max_tokens
|
|
2704
|
+
: 4096;
|
|
2705
|
+
const temperature = typeof benchmarkContext.temperature === "number"
|
|
2706
|
+
? benchmarkContext.temperature
|
|
2707
|
+
: 0;
|
|
2708
|
+
const toolChoice = typeof benchmarkContext.tool_choice === "string"
|
|
2709
|
+
? benchmarkContext.tool_choice
|
|
2710
|
+
: "auto";
|
|
2711
|
+
const turnUsageBuffer = [];
|
|
2712
|
+
activeUsageBuffer = turnUsageBuffer;
|
|
2713
|
+
let nativeResult;
|
|
2714
|
+
try {
|
|
2715
|
+
const directResult = await callOpenAiCompatibleActionCalling({
|
|
2716
|
+
messages,
|
|
2717
|
+
tools: benchmarkContext.tools,
|
|
2718
|
+
toolChoice,
|
|
2719
|
+
maxTokens,
|
|
2720
|
+
temperature,
|
|
2721
|
+
});
|
|
2722
|
+
if (directResult) {
|
|
2723
|
+
if (directResult.usage) {
|
|
2724
|
+
turnUsageBuffer.push(directResult.usage);
|
|
2725
|
+
}
|
|
2726
|
+
nativeResult = {
|
|
2727
|
+
text: directResult.text,
|
|
2728
|
+
toolCalls: directResult.toolCalls,
|
|
2729
|
+
};
|
|
2730
|
+
}
|
|
2731
|
+
else {
|
|
2732
|
+
nativeResult = await runtime.useModel(ModelType.TEXT_LARGE, {
|
|
2733
|
+
messages: toChatMessages(messages),
|
|
2734
|
+
tools: toToolDefinitions(benchmarkContext.tools),
|
|
2735
|
+
toolChoice: toToolChoice(toolChoice),
|
|
2736
|
+
maxTokens,
|
|
2737
|
+
temperature,
|
|
2738
|
+
});
|
|
2739
|
+
}
|
|
2740
|
+
}
|
|
2741
|
+
finally {
|
|
2742
|
+
activeUsageBuffer = null;
|
|
2743
|
+
}
|
|
2744
|
+
const turnUsage = summarizeBenchmarkTurnUsage(turnUsageBuffer);
|
|
2745
|
+
const nativeRecord = nativeResult && typeof nativeResult === "object"
|
|
2746
|
+
? nativeResult
|
|
2747
|
+
: {};
|
|
2748
|
+
const toolCalls = normalizeLocaNativeToolCalls(nativeRecord.toolCalls);
|
|
2749
|
+
const responseText = typeof nativeRecord.text === "string"
|
|
2750
|
+
? nativeRecord.text
|
|
2751
|
+
: typeof nativeResult === "string"
|
|
2752
|
+
? nativeResult
|
|
2753
|
+
: "";
|
|
2754
|
+
const finishedAt = Date.now();
|
|
2755
|
+
trajectory.push({
|
|
2756
|
+
step: trajectory.length + 1,
|
|
2757
|
+
startedAt,
|
|
2758
|
+
finishedAt,
|
|
2759
|
+
inputText: text,
|
|
2760
|
+
promptText: composedPrompt,
|
|
2761
|
+
context,
|
|
2762
|
+
thought: null,
|
|
2763
|
+
responseText,
|
|
2764
|
+
actions: toolCalls.length > 0 ? ["BENCHMARK_ACTION"] : [],
|
|
2765
|
+
params: toolCalls.length > 0 ? { tool_calls: toolCalls } : {},
|
|
2766
|
+
usage: turnUsage,
|
|
2767
|
+
});
|
|
2768
|
+
trajectoriesBySession.set(key, trajectory);
|
|
2769
|
+
const metadata = benchmarkTurnMetadata({
|
|
2770
|
+
session,
|
|
2771
|
+
step: trajectory.length,
|
|
2772
|
+
context: benchmarkContext,
|
|
2773
|
+
});
|
|
2774
|
+
res.writeHead(200, { "Content-Type": "application/json" });
|
|
2775
|
+
res.end(JSON.stringify({
|
|
2776
|
+
text: responseText,
|
|
2777
|
+
thought: null,
|
|
2778
|
+
actions: toolCalls.length > 0 ? ["BENCHMARK_ACTION"] : [],
|
|
2779
|
+
params: toolCalls.length > 0 ? { tool_calls: toolCalls } : {},
|
|
2780
|
+
captured_actions: [],
|
|
2781
|
+
tool_calls: toolCalls,
|
|
2782
|
+
usage: turnUsage,
|
|
2783
|
+
metadata,
|
|
2784
|
+
benchmark: session.benchmark,
|
|
2785
|
+
task_id: session.taskId,
|
|
2786
|
+
room_id: session.roomId,
|
|
2787
|
+
trajectory_step: trajectory.length,
|
|
2788
|
+
}));
|
|
2789
|
+
return;
|
|
2790
|
+
}
|
|
2791
|
+
if (isTerminalBenchmarkName(session.benchmark) ||
|
|
2792
|
+
isSweBenchmarkName(session.benchmark) ||
|
|
2793
|
+
isVisualWebBenchmarkName(session.benchmark) ||
|
|
2794
|
+
isOsworldBenchmarkName(session.benchmark)) {
|
|
2795
|
+
const maxTokens = typeof benchmarkContext.max_tokens === "number"
|
|
2796
|
+
? benchmarkContext.max_tokens
|
|
2797
|
+
: 4096;
|
|
2798
|
+
const temperature = typeof benchmarkContext.temperature === "number"
|
|
2799
|
+
? benchmarkContext.temperature
|
|
2800
|
+
: 0;
|
|
2801
|
+
const turnUsageBuffer = [];
|
|
2802
|
+
activeUsageBuffer = turnUsageBuffer;
|
|
2803
|
+
let nativeResult;
|
|
2804
|
+
try {
|
|
2805
|
+
const directResult = await callOpenAiCompatibleText({
|
|
2806
|
+
prompt: composedPrompt,
|
|
2807
|
+
maxTokens,
|
|
2808
|
+
temperature,
|
|
2809
|
+
});
|
|
2810
|
+
if (directResult) {
|
|
2811
|
+
if (directResult.usage) {
|
|
2812
|
+
turnUsageBuffer.push(directResult.usage);
|
|
2813
|
+
}
|
|
2814
|
+
nativeResult = directResult.text;
|
|
2815
|
+
}
|
|
2816
|
+
else {
|
|
2817
|
+
nativeResult = await runtime.useModel(ModelType.TEXT_LARGE, {
|
|
2818
|
+
prompt: composedPrompt,
|
|
2819
|
+
maxTokens,
|
|
2820
|
+
temperature,
|
|
2821
|
+
});
|
|
2822
|
+
}
|
|
2823
|
+
}
|
|
2824
|
+
finally {
|
|
2825
|
+
activeUsageBuffer = null;
|
|
2826
|
+
}
|
|
2827
|
+
const turnUsage = summarizeBenchmarkTurnUsage(turnUsageBuffer);
|
|
2828
|
+
const nativeRecord = nativeResult && typeof nativeResult === "object"
|
|
2829
|
+
? nativeResult
|
|
2830
|
+
: {};
|
|
2831
|
+
const responseText = typeof nativeRecord.text === "string"
|
|
2832
|
+
? nativeRecord.text
|
|
2833
|
+
: typeof nativeResult === "string"
|
|
2834
|
+
? nativeResult
|
|
2835
|
+
: "";
|
|
2836
|
+
const finishedAt = Date.now();
|
|
2837
|
+
trajectory.push({
|
|
2838
|
+
step: trajectory.length + 1,
|
|
2839
|
+
startedAt,
|
|
2840
|
+
finishedAt,
|
|
2841
|
+
inputText: text,
|
|
2842
|
+
promptText: composedPrompt,
|
|
2843
|
+
context,
|
|
2844
|
+
thought: null,
|
|
2845
|
+
responseText,
|
|
2846
|
+
actions: responseText.trim() ? ["REPLY"] : [],
|
|
2847
|
+
params: {},
|
|
2848
|
+
usage: turnUsage,
|
|
2849
|
+
});
|
|
2850
|
+
trajectoriesBySession.set(key, trajectory);
|
|
2851
|
+
const metadata = benchmarkTurnMetadata({
|
|
2852
|
+
session,
|
|
2853
|
+
step: trajectory.length,
|
|
2854
|
+
context: benchmarkContext,
|
|
2855
|
+
});
|
|
2856
|
+
res.writeHead(200, { "Content-Type": "application/json" });
|
|
2857
|
+
res.end(JSON.stringify({
|
|
2858
|
+
text: responseText,
|
|
2859
|
+
thought: null,
|
|
2860
|
+
actions: responseText.trim() ? ["REPLY"] : [],
|
|
2861
|
+
params: {},
|
|
2862
|
+
captured_actions: [],
|
|
2863
|
+
tool_calls: [],
|
|
2864
|
+
usage: turnUsage,
|
|
2865
|
+
metadata,
|
|
2866
|
+
benchmark: session.benchmark,
|
|
2867
|
+
task_id: session.taskId,
|
|
2868
|
+
room_id: session.roomId,
|
|
2869
|
+
trajectory_step: trajectory.length,
|
|
2870
|
+
}));
|
|
2871
|
+
return;
|
|
2872
|
+
}
|
|
1045
2873
|
const incomingMessage = {
|
|
1046
2874
|
id: stringToUuid(`benchmark-msg:${Date.now()}:${Math.random()}`),
|
|
1047
2875
|
content: {
|
|
@@ -1083,8 +2911,9 @@ export async function startBenchmarkServer() {
|
|
|
1083
2911
|
activeUsageBuffer = null;
|
|
1084
2912
|
}
|
|
1085
2913
|
})();
|
|
1086
|
-
const turnUsage =
|
|
2914
|
+
const turnUsage = summarizeBenchmarkTurnUsage(turnUsageBuffer);
|
|
1087
2915
|
const capturedAction = getCapturedAction();
|
|
2916
|
+
const capturedActions = getCapturedActions();
|
|
1088
2917
|
const responseText = typeof result.responseContent?.text === "string"
|
|
1089
2918
|
? result.responseContent.text
|
|
1090
2919
|
: callbackTexts.join("\n\n");
|
|
@@ -1101,6 +2930,12 @@ export async function startBenchmarkServer() {
|
|
|
1101
2930
|
const params = Object.keys(parsedParams).length > 0
|
|
1102
2931
|
? parsedParams
|
|
1103
2932
|
: capturedActionToParams(capturedAction);
|
|
2933
|
+
if (capturedActions.length > 1) {
|
|
2934
|
+
params.BENCHMARK_ACTIONS = capturedActions
|
|
2935
|
+
.map((action) => capturedActionToParams(action).BENCHMARK_ACTION)
|
|
2936
|
+
.filter(Boolean);
|
|
2937
|
+
}
|
|
2938
|
+
const toolCalls = capturedActionsToToolCalls(capturedActions);
|
|
1104
2939
|
const finishedAt = Date.now();
|
|
1105
2940
|
trajectory.push({
|
|
1106
2941
|
step: trajectory.length + 1,
|
|
@@ -1116,12 +2951,21 @@ export async function startBenchmarkServer() {
|
|
|
1116
2951
|
usage: turnUsage,
|
|
1117
2952
|
});
|
|
1118
2953
|
trajectoriesBySession.set(key, trajectory);
|
|
2954
|
+
const metadata = benchmarkTurnMetadata({
|
|
2955
|
+
session,
|
|
2956
|
+
step: trajectory.length,
|
|
2957
|
+
context: benchmarkContext,
|
|
2958
|
+
});
|
|
1119
2959
|
res.writeHead(200, { "Content-Type": "application/json" });
|
|
1120
2960
|
res.end(JSON.stringify({
|
|
1121
2961
|
text: responseText,
|
|
1122
2962
|
thought,
|
|
1123
2963
|
actions,
|
|
1124
2964
|
params,
|
|
2965
|
+
captured_actions: capturedActions,
|
|
2966
|
+
tool_calls: toolCalls,
|
|
2967
|
+
usage: turnUsage,
|
|
2968
|
+
metadata,
|
|
1125
2969
|
benchmark: session.benchmark,
|
|
1126
2970
|
task_id: session.taskId,
|
|
1127
2971
|
room_id: session.roomId,
|
|
@@ -1171,6 +3015,6 @@ export async function startBenchmarkServer() {
|
|
|
1171
3015
|
});
|
|
1172
3016
|
}
|
|
1173
3017
|
startBenchmarkServer().catch((err) => {
|
|
1174
|
-
|
|
3018
|
+
elizaLogger.error(`[bench] Failed to start benchmark server: ${formatUnknownError(err)}`);
|
|
1175
3019
|
process.exit(1);
|
|
1176
3020
|
});
|