@elizaos/app-core 2.0.0-beta.3 → 2.0.11-beta.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agent-bridge.d.ts +27 -0
- package/agent-bridge.d.ts.map +1 -0
- package/agent-bridge.js +26 -0
- package/api/__tests__/sandbox-test-helpers.js +1 -1
- package/api/auth/audit.js +1 -1
- package/api/auth/auth-context.js +1 -1
- package/api/auth/bootstrap-token.js +2 -2
- package/api/auth/index.d.ts +9 -10
- package/api/auth/index.d.ts.map +1 -1
- package/api/auth/index.js +9 -10
- package/api/auth/passwords.js +2 -2
- package/api/auth/sensitive-rate-limit.d.ts +1 -4
- package/api/auth/sensitive-rate-limit.d.ts.map +1 -1
- package/api/auth/sensitive-rate-limit.js +6 -6
- package/api/auth/sessions.js +2 -2
- package/api/auth-bootstrap-routes.d.ts +6 -13
- package/api/auth-bootstrap-routes.d.ts.map +1 -1
- package/api/auth-bootstrap-routes.js +14 -27
- package/api/auth-pairing-routes.d.ts +17 -0
- package/api/auth-pairing-routes.d.ts.map +1 -0
- package/api/auth-pairing-routes.js +300 -0
- package/api/auth-session-routes.d.ts.map +1 -1
- package/api/auth-session-routes.js +36 -15
- package/api/auth.d.ts +12 -19
- package/api/auth.d.ts.map +1 -1
- package/api/auth.js +32 -27
- package/api/automations-compat-routes.d.ts.map +1 -1
- package/api/automations-compat-routes.js +5 -5
- package/api/background-tasks-routes.d.ts +4 -0
- package/api/background-tasks-routes.d.ts.map +1 -0
- package/api/background-tasks-routes.js +63 -0
- package/api/catalog-routes.js +3 -3
- package/api/cloud-pair-route.d.ts +26 -0
- package/api/cloud-pair-route.d.ts.map +1 -0
- package/api/cloud-pair-route.js +222 -0
- package/api/cloud-voice-routes.d.ts +52 -0
- package/api/cloud-voice-routes.d.ts.map +1 -0
- package/api/cloud-voice-routes.js +50 -0
- package/api/compat-route-shared.d.ts +2 -2
- package/api/compat-route-shared.d.ts.map +1 -1
- package/api/compat-route-shared.js +11 -7
- package/api/credential-resolver.d.ts +2 -2
- package/api/credential-resolver.d.ts.map +1 -1
- package/api/credential-resolver.js +8 -2
- package/api/database-rows-compat-routes.d.ts.map +1 -1
- package/api/database-rows-compat-routes.js +69 -31
- package/api/dev-boot-history.d.ts +26 -0
- package/api/dev-boot-history.d.ts.map +1 -0
- package/api/dev-boot-history.js +69 -0
- package/api/dev-compat-routes.d.ts +5 -0
- package/api/dev-compat-routes.d.ts.map +1 -1
- package/api/dev-compat-routes.js +127 -4
- package/api/dev-console-log.d.ts +2 -2
- package/api/dev-console-log.d.ts.map +1 -1
- package/api/dev-console-log.js +8 -5
- package/api/dev-route-catalog.d.ts +58 -0
- package/api/dev-route-catalog.d.ts.map +1 -0
- package/api/dev-route-catalog.js +447 -0
- package/api/dev-stack.d.ts.map +1 -1
- package/api/dev-stack.js +6 -9
- package/api/first-run-routes.d.ts +4 -0
- package/api/first-run-routes.d.ts.map +1 -0
- package/api/first-run-routes.js +208 -0
- package/api/first-run-tts-route.d.ts +19 -0
- package/api/first-run-tts-route.d.ts.map +1 -0
- package/api/first-run-tts-route.js +59 -0
- package/api/internal-routes.d.ts +23 -0
- package/api/internal-routes.d.ts.map +1 -0
- package/api/internal-routes.js +203 -0
- package/api/ios-local-agent-transport.d.ts +36 -0
- package/api/ios-local-agent-transport.d.ts.map +1 -0
- package/api/ios-local-agent-transport.js +566 -0
- package/api/onboarding-voice-lines.d.ts +23 -0
- package/api/onboarding-voice-lines.d.ts.map +1 -0
- package/api/onboarding-voice-lines.js +8 -0
- package/api/perf-instrument.d.ts +43 -0
- package/api/perf-instrument.d.ts.map +1 -0
- package/api/perf-instrument.js +113 -0
- package/api/response.d.ts.map +1 -1
- package/api/response.js +14 -14
- package/api/runtime-mode-routes.d.ts.map +1 -1
- package/api/runtime-mode-routes.js +2 -2
- package/api/secrets-inventory-routes.js +2 -2
- package/api/secrets-manager-routes.d.ts +1 -1
- package/api/secrets-manager-routes.d.ts.map +1 -1
- package/api/secrets-manager-routes.js +9 -10
- package/api/sensitive-request-routes.js +5 -5
- package/api/server-cors.d.ts.map +1 -1
- package/api/server-cors.js +13 -2
- package/api/server-first-run-helpers.d.ts +26 -0
- package/api/server-first-run-helpers.d.ts.map +1 -0
- package/api/server-first-run-helpers.js +271 -0
- package/api/server-security.js +1 -1
- package/api/server-startup.d.ts.map +1 -1
- package/api/server-startup.js +3 -4
- package/api/server-wallet-trade.js +1 -1
- package/api/server.d.ts +4 -4
- package/api/server.d.ts.map +1 -1
- package/api/server.js +222 -88
- package/api/setup-contract.d.ts +63 -0
- package/api/setup-contract.d.ts.map +1 -0
- package/api/setup-contract.js +39 -0
- package/api/training-benchmarks.d.ts +97 -0
- package/api/training-benchmarks.d.ts.map +1 -0
- package/api/training-benchmarks.js +307 -0
- package/api/workbench-compat-routes.js +2 -2
- package/benchmark/cerebras-autowire.d.ts +28 -0
- package/benchmark/cerebras-autowire.d.ts.map +1 -0
- package/benchmark/cerebras-autowire.js +62 -0
- package/benchmark/lifeops-bench-handler.d.ts +36 -0
- package/benchmark/lifeops-bench-handler.d.ts.map +1 -1
- package/benchmark/lifeops-bench-handler.js +63 -1
- package/benchmark/lifeops-fake-backend.d.ts +39 -0
- package/benchmark/lifeops-fake-backend.d.ts.map +1 -1
- package/benchmark/lifeops-fake-backend.js +993 -21
- package/benchmark/mock-plugin.d.ts.map +1 -1
- package/benchmark/mock-plugin.js +0 -24
- package/benchmark/plugin.d.ts +2 -1
- package/benchmark/plugin.d.ts.map +1 -1
- package/benchmark/plugin.js +989 -68
- package/benchmark/replay-capture.d.ts +2 -2
- package/benchmark/replay-capture.d.ts.map +1 -1
- package/benchmark/replay-capture.js +3 -3
- package/benchmark/server-utils.d.ts +162 -9
- package/benchmark/server-utils.d.ts.map +1 -1
- package/benchmark/server-utils.js +625 -62
- package/benchmark/server.d.ts.map +1 -1
- package/benchmark/server.js +1962 -118
- package/boot-profile.d.ts +3 -0
- package/boot-profile.d.ts.map +1 -0
- package/boot-profile.js +30 -0
- package/browser.d.ts +23 -1
- package/browser.d.ts.map +1 -1
- package/browser.js +20 -1
- package/cli/argv.js +1 -1
- package/cli/banner.js +1 -1
- package/cli/command-format.js +2 -2
- package/cli/doctor/checks.d.ts.map +1 -1
- package/cli/doctor/checks.js +6 -6
- package/cli/plugins-cli.d.ts.map +1 -1
- package/cli/plugins-cli.js +77 -32
- package/cli/profile.d.ts.map +1 -1
- package/cli/profile.js +5 -4
- package/cli/program/build-program.js +4 -4
- package/cli/program/command-registry.d.ts.map +1 -1
- package/cli/program/command-registry.js +13 -11
- package/cli/program/help.js +5 -5
- package/cli/program/preaction.js +5 -5
- package/cli/program/register.auth.d.ts.map +1 -1
- package/cli/program/register.auth.js +6 -12
- package/cli/program/register.capability-router.d.ts +29 -0
- package/cli/program/register.capability-router.d.ts.map +1 -0
- package/cli/program/register.capability-router.js +568 -0
- package/cli/program/register.config.js +1 -1
- package/cli/program/register.configure.d.ts.map +1 -1
- package/cli/program/register.configure.js +1 -1
- package/cli/program/register.dashboard.d.ts.map +1 -1
- package/cli/program/register.dashboard.js +6 -7
- package/cli/program/register.db.d.ts.map +1 -1
- package/cli/program/register.db.js +3 -4
- package/cli/program/register.doctor.js +7 -7
- package/cli/program/register.setup.d.ts.map +1 -1
- package/cli/program/register.setup.js +14 -10
- package/cli/program/register.start.d.ts.map +1 -1
- package/cli/program/register.start.js +5 -3
- package/cli/program/register.subclis.js +3 -3
- package/cli/program/register.update.d.ts +6 -0
- package/cli/program/register.update.d.ts.map +1 -1
- package/cli/program/register.update.js +58 -6
- package/cli/program.js +1 -1
- package/cli/run-main.js +4 -4
- package/config/app-config.d.ts +2 -0
- package/config/app-config.d.ts.map +1 -0
- package/config/app-config.js +1 -0
- package/connectors/capacitor-jsc.d.ts.map +1 -1
- package/connectors/capacitor-jsc.js +16 -10
- package/connectors/capacitor-quickjs.d.ts.map +1 -1
- package/connectors/capacitor-quickjs.js +18 -13
- package/connectors/capacitor-sqlite.d.ts.map +1 -1
- package/connectors/capacitor-sqlite.js +27 -12
- package/dispatch/approval-queue.d.ts +37 -0
- package/dispatch/approval-queue.d.ts.map +1 -0
- package/dispatch/approval-queue.js +25 -0
- package/dispatch/channel-registry.d.ts +30 -0
- package/dispatch/channel-registry.d.ts.map +1 -0
- package/dispatch/channel-registry.js +22 -0
- package/dispatch/connector-registry.d.ts +39 -0
- package/dispatch/connector-registry.d.ts.map +1 -0
- package/dispatch/connector-registry.js +24 -0
- package/dispatch/index.d.ts +14 -0
- package/dispatch/index.d.ts.map +1 -0
- package/dispatch/index.js +13 -0
- package/dispatch/send-policy.d.ts +36 -0
- package/dispatch/send-policy.d.ts.map +1 -0
- package/dispatch/send-policy.js +16 -0
- package/entry.js +28 -11
- package/first-run/first-run-config.d.ts +55 -0
- package/first-run/first-run-config.d.ts.map +1 -0
- package/first-run/first-run-config.js +178 -0
- package/first-run/runtime-target.d.ts +4 -0
- package/first-run/runtime-target.d.ts.map +1 -0
- package/first-run/runtime-target.js +13 -0
- package/index.d.ts +16 -3
- package/index.d.ts.map +1 -1
- package/index.js +57 -33
- package/package.json +159 -50
- package/packaging/debian/apt-repo-config/README.md +18 -0
- package/packaging/debian/apt-repo-config/conf/distributions +11 -0
- package/packaging/flatpak/README.md +26 -16
- package/packaging/flatpak/ai.elizaos.App.metainfo.xml +17 -12
- package/packaging/flatpak/ai.elizaos.App.store.yml +5 -5
- package/packaging/flatpak/ai.elizaos.App.yml +10 -24
- package/packaging/flatpak/elizaos-app-wrapper.store.sh +2 -2
- package/packaging/flatpak/generate-sources.sh +74 -0
- package/packaging/flatpak/node-sources.json +7930 -0
- package/packaging/inno/build-inno.ps1 +34 -9
- package/packaging/msix/AppxManifest.store.xml +1 -1
- package/packaging/msix/README.md +39 -19
- package/packaging/msix/build-msix.ps1 +44 -14
- package/packaging/snap/snapcraft.yaml +22 -21
- package/packaging/test-packaging.sh +2 -2
- package/permissions/types.d.ts +1 -1
- package/permissions/types.js +1 -1
- package/platform/elizaos-agent-browser-stub.d.ts +144 -0
- package/platform/elizaos-agent-browser-stub.d.ts.map +1 -0
- package/platform/elizaos-agent-browser-stub.js +158 -0
- package/platform/elizaos-plugin-elizacloud-browser-stub.d.ts +34 -0
- package/platform/elizaos-plugin-elizacloud-browser-stub.d.ts.map +1 -0
- package/platform/elizaos-plugin-elizacloud-browser-stub.js +51 -0
- package/platform/empty-node-module.d.ts +148 -0
- package/platform/empty-node-module.d.ts.map +1 -1
- package/platform/empty-node-module.js +140 -3
- package/platform/ios-runtime-backends.d.ts +83 -0
- package/platform/ios-runtime-backends.d.ts.map +1 -0
- package/platform/ios-runtime-backends.js +133 -0
- package/platform/ios-runtime-bridge.d.ts +15 -0
- package/platform/ios-runtime-bridge.d.ts.map +1 -0
- package/platform/ios-runtime-bridge.js +527 -0
- package/platform/native-library-policy.d.ts +23 -0
- package/platform/native-library-policy.d.ts.map +1 -0
- package/platform/native-library-policy.js +112 -0
- package/platform/native-plugin-entrypoints.d.ts +19 -0
- package/platform/native-plugin-entrypoints.d.ts.map +1 -0
- package/platform/native-plugin-entrypoints.js +29 -0
- package/platforms/android/README.md +68 -10
- package/platforms/android/app/build.gradle +268 -3
- package/platforms/android/app/capacitor.build.gradle +18 -1
- package/platforms/android/app/proguard-rules.pro +17 -2
- package/platforms/android/app/src/androidTest/java/ai/elizaos/app/ElizaOsInstrumentedTest.java +1 -1
- package/platforms/android/app/src/main/AndroidManifest.xml +334 -17
- package/platforms/android/app/src/main/assets/runners/eliza-tasks.js +177 -0
- package/platforms/android/app/src/main/elizavoice-jni/CMakeLists.txt +100 -0
- package/platforms/android/app/src/main/elizavoice-jni/elizavoice-jni.cpp +1349 -0
- package/platforms/android/app/src/main/java/ai/elizaos/app/AgentPlugin.java +111 -171
- package/platforms/android/app/src/main/java/ai/elizaos/app/AndroidVirtualizationBridge.java +284 -0
- package/platforms/android/app/src/main/java/ai/elizaos/app/BatteryOptimizationPlugin.java +95 -0
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaAccessibilityService.java +55 -0
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaAgentService.java +1198 -141
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaAndroidSystemBridge.java +83 -0
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaAssistActivity.java +50 -1
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaBootReceiver.java +90 -8
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaBrowserActivity.java +2 -2
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaCalendarActivity.java +1 -1
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaCameraActivity.java +1 -1
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaClockActivity.java +2 -2
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaContactsActivity.java +1 -1
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaDialActivity.java +1 -1
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaInCallService.java +1 -1
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaMmsReceiver.java +1 -1
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaNativeBridge.java +22 -0
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaNotificationListenerService.java +45 -0
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaQuickActionsWidgetProvider.java +68 -0
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaShareActivity.java +132 -0
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaSmsComposeActivity.java +1 -1
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaSmsGatewayService.java +268 -0
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaSmsReceiver.java +12 -1
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaTasksWorker.java +194 -0
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaVoiceCaptureService.java +198 -0
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaVoiceNative.java +205 -0
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaVoicePlugin.java +498 -0
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaVoiceTileService.java +39 -0
- package/platforms/android/app/src/main/java/ai/elizaos/app/ElizaWorkScheduler.java +60 -0
- package/platforms/android/app/src/main/java/ai/elizaos/app/GatewayConnectionService.java +53 -19
- package/platforms/android/app/src/main/java/ai/elizaos/app/MainActivity.java +160 -33
- package/platforms/android/app/src/main/java/ai/elizaos/app/ResourceProbePlugin.java +169 -0
- package/platforms/android/app/src/main/java/ai/elizaos/app/VoiceCapturePlugin.java +119 -0
- package/platforms/android/app/src/main/res/drawable/eliza_widget_background.xml +10 -0
- package/platforms/android/app/src/main/res/drawable/eliza_widget_button_background.xml +13 -0
- package/platforms/android/app/src/main/res/drawable/splash.png +0 -0
- package/platforms/android/app/src/main/res/drawable-land-hdpi/splash.png +0 -0
- package/platforms/android/app/src/main/res/drawable-land-mdpi/splash.png +0 -0
- package/platforms/android/app/src/main/res/drawable-land-xhdpi/splash.png +0 -0
- package/platforms/android/app/src/main/res/drawable-land-xxhdpi/splash.png +0 -0
- package/platforms/android/app/src/main/res/drawable-land-xxxhdpi/splash.png +0 -0
- package/platforms/android/app/src/main/res/drawable-port-hdpi/splash.png +0 -0
- package/platforms/android/app/src/main/res/drawable-port-mdpi/splash.png +0 -0
- package/platforms/android/app/src/main/res/drawable-port-xhdpi/splash.png +0 -0
- package/platforms/android/app/src/main/res/drawable-port-xxhdpi/splash.png +0 -0
- package/platforms/android/app/src/main/res/drawable-port-xxxhdpi/splash.png +0 -0
- package/platforms/android/app/src/main/res/layout/eliza_quick_actions_widget.xml +86 -0
- package/platforms/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml +2 -1
- package/platforms/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml +2 -1
- package/platforms/android/app/src/main/res/mipmap-hdpi/ic_launcher.png +0 -0
- package/platforms/android/app/src/main/res/mipmap-hdpi/ic_launcher_foreground.png +0 -0
- package/platforms/android/app/src/main/res/mipmap-hdpi/ic_launcher_monochrome.png +0 -0
- package/platforms/android/app/src/main/res/mipmap-hdpi/ic_launcher_round.png +0 -0
- package/platforms/android/app/src/main/res/mipmap-mdpi/ic_launcher.png +0 -0
- package/platforms/android/app/src/main/res/mipmap-mdpi/ic_launcher_foreground.png +0 -0
- package/platforms/android/app/src/main/res/mipmap-mdpi/ic_launcher_monochrome.png +0 -0
- package/platforms/android/app/src/main/res/mipmap-mdpi/ic_launcher_round.png +0 -0
- package/platforms/android/app/src/main/res/mipmap-xhdpi/ic_launcher.png +0 -0
- package/platforms/android/app/src/main/res/mipmap-xhdpi/ic_launcher_foreground.png +0 -0
- package/platforms/android/app/src/main/res/mipmap-xhdpi/ic_launcher_monochrome.png +0 -0
- package/platforms/android/app/src/main/res/mipmap-xhdpi/ic_launcher_round.png +0 -0
- package/platforms/android/app/src/main/res/mipmap-xxhdpi/ic_launcher.png +0 -0
- package/platforms/android/app/src/main/res/mipmap-xxhdpi/ic_launcher_foreground.png +0 -0
- package/platforms/android/app/src/main/res/mipmap-xxhdpi/ic_launcher_monochrome.png +0 -0
- package/platforms/android/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.png +0 -0
- package/platforms/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png +0 -0
- package/platforms/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher_foreground.png +0 -0
- package/platforms/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher_monochrome.png +0 -0
- package/platforms/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.png +0 -0
- package/platforms/android/app/src/main/res/values/android_app_actions.xml +48 -0
- package/platforms/android/app/src/main/res/values/colors.xml +8 -0
- package/platforms/android/app/src/main/res/values/ic_launcher_background.xml +2 -2
- package/platforms/android/app/src/main/res/values/strings.xml +2 -2
- package/platforms/android/app/src/main/res/values/styles.xml +25 -1
- package/platforms/android/app/src/main/res/xml/eliza_accessibility_service.xml +9 -0
- package/platforms/android/app/src/main/res/xml/eliza_quick_actions_widget.xml +13 -0
- package/platforms/android/app/src/main/res/xml/shortcuts.xml +121 -0
- package/platforms/android/build.gradle +2 -2
- package/platforms/android/capacitor-cordova-android-plugins/build.gradle +9 -3
- package/platforms/android/capacitor-cordova-android-plugins/cordova.variables.gradle +6 -2
- package/platforms/android/capacitor-cordova-android-plugins/src/main/AndroidManifest.xml +7 -2
- package/platforms/android/capacitor-cordova-android-plugins/src/main/java/.gitkeep +0 -1
- package/platforms/android/capacitor.settings.gradle +66 -16
- package/platforms/android/gradle.properties +1 -0
- package/platforms/android/update-manifest/generate-manifest.mjs +97 -0
- package/platforms/android/update-manifest/schema.json +26 -0
- package/platforms/apple-store-entitlements.reviewed.json +155 -0
- package/platforms/electrobun/.generated/brand-config.json +3 -2
- package/platforms/electrobun/LICENSE +21 -0
- package/platforms/electrobun/README.md +15 -1
- package/platforms/electrobun/assets/appIcon.icns +0 -0
- package/platforms/electrobun/assets/appIcon.ico +0 -0
- package/platforms/electrobun/assets/appIcon.iconset/icon_128x128.png +0 -0
- package/platforms/electrobun/assets/appIcon.iconset/icon_128x128@2x.png +0 -0
- package/platforms/electrobun/assets/appIcon.iconset/icon_16x16.png +0 -0
- package/platforms/electrobun/assets/appIcon.iconset/icon_16x16@2x.png +0 -0
- package/platforms/electrobun/assets/appIcon.iconset/icon_256x256.png +0 -0
- package/platforms/electrobun/assets/appIcon.iconset/icon_256x256@2x.png +0 -0
- package/platforms/electrobun/assets/appIcon.iconset/icon_32x32.png +0 -0
- package/platforms/electrobun/assets/appIcon.iconset/icon_32x32@2x.png +0 -0
- package/platforms/electrobun/assets/appIcon.iconset/icon_512x512.png +0 -0
- package/platforms/electrobun/assets/brand-config.json +6 -6
- package/platforms/electrobun/biome.json +9 -9
- package/platforms/electrobun/docs/capability-collapse-matrix.json +318 -0
- package/platforms/electrobun/docs/capability-collapse-matrix.md +129 -0
- package/platforms/electrobun/docs/capability-routing.md +86 -0
- package/platforms/electrobun/docs/convergence-audit.json +3505 -0
- package/platforms/electrobun/docs/convergence-audit.md +694 -0
- package/platforms/electrobun/docs/database-boot-policy.md +90 -0
- package/platforms/electrobun/docs/riscv64-port.md +175 -0
- package/platforms/electrobun/docs/startup-first-run-cleanup.md +18 -0
- package/platforms/electrobun/docs/trace-first-annotations.md +52 -0
- package/platforms/electrobun/docs/ui-boundary-audit.json +580 -0
- package/platforms/electrobun/docs/ui-boundary-audit.md +257 -0
- package/platforms/electrobun/electrobun.config.ts +592 -364
- package/platforms/electrobun/entitlements/JUSTIFICATIONS.md +141 -0
- package/platforms/electrobun/entitlements/README.md +34 -6
- package/platforms/electrobun/entitlements/mas-bun.entitlements +15 -0
- package/platforms/electrobun/entitlements/mas.entitlements +6 -4
- package/platforms/electrobun/native/macos/window-effects.mm +1522 -0
- package/platforms/electrobun/package.json +18 -12
- package/platforms/electrobun/remotes/fs/README.md +70 -0
- package/platforms/electrobun/remotes/fs/electrobun.config.ts +38 -0
- package/platforms/electrobun/remotes/fs/package.json +12 -0
- package/platforms/electrobun/remotes/fs/plugin.json +25 -0
- package/platforms/electrobun/remotes/fs/src/bun/errors.ts +57 -0
- package/platforms/electrobun/remotes/fs/src/bun/file-limits.ts +50 -0
- package/platforms/electrobun/remotes/fs/src/bun/fs-service.ts +389 -0
- package/platforms/electrobun/remotes/fs/src/bun/path-guard.ts +270 -0
- package/platforms/electrobun/remotes/fs/src/bun/protocol.ts +149 -0
- package/platforms/electrobun/remotes/fs/src/bun/worker.ts +174 -0
- package/platforms/electrobun/remotes/fs/src/dev/phase5-smoke.ts +171 -0
- package/platforms/electrobun/remotes/fs/src/web/index.html +8 -0
- package/platforms/electrobun/remotes/git/README.md +75 -0
- package/platforms/electrobun/remotes/git/electrobun.config.ts +44 -0
- package/platforms/electrobun/remotes/git/package.json +12 -0
- package/platforms/electrobun/remotes/git/plugin.json +31 -0
- package/platforms/electrobun/remotes/git/src/bun/errors.ts +69 -0
- package/platforms/electrobun/remotes/git/src/bun/git-command.ts +156 -0
- package/platforms/electrobun/remotes/git/src/bun/git-service.ts +446 -0
- package/platforms/electrobun/remotes/git/src/bun/operation-history.ts +124 -0
- package/platforms/electrobun/remotes/git/src/bun/protocol.ts +252 -0
- package/platforms/electrobun/remotes/git/src/bun/worker.ts +316 -0
- package/platforms/electrobun/remotes/git/src/dev/phase7-smoke.ts +141 -0
- package/platforms/electrobun/remotes/git/src/web/index.html +8 -0
- package/platforms/electrobun/remotes/local-model/README.md +138 -0
- package/platforms/electrobun/remotes/local-model/electrobun.config.ts +46 -0
- package/platforms/electrobun/remotes/local-model/package.json +12 -0
- package/platforms/electrobun/remotes/local-model/plugin.json +33 -0
- package/platforms/electrobun/remotes/local-model/src/bun/download-state.ts +115 -0
- package/platforms/electrobun/remotes/local-model/src/bun/eliza1-catalog.ts +425 -0
- package/platforms/electrobun/remotes/local-model/src/bun/errors.ts +74 -0
- package/platforms/electrobun/remotes/local-model/src/bun/hf-eliza1-client.ts +169 -0
- package/platforms/electrobun/remotes/local-model/src/bun/local-inference-api-client.ts +245 -0
- package/platforms/electrobun/remotes/local-model/src/bun/model-service.ts +490 -0
- package/platforms/electrobun/remotes/local-model/src/bun/protocol.ts +301 -0
- package/platforms/electrobun/remotes/local-model/src/bun/worker.ts +248 -0
- package/platforms/electrobun/remotes/local-model/src/dev/phase8-smoke.ts +117 -0
- package/platforms/electrobun/remotes/local-model/src/web/index.html +13 -0
- package/platforms/electrobun/remotes/pty/README.md +65 -0
- package/platforms/electrobun/remotes/pty/electrobun.config.ts +47 -0
- package/platforms/electrobun/remotes/pty/package.json +12 -0
- package/platforms/electrobun/remotes/pty/plugin.json +34 -0
- package/platforms/electrobun/remotes/pty/src/bun/errors.ts +57 -0
- package/platforms/electrobun/remotes/pty/src/bun/output-buffer.ts +127 -0
- package/platforms/electrobun/remotes/pty/src/bun/protocol.ts +192 -0
- package/platforms/electrobun/remotes/pty/src/bun/pty-service.ts +562 -0
- package/platforms/electrobun/remotes/pty/src/bun/worker.ts +218 -0
- package/platforms/electrobun/remotes/pty/src/dev/phase6-smoke.ts +127 -0
- package/platforms/electrobun/remotes/pty/src/web/index.html +8 -0
- package/platforms/electrobun/remotes/runtime/README.md +370 -0
- package/platforms/electrobun/remotes/runtime/electrobun.config.ts +48 -0
- package/platforms/electrobun/remotes/runtime/package.json +14 -0
- package/platforms/electrobun/remotes/runtime/plugin.json +30 -0
- package/platforms/electrobun/remotes/runtime/src/bun/api-client.ts +620 -0
- package/platforms/electrobun/remotes/runtime/src/bun/errors.ts +45 -0
- package/platforms/electrobun/remotes/runtime/src/bun/log-buffer.ts +33 -0
- package/platforms/electrobun/remotes/runtime/src/bun/protocol.ts +366 -0
- package/platforms/electrobun/remotes/runtime/src/bun/route-discovery.ts +419 -0
- package/platforms/electrobun/remotes/runtime/src/bun/runtime-manager.ts +423 -0
- package/platforms/electrobun/remotes/runtime/src/bun/sse-parser.ts +99 -0
- package/platforms/electrobun/remotes/runtime/src/bun/stream-manager.ts +887 -0
- package/platforms/electrobun/remotes/runtime/src/bun/worker.ts +1231 -0
- package/platforms/electrobun/remotes/runtime/src/dev/phase1-smoke.ts +34 -0
- package/platforms/electrobun/remotes/runtime/src/dev/phase2-smoke.ts +86 -0
- package/platforms/electrobun/remotes/runtime/src/dev/phase3-smoke.ts +141 -0
- package/platforms/electrobun/remotes/runtime/src/web/index.css +187 -0
- package/platforms/electrobun/remotes/runtime/src/web/index.html +76 -0
- package/platforms/electrobun/remotes/runtime/src/web/index.ts +192 -0
- package/platforms/electrobun/remotes/surface/README.md +201 -0
- package/platforms/electrobun/remotes/surface/electrobun.config.ts +38 -0
- package/platforms/electrobun/remotes/surface/package.json +12 -0
- package/platforms/electrobun/remotes/surface/plugin.json +28 -0
- package/platforms/electrobun/remotes/surface/src/bun/worker.ts +132 -0
- package/platforms/electrobun/remotes/surface/src/dev/phase4-smoke.ts +566 -0
- package/platforms/electrobun/remotes/surface/src/protocol/event-types.ts +84 -0
- package/platforms/electrobun/remotes/surface/src/protocol/runtime-client.ts +673 -0
- package/platforms/electrobun/remotes/surface/src/web/app.ts +595 -0
- package/platforms/electrobun/remotes/surface/src/web/index.css +460 -0
- package/platforms/electrobun/remotes/surface/src/web/index.html +466 -0
- package/platforms/electrobun/remotes/surface/src/web/index.ts +5 -0
- package/platforms/electrobun/remotes/surface/src/web/render.ts +455 -0
- package/platforms/electrobun/remotes/surface/src/web/state.ts +427 -0
- package/platforms/electrobun/scripts/build-macos-effects.sh +4 -0
- package/platforms/electrobun/scripts/ensure-build-folder.ts +28 -0
- package/platforms/electrobun/scripts/ensure-whisper-gguf.sh +55 -0
- package/platforms/electrobun/scripts/ensure-whisper-model.sh +22 -80
- package/platforms/electrobun/scripts/generate-convergence-audit.ts +1203 -0
- package/platforms/electrobun/scripts/local-adhoc-sign-macos.ts +159 -159
- package/platforms/electrobun/scripts/postwrap-diagnostics.ts +424 -339
- package/platforms/electrobun/scripts/postwrap-sign-runtime-macos.ts +302 -271
- package/platforms/electrobun/scripts/smoke-test-windows.ps1 +17 -16
- package/platforms/electrobun/scripts/smoke-test.sh +5 -7
- package/platforms/electrobun/scripts/sync-web-assets.mjs +13 -13
- package/platforms/electrobun/scripts/verify-rpc-handlers.ts +109 -110
- package/platforms/electrobun/scripts/verify-windows-installer-proof.ps1 +3 -8
- package/platforms/electrobun/src/__stubs__/bun-ffi.ts +31 -31
- package/platforms/electrobun/src/__stubs__/electrobun-bun.ts +1 -1
- package/platforms/electrobun/src/agent-ready-state.ts +8 -8
- package/platforms/electrobun/src/agent-reset-from-main.test.ts +162 -0
- package/platforms/electrobun/src/agent-reset-from-main.ts +62 -62
- package/platforms/electrobun/src/agent-status-rpc.test.ts +95 -0
- package/platforms/electrobun/src/agent-status-rpc.ts +156 -0
- package/platforms/electrobun/src/api-base.test.ts +247 -0
- package/platforms/electrobun/src/api-base.ts +202 -93
- package/platforms/electrobun/src/application-menu-action-registry.ts +9 -9
- package/platforms/electrobun/src/application-menu.ts +348 -348
- package/platforms/electrobun/src/background-notice.ts +36 -36
- package/platforms/electrobun/src/boot-progress.test.ts +188 -0
- package/platforms/electrobun/src/boot-progress.ts +111 -0
- package/platforms/electrobun/src/brand-config.test.ts +39 -0
- package/platforms/electrobun/src/brand-config.ts +141 -129
- package/platforms/electrobun/src/bridge/browser-tabs-renderer-registry.ts +28 -28
- package/platforms/electrobun/src/bridge/electrobun-boot-config.ts +42 -0
- package/platforms/electrobun/src/bridge/electrobun-crypto-ready.ts +120 -0
- package/platforms/electrobun/src/bridge/electrobun-direct-rpc.ts +342 -357
- package/platforms/electrobun/src/bridge/electrobun-stub.ts +13 -13
- package/platforms/electrobun/src/browser-workspace-bridge-server.ts +285 -243
- package/platforms/electrobun/src/cloud-auth-window.ts +136 -136
- package/platforms/electrobun/src/cloud-disconnect-from-main.ts +90 -90
- package/platforms/electrobun/src/config-and-auth-rpc.test.ts +256 -0
- package/platforms/electrobun/src/config-and-auth-rpc.ts +302 -0
- package/platforms/electrobun/src/conversations-and-character-rpc.test.ts +185 -0
- package/platforms/electrobun/src/conversations-and-character-rpc.ts +131 -0
- package/platforms/electrobun/src/dashboard-rpc.test.ts +200 -0
- package/platforms/electrobun/src/dashboard-rpc.ts +344 -0
- package/platforms/electrobun/src/database/database-lock.ts +141 -0
- package/platforms/electrobun/src/database/database-mode.ts +149 -0
- package/platforms/electrobun/src/database/database-recovery.ts +72 -0
- package/platforms/electrobun/src/database/database-snapshot.ts +190 -0
- package/platforms/electrobun/src/database/database.test.ts +196 -0
- package/platforms/electrobun/src/database/index.ts +5 -0
- package/platforms/electrobun/src/database/pglite-paths.ts +100 -0
- package/platforms/electrobun/src/desktop-deep-link-events.test.ts +30 -0
- package/platforms/electrobun/src/desktop-deep-link-events.ts +17 -0
- package/platforms/electrobun/src/desktop-http-request.test.ts +73 -73
- package/platforms/electrobun/src/desktop-http-request.ts +85 -85
- package/platforms/electrobun/src/desktop-pill-config.test.ts +27 -0
- package/platforms/electrobun/src/desktop-pill-config.ts +40 -0
- package/platforms/electrobun/src/desktop-test-bridge-server.ts +204 -204
- package/platforms/electrobun/src/desktop-tray-config.test.ts +87 -0
- package/platforms/electrobun/src/desktop-tray-config.ts +84 -0
- package/platforms/electrobun/src/devtools-layout.ts +41 -41
- package/platforms/electrobun/src/diagnostic-format.test.ts +71 -0
- package/platforms/electrobun/src/diagnostic-format.ts +75 -36
- package/platforms/electrobun/src/dynamic-view-rpc-schema.test.ts +37 -0
- package/platforms/electrobun/src/dynamic-views/README.md +44 -0
- package/platforms/electrobun/src/dynamic-views/demo/agent-run-trace.html +135 -0
- package/platforms/electrobun/src/dynamic-views/errors.ts +29 -0
- package/platforms/electrobun/src/dynamic-views/host.test.ts +353 -0
- package/platforms/electrobun/src/dynamic-views/host.ts +332 -0
- package/platforms/electrobun/src/dynamic-views/index.ts +57 -0
- package/platforms/electrobun/src/dynamic-views/kiosk-canvas.ts +89 -0
- package/platforms/electrobun/src/dynamic-views/registry.test.ts +139 -0
- package/platforms/electrobun/src/dynamic-views/registry.ts +196 -0
- package/platforms/electrobun/src/dynamic-views/session-manager.test.ts +355 -0
- package/platforms/electrobun/src/dynamic-views/session-manager.ts +348 -0
- package/platforms/electrobun/src/dynamic-views/types.ts +105 -0
- package/platforms/electrobun/src/electrobun-boot-config.test.ts +50 -0
- package/platforms/electrobun/src/electrobun-config.test.ts +62 -0
- package/platforms/electrobun/src/electrobun-crypto-ready.test.ts +65 -0
- package/platforms/electrobun/src/electrobun-window-options.ts +25 -0
- package/platforms/electrobun/src/extension-rpc.test.ts +88 -0
- package/platforms/electrobun/src/extension-rpc.ts +102 -0
- package/platforms/electrobun/src/fatal-shutdown.test.ts +10 -10
- package/platforms/electrobun/src/fatal-shutdown.ts +1 -1
- package/platforms/electrobun/src/first-party-remotes.test.ts +169 -0
- package/platforms/electrobun/src/first-party-remotes.ts +297 -0
- package/platforms/electrobun/src/first-run-rpc.test.ts +192 -0
- package/platforms/electrobun/src/first-run-rpc.ts +146 -0
- package/platforms/electrobun/src/floating-chat-window.ts +181 -181
- package/platforms/electrobun/src/inbox-rpc.test.ts +123 -0
- package/platforms/electrobun/src/inbox-rpc.ts +158 -0
- package/platforms/electrobun/src/index.ts +2555 -2096
- package/platforms/electrobun/src/kiosk-mode.ts +50 -0
- package/platforms/electrobun/src/launch/index.ts +4 -0
- package/platforms/electrobun/src/launch/launch-dynamic-view.ts +37 -0
- package/platforms/electrobun/src/launch/launch-orchestrator.test.ts +224 -0
- package/platforms/electrobun/src/launch/launch-orchestrator.ts +456 -0
- package/platforms/electrobun/src/launch/launch-store.test.ts +97 -0
- package/platforms/electrobun/src/launch/launch-store.ts +134 -0
- package/platforms/electrobun/src/launch/types.ts +103 -0
- package/platforms/electrobun/src/launch/views/launch-diagnostics.html +205 -0
- package/platforms/electrobun/src/lifecycle/agent-ready-publish.test.ts +50 -0
- package/platforms/electrobun/src/lifecycle/agent-ready-publish.ts +27 -0
- package/platforms/electrobun/src/lifecycle/api-base-owner.ts +42 -31
- package/platforms/electrobun/src/lifecycle/desktop-session-prime.ts +44 -44
- package/platforms/electrobun/src/logger.ts +14 -14
- package/platforms/electrobun/src/main-window-runtime.ts +83 -83
- package/platforms/electrobun/src/main-window-session.test.ts +109 -0
- package/platforms/electrobun/src/main-window-session.ts +87 -51
- package/platforms/electrobun/src/menu-reset-from-main.ts +158 -158
- package/platforms/electrobun/src/native/agent-env.test.ts +52 -0
- package/platforms/electrobun/src/native/agent-runtime-layout.test.ts +42 -0
- package/platforms/electrobun/src/native/agent-state-dir.test.ts +91 -0
- package/platforms/electrobun/src/native/agent.ts +2122 -1682
- package/platforms/electrobun/src/native/auth-bridge.test.ts +67 -0
- package/platforms/electrobun/src/native/auth-bridge.ts +464 -360
- package/platforms/electrobun/src/native/browser-workspace.ts +723 -471
- package/platforms/electrobun/src/native/camera.ts +50 -50
- package/platforms/electrobun/src/native/canvas.ts +444 -445
- package/platforms/electrobun/src/native/credentials.ts +673 -616
- package/platforms/electrobun/src/native/desktop-window.test.ts +300 -0
- package/platforms/electrobun/src/native/desktop.ts +2196 -2156
- package/platforms/electrobun/src/native/editor-bridge.ts +201 -201
- package/platforms/electrobun/src/native/file-watcher.ts +154 -154
- package/platforms/electrobun/src/native/gateway.ts +179 -180
- package/platforms/electrobun/src/native/gpu-window.ts +256 -256
- package/platforms/electrobun/src/native/index.ts +76 -74
- package/platforms/electrobun/src/native/location.test.ts +44 -0
- package/platforms/electrobun/src/native/location.ts +90 -80
- package/platforms/electrobun/src/native/loopback-port.ts +60 -60
- package/platforms/electrobun/src/native/mac-window-effects.ts +166 -104
- package/platforms/electrobun/src/native/music-player.ts +38 -38
- package/platforms/electrobun/src/native/permissions-shared.ts +249 -150
- package/platforms/electrobun/src/native/permissions.ts +301 -208
- package/platforms/electrobun/src/native/power-state.ts +129 -129
- package/platforms/electrobun/src/native/remote-plugin-host.test.ts +1394 -0
- package/platforms/electrobun/src/native/remote-plugin-host.ts +1531 -0
- package/platforms/electrobun/src/native/screencapture.ts +667 -573
- package/platforms/electrobun/src/native/steward.ts +207 -204
- package/platforms/electrobun/src/native/swabble.ts +68 -324
- package/platforms/electrobun/src/native/talkmode.ts +253 -422
- package/platforms/electrobun/src/native/webgpu-browser-support.test.ts +18 -0
- package/platforms/electrobun/src/native/webgpu-browser-support.ts +165 -147
- package/platforms/electrobun/src/native/whisper-env.test.ts +71 -0
- package/platforms/electrobun/src/native/whisper-env.ts +68 -0
- package/platforms/electrobun/src/native-onboarding.ts +270 -0
- package/platforms/electrobun/src/onboarding-overlay-window.ts +141 -0
- package/platforms/electrobun/src/persisted-deployment.ts +91 -0
- package/platforms/electrobun/src/pill-window.test.ts +91 -0
- package/platforms/electrobun/src/pill-window.ts +99 -0
- package/platforms/electrobun/src/preload-validation.ts +44 -44
- package/platforms/electrobun/src/preload.js +1 -1
- package/platforms/electrobun/src/print-electrobun-dev-settings-banner.ts +120 -120
- package/platforms/electrobun/src/renderer-api-proxy.test.ts +73 -0
- package/platforms/electrobun/src/renderer-api-proxy.ts +86 -0
- package/platforms/electrobun/src/renderer-static.test.ts +53 -0
- package/platforms/electrobun/src/renderer-static.ts +144 -57
- package/platforms/electrobun/src/rpc-handler-slices.ts +121 -0
- package/platforms/electrobun/src/rpc-handlers.test.ts +267 -0
- package/platforms/electrobun/src/rpc-handlers.ts +1306 -913
- package/platforms/electrobun/src/rpc-parse-utils.ts +57 -0
- package/platforms/electrobun/src/rpc-port-resolver.test.ts +45 -0
- package/platforms/electrobun/src/rpc-port-resolver.ts +31 -0
- package/platforms/electrobun/src/rpc-schema.ts +2556 -1619
- package/platforms/electrobun/src/runtime-layout.ts +105 -105
- package/platforms/electrobun/src/runtime-permissions.ts +95 -95
- package/platforms/electrobun/src/runtime-rpc.test.ts +126 -0
- package/platforms/electrobun/src/runtime-rpc.ts +237 -0
- package/platforms/electrobun/src/screenshot-dev-server.ts +87 -87
- package/platforms/electrobun/src/settings-mutations-rpc.test.ts +193 -0
- package/platforms/electrobun/src/settings-mutations-rpc.ts +220 -0
- package/platforms/electrobun/src/startup-trace.ts +274 -270
- package/platforms/electrobun/src/subscription-rpc.test.ts +89 -0
- package/platforms/electrobun/src/subscription-rpc.ts +192 -0
- package/platforms/electrobun/src/surface-windows.test.ts +355 -0
- package/platforms/electrobun/src/surface-windows.ts +410 -410
- package/platforms/electrobun/src/trace/README.md +73 -0
- package/platforms/electrobun/src/trace/errors.ts +21 -0
- package/platforms/electrobun/src/trace/index.ts +40 -0
- package/platforms/electrobun/src/trace/trace-dynamic-view.ts +40 -0
- package/platforms/electrobun/src/trace/trace-host-requests.ts +473 -0
- package/platforms/electrobun/src/trace/trace-service.test.ts +186 -0
- package/platforms/electrobun/src/trace/trace-service.ts +324 -0
- package/platforms/electrobun/src/trace/trace-store.test.ts +141 -0
- package/platforms/electrobun/src/trace/trace-store.ts +551 -0
- package/platforms/electrobun/src/trace/types.ts +250 -0
- package/platforms/electrobun/src/trace/views/agent-run-trace.html +311 -0
- package/platforms/electrobun/src/types/web-speech.d.ts +28 -28
- package/platforms/electrobun/src/types.ts +5 -5
- package/platforms/electrobun/src/update-availability.test.ts +72 -0
- package/platforms/electrobun/src/update-availability.ts +90 -0
- package/platforms/electrobun/src/update-rpc.test.ts +83 -0
- package/platforms/electrobun/src/update-rpc.ts +123 -0
- package/platforms/electrobun/src/voice/README.md +184 -0
- package/platforms/electrobun/src/voice/errors.ts +42 -0
- package/platforms/electrobun/src/voice/index.ts +78 -0
- package/platforms/electrobun/src/voice/types.ts +316 -0
- package/platforms/electrobun/src/voice/voice-host-requests.ts +259 -0
- package/platforms/electrobun/src/voice/voice-latency-budget.test.ts +66 -0
- package/platforms/electrobun/src/voice/voice-latency-budget.ts +243 -0
- package/platforms/electrobun/src/voice/voice-live-validation.test.ts +352 -0
- package/platforms/electrobun/src/voice/voice-live-validation.ts +838 -0
- package/platforms/electrobun/src/voice/voice-pipeline.ts +250 -0
- package/platforms/electrobun/src/voice/voice-playback-adapter.ts +31 -0
- package/platforms/electrobun/src/voice/voice-runtime-adapter.test.ts +213 -0
- package/platforms/electrobun/src/voice/voice-runtime-adapter.ts +686 -0
- package/platforms/electrobun/src/voice/voice-service.test.ts +561 -0
- package/platforms/electrobun/src/voice/voice-service.ts +1027 -0
- package/platforms/electrobun/src/voice/voice-stream-coordinator.test.ts +115 -0
- package/platforms/electrobun/src/voice/voice-stream-coordinator.ts +270 -0
- package/platforms/electrobun/src/voice/voice-trace.ts +97 -0
- package/platforms/electrobun/src/voice/voice-tts-chunker.test.ts +91 -0
- package/platforms/electrobun/src/voice/voice-tts-chunker.ts +194 -0
- package/platforms/electrobun/src/windows-cef-profile.ts +88 -88
- package/platforms/electrobun/tsconfig.json +73 -13
- package/platforms/electrobun/update-channels.json +22 -0
- package/platforms/electrobun/vitest.electrobun.config.ts +72 -42
- package/platforms/ios/App/App/App.entitlements +4 -0
- package/platforms/ios/App/App/AppDelegate.swift +80 -18
- package/platforms/ios/App/App/Assets.xcassets/AppIcon.appiconset/AppIcon-ios-marketing-1024.png +0 -0
- package/platforms/ios/App/App/Assets.xcassets/AppIcon.appiconset/AppIcon-ipad-20x20@1x.png +0 -0
- package/platforms/ios/App/App/Assets.xcassets/AppIcon.appiconset/AppIcon-ipad-20x20@2x.png +0 -0
- package/platforms/ios/App/App/Assets.xcassets/AppIcon.appiconset/AppIcon-ipad-29x29@1x.png +0 -0
- package/platforms/ios/App/App/Assets.xcassets/AppIcon.appiconset/AppIcon-ipad-29x29@2x.png +0 -0
- package/platforms/ios/App/App/Assets.xcassets/AppIcon.appiconset/AppIcon-ipad-40x40@1x.png +0 -0
- package/platforms/ios/App/App/Assets.xcassets/AppIcon.appiconset/AppIcon-ipad-40x40@2x.png +0 -0
- package/platforms/ios/App/App/Assets.xcassets/AppIcon.appiconset/AppIcon-ipad-76x76@1x.png +0 -0
- package/platforms/ios/App/App/Assets.xcassets/AppIcon.appiconset/AppIcon-ipad-76x76@2x.png +0 -0
- package/platforms/ios/App/App/Assets.xcassets/AppIcon.appiconset/AppIcon-ipad-83_5x83_5@2x.png +0 -0
- package/platforms/ios/App/App/Assets.xcassets/AppIcon.appiconset/AppIcon-iphone-20x20@2x.png +0 -0
- package/platforms/ios/App/App/Assets.xcassets/AppIcon.appiconset/AppIcon-iphone-20x20@3x.png +0 -0
- package/platforms/ios/App/App/Assets.xcassets/AppIcon.appiconset/AppIcon-iphone-29x29@2x.png +0 -0
- package/platforms/ios/App/App/Assets.xcassets/AppIcon.appiconset/AppIcon-iphone-29x29@3x.png +0 -0
- package/platforms/ios/App/App/Assets.xcassets/AppIcon.appiconset/AppIcon-iphone-40x40@2x.png +0 -0
- package/platforms/ios/App/App/Assets.xcassets/AppIcon.appiconset/AppIcon-iphone-40x40@3x.png +0 -0
- package/platforms/ios/App/App/Assets.xcassets/AppIcon.appiconset/AppIcon-iphone-60x60@2x.png +0 -0
- package/platforms/ios/App/App/Assets.xcassets/AppIcon.appiconset/AppIcon-iphone-60x60@3x.png +0 -0
- package/platforms/ios/App/App/Base.lproj/LaunchScreen.storyboard +1 -4
- package/platforms/ios/App/App/ComputerUseBridge.swift +589 -0
- package/platforms/ios/App/App/DeviceActivityMonitorExtension/DeviceActivityMonitorExtension.entitlements +12 -0
- package/platforms/ios/App/App/DeviceActivityMonitorExtension/DeviceActivityMonitorExtension.swift +34 -0
- package/platforms/ios/App/App/DeviceActivityMonitorExtension/Info.plist +29 -0
- package/platforms/ios/App/App/DeviceActivityReportExtension/DeviceActivityReportExtension.entitlements +12 -0
- package/platforms/ios/App/App/DeviceActivityReportExtension/DeviceActivityReportExtension.swift +53 -0
- package/platforms/ios/App/App/DeviceActivityReportExtension/Info.plist +27 -0
- package/platforms/ios/App/App/ElizaAppIntents.swift +183 -0
- package/platforms/ios/App/App/ElizaIntentPlugin.swift +342 -5
- package/platforms/ios/App/App/Info.plist +17 -1
- package/platforms/ios/App/App/runners/eliza-tasks.js +177 -0
- package/platforms/ios/App/App.xcodeproj/project.pbxproj +262 -6
- package/platforms/ios/App/BroadcastExtension/SampleHandler.swift +100 -0
- package/platforms/ios/App/Podfile +5 -0
- package/platforms/ios/App/Podfile.lock +83 -59
- package/register-runtime-hooks.js +11 -5
- package/registry/app-registry.d.ts +14 -0
- package/registry/app-registry.d.ts.map +1 -0
- package/registry/app-registry.js +29 -0
- package/registry/entries/apps/app-polymarket.json +31 -0
- package/registry/entries/apps/clawville.json +27 -0
- package/registry/entries/apps/companion.json +28 -0
- package/registry/entries/apps/database-viewer.json +27 -0
- package/registry/entries/apps/defense-of-the-agents.json +27 -0
- package/registry/entries/apps/documents.json +30 -0
- package/registry/entries/apps/feed.json +27 -0
- package/registry/entries/apps/hyperliquid.json +31 -0
- package/registry/entries/apps/log-viewer.json +27 -0
- package/registry/entries/apps/memory-viewer.json +27 -0
- package/registry/entries/apps/model-tester.json +31 -0
- package/registry/entries/apps/plugin-viewer.json +27 -0
- package/registry/entries/apps/relationship-viewer.json +27 -0
- package/registry/entries/apps/runtime-debugger.json +27 -0
- package/registry/entries/apps/shopify.json +31 -0
- package/registry/entries/apps/skills-viewer.json +27 -0
- package/registry/entries/apps/steward.json +31 -0
- package/registry/entries/apps/training.json +54 -0
- package/registry/entries/apps/trajectory-viewer.json +27 -0
- package/registry/entries/apps/vincent.json +31 -0
- package/registry/entries/connectors/bluebubbles.json +99 -0
- package/registry/entries/connectors/bluesky.json +173 -0
- package/registry/entries/connectors/discord.json +119 -0
- package/registry/entries/connectors/farcaster.json +174 -0
- package/registry/entries/connectors/feishu.json +79 -0
- package/registry/entries/connectors/google-chat.json +120 -0
- package/registry/entries/connectors/google.json +82 -0
- package/registry/entries/connectors/imessage.json +96 -0
- package/registry/entries/connectors/instagram.json +64 -0
- package/registry/entries/connectors/line.json +86 -0
- package/registry/entries/connectors/matrix.json +94 -0
- package/registry/entries/connectors/mattermost.json +110 -0
- package/registry/entries/connectors/msteams.json +104 -0
- package/registry/entries/connectors/nextcloud-talk.json +104 -0
- package/registry/entries/connectors/nostr.json +70 -0
- package/registry/entries/connectors/signal.json +81 -0
- package/registry/entries/connectors/slack.json +102 -0
- package/registry/entries/connectors/telegram.json +71 -0
- package/registry/entries/connectors/tlon.json +94 -0
- package/registry/entries/connectors/twitch.json +110 -0
- package/registry/entries/connectors/whatsapp.json +113 -0
- package/registry/entries/connectors/x.json +231 -0
- package/registry/entries/connectors/zalo.json +112 -0
- package/registry/entries/connectors/zalouser.json +122 -0
- package/registry/entries/plugins/agent-orchestrator.json +33 -0
- package/registry/entries/plugins/agent-skills.json +72 -0
- package/registry/entries/plugins/anthropic.json +73 -0
- package/registry/entries/plugins/app-control.json +23 -0
- package/registry/entries/plugins/auto-trader.json +203 -0
- package/registry/entries/plugins/background-runner.json +26 -0
- package/registry/entries/plugins/blooio.json +102 -0
- package/registry/entries/plugins/browser.json +75 -0
- package/registry/entries/plugins/cli.json +40 -0
- package/registry/entries/plugins/clipboard.json +44 -0
- package/registry/entries/plugins/coding-tools.json +71 -0
- package/registry/entries/plugins/commands.json +63 -0
- package/registry/entries/plugins/computeruse.json +74 -0
- package/registry/entries/plugins/copilot-proxy.json +93 -0
- package/registry/entries/plugins/directives.json +63 -0
- package/registry/entries/plugins/edge-tts.json +97 -0
- package/registry/entries/plugins/elevenlabs.json +169 -0
- package/registry/entries/plugins/elizacloud.json +208 -0
- package/registry/entries/plugins/evm.json +134 -0
- package/registry/entries/plugins/experience.json +34 -0
- package/registry/entries/plugins/facewear.json +131 -0
- package/registry/entries/plugins/form.json +26 -0
- package/registry/entries/plugins/github.json +93 -0
- package/registry/entries/plugins/gmail-watch.json +25 -0
- package/registry/entries/plugins/goals.json +77 -0
- package/registry/entries/plugins/google-genai.json +106 -0
- package/registry/entries/plugins/groq.json +93 -0
- package/registry/entries/plugins/hedera.json +48 -0
- package/registry/entries/plugins/inmemorydb.json +25 -0
- package/registry/entries/plugins/linear.json +51 -0
- package/registry/entries/plugins/local-inference.json +142 -0
- package/registry/entries/plugins/local-storage.json +36 -0
- package/registry/entries/plugins/localdb.json +25 -0
- package/registry/entries/plugins/mcp.json +44 -0
- package/registry/entries/plugins/memory.json +124 -0
- package/registry/entries/plugins/minecraft.json +79 -0
- package/registry/entries/plugins/moltbook.json +83 -0
- package/registry/entries/plugins/music.json +155 -0
- package/registry/entries/plugins/mysticism.json +48 -0
- package/registry/entries/plugins/nearai.json +82 -0
- package/registry/entries/plugins/ngrok.json +69 -0
- package/registry/entries/plugins/ollama.json +96 -0
- package/registry/entries/plugins/openai.json +189 -0
- package/registry/entries/plugins/openrouter.json +188 -0
- package/registry/entries/plugins/pdf.json +26 -0
- package/registry/entries/plugins/plugin-manager.json +23 -0
- package/registry/entries/plugins/prose.json +48 -0
- package/registry/entries/plugins/rlm.json +26 -0
- package/registry/entries/plugins/roblox.json +88 -0
- package/registry/entries/plugins/rss.json +64 -0
- package/registry/entries/plugins/s3-storage.json +91 -0
- package/registry/entries/plugins/scheduling.json +35 -0
- package/registry/entries/plugins/shell.json +94 -0
- package/registry/entries/plugins/social-alpha.json +72 -0
- package/registry/entries/plugins/tailscale.json +81 -0
- package/registry/entries/plugins/tee.json +53 -0
- package/registry/entries/plugins/todos.json +26 -0
- package/registry/entries/plugins/trajectory-logger.json +33 -0
- package/registry/entries/plugins/trust.json +39 -0
- package/registry/entries/plugins/tts.json +71 -0
- package/registry/entries/plugins/tunnel.json +45 -0
- package/registry/entries/plugins/twilio.json +168 -0
- package/registry/entries/plugins/vercel-ai-gateway.json +128 -0
- package/registry/entries/plugins/video.json +23 -0
- package/registry/entries/plugins/vision.json +43 -0
- package/registry/entries/plugins/webhooks.json +23 -0
- package/registry/entries/plugins/workflow.json +25 -0
- package/registry/entries/plugins/xai.json +75 -0
- package/registry/index.d.ts +2 -1
- package/registry/index.d.ts.map +1 -1
- package/registry/index.js +46 -12
- package/registry/loader.d.ts +2 -1
- package/registry/loader.d.ts.map +1 -1
- package/registry/loader.js +49 -2
- package/registry/schema.d.ts +244 -34
- package/registry/schema.d.ts.map +1 -1
- package/registry/schema.js +36 -0
- package/runtime/android-avf-microdroid-bridge.d.ts +29 -0
- package/runtime/android-avf-microdroid-bridge.d.ts.map +1 -0
- package/runtime/android-avf-microdroid-bridge.js +149 -0
- package/runtime/api-dev-settings-banner.d.ts.map +1 -1
- package/runtime/api-dev-settings-banner.js +5 -13
- package/runtime/app-core-runtime-hooks.d.ts +21 -0
- package/runtime/app-core-runtime-hooks.d.ts.map +1 -0
- package/runtime/app-core-runtime-hooks.js +10 -0
- package/runtime/autonomy-policy.d.ts +2 -0
- package/runtime/autonomy-policy.d.ts.map +1 -0
- package/runtime/autonomy-policy.js +4 -0
- package/runtime/desktop/AppWindowRenderer.d.ts +17 -0
- package/runtime/desktop/AppWindowRenderer.d.ts.map +1 -0
- package/runtime/desktop/AppWindowRenderer.js +360 -0
- package/runtime/desktop/DesktopSurfaceNavigationRuntime.d.ts +2 -0
- package/runtime/desktop/DesktopSurfaceNavigationRuntime.d.ts.map +1 -0
- package/runtime/desktop/DesktopSurfaceNavigationRuntime.js +41 -0
- package/runtime/desktop/DesktopTrayRuntime.d.ts +2 -0
- package/runtime/desktop/DesktopTrayRuntime.d.ts.map +1 -0
- package/runtime/desktop/DesktopTrayRuntime.js +174 -0
- package/runtime/desktop/DetachedShellRoot.d.ts +10 -0
- package/runtime/desktop/DetachedShellRoot.d.ts.map +1 -0
- package/runtime/desktop/DetachedShellRoot.js +111 -0
- package/runtime/desktop/index.d.ts +6 -0
- package/runtime/desktop/index.d.ts.map +1 -0
- package/runtime/desktop/index.js +5 -0
- package/runtime/desktop/tray-menu.d.ts +20 -0
- package/runtime/desktop/tray-menu.d.ts.map +1 -0
- package/runtime/desktop/tray-menu.js +143 -0
- package/runtime/dev-server.d.ts +1 -1
- package/runtime/dev-server.d.ts.map +1 -1
- package/runtime/dev-server.js +93 -17
- package/runtime/eliza.d.ts +75 -1
- package/runtime/eliza.d.ts.map +1 -1
- package/runtime/eliza.js +596 -122
- package/runtime/ensure-text-to-speech-handler.d.ts.map +1 -1
- package/runtime/ensure-text-to-speech-handler.js +10 -3
- package/runtime/mobile-safe-runtime.d.ts +181 -2
- package/runtime/mobile-safe-runtime.d.ts.map +1 -1
- package/runtime/mobile-safe-runtime.js +1019 -12
- package/runtime/mode/remote-forwarder.d.ts.map +1 -1
- package/runtime/mode/remote-forwarder.js +2 -2
- package/runtime/mode/route-mode-guard.d.ts +1 -2
- package/runtime/mode/route-mode-guard.d.ts.map +1 -1
- package/runtime/mode/route-mode-guard.js +4 -5
- package/runtime/mode/route-mode-matrix.d.ts.map +1 -1
- package/runtime/mode/route-mode-matrix.js +14 -1
- package/runtime/mode/runtime-mode.d.ts +1 -1
- package/runtime/mode/runtime-mode.js +1 -1
- package/runtime/runtime-bootstrap-policy.d.ts.map +1 -1
- package/runtime/runtime-bootstrap-policy.js +14 -2
- package/runtime/telegram-standalone-handler.d.ts.map +1 -1
- package/runtime/telegram-standalone-handler.js +10 -9
- package/runtime/tts-cache-wiring.d.ts +29 -0
- package/runtime/tts-cache-wiring.d.ts.map +1 -0
- package/runtime/tts-cache-wiring.js +114 -0
- package/runtime/voice-warmup.d.ts +81 -0
- package/runtime/voice-warmup.d.ts.map +1 -0
- package/runtime/voice-warmup.js +111 -0
- package/scripts/android-sms-gateway-template.test.mjs +1014 -0
- package/scripts/aosp/README.md +19 -15
- package/scripts/aosp/compile-libllama.mjs +1344 -248
- package/scripts/aosp/compile-shim.mjs +47 -18
- package/scripts/aosp/deploy-pixel.mjs +405 -0
- package/scripts/aosp/lib/load-variant-config.mjs +3 -3
- package/scripts/aosp/llama-cpp-patches/README.md +8 -8
- package/scripts/aosp/llama-cpp-patches/apply-patches.mjs +23 -6
- package/scripts/aosp/llama-cpp-patches/polarquant/README.md +37 -0
- package/scripts/aosp/llama-cpp-patches/qjl/README.md +37 -0
- package/scripts/aosp/seccomp-shim/sigsys-handler-arm64.c +169 -0
- package/scripts/aosp/seccomp-shim/sigsys-handler-riscv64.c +217 -0
- package/scripts/aosp/smoke-cuttlefish.mjs +34 -4
- package/scripts/aosp/stage-default-models.mjs +18 -18
- package/scripts/aosp/variant-config-schema.ts +2 -2
- package/scripts/assert-required-bundled-packages.test.ts +534 -0
- package/scripts/audit-apple-store-sandbox.mjs +146 -0
- package/scripts/audit-live-test-surface.mjs +5 -2
- package/scripts/build-capacitor-app.mjs +21 -0
- package/scripts/build-flatpak.mjs +5 -5
- package/scripts/build-helpers/arm64-simd.mjs +72 -0
- package/scripts/build-helpers/omnivoice-merged.mjs +87 -0
- package/scripts/build-helpers/verify-fused-symbols.mjs +567 -0
- package/scripts/build-image.sh +1 -1
- package/scripts/build-llama-cpp-mtp.mjs +487 -0
- package/scripts/build-native-plugins.mjs +230 -18
- package/scripts/build-patched-electrobun-cli.mjs +68 -10
- package/scripts/build-win.mjs +1 -1
- package/scripts/bun-riscv64/Dockerfile +418 -0
- package/scripts/bun-riscv64/README.md +316 -0
- package/scripts/bun-riscv64/build.sh +469 -0
- package/scripts/bun-riscv64/bun-patches/0001-config-add-riscv64-arch.patch +74 -0
- package/scripts/bun-riscv64/bun-patches/0002-flags-add-riscv64-march-mabi.patch +16 -0
- package/scripts/bun-riscv64/bun-patches/0003-zig-add-riscv64-target-triple-and-cpu.patch +26 -0
- package/scripts/bun-riscv64/bun-patches/0004-webkit-force-local-mode-on-riscv64.patch +33 -0
- package/scripts/bun-riscv64/bun-patches/0005-tinycc-disable-on-riscv64.patch +16 -0
- package/scripts/bun-riscv64/bun-patches/0006-build-add-riscv64-cli-validation.patch +15 -0
- package/scripts/bun-riscv64/bun-patches/0007-deps-per-dep-riscv64-checks.patch +24 -0
- package/scripts/bun-riscv64/bun-patches/0008-source-stabilize-riscv64-musl-build.patch +226 -0
- package/scripts/bun-riscv64/bun-patches/0009-disable-wasm-streaming-hooks-for-c-loop.patch +162 -0
- package/scripts/bun-riscv64/bun-patches/0010-disable-inspector-profiler-for-riscv64-c-loop.patch +80 -0
- package/scripts/bun-riscv64/bun-patches/0011-process-arch-add-riscv64.patch +23 -0
- package/scripts/bun-riscv64/bun-patches/0012-cpu-features-add-riscv64-fallback.patch +13 -0
- package/scripts/bun-riscv64/bun-patches/0013-disable-console-inspector-hooks-for-riscv64-c-loop.patch +43 -0
- package/scripts/bun-riscv64/bun-patches/0014-disable-custom-inspector-dispatchers-on-riscv64.patch +127 -0
- package/scripts/bun-riscv64/bun-patches/0015-disable-jsc-profiler-builtins-on-riscv64.patch +75 -0
- package/scripts/bun-riscv64/bun-patches/0016-node-vm-disable-jit-cached-data-on-riscv64-c-loop.patch +96 -0
- package/scripts/bun-riscv64/bun-patches/0017-disable-performance-domjit-signature-on-riscv64-c-loop.patch +34 -0
- package/scripts/bun-riscv64/bun-patches/0018-fix-serialized-script-identifier-big-endian-path.patch +19 -0
- package/scripts/bun-riscv64/bun-patches/0019-add-wtf-timer-fire-bridge-for-c-loop.patch +24 -0
- package/scripts/bun-riscv64/bun-patches/0020-run-riscv64-smoke-test-under-qemu.patch +13 -0
- package/scripts/bun-riscv64/bun-patches/0021-fix-riscv64-linux-open-flags.patch +25 -0
- package/scripts/bun-riscv64/bun-patches/0022-zlib-riscv64-generic-kernels.patch +25 -0
- package/scripts/bun-riscv64/bun-patches/README.md +127 -0
- package/scripts/bun-riscv64/bun-version.json +202 -0
- package/scripts/bun-riscv64/run-build.sh +162 -0
- package/scripts/bun-riscv64/rust-core/0001-riscv64-rust-core-port.patch +868 -0
- package/scripts/bun-riscv64/rust-core/0002-second-wave-riscv64-source-gaps.patch +130 -0
- package/scripts/bun-riscv64/rust-core/0003-third-wave-riscv64-crash-handler-gaps.patch +78 -0
- package/scripts/bun-riscv64/rust-core/0004-rust-target-cpu-riscv64.patch +39 -0
- package/scripts/bun-riscv64/rust-core/0005-fifth-wave-riscv64-source-gaps.patch +96 -0
- package/scripts/bun-riscv64/rust-core/0006-cpp-wasm-and-inspector-guards-riscv64.patch +91 -0
- package/scripts/bun-riscv64/rust-core/0007-bun-alloc-max-align-t-riscv64.patch +36 -0
- package/scripts/bun-riscv64/rust-core/0008-workspace-lints-warn-not-deny-riscv64.patch +75 -0
- package/scripts/bun-riscv64/rust-core/0009-zigglobalobject-wasm-streaming-guards-riscv64.patch +109 -0
- package/scripts/bun-riscv64/rust-core/0010-tcc-externs-stub-on-riscv64.patch +62 -0
- package/scripts/bun-riscv64/rust-core/0011-clippy-ptr-cast-lints-warn-riscv64.patch +61 -0
- package/scripts/bun-riscv64/rust-core/README.md +80 -0
- package/scripts/bun-riscv64/rust-core/webkit-patches/0003-disable-dfg-ftl-on-riscv64.patch +60 -0
- package/scripts/bun-riscv64/rust-core/webkit-patches/0004-riscv64-do-not-force-wasm-in-c-loop.patch +31 -0
- package/scripts/bun-riscv64/rust-core/webkit-patches/0005-domjit-effect-allow-no-dfg-c-loop.patch +40 -0
- package/scripts/bun-riscv64/rust-core/webkit-patches/0006-disable-usewasm-when-webassembly-compiled-out.patch +33 -0
- package/scripts/bun-riscv64/rust-core/webkit-patches/0007-restore-dropped-includes-and-llint-fwd-decl.patch +31 -0
- package/scripts/bun-riscv64/validate.sh +264 -0
- package/scripts/bun-riscv64/webkit-patches/0001-cherry-pick-llint-riscv64.recipe +155 -0
- package/scripts/bun-riscv64/webkit-patches/0002-cherry-pick-baseline-jit-riscv64.recipe +40 -0
- package/scripts/bun-riscv64/webkit-patches/0003-disable-dfg-ftl-on-riscv64.patch +60 -0
- package/scripts/bun-riscv64/webkit-patches/0004-riscv64-do-not-force-wasm-in-c-loop.patch +31 -0
- package/scripts/bun-riscv64/webkit-patches/0005-domjit-effect-allow-no-dfg-c-loop.patch +40 -0
- package/scripts/bun-riscv64/webkit-patches/0006-disable-usewasm-when-webassembly-compiled-out.patch +33 -0
- package/scripts/bun-riscv64/webkit-patches/0007-restore-dropped-includes-and-llint-fwd-decl.patch +72 -0
- package/scripts/bun-riscv64/webkit-patches/README.md +146 -0
- package/scripts/check-homepage-public-readiness.mjs +353 -0
- package/scripts/check-homepage-release-data.mjs +110 -0
- package/scripts/check-i18n.mjs +2 -1
- package/scripts/check-real-local-chat.ts +147 -0
- package/scripts/check-real-local-provisioning.ts +104 -0
- package/scripts/check-real-local-reset.ts +249 -0
- package/scripts/check-sms-gateway-completion-audit.mjs +428 -0
- package/scripts/check-sms-gateway-readiness.mjs +266 -0
- package/scripts/clean-repo.mjs +5 -5
- package/scripts/codesign-mas.mjs +222 -16
- package/scripts/collect-docker-runtime-deps.mjs +229 -0
- package/scripts/continue-sms-gateway-work.mjs +121 -0
- package/scripts/copy-runtime-node-modules.ts +903 -195
- package/scripts/deploy-cloud-api-production-gateway.mjs +52 -0
- package/scripts/desktop-build.mjs +655 -101
- package/scripts/dev-platform.mjs +346 -102
- package/scripts/dev-startup-smoke.mjs +248 -0
- package/scripts/dev-ui.mjs +418 -176
- package/scripts/disable-local-eliza-workspace.mjs +35 -0
- package/scripts/docker-ci-smoke.sh +298 -96
- package/scripts/docker-entrypoint.sh +62 -1
- package/scripts/docker-entrypoint.test.ts +283 -0
- package/scripts/ensure-avatars.mjs +2 -2
- package/scripts/ensure-electrobun-core.mjs +1 -1
- package/scripts/ensure-generated-core-proto-js.mjs +1 -1
- package/scripts/ensure-type-package-aliases.mjs +62 -5
- package/scripts/ensure-vision-deps.mjs +20 -1
- package/scripts/entry.ts +1 -1
- package/scripts/ffi-stub/Makefile +64 -0
- package/scripts/ffi-stub/README.md +391 -0
- package/scripts/ffi-stub/asr-ffi-smoke.ts +139 -0
- package/scripts/ffi-stub/ffi-stub.c +539 -0
- package/scripts/ffi-stub/ffi.h +538 -0
- package/scripts/ffi-stub/libelizainference_stub.so +0 -0
- package/scripts/ffi-stub/tts-stream-ffi-smoke.ts +349 -0
- package/scripts/generate-first-run-voicelines.mjs +194 -0
- package/scripts/generate-plugin-index.js +4 -3
- package/scripts/generate-static-asset-manifest.mjs +1 -1
- package/scripts/i18n-dynamic-keys.json +5 -5
- package/scripts/init-submodules.mjs +2 -2
- package/scripts/install-android-sms-gateway.md +177 -0
- package/scripts/install-android-sms-gateway.mjs +1088 -0
- package/scripts/ios-xcframework/README.md +74 -72
- package/scripts/ios-xcframework/build-xcframework.mjs +204 -43
- package/scripts/ios-xcframework/run-physical-device-smoke.mjs +1943 -0
- package/scripts/ios-xcframework/runtime-symbol-shim.c +450 -0
- package/scripts/kernel-patches/cpu-polar-kernels.mjs +441 -0
- package/scripts/kernel-patches/cpu-simd-kernels.mjs +253 -0
- package/scripts/kernel-patches/cpu-thread-parallelism.mjs +368 -0
- package/scripts/kernel-patches/cuda-kernels.mjs +117 -0
- package/scripts/kernel-patches/metal-kernels.mjs +1698 -109
- package/scripts/kernel-patches/server-omnivoice-route.mjs +718 -0
- package/scripts/kernel-patches/server-structured-output.mjs +279 -0
- package/scripts/kernel-patches/vulkan-dispatch-log.mjs +166 -0
- package/scripts/kernel-patches/vulkan-dispatch-log.test.mjs +50 -0
- package/scripts/kernel-patches/vulkan-dispatch-patches/01-vulkan-shaders-gen.patch +30 -16
- package/scripts/kernel-patches/vulkan-dispatch-patches/02-ggml-vulkan-pipelines.patch +75 -30
- package/scripts/kernel-patches/vulkan-kernels.mjs +800 -49
- package/scripts/lib/agent-source-watcher.mjs +174 -0
- package/scripts/lib/agent-source-watcher.test.mjs +184 -0
- package/scripts/lib/api-supervisor.mjs +78 -9
- package/scripts/lib/api-supervisor.test.mjs +121 -0
- package/scripts/lib/app-dir.mjs +2 -16
- package/scripts/lib/apple-entitlement-audit.mjs +655 -0
- package/scripts/lib/apple-entitlement-audit.test.mjs +144 -0
- package/scripts/lib/bun-version-guard.mjs +13 -13
- package/scripts/lib/capacitor-plugin-build-needed.mjs +4 -3
- package/scripts/lib/capacitor-plugin-names.mjs +30 -14
- package/scripts/lib/desktop-preflight.mjs +9 -5
- package/scripts/lib/desktop-startup-embedding-warmup-policy.mjs +51 -0
- package/scripts/lib/desktop-startup-embedding-warmup-policy.test.mjs +55 -0
- package/scripts/lib/duet-bridge.d.mts +63 -0
- package/scripts/lib/duet-bridge.mjs +193 -0
- package/scripts/lib/node-path-env.mjs +4 -2
- package/scripts/lib/orchestrator-desktop-dev-banner.mjs +12 -3
- package/scripts/lib/patch-bun-exports.mjs +90 -27
- package/scripts/lib/patch-bun-exports.test.mjs +79 -0
- package/scripts/lib/renderer-build-action.mjs +35 -0
- package/scripts/lib/renderer-build-action.test.mjs +70 -0
- package/scripts/lib/stage-android-agent.mjs +748 -99
- package/scripts/lib/sync-eliza-env-aliases.mjs +3 -25
- package/scripts/lib/ui-smoke-stub-decision.mjs +33 -0
- package/scripts/lib/ui-smoke-stub-decision.test.mjs +46 -0
- package/scripts/lib/vite-renderer-dist-stale.mjs +5 -0
- package/scripts/lib/voice-latency-report.mjs +154 -0
- package/scripts/lifeops-prompt-benchmark.ts +21 -12
- package/scripts/link-docker-local-app-packages.mjs +89 -36
- package/scripts/local-stt-bench.ts +192 -0
- package/scripts/maintain-cloud-api-production-gateway.mjs +54 -0
- package/scripts/mas-smoke.mjs +459 -0
- package/scripts/mas-smoke.test.mjs +220 -0
- package/scripts/mobile-auth-simulator-smoke.mjs +0 -1
- package/scripts/normalize-eliza-capture.ts +97 -0
- package/scripts/omnivoice-fuse/prepare.mjs +2543 -23
- package/scripts/pack-upstreams.mjs +65 -5
- package/scripts/package-electrobun-linux.mjs +303 -0
- package/scripts/patch-deps.mjs +5 -3
- package/scripts/patches/llama-mobile-kokoro-tts.patch +480 -0
- package/scripts/playwright-ui-live-stack.ts +194 -49
- package/scripts/playwright-ui-smoke-api-stub.mjs +3501 -109
- package/scripts/pre-review-local.mjs +2 -2
- package/scripts/prepare-ios-cocoapods.sh +41 -3
- package/scripts/release-check.ts +180 -84
- package/scripts/release-workflow-drift.test.ts +57 -0
- package/scripts/relink-workspace-packages-to-dist.mjs +21 -4
- package/scripts/rt.mjs +16 -1
- package/scripts/run-biome-check.mjs +1 -1
- package/scripts/run-coding-agent-e2e.mjs +3 -3
- package/scripts/run-eliza-app-core-script.mjs +34 -0
- package/scripts/run-local-plugin-live-smoke.mjs +71 -2
- package/scripts/run-mobile-build-android-app-actions.test.mjs +426 -0
- package/scripts/run-mobile-build.mjs +4757 -607
- package/scripts/run-node-runtime.mjs +184 -7
- package/scripts/run-node-runtime.test.mjs +167 -0
- package/scripts/run-node-tsx.mjs +80 -33
- package/scripts/run-node.mjs +41 -1
- package/scripts/run-production-build.mjs +34 -27
- package/scripts/run-release-check.mjs +19 -0
- package/scripts/run-release-contract-suite.mjs +107 -14
- package/scripts/run-ui-smoke-playwright-suite.mjs +0 -2
- package/scripts/runtime-package-manifest.ts +21 -3
- package/scripts/setup-upstreams.mjs +42 -1
- package/scripts/sms-gateway-status.mjs +194 -0
- package/scripts/stage-android-agent.test.mjs +97 -0
- package/scripts/stage-elizavoice-lib.mjs +203 -0
- package/scripts/startup-integration-script-drift.test.ts +82 -4
- package/scripts/streaming-pipeline-bench.ts +543 -0
- package/scripts/sync-homepage-porkbun-dns.mjs +262 -0
- package/scripts/test-sms-gateway-software.mjs +100 -0
- package/scripts/type-audit.mjs +1 -1
- package/scripts/validate-bluebubbles-outbound.mjs +293 -0
- package/scripts/validate-cdn-assets.mjs +15 -7
- package/scripts/validate-regression-matrix.mjs +109 -8
- package/scripts/verify-android-sms-gateway-e2e.mjs +362 -0
- package/scripts/verify-bluebubbles-gateway-e2e.mjs +191 -0
- package/scripts/verify-bluebubbles-inbound-readiness.mjs +88 -0
- package/scripts/verify-cloud-api-production-deploy.mjs +87 -0
- package/scripts/verify-cloud-sms-onboarding-flow.mjs +336 -0
- package/scripts/voice/freeze-voice.mjs +521 -0
- package/scripts/voice-attribution-smoke.ts +538 -0
- package/scripts/voice-create-profile.mjs +379 -0
- package/scripts/voice-duet.mjs +1355 -0
- package/scripts/voice-e2e-hardware.ts +871 -0
- package/scripts/voice-interactive.mjs +1750 -0
- package/scripts/voice-latency-report.mjs +96 -0
- package/scripts/voice-latency-report.test.ts +176 -0
- package/scripts/voice-preset/build-default-voice-preset.mjs +249 -0
- package/scripts/voice-preset/build-onboarding-voice.mjs +281 -0
- package/scripts/watch-sms-gateway-readiness.mjs +303 -0
- package/scripts/write-homepage-release-data.mjs +458 -26
- package/security/agent-vault-id.d.ts +1 -1
- package/security/agent-vault-id.js +1 -1
- package/security/hydrate-wallet-keys-from-platform-store.d.ts.map +1 -1
- package/security/hydrate-wallet-keys-from-platform-store.js +23 -14
- package/security/platform-secure-store-node.d.ts +2 -2
- package/security/platform-secure-store-node.js +3 -3
- package/security/wallet-os-store-actions.d.ts +0 -9
- package/security/wallet-os-store-actions.d.ts.map +1 -1
- package/security/wallet-os-store-actions.js +3 -10
- package/services/account-pool.d.ts +23 -14
- package/services/account-pool.d.ts.map +1 -1
- package/services/account-pool.js +86 -24
- package/services/account-usage.d.ts.map +1 -1
- package/services/account-usage.js +2 -5
- package/services/ambient-audio/consent.d.ts +9 -0
- package/services/ambient-audio/consent.d.ts.map +1 -0
- package/services/ambient-audio/consent.js +28 -0
- package/services/ambient-audio/index.d.ts +7 -0
- package/services/ambient-audio/index.d.ts.map +1 -0
- package/services/ambient-audio/index.js +4 -0
- package/services/ambient-audio/replay-buffer.d.ts +14 -0
- package/services/ambient-audio/replay-buffer.d.ts.map +1 -0
- package/services/ambient-audio/replay-buffer.js +66 -0
- package/services/ambient-audio/response-gate.d.ts +3 -0
- package/services/ambient-audio/response-gate.d.ts.map +1 -0
- package/services/ambient-audio/response-gate.js +33 -0
- package/services/ambient-audio/service.d.ts +22 -0
- package/services/ambient-audio/service.d.ts.map +1 -0
- package/services/ambient-audio/service.js +47 -0
- package/services/ambient-audio/types.d.ts +42 -0
- package/services/ambient-audio/types.d.ts.map +1 -0
- package/services/app-updates/update-policy.d.ts +64 -0
- package/services/app-updates/update-policy.d.ts.map +1 -0
- package/services/app-updates/update-policy.js +228 -0
- package/services/auth-store.d.ts +37 -1
- package/services/auth-store.d.ts.map +1 -1
- package/services/auth-store.js +59 -26
- package/services/cloud-jwks-store.d.ts +3 -3
- package/services/cloud-jwks-store.d.ts.map +1 -1
- package/services/cloud-jwks-store.js +5 -8
- package/services/coding-account-bridge.d.ts +71 -0
- package/services/coding-account-bridge.d.ts.map +1 -0
- package/services/coding-account-bridge.js +267 -0
- package/services/connector-target-catalog.d.ts +10 -3
- package/services/connector-target-catalog.d.ts.map +1 -1
- package/services/connector-target-catalog.js +7 -4
- package/services/credential-tunnel-service.d.ts +66 -0
- package/services/credential-tunnel-service.d.ts.map +1 -0
- package/services/credential-tunnel-service.js +227 -0
- package/services/github-credentials.d.ts +1 -1
- package/services/github-credentials.js +1 -1
- package/services/inference-abort.d.ts +47 -0
- package/services/inference-abort.d.ts.map +1 -0
- package/services/inference-abort.js +76 -0
- package/services/persistence.d.ts +2 -3
- package/services/persistence.d.ts.map +1 -1
- package/services/persistence.js +2 -3
- package/services/phrase-chunked-tts.d.ts +136 -0
- package/services/phrase-chunked-tts.d.ts.map +1 -0
- package/services/phrase-chunked-tts.js +208 -0
- package/services/sandbox-registry.d.ts +78 -0
- package/services/sandbox-registry.d.ts.map +1 -0
- package/services/sandbox-registry.js +323 -0
- package/services/secrets-manager-installer.d.ts +8 -1
- package/services/secrets-manager-installer.d.ts.map +1 -1
- package/services/secrets-manager-installer.js +27 -2
- package/services/sensitive-requests/cloud-link-adapter.d.ts +15 -0
- package/services/sensitive-requests/cloud-link-adapter.d.ts.map +1 -0
- package/services/sensitive-requests/cloud-link-adapter.js +73 -0
- package/services/sensitive-requests/index.d.ts +27 -0
- package/services/sensitive-requests/index.d.ts.map +1 -0
- package/services/sensitive-requests/index.js +51 -0
- package/services/sensitive-requests/instruct-dm-only-adapter.d.ts +14 -0
- package/services/sensitive-requests/instruct-dm-only-adapter.d.ts.map +1 -0
- package/services/sensitive-requests/instruct-dm-only-adapter.js +22 -0
- package/services/sensitive-requests/owner-app-inline-adapter.d.ts +3 -0
- package/services/sensitive-requests/owner-app-inline-adapter.d.ts.map +1 -0
- package/services/sensitive-requests/owner-app-inline-adapter.js +146 -0
- package/services/sensitive-requests/owner-app-oauth-adapter.d.ts +3 -0
- package/services/sensitive-requests/owner-app-oauth-adapter.d.ts.map +1 -0
- package/services/sensitive-requests/owner-app-oauth-adapter.js +156 -0
- package/services/sensitive-requests/public-link-adapter.d.ts +14 -0
- package/services/sensitive-requests/public-link-adapter.d.ts.map +1 -0
- package/services/sensitive-requests/public-link-adapter.js +86 -0
- package/services/sensitive-requests/tunnel-link-adapter.d.ts +17 -0
- package/services/sensitive-requests/tunnel-link-adapter.d.ts.map +1 -0
- package/services/sensitive-requests/tunnel-link-adapter.js +38 -0
- package/services/steward-credentials.d.ts +1 -1
- package/services/steward-credentials.d.ts.map +1 -1
- package/services/steward-credentials.js +10 -6
- package/services/steward-sidecar/health-check.d.ts.map +1 -1
- package/services/steward-sidecar/health-check.js +4 -3
- package/services/steward-sidecar/process-management.d.ts +1 -1
- package/services/steward-sidecar/process-management.d.ts.map +1 -1
- package/services/steward-sidecar/process-management.js +9 -3
- package/services/steward-sidecar/types.d.ts +1 -1
- package/services/steward-sidecar/types.d.ts.map +1 -1
- package/services/steward-sidecar/wallet-setup.d.ts.map +1 -1
- package/services/steward-sidecar/wallet-setup.js +8 -7
- package/services/steward-sidecar.d.ts +2 -2
- package/services/steward-sidecar.d.ts.map +1 -1
- package/services/steward-sidecar.js +27 -19
- package/services/task-host-capabilities.d.ts +60 -0
- package/services/task-host-capabilities.d.ts.map +1 -0
- package/services/task-host-capabilities.js +122 -0
- package/services/tool-call-cache/index.d.ts +2 -2
- package/services/tool-call-cache/index.d.ts.map +1 -1
- package/services/tool-call-cache/index.js +1 -1
- package/services/trigger-event-bridge.js +1 -1
- package/services/tunnel-to-mobile/index.d.ts +2 -0
- package/services/tunnel-to-mobile/index.d.ts.map +1 -0
- package/services/tunnel-to-mobile/index.js +1 -0
- package/services/tunnel-to-mobile/tunnel-to-mobile-client.d.ts +105 -0
- package/services/tunnel-to-mobile/tunnel-to-mobile-client.d.ts.map +1 -0
- package/services/tunnel-to-mobile/tunnel-to-mobile-client.js +190 -0
- package/services/vault-bootstrap.d.ts.map +1 -1
- package/services/vault-bootstrap.js +48 -21
- package/services/vault-mirror.d.ts +1 -1
- package/services/vault-mirror.d.ts.map +1 -1
- package/services/vault-mirror.js +29 -6
- package/services/voice-profiles/diarization-pipeline.d.ts +6 -0
- package/services/voice-profiles/diarization-pipeline.d.ts.map +1 -0
- package/services/voice-profiles/diarization-pipeline.js +20 -0
- package/services/voice-profiles/index.d.ts +12 -0
- package/services/voice-profiles/index.d.ts.map +1 -0
- package/services/voice-profiles/index.js +5 -0
- package/services/voice-profiles/nickname-evaluator.d.ts +14 -0
- package/services/voice-profiles/nickname-evaluator.d.ts.map +1 -0
- package/services/voice-profiles/nickname-evaluator.js +46 -0
- package/services/voice-profiles/owner-confidence.d.ts +10 -0
- package/services/voice-profiles/owner-confidence.d.ts.map +1 -0
- package/services/voice-profiles/owner-confidence.js +38 -0
- package/services/voice-profiles/private-challenge.d.ts +20 -0
- package/services/voice-profiles/private-challenge.d.ts.map +1 -0
- package/services/voice-profiles/private-challenge.js +44 -0
- package/services/voice-profiles/store.d.ts +21 -0
- package/services/voice-profiles/store.d.ts.map +1 -0
- package/services/voice-profiles/store.js +50 -0
- package/services/voice-profiles/types.d.ts +38 -0
- package/services/voice-profiles/types.d.ts.map +1 -0
- package/services/voice-profiles/types.js +1 -0
- package/styles/electrobun-mac-window-drag.css +4 -4
- package/test/helpers/__tests__/live-agent-test.smoke.test.ts +43 -70
- package/test/helpers/browser-mocks.ts +2 -2
- package/test/helpers/conditional-tests.ts +2 -2
- package/test/helpers/i18n.ts +1 -1
- package/test/helpers/live-agent-test.ts +537 -551
- package/test/helpers/live-provider.test.ts +4 -4
- package/test/helpers/live-provider.ts +41 -7
- package/test/helpers/live-runtime-server.ts +4 -4
- package/test/helpers/pglite-runtime.ts +1 -1
- package/test/helpers/real-runtime.ts +54 -15
- package/test/helpers/trajectory-harness.ts +11 -7
- package/test/scripts/start-eliza-live.ts +9 -0
- package/test/scripts/test-parallel.mjs +1 -1
- package/test/scripts/test-root-unit.mjs +6 -7
- package/ui-compat.d.ts +13 -2
- package/ui-compat.d.ts.map +1 -1
- package/ui-compat.js +19 -3
- package/api/auth-pairing-compat-routes.d.ts +0 -17
- package/api/auth-pairing-compat-routes.d.ts.map +0 -1
- package/api/auth-pairing-compat-routes.js +0 -301
- package/api/local-inference-compat-routes.d.ts +0 -16
- package/api/local-inference-compat-routes.d.ts.map +0 -1
- package/api/local-inference-compat-routes.js +0 -617
- package/api/onboarding-compat-routes.d.ts +0 -4
- package/api/onboarding-compat-routes.d.ts.map +0 -1
- package/api/onboarding-compat-routes.js +0 -207
- package/api/plugins-compat-routes.d.ts +0 -103
- package/api/plugins-compat-routes.d.ts.map +0 -1
- package/api/plugins-compat-routes.js +0 -1181
- package/api/server-onboarding-compat.d.ts +0 -31
- package/api/server-onboarding-compat.d.ts.map +0 -1
- package/api/server-onboarding-compat.js +0 -283
- package/benchmark/cua-routes.d.ts +0 -10
- package/benchmark/cua-routes.d.ts.map +0 -1
- package/benchmark/cua-routes.js +0 -179
- package/benchmark/mock-plugin-base.d.ts +0 -9
- package/benchmark/mock-plugin-base.d.ts.map +0 -1
- package/benchmark/mock-plugin-base.js +0 -325
- package/cli/parse-duration.d.ts +0 -5
- package/cli/parse-duration.d.ts.map +0 -1
- package/cli/parse-duration.js +0 -27
- package/patches/llama-cpp-capacitor@0.1.5.patch +0 -2387
- package/platform/agent-browser-stub.d.ts +0 -27
- package/platform/agent-browser-stub.d.ts.map +0 -1
- package/platform/agent-browser-stub.js +0 -16
- package/platforms/android/app/src/androidTest/java/com/getcapacitor/myapp/ExampleInstrumentedTest.java +0 -26
- package/platforms/android/app/src/main/res/drawable/ic_launcher_background.xml +0 -170
- package/platforms/android/app/src/main/res/drawable-v24/ic_launcher_foreground.xml +0 -34
- package/platforms/android/app/src/test/java/com/getcapacitor/myapp/ExampleUnitTest.java +0 -18
- package/platforms/electrobun/assets/appIcon.iconset/icon_512x512@2x.png +0 -0
- package/platforms/electrobun/assets/appIcon.png +0 -0
- package/platforms/electrobun/scripts/build-whisper-universal.sh +0 -137
- package/platforms/electrobun/scripts/build-whisper.sh +0 -95
- package/platforms/electrobun/src/libMacWindowEffects.dylib +0 -0
- package/platforms/electrobun/src/native/whisper.ts +0 -280
- package/platforms/ios/App/App/Assets.xcassets/Splash.imageset/splash-2732x2732-1.png +0 -0
- package/platforms/ios/App/App/Assets.xcassets/Splash.imageset/splash-2732x2732-2.png +0 -0
- package/platforms/ios/App/App/Assets.xcassets/Splash.imageset/splash-2732x2732.png +0 -0
- package/registry/generate-apps.d.ts +0 -2
- package/registry/generate-apps.d.ts.map +0 -1
- package/registry/generate-apps.js +0 -338
- package/registry/generate.d.ts +0 -2
- package/registry/generate.d.ts.map +0 -1
- package/registry/generate.js +0 -506
- package/runtime/embedding-manager-support.d.ts +0 -77
- package/runtime/embedding-manager-support.d.ts.map +0 -1
- package/runtime/embedding-manager-support.js +0 -309
- package/runtime/embedding-presets.d.ts +0 -5
- package/runtime/embedding-presets.d.ts.map +0 -1
- package/runtime/embedding-presets.js +0 -47
- package/runtime/embedding-warmup-policy.d.ts +0 -13
- package/runtime/embedding-warmup-policy.d.ts.map +0 -1
- package/runtime/embedding-warmup-policy.js +0 -33
- package/runtime/ensure-local-inference-handler.d.ts +0 -25
- package/runtime/ensure-local-inference-handler.d.ts.map +0 -1
- package/runtime/ensure-local-inference-handler.js +0 -389
- package/runtime/mobile-local-inference-gate.d.ts +0 -21
- package/runtime/mobile-local-inference-gate.d.ts.map +0 -1
- package/runtime/mobile-local-inference-gate.js +0 -24
- package/scripts/aosp/avd-test.mjs +0 -403
- package/scripts/aosp/boot-validate.mjs +0 -536
- package/scripts/aosp/build-aosp.mjs +0 -448
- package/scripts/aosp/build-bootanimation.mjs +0 -178
- package/scripts/aosp/capture-screens.mjs +0 -325
- package/scripts/aosp/e2e-validate.mjs +0 -225
- package/scripts/aosp/lint-init-rc.mjs +0 -258
- package/scripts/aosp/llama-shim/eliza_llama_shim.c +0 -276
- package/scripts/aosp/sim.mjs +0 -277
- package/scripts/aosp/sync-to-aosp.mjs +0 -134
- package/scripts/aosp/validate.mjs +0 -1273
- package/scripts/build-llama-cpp-dflash.mjs +0 -1866
- package/scripts/generate-onboarding-voicelines.mjs +0 -194
- package/scripts/generated/static-asset-manifest.json +0 -4
- package/scripts/normalize-parallax-capture.ts +0 -97
- package/scripts/omnivoice-fuse/Makefile +0 -44
- package/scripts/omnivoice-fuse/README.md +0 -266
- package/scripts/omnivoice-fuse/cmake-graft.mjs +0 -180
- package/scripts/omnivoice-fuse/ffi-stub.c +0 -222
- package/scripts/omnivoice-fuse/ffi.h +0 -158
- package/scripts/omnivoice-fuse/libelizainference_stub.dylib +0 -0
- package/scripts/omnivoice-fuse/verify-symbols.mjs +0 -138
- package/security/cloud-secret-store.d.ts +0 -34
- package/security/cloud-secret-store.d.ts.map +0 -1
- package/security/cloud-secret-store.js +0 -65
- package/security/export-guard.d.ts +0 -34
- package/security/export-guard.d.ts.map +0 -1
- package/security/export-guard.js +0 -127
- package/services/local-inference/__stress__/cache-stress-helpers.d.ts +0 -76
- package/services/local-inference/__stress__/cache-stress-helpers.d.ts.map +0 -1
- package/services/local-inference/__stress__/cache-stress-helpers.js +0 -238
- package/services/local-inference/active-model.d.ts +0 -180
- package/services/local-inference/active-model.d.ts.map +0 -1
- package/services/local-inference/active-model.js +0 -362
- package/services/local-inference/assignments.d.ts +0 -58
- package/services/local-inference/assignments.d.ts.map +0 -1
- package/services/local-inference/assignments.js +0 -179
- package/services/local-inference/backend.d.ts +0 -200
- package/services/local-inference/backend.d.ts.map +0 -1
- package/services/local-inference/backend.js +0 -242
- package/services/local-inference/bundled-models.d.ts +0 -34
- package/services/local-inference/bundled-models.d.ts.map +0 -1
- package/services/local-inference/bundled-models.js +0 -104
- package/services/local-inference/cache-bridge.d.ts +0 -184
- package/services/local-inference/cache-bridge.d.ts.map +0 -1
- package/services/local-inference/cache-bridge.js +0 -333
- package/services/local-inference/catalog.d.ts +0 -57
- package/services/local-inference/catalog.d.ts.map +0 -1
- package/services/local-inference/catalog.js +0 -262
- package/services/local-inference/conversation-registry.d.ts +0 -122
- package/services/local-inference/conversation-registry.d.ts.map +0 -1
- package/services/local-inference/conversation-registry.js +0 -182
- package/services/local-inference/device-bridge.d.ts +0 -139
- package/services/local-inference/device-bridge.d.ts.map +0 -1
- package/services/local-inference/device-bridge.js +0 -774
- package/services/local-inference/dflash-doctor.d.ts +0 -27
- package/services/local-inference/dflash-doctor.d.ts.map +0 -1
- package/services/local-inference/dflash-doctor.js +0 -149
- package/services/local-inference/dflash-server.d.ts +0 -248
- package/services/local-inference/dflash-server.d.ts.map +0 -1
- package/services/local-inference/dflash-server.js +0 -1076
- package/services/local-inference/downloader.d.ts +0 -48
- package/services/local-inference/downloader.d.ts.map +0 -1
- package/services/local-inference/downloader.js +0 -688
- package/services/local-inference/engine.d.ts +0 -282
- package/services/local-inference/engine.d.ts.map +0 -1
- package/services/local-inference/engine.js +0 -743
- package/services/local-inference/external-scanner.d.ts +0 -17
- package/services/local-inference/external-scanner.d.ts.map +0 -1
- package/services/local-inference/external-scanner.js +0 -261
- package/services/local-inference/handler-registry.d.ts +0 -72
- package/services/local-inference/handler-registry.d.ts.map +0 -1
- package/services/local-inference/handler-registry.js +0 -159
- package/services/local-inference/hardware.d.ts +0 -26
- package/services/local-inference/hardware.d.ts.map +0 -1
- package/services/local-inference/hardware.js +0 -139
- package/services/local-inference/hf-search.d.ts +0 -19
- package/services/local-inference/hf-search.d.ts.map +0 -1
- package/services/local-inference/hf-search.js +0 -169
- package/services/local-inference/index.d.ts +0 -10
- package/services/local-inference/index.d.ts.map +0 -1
- package/services/local-inference/index.js +0 -7
- package/services/local-inference/llama-server-metrics.d.ts +0 -108
- package/services/local-inference/llama-server-metrics.d.ts.map +0 -1
- package/services/local-inference/llama-server-metrics.js +0 -175
- package/services/local-inference/manifest/index.d.ts +0 -4
- package/services/local-inference/manifest/index.d.ts.map +0 -1
- package/services/local-inference/manifest/index.js +0 -5
- package/services/local-inference/manifest/schema.d.ts +0 -419
- package/services/local-inference/manifest/schema.d.ts.map +0 -1
- package/services/local-inference/manifest/schema.js +0 -227
- package/services/local-inference/manifest/types.d.ts +0 -23
- package/services/local-inference/manifest/types.d.ts.map +0 -1
- package/services/local-inference/manifest/types.js +0 -5
- package/services/local-inference/manifest/validator.d.ts +0 -43
- package/services/local-inference/manifest/validator.d.ts.map +0 -1
- package/services/local-inference/manifest/validator.js +0 -187
- package/services/local-inference/paths.d.ts +0 -8
- package/services/local-inference/paths.d.ts.map +0 -1
- package/services/local-inference/paths.js +0 -7
- package/services/local-inference/providers.d.ts +0 -61
- package/services/local-inference/providers.d.ts.map +0 -1
- package/services/local-inference/providers.js +0 -334
- package/services/local-inference/ram-budget.d.ts +0 -57
- package/services/local-inference/ram-budget.d.ts.map +0 -1
- package/services/local-inference/ram-budget.js +0 -107
- package/services/local-inference/readiness.d.ts +0 -9
- package/services/local-inference/readiness.d.ts.map +0 -1
- package/services/local-inference/readiness.js +0 -153
- package/services/local-inference/recommendation.d.ts +0 -62
- package/services/local-inference/recommendation.d.ts.map +0 -1
- package/services/local-inference/recommendation.js +0 -309
- package/services/local-inference/registry.d.ts +0 -35
- package/services/local-inference/registry.d.ts.map +0 -1
- package/services/local-inference/registry.js +0 -117
- package/services/local-inference/router-handler.d.ts +0 -51
- package/services/local-inference/router-handler.d.ts.map +0 -1
- package/services/local-inference/router-handler.js +0 -165
- package/services/local-inference/routing-policy.d.ts +0 -55
- package/services/local-inference/routing-policy.d.ts.map +0 -1
- package/services/local-inference/routing-policy.js +0 -195
- package/services/local-inference/routing-preferences.d.ts +0 -8
- package/services/local-inference/routing-preferences.d.ts.map +0 -1
- package/services/local-inference/routing-preferences.js +0 -7
- package/services/local-inference/service.d.ts +0 -88
- package/services/local-inference/service.d.ts.map +0 -1
- package/services/local-inference/service.js +0 -210
- package/services/local-inference/session-pool.d.ts +0 -72
- package/services/local-inference/session-pool.d.ts.map +0 -1
- package/services/local-inference/session-pool.js +0 -125
- package/services/local-inference/types.d.ts +0 -309
- package/services/local-inference/types.d.ts.map +0 -1
- package/services/local-inference/types.js +0 -23
- package/services/local-inference/verify.d.ts +0 -8
- package/services/local-inference/verify.d.ts.map +0 -1
- package/services/local-inference/verify.js +0 -7
- package/services/local-inference/voice/barge-in.d.ts +0 -15
- package/services/local-inference/voice/barge-in.d.ts.map +0 -1
- package/services/local-inference/voice/barge-in.js +0 -20
- package/services/local-inference/voice/engine-bridge.d.ts +0 -256
- package/services/local-inference/voice/engine-bridge.d.ts.map +0 -1
- package/services/local-inference/voice/engine-bridge.js +0 -398
- package/services/local-inference/voice/ffi-bindings.d.ts +0 -114
- package/services/local-inference/voice/ffi-bindings.d.ts.map +0 -1
- package/services/local-inference/voice/ffi-bindings.js +0 -281
- package/services/local-inference/voice/index.d.ts +0 -51
- package/services/local-inference/voice/index.d.ts.map +0 -1
- package/services/local-inference/voice/index.js +0 -50
- package/services/local-inference/voice/lifecycle.d.ts +0 -135
- package/services/local-inference/voice/lifecycle.d.ts.map +0 -1
- package/services/local-inference/voice/lifecycle.js +0 -189
- package/services/local-inference/voice/phoneme-tokenizer.d.ts +0 -58
- package/services/local-inference/voice/phoneme-tokenizer.d.ts.map +0 -1
- package/services/local-inference/voice/phoneme-tokenizer.js +0 -53
- package/services/local-inference/voice/phrase-cache.d.ts +0 -24
- package/services/local-inference/voice/phrase-cache.d.ts.map +0 -1
- package/services/local-inference/voice/phrase-cache.js +0 -32
- package/services/local-inference/voice/phrase-chunker.d.ts +0 -20
- package/services/local-inference/voice/phrase-chunker.d.ts.map +0 -1
- package/services/local-inference/voice/phrase-chunker.js +0 -85
- package/services/local-inference/voice/ring-buffer.d.ts +0 -40
- package/services/local-inference/voice/ring-buffer.d.ts.map +0 -1
- package/services/local-inference/voice/ring-buffer.js +0 -85
- package/services/local-inference/voice/rollback-queue.d.ts +0 -24
- package/services/local-inference/voice/rollback-queue.d.ts.map +0 -1
- package/services/local-inference/voice/rollback-queue.js +0 -49
- package/services/local-inference/voice/scheduler.d.ts +0 -47
- package/services/local-inference/voice/scheduler.d.ts.map +0 -1
- package/services/local-inference/voice/scheduler.js +0 -123
- package/services/local-inference/voice/shared-resources.d.ts +0 -119
- package/services/local-inference/voice/shared-resources.d.ts.map +0 -1
- package/services/local-inference/voice/shared-resources.js +0 -83
- package/services/local-inference/voice/speaker-preset-cache.d.ts +0 -28
- package/services/local-inference/voice/speaker-preset-cache.d.ts.map +0 -1
- package/services/local-inference/voice/speaker-preset-cache.js +0 -44
- package/services/local-inference/voice/types.d.ts +0 -80
- package/services/local-inference/voice/types.d.ts.map +0 -1
- package/services/local-inference/voice/voice-preset-format.d.ts +0 -56
- package/services/local-inference/voice/voice-preset-format.d.ts.map +0 -1
- package/services/local-inference/voice/voice-preset-format.js +0 -184
- package/services/plugin-installer.d.ts +0 -22
- package/services/plugin-installer.d.ts.map +0 -1
- package/services/plugin-installer.js +0 -41
- package/test/scripts/task-agent-live-smoke.ts +0 -1335
- /package/services/{local-inference/voice → ambient-audio}/types.js +0 -0
|
@@ -1,43 +1,37 @@
|
|
|
1
|
-
// Real Metal kernel-shipment helpers — replace the
|
|
2
|
-
// log no-ops in build-llama-cpp-
|
|
1
|
+
// Real Metal kernel-shipment helpers — replace the fork's decorative
|
|
2
|
+
// log no-ops in build-llama-cpp-mtp.mjs.
|
|
3
3
|
//
|
|
4
4
|
// What this module does:
|
|
5
5
|
//
|
|
6
|
-
// 1. Copies the
|
|
6
|
+
// 1. Copies the required and optimization standalone Metal shaders from
|
|
7
7
|
// packages/inference/metal/ into the fork's tree at
|
|
8
|
-
// ggml/src/ggml-metal/
|
|
8
|
+
// ggml/src/ggml-metal/eliza-shipped/<kernel>.metal. The standalones are
|
|
9
9
|
// self-contained TUs (only #include <metal_stdlib>; their own structs,
|
|
10
10
|
// constants, kernel symbols), so they compile as independent .air files.
|
|
11
11
|
//
|
|
12
12
|
// 2. Patches ggml/src/ggml-metal/CMakeLists.txt so both Metal packaging
|
|
13
13
|
// branches build each standalone shader into its own .air via
|
|
14
14
|
// `xcrun metal -c` and merge all .air files (the original ggml-metal.air
|
|
15
|
-
// plus the five
|
|
15
|
+
// plus the five eliza .air files) into one default.metallib.
|
|
16
16
|
//
|
|
17
17
|
// The original CMake snippet pipes `xcrun metal | xcrun metallib`. We
|
|
18
18
|
// replace that with explicit per-source compilation + a final merge step,
|
|
19
|
-
// keyed by a `#
|
|
19
|
+
// keyed by a `# ELIZA-KERNEL-PATCH-V1` sentinel so the patch is idempotent.
|
|
20
20
|
//
|
|
21
21
|
// 3. Hard-throws on any error — missing files, missing anchor in
|
|
22
22
|
// CMakeLists.txt, fs failures. Per AGENTS.md §3, the build must exit
|
|
23
23
|
// non-zero rather than silently produce a kernel-missing artifact.
|
|
24
24
|
//
|
|
25
|
-
//
|
|
25
|
+
// Runtime dispatch status:
|
|
26
26
|
//
|
|
27
|
-
// *
|
|
28
|
-
//
|
|
29
|
-
//
|
|
30
|
-
//
|
|
31
|
-
// per `GGML_TYPE_*`. Once kernels are in the metallib, follow-up work
|
|
32
|
-
// can add the dispatch wiring and the kernels become reachable. Until
|
|
33
|
-
// then the kernels ship as live symbols inside default.metallib but are
|
|
34
|
-
// not yet selected by the runtime — the symbol-presence audit (`nm`,
|
|
35
|
-
// `strings default.metallib`) passes, the dispatch audit does not.
|
|
27
|
+
// * QJL, Turbo3, Turbo3_TCQ, and PolarQuant are wired through dedicated
|
|
28
|
+
// attention-score graph ops. The dispatch smoke links against the built
|
|
29
|
+
// fork and numerically verifies graph execution selects the shipped Metal
|
|
30
|
+
// kernels.
|
|
36
31
|
//
|
|
37
|
-
// *
|
|
38
|
-
//
|
|
39
|
-
//
|
|
40
|
-
// iOS load the same multi-TU kernel set as desktop.
|
|
32
|
+
// * Turbo4 is wired through the same dedicated attention-score graph op.
|
|
33
|
+
// The shipped shader consumes the fork's real GGML_TYPE_TBQ4_0 layout:
|
|
34
|
+
// four 32-wide block_tbq4_0 records per 128-row (72 bytes).
|
|
41
35
|
|
|
42
36
|
import fs from "node:fs";
|
|
43
37
|
import path from "node:path";
|
|
@@ -46,8 +40,10 @@ import { fileURLToPath } from "node:url";
|
|
|
46
40
|
const __filename = fileURLToPath(import.meta.url);
|
|
47
41
|
const __dirname = path.dirname(__filename);
|
|
48
42
|
|
|
49
|
-
// packages/app-core/scripts/kernel-patches/ →
|
|
50
|
-
|
|
43
|
+
// packages/app-core/scripts/kernel-patches/ → plugin-local-inference/native/metal/
|
|
44
|
+
// Older workstreams staged these under packages/inference/metal; the current
|
|
45
|
+
// native plugin owns the verified standalone shader sources.
|
|
46
|
+
const LEGACY_STANDALONE_METAL_DIR = path.resolve(
|
|
51
47
|
__dirname,
|
|
52
48
|
"..",
|
|
53
49
|
"..",
|
|
@@ -55,25 +51,124 @@ const STANDALONE_METAL_DIR = path.resolve(
|
|
|
55
51
|
"inference",
|
|
56
52
|
"metal",
|
|
57
53
|
);
|
|
54
|
+
const PLUGIN_STANDALONE_METAL_DIR = path.resolve(
|
|
55
|
+
__dirname,
|
|
56
|
+
"..",
|
|
57
|
+
"..",
|
|
58
|
+
"..",
|
|
59
|
+
"..",
|
|
60
|
+
"plugins",
|
|
61
|
+
"plugin-local-inference",
|
|
62
|
+
"native",
|
|
63
|
+
"metal",
|
|
64
|
+
);
|
|
65
|
+
const STANDALONE_METAL_DIR = fs.existsSync(LEGACY_STANDALONE_METAL_DIR)
|
|
66
|
+
? LEGACY_STANDALONE_METAL_DIR
|
|
67
|
+
: PLUGIN_STANDALONE_METAL_DIR;
|
|
68
|
+
|
|
69
|
+
// Reference C kernels (TCQ codebook source) — same restructure drift: older
|
|
70
|
+
// workstreams kept these under packages/inference/reference; the native plugin
|
|
71
|
+
// now owns them.
|
|
72
|
+
const LEGACY_STANDALONE_REFERENCE_DIR = path.resolve(
|
|
73
|
+
__dirname,
|
|
74
|
+
"..",
|
|
75
|
+
"..",
|
|
76
|
+
"..",
|
|
77
|
+
"inference",
|
|
78
|
+
"reference",
|
|
79
|
+
);
|
|
80
|
+
const PLUGIN_STANDALONE_REFERENCE_DIR = path.resolve(
|
|
81
|
+
__dirname,
|
|
82
|
+
"..",
|
|
83
|
+
"..",
|
|
84
|
+
"..",
|
|
85
|
+
"..",
|
|
86
|
+
"plugins",
|
|
87
|
+
"plugin-local-inference",
|
|
88
|
+
"native",
|
|
89
|
+
"reference",
|
|
90
|
+
);
|
|
91
|
+
export const STANDALONE_REFERENCE_DIR = fs.existsSync(
|
|
92
|
+
LEGACY_STANDALONE_REFERENCE_DIR,
|
|
93
|
+
)
|
|
94
|
+
? LEGACY_STANDALONE_REFERENCE_DIR
|
|
95
|
+
: PLUGIN_STANDALONE_REFERENCE_DIR;
|
|
58
96
|
|
|
59
97
|
// Map: standalone-shader-filename → in-fork relative path (under cacheDir).
|
|
60
98
|
// Each standalone is copied verbatim — its content is not edited. Per agent
|
|
61
|
-
// contract
|
|
99
|
+
// contract, verified shader math lives under packages/inference/metal/ and the
|
|
100
|
+
// fork copy is a generated shipping copy.
|
|
101
|
+
//
|
|
102
|
+
// Apple M4 Max verification on 2026-05-11:
|
|
103
|
+
// * metal-verify: turbo3, turbo4, turbo3_tcq, qjl, polar, polar+QJL,
|
|
104
|
+
// polar_preht, polar_preht+QJL all 8/8 PASS.
|
|
105
|
+
// * metal-verify-multiblock: TurboQuant/QJL multi-block variants PASS.
|
|
106
|
+
// * metal-verify-fused: fused_attn_qjl_tbq and fused_attn_qjl_polar PASS.
|
|
107
|
+
//
|
|
108
|
+
// Runtime dispatch is flipped only for the graph ops listed in
|
|
109
|
+
// METAL_RUNTIME_DISPATCH_GATES. The QJL+TBQ fused-attention kernel is wired
|
|
110
|
+
// through a dedicated graph op; the QJL+Polar fused variant remains shipped
|
|
111
|
+
// and standalone-verified only until it gets a separate graph contract.
|
|
62
112
|
export const METAL_KERNEL_FILES = [
|
|
63
113
|
"turbo3.metal",
|
|
64
114
|
"turbo4.metal",
|
|
65
115
|
"turbo3_tcq.metal",
|
|
66
116
|
"qjl.metal",
|
|
117
|
+
"qjl_set_rows.metal",
|
|
67
118
|
"polar.metal",
|
|
119
|
+
"polar_preht.metal",
|
|
120
|
+
"fused_attn_qjl_tbq.metal",
|
|
121
|
+
"fused_attn_qjl_polar.metal",
|
|
68
122
|
];
|
|
69
123
|
|
|
70
|
-
const
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
124
|
+
export const METAL_RUNTIME_DISPATCH_GATES = {
|
|
125
|
+
turbo3: {
|
|
126
|
+
status: "runtime-ready",
|
|
127
|
+
runtimeReady: true,
|
|
128
|
+
graphOp: "GGML_OP_ATTN_SCORE_TBQ",
|
|
129
|
+
smokeTarget: "dispatch-smoke",
|
|
130
|
+
},
|
|
131
|
+
turbo4: {
|
|
132
|
+
status: "runtime-ready",
|
|
133
|
+
runtimeReady: true,
|
|
134
|
+
graphOp: "GGML_OP_ATTN_SCORE_TBQ",
|
|
135
|
+
smokeTarget: "dispatch-smoke",
|
|
136
|
+
},
|
|
137
|
+
turbo3_tcq: {
|
|
138
|
+
status: "runtime-ready",
|
|
139
|
+
runtimeReady: true,
|
|
140
|
+
graphOp: "GGML_OP_ATTN_SCORE_TBQ",
|
|
141
|
+
smokeTarget: "dispatch-smoke",
|
|
142
|
+
},
|
|
143
|
+
qjl_full: {
|
|
144
|
+
status: "runtime-ready",
|
|
145
|
+
runtimeReady: true,
|
|
146
|
+
graphOp: "GGML_OP_ATTN_SCORE_QJL",
|
|
147
|
+
smokeTarget: "dispatch-smoke",
|
|
148
|
+
},
|
|
149
|
+
polarquant: {
|
|
150
|
+
status: "runtime-ready",
|
|
151
|
+
runtimeReady: true,
|
|
152
|
+
graphOp: "GGML_OP_ATTN_SCORE_POLAR",
|
|
153
|
+
smokeTarget: "dispatch-smoke",
|
|
154
|
+
},
|
|
155
|
+
fused_attn_qjl_tbq: {
|
|
156
|
+
status: "runtime-ready",
|
|
157
|
+
runtimeReady: true,
|
|
158
|
+
graphOp: "GGML_OP_FUSED_ATTN_QJL_TBQ",
|
|
159
|
+
smokeTarget: "dispatch-smoke",
|
|
160
|
+
},
|
|
161
|
+
};
|
|
162
|
+
|
|
163
|
+
const SENTINEL = "# ELIZA-KERNEL-PATCH-V1";
|
|
164
|
+
const SENTINEL_EMBED = "# ELIZA-KERNEL-EMBED-PATCH-V1";
|
|
165
|
+
const SENTINEL_EMBED_LOADER = "// ELIZA-EMBEDDED-METALLIB-LOADER-V1";
|
|
166
|
+
const SENTINEL_QJL_ATTN = "// ELIZA-QJL-ATTN-DISPATCH-V1";
|
|
167
|
+
const SENTINEL_QJL_SET_ROWS = "// ELIZA-QJL-SET-ROWS-V1";
|
|
168
|
+
const SENTINEL_TBQ_POLAR_ATTN = "// ELIZA-TBQ-POLAR-ATTN-DISPATCH-V1";
|
|
74
169
|
|
|
75
170
|
function inForkRelpath(name) {
|
|
76
|
-
return path.posix.join("ggml", "src", "ggml-metal", "
|
|
171
|
+
return path.posix.join("ggml", "src", "ggml-metal", "eliza-shipped", name);
|
|
77
172
|
}
|
|
78
173
|
|
|
79
174
|
// Verify all standalones exist and are non-empty before any fs writes — we
|
|
@@ -99,19 +194,23 @@ function assertStandalonesPresent() {
|
|
|
99
194
|
}
|
|
100
195
|
|
|
101
196
|
// Copy each standalone .metal into the fork at
|
|
102
|
-
// ggml/src/ggml-metal/
|
|
197
|
+
// ggml/src/ggml-metal/eliza-shipped/<name>.metal, overwriting any prior copy
|
|
103
198
|
// so the canonical source-of-truth is always the verified standalone.
|
|
104
199
|
//
|
|
105
|
-
// We deliberately overwrite the fork's stale ggml/src/ggml-metal/
|
|
106
|
-
// content if it exists, but we write into a sibling
|
|
107
|
-
// so the patch is self-contained and the original (un-wired)
|
|
200
|
+
// We deliberately overwrite the fork's stale ggml/src/ggml-metal/eliza-kernels/
|
|
201
|
+
// content if it exists, but we write into a sibling eliza-shipped/ directory
|
|
202
|
+
// so the patch is self-contained and the original (un-wired) eliza-kernels/
|
|
108
203
|
// drafts remain visible for diff-archaeology if a future agent wants them.
|
|
109
204
|
function copyStandalonesIntoFork(cacheDir, { dryRun }) {
|
|
110
|
-
const targetDir = path.join(
|
|
205
|
+
const targetDir = path.join(
|
|
206
|
+
cacheDir,
|
|
207
|
+
"ggml",
|
|
208
|
+
"src",
|
|
209
|
+
"ggml-metal",
|
|
210
|
+
"eliza-shipped",
|
|
211
|
+
);
|
|
111
212
|
if (dryRun) {
|
|
112
|
-
console.log(
|
|
113
|
-
`[metal-kernels] (dry-run) mkdir -p ${targetDir}`,
|
|
114
|
-
);
|
|
213
|
+
console.log(`[metal-kernels] (dry-run) mkdir -p ${targetDir}`);
|
|
115
214
|
} else {
|
|
116
215
|
fs.mkdirSync(targetDir, { recursive: true });
|
|
117
216
|
}
|
|
@@ -128,7 +227,7 @@ function copyStandalonesIntoFork(cacheDir, { dryRun }) {
|
|
|
128
227
|
// in-fork draft.
|
|
129
228
|
const stamped =
|
|
130
229
|
`// ${SENTINEL} — copied verbatim from packages/inference/metal/${name}\n` +
|
|
131
|
-
`// at build time by build-llama-cpp-
|
|
230
|
+
`// at build time by build-llama-cpp-mtp.mjs. Do not edit in place;\n` +
|
|
132
231
|
`// edit the standalone source and rerun the build.\n` +
|
|
133
232
|
text;
|
|
134
233
|
fs.writeFileSync(dst, stamped, "utf8");
|
|
@@ -159,17 +258,17 @@ function patchMetalCMakeLists(cacheDir, { dryRun }) {
|
|
|
159
258
|
let patched = original;
|
|
160
259
|
let changed = false;
|
|
161
260
|
|
|
162
|
-
const
|
|
261
|
+
const elizaAirLinesForSdk = (sdkExpr) =>
|
|
163
262
|
METAL_KERNEL_FILES.map((name) => {
|
|
164
263
|
const stem = name.replace(/\.metal$/, "");
|
|
165
|
-
return ` COMMAND xcrun -sdk ${sdkExpr} metal \${XC_FLAGS} -c \${CMAKE_CURRENT_SOURCE_DIR}/
|
|
264
|
+
return ` COMMAND xcrun -sdk ${sdkExpr} metal \${XC_FLAGS} -c \${CMAKE_CURRENT_SOURCE_DIR}/eliza-shipped/${name} -o \${CMAKE_CURRENT_BINARY_DIR}/${stem}.air`;
|
|
166
265
|
}).join("\n");
|
|
167
|
-
const
|
|
266
|
+
const elizaAirInputs = METAL_KERNEL_FILES.map((name) => {
|
|
168
267
|
const stem = name.replace(/\.metal$/, "");
|
|
169
268
|
return `\${CMAKE_CURRENT_BINARY_DIR}/${stem}.air`;
|
|
170
269
|
}).join(" ");
|
|
171
|
-
const
|
|
172
|
-
(name) => `\${CMAKE_CURRENT_SOURCE_DIR}/
|
|
270
|
+
const elizaDepends = METAL_KERNEL_FILES.map(
|
|
271
|
+
(name) => `\${CMAKE_CURRENT_SOURCE_DIR}/eliza-shipped/${name}`,
|
|
173
272
|
).join(" ");
|
|
174
273
|
|
|
175
274
|
if (!patched.includes(SENTINEL_EMBED)) {
|
|
@@ -180,7 +279,7 @@ function patchMetalCMakeLists(cacheDir, { dryRun }) {
|
|
|
180
279
|
embedStart === -1
|
|
181
280
|
? -1
|
|
182
281
|
: patched.indexOf(
|
|
183
|
-
|
|
282
|
+
'\n\n target_sources(ggml-metal PRIVATE "${METALLIB_EMBED_ASM}")',
|
|
184
283
|
embedStart,
|
|
185
284
|
);
|
|
186
285
|
if (embedStart === -1 || embedEnd === -1) {
|
|
@@ -189,11 +288,11 @@ function patchMetalCMakeLists(cacheDir, { dryRun }) {
|
|
|
189
288
|
`the fork's GGML_METAL_EMBED_LIBRARY branch changed shape and the patch must be revisited.`,
|
|
190
289
|
);
|
|
191
290
|
}
|
|
192
|
-
const embedAirLines =
|
|
291
|
+
const embedAirLines = elizaAirLinesForSdk("${METAL_SDK}");
|
|
193
292
|
const embedReplacement = ` # ${SENTINEL_EMBED}
|
|
194
293
|
# Build a compiled default.metallib for embedded-library targets (iOS).
|
|
195
294
|
# The upstream path embedded concatenated Metal source and JIT-compiled it
|
|
196
|
-
# at runtime. That cannot include the
|
|
295
|
+
# at runtime. That cannot include the eliza standalones because the source
|
|
197
296
|
# TUs intentionally redeclare block_* structs/constants that already exist
|
|
198
297
|
# in ggml-common.h. Compile each TU separately, merge into one metallib,
|
|
199
298
|
# and embed the binary metallib bytes instead.
|
|
@@ -217,31 +316,29 @@ function patchMetalCMakeLists(cacheDir, { dryRun }) {
|
|
|
217
316
|
|
|
218
317
|
add_custom_command(
|
|
219
318
|
OUTPUT "\${METALLIB_EMBED_ASM}"
|
|
220
|
-
COMMAND echo "Embedding Metal library (compiled metallib +
|
|
319
|
+
COMMAND echo "Embedding Metal library (compiled metallib + eliza-shipped kernels)"
|
|
221
320
|
COMMAND sed -e "/__embed_ggml-common.h__/r \${METALLIB_COMMON}" -e "/__embed_ggml-common.h__/d" < "\${METALLIB_SOURCE}" > "\${METALLIB_SOURCE_EMBED_TMP}"
|
|
222
321
|
COMMAND sed -e "/\\#include \\"ggml-metal-impl.h\\"/r \${METALLIB_IMPL}" -e "/\\#include \\"ggml-metal-impl.h\\"/d" < "\${METALLIB_SOURCE_EMBED_TMP}" > "\${METALLIB_SOURCE_EMBED}"
|
|
223
322
|
COMMAND xcrun -sdk \${METAL_SDK} metal \${XC_FLAGS} -DGGML_METAL_EMBED_LIBRARY=1 -c "\${METALLIB_SOURCE_EMBED}" -o "\${METALLIB_EMBED_AIR}"
|
|
224
323
|
${embedAirLines}
|
|
225
|
-
COMMAND xcrun -sdk \${METAL_SDK} metallib "\${METALLIB_EMBED_AIR}" ${
|
|
324
|
+
COMMAND xcrun -sdk \${METAL_SDK} metallib "\${METALLIB_EMBED_AIR}" ${elizaAirInputs} -o "\${METALLIB_EMBED_BINARY}"
|
|
226
325
|
COMMAND echo ".section __DATA,__ggml_metallib" > "\${METALLIB_EMBED_ASM}"
|
|
227
326
|
COMMAND echo ".globl _ggml_metallib_start" >> "\${METALLIB_EMBED_ASM}"
|
|
228
327
|
COMMAND echo "_ggml_metallib_start:" >> "\${METALLIB_EMBED_ASM}"
|
|
229
328
|
COMMAND echo .incbin "\\"\${METALLIB_EMBED_BINARY}\\"" >> "\${METALLIB_EMBED_ASM}"
|
|
230
329
|
COMMAND echo ".globl _ggml_metallib_end" >> "\${METALLIB_EMBED_ASM}"
|
|
231
330
|
COMMAND echo "_ggml_metallib_end:" >> "\${METALLIB_EMBED_ASM}"
|
|
232
|
-
DEPENDS ../ggml-common.h ggml-metal.metal ggml-metal-impl.h ${
|
|
331
|
+
DEPENDS ../ggml-common.h ggml-metal.metal ggml-metal-impl.h ${elizaDepends}
|
|
233
332
|
COMMENT "Generate assembly for embedded compiled Metal library"
|
|
234
333
|
VERBATIM
|
|
235
334
|
)`;
|
|
236
335
|
patched =
|
|
237
|
-
patched.slice(0, embedStart) +
|
|
238
|
-
embedReplacement +
|
|
239
|
-
patched.slice(embedEnd);
|
|
336
|
+
patched.slice(0, embedStart) + embedReplacement + patched.slice(embedEnd);
|
|
240
337
|
changed = true;
|
|
241
338
|
}
|
|
242
339
|
|
|
243
340
|
// The exact block we replace. This pipe pattern has been stable in the
|
|
244
|
-
//
|
|
341
|
+
// elizaOS/llama.cpp fork for the entire v0.4.x line; if the upstream
|
|
245
342
|
// ever rewrites it we want to fail loudly rather than silently no-op.
|
|
246
343
|
if (!patched.includes(SENTINEL)) {
|
|
247
344
|
const anchor = ` add_custom_command(
|
|
@@ -261,9 +358,9 @@ ${embedAirLines}
|
|
|
261
358
|
);
|
|
262
359
|
}
|
|
263
360
|
|
|
264
|
-
const
|
|
361
|
+
const elizaAirLines = elizaAirLinesForSdk("macosx");
|
|
265
362
|
const replacement = ` # ${SENTINEL}
|
|
266
|
-
# Build ggml-metal.metal AND each
|
|
363
|
+
# Build ggml-metal.metal AND each eliza standalone shader into its own
|
|
267
364
|
# .air file, then merge all .air files into a single default.metallib.
|
|
268
365
|
# The standalones are self-contained TUs (only #include <metal_stdlib>;
|
|
269
366
|
# define their own block_*, constants, kernel functions) so they do not
|
|
@@ -271,17 +368,46 @@ ${embedAirLines}
|
|
|
271
368
|
add_custom_command(
|
|
272
369
|
OUTPUT \${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
|
|
273
370
|
COMMAND xcrun -sdk macosx metal \${XC_FLAGS} -c \${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal -o \${CMAKE_CURRENT_BINARY_DIR}/ggml-metal.air
|
|
274
|
-
${
|
|
275
|
-
COMMAND xcrun -sdk macosx metallib \${CMAKE_CURRENT_BINARY_DIR}/ggml-metal.air ${
|
|
371
|
+
${elizaAirLines}
|
|
372
|
+
COMMAND xcrun -sdk macosx metallib \${CMAKE_CURRENT_BINARY_DIR}/ggml-metal.air ${elizaAirInputs} -o \${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
|
|
276
373
|
COMMAND rm -f \${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-common.h
|
|
277
374
|
COMMAND rm -f \${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal
|
|
278
|
-
DEPENDS ggml-metal.metal \${METALLIB_COMMON} ${
|
|
279
|
-
COMMENT "Compiling Metal kernels (ggml-metal +
|
|
375
|
+
DEPENDS ggml-metal.metal \${METALLIB_COMMON} ${elizaDepends}
|
|
376
|
+
COMMENT "Compiling Metal kernels (ggml-metal + eliza-shipped: ${METAL_KERNEL_FILES.join(", ")})"
|
|
280
377
|
)`;
|
|
281
378
|
patched = patched.replace(anchor, replacement);
|
|
282
379
|
changed = true;
|
|
283
380
|
}
|
|
284
381
|
|
|
382
|
+
if (
|
|
383
|
+
patched.includes(SENTINEL) &&
|
|
384
|
+
patched.includes(SENTINEL_EMBED) &&
|
|
385
|
+
!patched.includes("qjl_set_rows.metal")
|
|
386
|
+
) {
|
|
387
|
+
patched = patched
|
|
388
|
+
.replaceAll(
|
|
389
|
+
"COMMAND xcrun -sdk ${METAL_SDK} metal ${XC_FLAGS} -c ${CMAKE_CURRENT_SOURCE_DIR}/eliza-shipped/qjl.metal -o ${CMAKE_CURRENT_BINARY_DIR}/qjl.air",
|
|
390
|
+
"COMMAND xcrun -sdk ${METAL_SDK} metal ${XC_FLAGS} -c ${CMAKE_CURRENT_SOURCE_DIR}/eliza-shipped/qjl.metal -o ${CMAKE_CURRENT_BINARY_DIR}/qjl.air\n COMMAND xcrun -sdk ${METAL_SDK} metal ${XC_FLAGS} -c ${CMAKE_CURRENT_SOURCE_DIR}/eliza-shipped/qjl_set_rows.metal -o ${CMAKE_CURRENT_BINARY_DIR}/qjl_set_rows.air",
|
|
391
|
+
)
|
|
392
|
+
.replaceAll(
|
|
393
|
+
"COMMAND xcrun -sdk macosx metal ${XC_FLAGS} -c ${CMAKE_CURRENT_SOURCE_DIR}/eliza-shipped/qjl.metal -o ${CMAKE_CURRENT_BINARY_DIR}/qjl.air",
|
|
394
|
+
"COMMAND xcrun -sdk macosx metal ${XC_FLAGS} -c ${CMAKE_CURRENT_SOURCE_DIR}/eliza-shipped/qjl.metal -o ${CMAKE_CURRENT_BINARY_DIR}/qjl.air\n COMMAND xcrun -sdk macosx metal ${XC_FLAGS} -c ${CMAKE_CURRENT_SOURCE_DIR}/eliza-shipped/qjl_set_rows.metal -o ${CMAKE_CURRENT_BINARY_DIR}/qjl_set_rows.air",
|
|
395
|
+
)
|
|
396
|
+
.replaceAll(
|
|
397
|
+
"${CMAKE_CURRENT_BINARY_DIR}/qjl.air ${CMAKE_CURRENT_BINARY_DIR}/polar.air",
|
|
398
|
+
"${CMAKE_CURRENT_BINARY_DIR}/qjl.air ${CMAKE_CURRENT_BINARY_DIR}/qjl_set_rows.air ${CMAKE_CURRENT_BINARY_DIR}/polar.air",
|
|
399
|
+
)
|
|
400
|
+
.replaceAll(
|
|
401
|
+
"${CMAKE_CURRENT_SOURCE_DIR}/eliza-shipped/qjl.metal ${CMAKE_CURRENT_SOURCE_DIR}/eliza-shipped/polar.metal",
|
|
402
|
+
"${CMAKE_CURRENT_SOURCE_DIR}/eliza-shipped/qjl.metal ${CMAKE_CURRENT_SOURCE_DIR}/eliza-shipped/qjl_set_rows.metal ${CMAKE_CURRENT_SOURCE_DIR}/eliza-shipped/polar.metal",
|
|
403
|
+
)
|
|
404
|
+
.replaceAll(
|
|
405
|
+
"qjl.metal, polar.metal",
|
|
406
|
+
"qjl.metal, qjl_set_rows.metal, polar.metal",
|
|
407
|
+
);
|
|
408
|
+
changed = true;
|
|
409
|
+
}
|
|
410
|
+
|
|
285
411
|
if (patched === original) {
|
|
286
412
|
return { changed: false, path: cmakePath };
|
|
287
413
|
}
|
|
@@ -336,7 +462,7 @@ function patchEmbeddedMetallibLoader(cacheDir, { dryRun }) {
|
|
|
336
462
|
// The build patch embeds compiled default.metallib bytes here, not
|
|
337
463
|
// Metal source. Loading with newLibraryWithData keeps iOS on the same
|
|
338
464
|
// multi-TU kernel set as desktop and avoids duplicate declarations
|
|
339
|
-
// between ggml-metal.metal and the
|
|
465
|
+
// between ggml-metal.metal and the eliza standalone shaders.
|
|
340
466
|
const NSUInteger metallib_len = (NSUInteger)(ggml_metallib_end - ggml_metallib_start);
|
|
341
467
|
dispatch_data_t metallib_data = dispatch_data_create(ggml_metallib_start, metallib_len, nil, DISPATCH_DATA_DESTRUCTOR_DEFAULT);
|
|
342
468
|
library = [device newLibraryWithData:metallib_data error:&error];
|
|
@@ -347,16 +473,24 @@ function patchEmbeddedMetallibLoader(cacheDir, { dryRun }) {
|
|
|
347
473
|
#else`;
|
|
348
474
|
const patched = original.replace(anchor, replacement);
|
|
349
475
|
if (patched === original) {
|
|
350
|
-
throw new Error(
|
|
476
|
+
throw new Error(
|
|
477
|
+
"[metal-kernels] embedded loader replace produced no change",
|
|
478
|
+
);
|
|
351
479
|
}
|
|
352
480
|
if (!dryRun) fs.writeFileSync(deviceMPath, patched, "utf8");
|
|
353
481
|
return { changed: !dryRun, path: deviceMPath };
|
|
354
482
|
}
|
|
355
483
|
|
|
356
|
-
const SENTINEL_DISPATCH = "//
|
|
484
|
+
const SENTINEL_DISPATCH = "// ELIZA-DISPATCH-V1";
|
|
357
485
|
|
|
358
486
|
function patchMetalQjlAttnHeader(cacheDir, { dryRun }) {
|
|
359
|
-
const headerPath = path.join(
|
|
487
|
+
const headerPath = path.join(
|
|
488
|
+
cacheDir,
|
|
489
|
+
"ggml",
|
|
490
|
+
"src",
|
|
491
|
+
"ggml-metal",
|
|
492
|
+
"ggml-metal-device.h",
|
|
493
|
+
);
|
|
360
494
|
const original = fs.readFileSync(headerPath, "utf8");
|
|
361
495
|
if (original.includes(SENTINEL_QJL_ATTN)) {
|
|
362
496
|
return { changed: false, path: headerPath };
|
|
@@ -386,10 +520,22 @@ struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_attn_scor
|
|
|
386
520
|
}
|
|
387
521
|
|
|
388
522
|
function patchMetalQjlAttnDeviceCpp(cacheDir, { dryRun }) {
|
|
389
|
-
const cppPath = path.join(
|
|
523
|
+
const cppPath = path.join(
|
|
524
|
+
cacheDir,
|
|
525
|
+
"ggml",
|
|
526
|
+
"src",
|
|
527
|
+
"ggml-metal",
|
|
528
|
+
"ggml-metal-device.cpp",
|
|
529
|
+
);
|
|
390
530
|
const original = fs.readFileSync(cppPath, "utf8");
|
|
391
531
|
if (original.includes(SENTINEL_QJL_ATTN)) {
|
|
392
|
-
|
|
532
|
+
const upgraded = original.replace(
|
|
533
|
+
'const char * name = "kernel_attn_score_qjl1_256";',
|
|
534
|
+
'const char * name = "kernel_attn_score_qjl1_256_multi";',
|
|
535
|
+
);
|
|
536
|
+
if (upgraded !== original && !dryRun)
|
|
537
|
+
fs.writeFileSync(cppPath, upgraded, "utf8");
|
|
538
|
+
return { changed: upgraded !== original && !dryRun, path: cppPath };
|
|
393
539
|
}
|
|
394
540
|
const anchor = `ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_bin(ggml_metal_library_t lib, const ggml_tensor * op, int32_t n_fuse) {`;
|
|
395
541
|
if (!original.includes(anchor)) {
|
|
@@ -399,7 +545,7 @@ function patchMetalQjlAttnDeviceCpp(cacheDir, { dryRun }) {
|
|
|
399
545
|
}
|
|
400
546
|
const helper = `${SENTINEL_QJL_ATTN}
|
|
401
547
|
ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_attn_score_qjl(ggml_metal_library_t lib) {
|
|
402
|
-
const char * name = "
|
|
548
|
+
const char * name = "kernel_attn_score_qjl1_256_multi";
|
|
403
549
|
ggml_metal_pipeline_with_params res = ggml_metal_library_get_pipeline(lib, name);
|
|
404
550
|
if (!res.pipeline) {
|
|
405
551
|
// Standalone shipped shader: it declares no Metal function constants,
|
|
@@ -424,7 +570,13 @@ ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_attn_score_qjl(g
|
|
|
424
570
|
}
|
|
425
571
|
|
|
426
572
|
function patchMetalQjlAttnOpsHeader(cacheDir, { dryRun }) {
|
|
427
|
-
const headerPath = path.join(
|
|
573
|
+
const headerPath = path.join(
|
|
574
|
+
cacheDir,
|
|
575
|
+
"ggml",
|
|
576
|
+
"src",
|
|
577
|
+
"ggml-metal",
|
|
578
|
+
"ggml-metal-ops.h",
|
|
579
|
+
);
|
|
428
580
|
const original = fs.readFileSync(headerPath, "utf8");
|
|
429
581
|
if (original.includes(SENTINEL_QJL_ATTN)) {
|
|
430
582
|
return { changed: false, path: headerPath };
|
|
@@ -444,10 +596,98 @@ int ggml_metal_op_attn_score_qjl (ggml_metal_op_t ctx, int idx);`;
|
|
|
444
596
|
}
|
|
445
597
|
|
|
446
598
|
function patchMetalQjlAttnOpsCpp(cacheDir, { dryRun }) {
|
|
447
|
-
const opsPath = path.join(
|
|
599
|
+
const opsPath = path.join(
|
|
600
|
+
cacheDir,
|
|
601
|
+
"ggml",
|
|
602
|
+
"src",
|
|
603
|
+
"ggml-metal",
|
|
604
|
+
"ggml-metal-ops.cpp",
|
|
605
|
+
);
|
|
448
606
|
const original = fs.readFileSync(opsPath, "utf8");
|
|
449
607
|
if (original.includes(SENTINEL_QJL_ATTN)) {
|
|
450
|
-
|
|
608
|
+
let upgraded = original;
|
|
609
|
+
if (!upgraded.includes("#include <cstdlib>")) {
|
|
610
|
+
upgraded = upgraded.replace(
|
|
611
|
+
`#include <cmath>
|
|
612
|
+
`,
|
|
613
|
+
`#include <cmath>
|
|
614
|
+
#include <cstdlib>
|
|
615
|
+
`,
|
|
616
|
+
);
|
|
617
|
+
}
|
|
618
|
+
upgraded = upgraded.replace(
|
|
619
|
+
`struct eliza_qjl_score_args {
|
|
620
|
+
uint32_t n_heads;
|
|
621
|
+
uint32_t n_kv_heads;
|
|
622
|
+
uint32_t n_tokens;
|
|
623
|
+
uint32_t proj_dim;
|
|
624
|
+
};`,
|
|
625
|
+
`struct eliza_qjl_score_args {
|
|
626
|
+
uint32_t n_heads;
|
|
627
|
+
uint32_t n_kv_heads;
|
|
628
|
+
uint32_t n_tokens;
|
|
629
|
+
uint32_t proj_dim;
|
|
630
|
+
uint32_t tokens_per_threadgroup;
|
|
631
|
+
};`,
|
|
632
|
+
);
|
|
633
|
+
if (!upgraded.includes("static inline uint32_t eliza_env_u32")) {
|
|
634
|
+
upgraded = upgraded.replace(
|
|
635
|
+
`static inline ggml_metal_buffer_id eliza_metal_buffer_offset(ggml_metal_buffer_id id, size_t extra) {
|
|
636
|
+
id.offs += extra;
|
|
637
|
+
return id;
|
|
638
|
+
}
|
|
639
|
+
`,
|
|
640
|
+
`static inline ggml_metal_buffer_id eliza_metal_buffer_offset(ggml_metal_buffer_id id, size_t extra) {
|
|
641
|
+
id.offs += extra;
|
|
642
|
+
return id;
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
static inline uint32_t eliza_env_u32(const char * name, uint32_t fallback, uint32_t min_value, uint32_t max_value) {
|
|
646
|
+
const char * raw = std::getenv(name);
|
|
647
|
+
if (raw == nullptr || raw[0] == '\\0') {
|
|
648
|
+
return fallback;
|
|
649
|
+
}
|
|
650
|
+
char * end = nullptr;
|
|
651
|
+
const unsigned long parsed = std::strtoul(raw, &end, 10);
|
|
652
|
+
if (end == raw || *end != '\\0' || parsed < min_value || parsed > max_value) {
|
|
653
|
+
GGML_LOG_WARN("%s: ignoring invalid %s=%s (expected %u..%u)\\n",
|
|
654
|
+
__func__, name, raw, min_value, max_value);
|
|
655
|
+
return fallback;
|
|
656
|
+
}
|
|
657
|
+
return (uint32_t) parsed;
|
|
658
|
+
}
|
|
659
|
+
`,
|
|
660
|
+
);
|
|
661
|
+
}
|
|
662
|
+
upgraded = upgraded.replace(
|
|
663
|
+
` /* n_tokens = */ n_tokens,
|
|
664
|
+
/* proj_dim = */ 256u,
|
|
665
|
+
};`,
|
|
666
|
+
` /* n_tokens = */ n_tokens,
|
|
667
|
+
/* proj_dim = */ 256u,
|
|
668
|
+
// M4 Max 2026-05-11 sweeps show N=4/8/16/32 trade median vs p99.
|
|
669
|
+
// Keep N=32 as the tail-latency-biased default until per-device
|
|
670
|
+
// autotuning can persist a device-specific table.
|
|
671
|
+
/* tokens_per_threadgroup = */ eliza_env_u32("ELIZA_METAL_QJL_TOKENS_PER_TG", 64u, 1u, 64u),
|
|
672
|
+
};`,
|
|
673
|
+
);
|
|
674
|
+
upgraded = upgraded.replace(
|
|
675
|
+
` ggml_metal_encoder_dispatch_threadgroups(enc, (int) n_heads, (int) n_tokens, 1, 32, 1, 1);`,
|
|
676
|
+
` const int token_groups = (int) ((n_tokens + args.tokens_per_threadgroup - 1u) / args.tokens_per_threadgroup);
|
|
677
|
+
ggml_metal_encoder_dispatch_threadgroups(enc, (int) n_heads, token_groups, 1, 32, 1, 1);`,
|
|
678
|
+
);
|
|
679
|
+
upgraded = upgraded.replace(
|
|
680
|
+
` GGML_ASSERT(q->ne[0] == 256);
|
|
681
|
+
GGML_ASSERT(pk->ne[0] == 128);`,
|
|
682
|
+
` GGML_ASSERT(q->ne[0] == 256);
|
|
683
|
+
GGML_ASSERT(pk->ne[0] == 128);
|
|
684
|
+
GGML_ASSERT(ggml_is_contiguous_rows(q));
|
|
685
|
+
GGML_ASSERT(ggml_is_contiguous_rows(pk));
|
|
686
|
+
GGML_ASSERT(ggml_is_contiguous_rows(op));`,
|
|
687
|
+
);
|
|
688
|
+
if (upgraded !== original && !dryRun)
|
|
689
|
+
fs.writeFileSync(opsPath, upgraded, "utf8");
|
|
690
|
+
return { changed: upgraded !== original && !dryRun, path: opsPath };
|
|
451
691
|
}
|
|
452
692
|
|
|
453
693
|
const funcAnchor = `static int ggml_metal_op_encode_impl(ggml_metal_op_t ctx, int idx) {`;
|
|
@@ -457,18 +697,34 @@ function patchMetalQjlAttnOpsCpp(cacheDir, { dryRun }) {
|
|
|
457
697
|
);
|
|
458
698
|
}
|
|
459
699
|
const opFunc = `${SENTINEL_QJL_ATTN}
|
|
460
|
-
struct
|
|
700
|
+
struct eliza_qjl_score_args {
|
|
461
701
|
uint32_t n_heads;
|
|
462
702
|
uint32_t n_kv_heads;
|
|
463
703
|
uint32_t n_tokens;
|
|
464
704
|
uint32_t proj_dim;
|
|
705
|
+
uint32_t tokens_per_threadgroup;
|
|
465
706
|
};
|
|
466
707
|
|
|
467
|
-
static inline ggml_metal_buffer_id
|
|
708
|
+
static inline ggml_metal_buffer_id eliza_metal_buffer_offset(ggml_metal_buffer_id id, size_t extra) {
|
|
468
709
|
id.offs += extra;
|
|
469
710
|
return id;
|
|
470
711
|
}
|
|
471
712
|
|
|
713
|
+
static inline uint32_t eliza_env_u32(const char * name, uint32_t fallback, uint32_t min_value, uint32_t max_value) {
|
|
714
|
+
const char * raw = std::getenv(name);
|
|
715
|
+
if (raw == nullptr || raw[0] == '\\0') {
|
|
716
|
+
return fallback;
|
|
717
|
+
}
|
|
718
|
+
char * end = nullptr;
|
|
719
|
+
const unsigned long parsed = std::strtoul(raw, &end, 10);
|
|
720
|
+
if (end == raw || *end != '\\0' || parsed < min_value || parsed > max_value) {
|
|
721
|
+
GGML_LOG_WARN("%s: ignoring invalid %s=%s (expected %u..%u)\\n",
|
|
722
|
+
__func__, name, raw, min_value, max_value);
|
|
723
|
+
return fallback;
|
|
724
|
+
}
|
|
725
|
+
return (uint32_t) parsed;
|
|
726
|
+
}
|
|
727
|
+
|
|
472
728
|
int ggml_metal_op_attn_score_qjl(ggml_metal_op_t ctx, int idx) {
|
|
473
729
|
ggml_tensor * op = ctx->node(idx);
|
|
474
730
|
|
|
@@ -485,6 +741,9 @@ int ggml_metal_op_attn_score_qjl(ggml_metal_op_t ctx, int idx) {
|
|
|
485
741
|
GGML_ASSERT(op->type == GGML_TYPE_F32);
|
|
486
742
|
GGML_ASSERT(q->ne[0] == 256);
|
|
487
743
|
GGML_ASSERT(pk->ne[0] == 128);
|
|
744
|
+
GGML_ASSERT(ggml_is_contiguous_rows(q));
|
|
745
|
+
GGML_ASSERT(ggml_is_contiguous_rows(pk));
|
|
746
|
+
GGML_ASSERT(ggml_is_contiguous_rows(op));
|
|
488
747
|
|
|
489
748
|
const uint32_t n_heads = (uint32_t) q->ne[1];
|
|
490
749
|
const uint32_t n_kv_heads = (uint32_t) ((const int32_t *) op->op_params)[0];
|
|
@@ -503,11 +762,15 @@ int ggml_metal_op_attn_score_qjl(ggml_metal_op_t ctx, int idx) {
|
|
|
503
762
|
GGML_ASSERT(pk->nb[1] == ggml_row_size(GGML_TYPE_QJL1_256, 128));
|
|
504
763
|
GGML_ASSERT(pk->nb[2] == (size_t) n_tokens * pk->nb[1]);
|
|
505
764
|
|
|
506
|
-
|
|
765
|
+
eliza_qjl_score_args args = {
|
|
507
766
|
/* n_heads = */ n_heads,
|
|
508
767
|
/* n_kv_heads = */ n_kv_heads,
|
|
509
768
|
/* n_tokens = */ n_tokens,
|
|
510
769
|
/* proj_dim = */ 256u,
|
|
770
|
+
// M4 Max 2026-05-11 sweeps show N=4/8/16/32 trade median vs p99.
|
|
771
|
+
// Keep N=32 as the tail-latency-biased default until per-device
|
|
772
|
+
// autotuning can persist a device-specific table.
|
|
773
|
+
/* tokens_per_threadgroup = */ eliza_env_u32("ELIZA_METAL_QJL_TOKENS_PER_TG", 64u, 1u, 64u),
|
|
511
774
|
};
|
|
512
775
|
|
|
513
776
|
auto pipeline = ggml_metal_library_get_pipeline_attn_score_qjl(lib);
|
|
@@ -524,18 +787,132 @@ int ggml_metal_op_attn_score_qjl(ggml_metal_op_t ctx, int idx) {
|
|
|
524
787
|
const size_t pk_i3 = (size_t) i3 * pk->nb[3];
|
|
525
788
|
const size_t dst_i3 = (size_t) i3 * op->nb[3];
|
|
526
789
|
for (int64_t ib = 0; ib < n_batch; ++ib) {
|
|
527
|
-
ggml_metal_encoder_set_buffer(enc,
|
|
528
|
-
ggml_metal_encoder_set_buffer(enc,
|
|
529
|
-
ggml_metal_encoder_set_buffer(enc,
|
|
530
|
-
|
|
790
|
+
ggml_metal_encoder_set_buffer(enc, eliza_metal_buffer_offset(q_base, q_i3 + (size_t) ib * q->nb[2]), 0);
|
|
791
|
+
ggml_metal_encoder_set_buffer(enc, eliza_metal_buffer_offset(pk_base, pk_i3), 1);
|
|
792
|
+
ggml_metal_encoder_set_buffer(enc, eliza_metal_buffer_offset(dst_base, dst_i3 + (size_t) ib * op->nb[2]), 2);
|
|
793
|
+
const int token_groups = (int) ((n_tokens + args.tokens_per_threadgroup - 1u) / args.tokens_per_threadgroup);
|
|
794
|
+
ggml_metal_encoder_dispatch_threadgroups(enc, (int) n_heads, token_groups, 1, 32, 1, 1);
|
|
531
795
|
}
|
|
532
796
|
}
|
|
533
797
|
|
|
534
798
|
return 1;
|
|
535
799
|
}
|
|
536
800
|
|
|
801
|
+
struct eliza_fused_attn_qjl_tbq_args {
|
|
802
|
+
uint32_t head_dim;
|
|
803
|
+
uint32_t proj_dim;
|
|
804
|
+
uint32_t n_heads;
|
|
805
|
+
uint32_t n_kv_heads;
|
|
806
|
+
uint32_t n_q_pos;
|
|
807
|
+
uint32_t n_kv;
|
|
808
|
+
uint32_t kv_tile;
|
|
809
|
+
uint32_t v_use_qjl;
|
|
810
|
+
float scale;
|
|
811
|
+
uint32_t causal;
|
|
812
|
+
uint32_t q_pos_base;
|
|
813
|
+
};
|
|
814
|
+
|
|
815
|
+
int ggml_metal_op_fused_attn_qjl_tbq(ggml_metal_op_t ctx, int idx) {
|
|
816
|
+
ggml_tensor * op = ctx->node(idx);
|
|
817
|
+
|
|
818
|
+
ggml_metal_library_t lib = ctx->lib;
|
|
819
|
+
ggml_metal_encoder_t enc = ctx->enc;
|
|
820
|
+
|
|
821
|
+
const ggml_tensor * q = op->src[0];
|
|
822
|
+
const ggml_tensor * pk = op->src[1];
|
|
823
|
+
const ggml_tensor * pv = op->src[2];
|
|
824
|
+
|
|
825
|
+
GGML_ASSERT(q != nullptr);
|
|
826
|
+
GGML_ASSERT(pk != nullptr);
|
|
827
|
+
GGML_ASSERT(pv != nullptr);
|
|
828
|
+
GGML_ASSERT(q->type == GGML_TYPE_F32);
|
|
829
|
+
GGML_ASSERT(pk->type == GGML_TYPE_QJL1_256);
|
|
830
|
+
GGML_ASSERT(pv->type == GGML_TYPE_TBQ3_0);
|
|
831
|
+
GGML_ASSERT(op->type == GGML_TYPE_F32);
|
|
832
|
+
GGML_ASSERT(q->ne[0] == 256);
|
|
833
|
+
GGML_ASSERT(pk->ne[0] == 128);
|
|
834
|
+
GGML_ASSERT(pv->ne[0] == 128);
|
|
835
|
+
GGML_ASSERT(op->ne[0] == 128);
|
|
836
|
+
GGML_ASSERT(ggml_is_contiguous_rows(q));
|
|
837
|
+
GGML_ASSERT(ggml_is_contiguous_rows(pk));
|
|
838
|
+
GGML_ASSERT(ggml_is_contiguous_rows(pv));
|
|
839
|
+
GGML_ASSERT(ggml_is_contiguous_rows(op));
|
|
840
|
+
|
|
841
|
+
const int32_t * params = (const int32_t *) op->op_params;
|
|
842
|
+
const uint32_t n_kv_heads = (uint32_t) params[0];
|
|
843
|
+
union { int32_t i; float f; } scale_bits;
|
|
844
|
+
scale_bits.i = params[1];
|
|
845
|
+
|
|
846
|
+
const uint32_t n_heads = (uint32_t) q->ne[1];
|
|
847
|
+
const uint32_t n_q_pos = (uint32_t) q->ne[2];
|
|
848
|
+
const uint32_t n_kv = (uint32_t) pk->ne[1];
|
|
849
|
+
const int64_t ne3 = q->ne[3];
|
|
850
|
+
|
|
851
|
+
GGML_ASSERT(n_kv_heads > 0);
|
|
852
|
+
GGML_ASSERT((n_heads % n_kv_heads) == 0);
|
|
853
|
+
GGML_ASSERT(pk->ne[2] == (int64_t) n_kv_heads);
|
|
854
|
+
GGML_ASSERT(pv->ne[1] == (int64_t) n_kv);
|
|
855
|
+
GGML_ASSERT(pv->ne[2] == (int64_t) n_kv_heads);
|
|
856
|
+
GGML_ASSERT(pk->ne[3] == ne3);
|
|
857
|
+
GGML_ASSERT(pv->ne[3] == ne3);
|
|
858
|
+
GGML_ASSERT(op->ne[1] == (int64_t) n_heads);
|
|
859
|
+
GGML_ASSERT(op->ne[2] == (int64_t) n_q_pos);
|
|
860
|
+
GGML_ASSERT(op->ne[3] == ne3);
|
|
861
|
+
GGML_ASSERT(q->nb[1] == (size_t) q->ne[0] * ggml_type_size(q->type));
|
|
862
|
+
GGML_ASSERT(q->nb[2] == (size_t) n_heads * q->nb[1]);
|
|
863
|
+
GGML_ASSERT(pk->nb[1] == ggml_row_size(GGML_TYPE_QJL1_256, 128));
|
|
864
|
+
GGML_ASSERT(pk->nb[2] == (size_t) n_kv * pk->nb[1]);
|
|
865
|
+
GGML_ASSERT(pv->nb[1] == ggml_row_size(GGML_TYPE_TBQ3_0, 128));
|
|
866
|
+
GGML_ASSERT(pv->nb[2] == (size_t) n_kv * pv->nb[1]);
|
|
867
|
+
GGML_ASSERT(op->nb[1] == (size_t) op->ne[0] * ggml_type_size(op->type));
|
|
868
|
+
GGML_ASSERT(op->nb[2] == (size_t) n_heads * op->nb[1]);
|
|
869
|
+
|
|
870
|
+
eliza_fused_attn_qjl_tbq_args args = {
|
|
871
|
+
/* head_dim = */ 128u,
|
|
872
|
+
/* proj_dim = */ 256u,
|
|
873
|
+
/* n_heads = */ n_heads,
|
|
874
|
+
/* n_kv_heads = */ n_kv_heads,
|
|
875
|
+
/* n_q_pos = */ n_q_pos,
|
|
876
|
+
/* n_kv = */ n_kv,
|
|
877
|
+
/* kv_tile = */ (uint32_t) params[3],
|
|
878
|
+
/* v_use_qjl = */ (uint32_t) params[2],
|
|
879
|
+
/* scale = */ scale_bits.f,
|
|
880
|
+
/* causal = */ (uint32_t) params[4],
|
|
881
|
+
/* q_pos_base = */ (uint32_t) params[5],
|
|
882
|
+
};
|
|
883
|
+
|
|
884
|
+
auto pipeline = ggml_metal_library_get_pipeline_fused_attn_qjl_tbq(lib);
|
|
885
|
+
|
|
886
|
+
const ggml_metal_buffer_id q_base = ggml_metal_get_buffer_id(q);
|
|
887
|
+
const ggml_metal_buffer_id pk_base = ggml_metal_get_buffer_id(pk);
|
|
888
|
+
const ggml_metal_buffer_id pv_base = ggml_metal_get_buffer_id(pv);
|
|
889
|
+
const ggml_metal_buffer_id dst_base = ggml_metal_get_buffer_id(op);
|
|
890
|
+
|
|
891
|
+
ggml_metal_encoder_set_pipeline(enc, pipeline);
|
|
892
|
+
ggml_metal_encoder_set_bytes(enc, &args, sizeof(args), 4);
|
|
893
|
+
|
|
894
|
+
for (int64_t i3 = 0; i3 < ne3; ++i3) {
|
|
895
|
+
ggml_metal_encoder_set_buffer(enc, eliza_metal_buffer_offset(q_base, (size_t) i3 * q->nb[3]), 0);
|
|
896
|
+
ggml_metal_encoder_set_buffer(enc, eliza_metal_buffer_offset(pk_base, (size_t) i3 * pk->nb[3]), 1);
|
|
897
|
+
ggml_metal_encoder_set_buffer(enc, eliza_metal_buffer_offset(pv_base, (size_t) i3 * pv->nb[3]), 2);
|
|
898
|
+
ggml_metal_encoder_set_buffer(enc, eliza_metal_buffer_offset(dst_base, (size_t) i3 * op->nb[3]), 3);
|
|
899
|
+
ggml_metal_encoder_dispatch_threadgroups(enc, (int) n_heads, (int) n_q_pos, 1, 32, 1, 1);
|
|
900
|
+
}
|
|
901
|
+
|
|
902
|
+
return 1;
|
|
903
|
+
}
|
|
904
|
+
|
|
537
905
|
`;
|
|
538
906
|
let patched = original.replace(funcAnchor, opFunc + funcAnchor);
|
|
907
|
+
if (!patched.includes("#include <cstdlib>")) {
|
|
908
|
+
patched = patched.replace(
|
|
909
|
+
`#include <cmath>
|
|
910
|
+
`,
|
|
911
|
+
`#include <cmath>
|
|
912
|
+
#include <cstdlib>
|
|
913
|
+
`,
|
|
914
|
+
);
|
|
915
|
+
}
|
|
539
916
|
|
|
540
917
|
const switchAnchor = ` case GGML_OP_FLASH_ATTN_EXT:
|
|
541
918
|
{
|
|
@@ -557,10 +934,27 @@ int ggml_metal_op_attn_score_qjl(ggml_metal_op_t ctx, int idx) {
|
|
|
557
934
|
}
|
|
558
935
|
|
|
559
936
|
function patchMetalQjlAttnSupportsOp(cacheDir, { dryRun }) {
|
|
560
|
-
const deviceMPath = path.join(
|
|
937
|
+
const deviceMPath = path.join(
|
|
938
|
+
cacheDir,
|
|
939
|
+
"ggml",
|
|
940
|
+
"src",
|
|
941
|
+
"ggml-metal",
|
|
942
|
+
"ggml-metal-device.m",
|
|
943
|
+
);
|
|
561
944
|
const original = fs.readFileSync(deviceMPath, "utf8");
|
|
562
945
|
if (original.includes(SENTINEL_QJL_ATTN)) {
|
|
563
|
-
|
|
946
|
+
const upgraded = original.replace(
|
|
947
|
+
` op->src[0]->ne[0] == 256 &&
|
|
948
|
+
op->src[1]->ne[0] == 128;`,
|
|
949
|
+
` op->src[0]->ne[0] == 256 &&
|
|
950
|
+
op->src[1]->ne[0] == 128 &&
|
|
951
|
+
ggml_is_contiguous_rows(op) &&
|
|
952
|
+
ggml_is_contiguous_rows(op->src[0]) &&
|
|
953
|
+
ggml_is_contiguous_rows(op->src[1]);`,
|
|
954
|
+
);
|
|
955
|
+
if (upgraded !== original && !dryRun)
|
|
956
|
+
fs.writeFileSync(deviceMPath, upgraded, "utf8");
|
|
957
|
+
return { changed: upgraded !== original && !dryRun, path: deviceMPath };
|
|
564
958
|
}
|
|
565
959
|
const anchor = ` case GGML_OP_FLASH_ATTN_EXT:
|
|
566
960
|
// for new head sizes, add checks here`;
|
|
@@ -578,7 +972,10 @@ function patchMetalQjlAttnSupportsOp(cacheDir, { dryRun }) {
|
|
|
578
972
|
op->src[0]->type == GGML_TYPE_F32 &&
|
|
579
973
|
op->src[1]->type == GGML_TYPE_QJL1_256 &&
|
|
580
974
|
op->src[0]->ne[0] == 256 &&
|
|
581
|
-
op->src[1]->ne[0] == 128
|
|
975
|
+
op->src[1]->ne[0] == 128 &&
|
|
976
|
+
ggml_is_contiguous_rows(op) &&
|
|
977
|
+
ggml_is_contiguous_rows(op->src[0]) &&
|
|
978
|
+
ggml_is_contiguous_rows(op->src[1]);
|
|
582
979
|
${anchor}`;
|
|
583
980
|
const patched = original.replace(anchor, insert);
|
|
584
981
|
if (!dryRun) fs.writeFileSync(deviceMPath, patched, "utf8");
|
|
@@ -594,45 +991,1237 @@ function patchMetalQjlAttnDispatch(cacheDir, { dryRun }) {
|
|
|
594
991
|
return { header, deviceCpp, opsHeader, opsCpp, supportsOp };
|
|
595
992
|
}
|
|
596
993
|
|
|
597
|
-
|
|
598
|
-
const
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
994
|
+
function readTcqCodebookLiteral() {
|
|
995
|
+
const referencePath = path.join(STANDALONE_REFERENCE_DIR, "turbo_kernels.c");
|
|
996
|
+
const source = fs.readFileSync(referencePath, "utf8");
|
|
997
|
+
const match = source.match(
|
|
998
|
+
/const float ELIZA_TURBO3_TCQ_CODEBOOK\[512\]\s*=\s*\{([\s\S]*?)\};/,
|
|
999
|
+
);
|
|
1000
|
+
if (!match) {
|
|
1001
|
+
throw new Error(
|
|
1002
|
+
`[metal-tbq-polar-attn] could not extract TCQ codebook from ${referencePath}`,
|
|
1003
|
+
);
|
|
1004
|
+
}
|
|
1005
|
+
return match[1].trim();
|
|
1006
|
+
}
|
|
1007
|
+
|
|
1008
|
+
export function patchGgmlTbqPolarAttnOps(cacheDir, { dryRun }) {
|
|
1009
|
+
const headerPath = path.join(cacheDir, "ggml", "include", "ggml.h");
|
|
1010
|
+
const cPath = path.join(cacheDir, "ggml", "src", "ggml.c");
|
|
1011
|
+
let changed = false;
|
|
1012
|
+
|
|
1013
|
+
const headerOriginal = fs.readFileSync(headerPath, "utf8");
|
|
1014
|
+
let header = headerOriginal;
|
|
1015
|
+
if (!header.includes(SENTINEL_TBQ_POLAR_ATTN)) {
|
|
1016
|
+
const enumAnchor = ` GGML_OP_ATTN_SCORE_QJL, // QJL 1-bit packed-K attention score (CPU-only)
|
|
1017
|
+
GGML_OP_FUSED_ATTN_QJL_TBQ, // fused QJL-K + TBQ-V attention (CPU-only)`;
|
|
1018
|
+
if (!header.includes(enumAnchor)) {
|
|
1019
|
+
throw new Error(
|
|
1020
|
+
`[metal-tbq-polar-attn] ggml.h op enum anchor not found at ${headerPath}`,
|
|
1021
|
+
);
|
|
607
1022
|
}
|
|
608
|
-
|
|
1023
|
+
header = header.replace(
|
|
1024
|
+
enumAnchor,
|
|
1025
|
+
` GGML_OP_ATTN_SCORE_QJL, // QJL 1-bit packed-K attention score
|
|
1026
|
+
GGML_OP_ATTN_SCORE_TBQ, // ${SENTINEL_TBQ_POLAR_ATTN} TurboQuant packed-K attention score
|
|
1027
|
+
GGML_OP_ATTN_SCORE_POLAR, // PolarQuant packed-K attention score
|
|
1028
|
+
GGML_OP_FUSED_ATTN_QJL_TBQ, // fused QJL-K + TBQ-V attention (CPU-only)`,
|
|
1029
|
+
);
|
|
1030
|
+
const declAnchor = ` GGML_API struct ggml_tensor * ggml_attn_score_qjl(
|
|
1031
|
+
struct ggml_context * ctx,
|
|
1032
|
+
struct ggml_tensor * q,
|
|
1033
|
+
struct ggml_tensor * packed_k,
|
|
1034
|
+
int n_kv_heads);`;
|
|
1035
|
+
if (!header.includes(declAnchor)) {
|
|
1036
|
+
throw new Error(
|
|
1037
|
+
`[metal-tbq-polar-attn] ggml.h QJL declaration anchor not found at ${headerPath}`,
|
|
1038
|
+
);
|
|
1039
|
+
}
|
|
1040
|
+
header = header.replace(
|
|
1041
|
+
declAnchor,
|
|
1042
|
+
`${declAnchor}
|
|
609
1043
|
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
1044
|
+
// ${SENTINEL_TBQ_POLAR_ATTN}
|
|
1045
|
+
// TurboQuant packed-K attention score.
|
|
1046
|
+
// q: F32 [128, n_heads, n_batch, ne3]
|
|
1047
|
+
// packed_k: TBQ3_0/TBQ4_0/TBQ3_TCQ [128, n_kv_tokens, n_kv_heads, ne3]
|
|
1048
|
+
// output: F32 [n_kv_tokens, n_heads, n_batch, ne3]
|
|
1049
|
+
GGML_API struct ggml_tensor * ggml_attn_score_tbq(
|
|
1050
|
+
struct ggml_context * ctx,
|
|
1051
|
+
struct ggml_tensor * q,
|
|
1052
|
+
struct ggml_tensor * packed_k,
|
|
1053
|
+
int n_kv_heads);
|
|
1054
|
+
|
|
1055
|
+
// PolarQuant packed-K attention score.
|
|
1056
|
+
// q: F32 [128, n_heads, n_batch, ne3]
|
|
1057
|
+
// packed_k: Q4_POLAR [128, n_kv_tokens, n_kv_heads, ne3]
|
|
1058
|
+
// use_qjl mirrors the PolarQuant GGUF residual flag.
|
|
1059
|
+
// output: F32 [n_kv_tokens, n_heads, n_batch, ne3]
|
|
1060
|
+
GGML_API struct ggml_tensor * ggml_attn_score_polar(
|
|
1061
|
+
struct ggml_context * ctx,
|
|
1062
|
+
struct ggml_tensor * q,
|
|
1063
|
+
struct ggml_tensor * packed_k,
|
|
1064
|
+
int n_kv_heads,
|
|
1065
|
+
bool use_qjl);
|
|
1066
|
+
|
|
1067
|
+
// PolarQuant packed-K attention score with pre-Hadamarded query.
|
|
1068
|
+
// q_preht MUST contain H*q for each query head, where H is the same
|
|
1069
|
+
// unnormalised 128-point Walsh-Hadamard transform used by the PolarQuant
|
|
1070
|
+
// decoder. This is faster than ggml_attn_score_polar() because the backend
|
|
1071
|
+
// can use dot(H*x, q) == dot(x, H*q) and avoid one Hadamard per K row.
|
|
1072
|
+
GGML_API struct ggml_tensor * ggml_attn_score_polar_preht(
|
|
1073
|
+
struct ggml_context * ctx,
|
|
1074
|
+
struct ggml_tensor * q_preht,
|
|
1075
|
+
struct ggml_tensor * packed_k,
|
|
1076
|
+
int n_kv_heads,
|
|
1077
|
+
bool use_qjl);`,
|
|
1078
|
+
);
|
|
1079
|
+
changed = true;
|
|
1080
|
+
}
|
|
1081
|
+
|
|
1082
|
+
if (header !== headerOriginal && !dryRun)
|
|
1083
|
+
fs.writeFileSync(headerPath, header, "utf8");
|
|
1084
|
+
|
|
1085
|
+
const cOriginal = fs.readFileSync(cPath, "utf8");
|
|
1086
|
+
let c = cOriginal;
|
|
1087
|
+
c = c.replace(
|
|
1088
|
+
` int32_t params[2];
|
|
1089
|
+
params[0] = n_kv_heads;
|
|
1090
|
+
union { float f; int32_t i; } scale_bits;
|
|
1091
|
+
scale_bits.f = sm_scale;
|
|
1092
|
+
params[1] = scale_bits.i;
|
|
1093
|
+
ggml_set_op_params(result, params, sizeof(params));`,
|
|
1094
|
+
` int32_t params[6] = { 0 };
|
|
1095
|
+
params[0] = n_kv_heads;
|
|
1096
|
+
union { float f; int32_t i; } scale_bits;
|
|
1097
|
+
scale_bits.f = sm_scale;
|
|
1098
|
+
params[1] = scale_bits.i;
|
|
1099
|
+
// Reserved for backend fused dispatch ABI: [2] v_use_qjl, [3] kv_tile,
|
|
1100
|
+
// [4] causal, [5] q_pos_base. The public constructor preserves the
|
|
1101
|
+
// existing non-causal CPU semantics by default.
|
|
1102
|
+
params[5] = n_kv_tokens >= q->ne[2] ? (int32_t) (n_kv_tokens - q->ne[2]) : 0;
|
|
1103
|
+
ggml_set_op_params(result, params, sizeof(params));`,
|
|
1104
|
+
);
|
|
1105
|
+
if (!c.includes(SENTINEL_TBQ_POLAR_ATTN)) {
|
|
1106
|
+
c = c.replace(
|
|
1107
|
+
` "ATTN_SCORE_QJL",
|
|
1108
|
+
"FUSED_ATTN_QJL_TBQ",`,
|
|
1109
|
+
` "ATTN_SCORE_QJL",
|
|
1110
|
+
"ATTN_SCORE_TBQ",
|
|
1111
|
+
"ATTN_SCORE_POLAR",
|
|
1112
|
+
"FUSED_ATTN_QJL_TBQ",`,
|
|
1113
|
+
);
|
|
1114
|
+
c = c.replace(
|
|
1115
|
+
` "attn_score_qjl(q, packed_k)",
|
|
1116
|
+
"fused_attn_qjl_tbq(q, packed_k, packed_v)",`,
|
|
1117
|
+
` "attn_score_qjl(q, packed_k)",
|
|
1118
|
+
"attn_score_tbq(q, packed_k)",
|
|
1119
|
+
"attn_score_polar(q, packed_k)",
|
|
1120
|
+
"fused_attn_qjl_tbq(q, packed_k, packed_v)",`,
|
|
1121
|
+
);
|
|
1122
|
+
if (!c.includes(` [GGML_TYPE_TBQ3_TCQ] = {`)) {
|
|
1123
|
+
c = c.replace(
|
|
1124
|
+
` [GGML_TYPE_QJL1_256] = {`,
|
|
1125
|
+
` [GGML_TYPE_TBQ3_TCQ] = {
|
|
1126
|
+
.type_name = "tbq3_tcq",
|
|
1127
|
+
.blck_size = QK_TBQ3_TCQ,
|
|
1128
|
+
.type_size = sizeof(block_tbq3_tcq),
|
|
1129
|
+
.is_quantized = true,
|
|
1130
|
+
},
|
|
1131
|
+
[GGML_TYPE_QJL1_256] = {`,
|
|
1132
|
+
);
|
|
623
1133
|
}
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
1134
|
+
c = c.replaceAll(
|
|
1135
|
+
`static_assert(GGML_OP_COUNT == 97, "GGML_OP_COUNT != 97");`,
|
|
1136
|
+
`static_assert(GGML_OP_COUNT == 99, "GGML_OP_COUNT != 99");`,
|
|
1137
|
+
);
|
|
1138
|
+
const implAnchor = `// ggml_fused_attn_qjl_tbq
|
|
1139
|
+
//`;
|
|
1140
|
+
if (!c.includes(implAnchor)) {
|
|
1141
|
+
throw new Error(
|
|
1142
|
+
`[metal-tbq-polar-attn] ggml.c implementation anchor not found at ${cPath}`,
|
|
1143
|
+
);
|
|
1144
|
+
}
|
|
1145
|
+
const impl = `// ${SENTINEL_TBQ_POLAR_ATTN}
|
|
1146
|
+
// ggml_attn_score_tbq
|
|
1147
|
+
//
|
|
1148
|
+
struct ggml_tensor * ggml_attn_score_tbq(
|
|
1149
|
+
struct ggml_context * ctx,
|
|
1150
|
+
struct ggml_tensor * q,
|
|
1151
|
+
struct ggml_tensor * packed_k,
|
|
1152
|
+
int n_kv_heads) {
|
|
1153
|
+
GGML_ASSERT(q != NULL);
|
|
1154
|
+
GGML_ASSERT(packed_k != NULL);
|
|
1155
|
+
GGML_ASSERT(q->type == GGML_TYPE_F32);
|
|
1156
|
+
GGML_ASSERT(packed_k->type == GGML_TYPE_TBQ3_0 ||
|
|
1157
|
+
packed_k->type == GGML_TYPE_TBQ4_0 ||
|
|
1158
|
+
packed_k->type == GGML_TYPE_TBQ3_TCQ);
|
|
1159
|
+
GGML_ASSERT(q->ne[0] == 128);
|
|
1160
|
+
GGML_ASSERT(packed_k->ne[0] == 128);
|
|
1161
|
+
|
|
1162
|
+
const int64_t n_heads = q->ne[1];
|
|
1163
|
+
const int64_t n_kv_tokens = packed_k->ne[1];
|
|
1164
|
+
|
|
1165
|
+
GGML_ASSERT(n_kv_heads > 0);
|
|
1166
|
+
GGML_ASSERT((n_heads % n_kv_heads) == 0);
|
|
1167
|
+
GGML_ASSERT(packed_k->ne[2] == (int64_t) n_kv_heads);
|
|
1168
|
+
GGML_ASSERT(packed_k->ne[3] == q->ne[3]);
|
|
1169
|
+
|
|
1170
|
+
const int64_t ne[4] = { n_kv_tokens, n_heads, q->ne[2], q->ne[3] };
|
|
1171
|
+
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
|
1172
|
+
|
|
1173
|
+
int32_t params[1] = { n_kv_heads };
|
|
1174
|
+
ggml_set_op_params(result, params, sizeof(params));
|
|
1175
|
+
|
|
1176
|
+
result->op = GGML_OP_ATTN_SCORE_TBQ;
|
|
1177
|
+
result->src[0] = q;
|
|
1178
|
+
result->src[1] = packed_k;
|
|
1179
|
+
|
|
1180
|
+
return result;
|
|
1181
|
+
}
|
|
1182
|
+
|
|
1183
|
+
// ggml_attn_score_polar_impl
|
|
1184
|
+
//
|
|
1185
|
+
static struct ggml_tensor * ggml_attn_score_polar_impl(
|
|
1186
|
+
struct ggml_context * ctx,
|
|
1187
|
+
struct ggml_tensor * q,
|
|
1188
|
+
struct ggml_tensor * packed_k,
|
|
1189
|
+
int n_kv_heads,
|
|
1190
|
+
bool use_qjl,
|
|
1191
|
+
bool q_preht) {
|
|
1192
|
+
GGML_ASSERT(q != NULL);
|
|
1193
|
+
GGML_ASSERT(packed_k != NULL);
|
|
1194
|
+
GGML_ASSERT(q->type == GGML_TYPE_F32);
|
|
1195
|
+
GGML_ASSERT(packed_k->type == GGML_TYPE_Q4_POLAR);
|
|
1196
|
+
GGML_ASSERT(q->ne[0] == 128);
|
|
1197
|
+
GGML_ASSERT(packed_k->ne[0] == 128);
|
|
1198
|
+
|
|
1199
|
+
const int64_t n_heads = q->ne[1];
|
|
1200
|
+
const int64_t n_kv_tokens = packed_k->ne[1];
|
|
1201
|
+
|
|
1202
|
+
GGML_ASSERT(n_kv_heads > 0);
|
|
1203
|
+
GGML_ASSERT((n_heads % n_kv_heads) == 0);
|
|
1204
|
+
GGML_ASSERT(packed_k->ne[2] == (int64_t) n_kv_heads);
|
|
1205
|
+
GGML_ASSERT(packed_k->ne[3] == q->ne[3]);
|
|
1206
|
+
|
|
1207
|
+
const int64_t ne[4] = { n_kv_tokens, n_heads, q->ne[2], q->ne[3] };
|
|
1208
|
+
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
|
1209
|
+
|
|
1210
|
+
int32_t params[3] = { n_kv_heads, use_qjl ? 1 : 0, q_preht ? 1 : 0 };
|
|
1211
|
+
ggml_set_op_params(result, params, sizeof(params));
|
|
1212
|
+
|
|
1213
|
+
result->op = GGML_OP_ATTN_SCORE_POLAR;
|
|
1214
|
+
result->src[0] = q;
|
|
1215
|
+
result->src[1] = packed_k;
|
|
1216
|
+
|
|
1217
|
+
return result;
|
|
1218
|
+
}
|
|
1219
|
+
|
|
1220
|
+
// ggml_attn_score_polar
|
|
1221
|
+
//
|
|
1222
|
+
struct ggml_tensor * ggml_attn_score_polar(
|
|
1223
|
+
struct ggml_context * ctx,
|
|
1224
|
+
struct ggml_tensor * q,
|
|
1225
|
+
struct ggml_tensor * packed_k,
|
|
1226
|
+
int n_kv_heads,
|
|
1227
|
+
bool use_qjl) {
|
|
1228
|
+
return ggml_attn_score_polar_impl(ctx, q, packed_k, n_kv_heads, use_qjl, false);
|
|
1229
|
+
}
|
|
1230
|
+
|
|
1231
|
+
// ggml_attn_score_polar_preht
|
|
1232
|
+
//
|
|
1233
|
+
struct ggml_tensor * ggml_attn_score_polar_preht(
|
|
1234
|
+
struct ggml_context * ctx,
|
|
1235
|
+
struct ggml_tensor * q_preht,
|
|
1236
|
+
struct ggml_tensor * packed_k,
|
|
1237
|
+
int n_kv_heads,
|
|
1238
|
+
bool use_qjl) {
|
|
1239
|
+
return ggml_attn_score_polar_impl(ctx, q_preht, packed_k, n_kv_heads, use_qjl, true);
|
|
1240
|
+
}
|
|
1241
|
+
|
|
1242
|
+
`;
|
|
1243
|
+
c = c.replace(implAnchor, impl + implAnchor);
|
|
1244
|
+
if (!c.includes("ATTN_SCORE_TBQ") || !c.includes("attn_score_polar")) {
|
|
1245
|
+
throw new Error(
|
|
1246
|
+
`[metal-tbq-polar-attn] ggml.c patch did not add expected op names at ${cPath}`,
|
|
1247
|
+
);
|
|
1248
|
+
}
|
|
1249
|
+
changed = true;
|
|
627
1250
|
}
|
|
628
|
-
|
|
1251
|
+
|
|
1252
|
+
if (c !== cOriginal && !dryRun) fs.writeFileSync(cPath, c, "utf8");
|
|
1253
|
+
return { changed: changed && !dryRun, headerPath, cPath };
|
|
1254
|
+
}
|
|
1255
|
+
|
|
1256
|
+
function patchMetalTbqPolarDeviceHeader(cacheDir, { dryRun }) {
|
|
1257
|
+
const headerPath = path.join(
|
|
1258
|
+
cacheDir,
|
|
1259
|
+
"ggml",
|
|
1260
|
+
"src",
|
|
1261
|
+
"ggml-metal",
|
|
1262
|
+
"ggml-metal-device.h",
|
|
1263
|
+
);
|
|
1264
|
+
const original = fs.readFileSync(headerPath, "utf8");
|
|
1265
|
+
if (original.includes(SENTINEL_TBQ_POLAR_ATTN)) {
|
|
1266
|
+
const anchor = `struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_attn_score_polar_preht(
|
|
1267
|
+
ggml_metal_library_t lib);`;
|
|
1268
|
+
const addition = `${anchor}
|
|
1269
|
+
|
|
1270
|
+
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_fused_attn_qjl_tbq(
|
|
1271
|
+
ggml_metal_library_t lib);`;
|
|
1272
|
+
const patched = original.includes(
|
|
1273
|
+
"ggml_metal_library_get_pipeline_fused_attn_qjl_tbq",
|
|
1274
|
+
)
|
|
1275
|
+
? original
|
|
1276
|
+
: original.replace(anchor, addition);
|
|
1277
|
+
if (patched !== original && !dryRun)
|
|
1278
|
+
fs.writeFileSync(headerPath, patched, "utf8");
|
|
1279
|
+
return { changed: patched !== original && !dryRun, path: headerPath };
|
|
1280
|
+
}
|
|
1281
|
+
const anchor = `${SENTINEL_QJL_ATTN}
|
|
1282
|
+
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_attn_score_qjl(
|
|
1283
|
+
ggml_metal_library_t lib);`;
|
|
1284
|
+
if (!original.includes(anchor)) {
|
|
1285
|
+
throw new Error(
|
|
1286
|
+
`[metal-tbq-polar-attn] device.h QJL pipeline anchor not found at ${headerPath}`,
|
|
1287
|
+
);
|
|
1288
|
+
}
|
|
1289
|
+
const patched = original.replace(
|
|
1290
|
+
anchor,
|
|
1291
|
+
`${anchor}
|
|
1292
|
+
|
|
1293
|
+
${SENTINEL_TBQ_POLAR_ATTN}
|
|
1294
|
+
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_attn_score_tbq(
|
|
1295
|
+
ggml_metal_library_t lib,
|
|
1296
|
+
enum ggml_type type);
|
|
1297
|
+
|
|
1298
|
+
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_attn_score_polar(
|
|
1299
|
+
ggml_metal_library_t lib);
|
|
1300
|
+
|
|
1301
|
+
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_attn_score_polar_preht(
|
|
1302
|
+
ggml_metal_library_t lib);
|
|
1303
|
+
|
|
1304
|
+
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_fused_attn_qjl_tbq(
|
|
1305
|
+
ggml_metal_library_t lib);`,
|
|
1306
|
+
);
|
|
1307
|
+
if (!dryRun) fs.writeFileSync(headerPath, patched, "utf8");
|
|
1308
|
+
return { changed: !dryRun, path: headerPath };
|
|
1309
|
+
}
|
|
1310
|
+
|
|
1311
|
+
function patchMetalTbqPolarDeviceCpp(cacheDir, { dryRun }) {
|
|
1312
|
+
const cppPath = path.join(
|
|
1313
|
+
cacheDir,
|
|
1314
|
+
"ggml",
|
|
1315
|
+
"src",
|
|
1316
|
+
"ggml-metal",
|
|
1317
|
+
"ggml-metal-device.cpp",
|
|
1318
|
+
);
|
|
1319
|
+
const original = fs.readFileSync(cppPath, "utf8");
|
|
1320
|
+
if (original.includes(SENTINEL_TBQ_POLAR_ATTN)) {
|
|
1321
|
+
const anchor = `ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_bin(ggml_metal_library_t lib, const ggml_tensor * op, int32_t n_fuse) {`;
|
|
1322
|
+
const helper = `ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_fused_attn_qjl_tbq(ggml_metal_library_t lib) {
|
|
1323
|
+
const char * name = "kernel_fused_attn_qjl_tbq3_f32";
|
|
1324
|
+
ggml_metal_pipeline_with_params res = ggml_metal_library_get_pipeline(lib, name);
|
|
1325
|
+
if (!res.pipeline) {
|
|
1326
|
+
res = ggml_metal_library_compile_pipeline(lib, name, name, nullptr);
|
|
1327
|
+
}
|
|
1328
|
+
if (!res.pipeline) {
|
|
1329
|
+
GGML_LOG_ERROR("fused_attn_qjl_tbq: kernel '%s' missing from default.metallib\\n", name);
|
|
1330
|
+
GGML_ABORT("fused_attn_qjl_tbq: pipeline compile failed");
|
|
1331
|
+
}
|
|
1332
|
+
res.nr0 = 1;
|
|
1333
|
+
res.nr1 = 1;
|
|
1334
|
+
res.nsg = 1;
|
|
1335
|
+
res.smem = 0;
|
|
1336
|
+
return res;
|
|
1337
|
+
}
|
|
1338
|
+
|
|
1339
|
+
`;
|
|
1340
|
+
const patched = original.includes(
|
|
1341
|
+
"ggml_metal_library_get_pipeline_fused_attn_qjl_tbq",
|
|
1342
|
+
)
|
|
1343
|
+
? original
|
|
1344
|
+
: original.replace(anchor, helper + anchor);
|
|
1345
|
+
if (patched !== original && !dryRun)
|
|
1346
|
+
fs.writeFileSync(cppPath, patched, "utf8");
|
|
1347
|
+
return { changed: patched !== original && !dryRun, path: cppPath };
|
|
1348
|
+
}
|
|
1349
|
+
const anchor = `ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_bin(ggml_metal_library_t lib, const ggml_tensor * op, int32_t n_fuse) {`;
|
|
1350
|
+
if (!original.includes(anchor)) {
|
|
1351
|
+
throw new Error(
|
|
1352
|
+
`[metal-tbq-polar-attn] device.cpp pipeline anchor not found at ${cppPath}`,
|
|
1353
|
+
);
|
|
1354
|
+
}
|
|
1355
|
+
const helper = `${SENTINEL_TBQ_POLAR_ATTN}
|
|
1356
|
+
static const char * eliza_metal_tbq_kernel_name(ggml_type type) {
|
|
1357
|
+
switch (type) {
|
|
1358
|
+
case GGML_TYPE_TBQ3_0: return "kernel_turbo3_dot_multi";
|
|
1359
|
+
case GGML_TYPE_TBQ4_0: return "kernel_turbo4_dot_multi";
|
|
1360
|
+
case GGML_TYPE_TBQ3_TCQ: return "kernel_turbo3_tcq_dot_multi";
|
|
1361
|
+
default: GGML_ABORT("unsupported TurboQuant attention score type");
|
|
1362
|
+
}
|
|
1363
|
+
}
|
|
1364
|
+
|
|
1365
|
+
ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_attn_score_tbq(ggml_metal_library_t lib, ggml_type type) {
|
|
1366
|
+
const char * name = eliza_metal_tbq_kernel_name(type);
|
|
1367
|
+
ggml_metal_pipeline_with_params res = ggml_metal_library_get_pipeline(lib, name);
|
|
1368
|
+
if (!res.pipeline) {
|
|
1369
|
+
res = ggml_metal_library_compile_pipeline(lib, name, name, nullptr);
|
|
1370
|
+
}
|
|
1371
|
+
if (!res.pipeline) {
|
|
1372
|
+
GGML_LOG_ERROR("attn_score_tbq: kernel '%s' missing from default.metallib\\n", name);
|
|
1373
|
+
GGML_ABORT("attn_score_tbq: pipeline compile failed");
|
|
1374
|
+
}
|
|
1375
|
+
res.nr0 = 1;
|
|
1376
|
+
res.nr1 = 1;
|
|
1377
|
+
res.nsg = 1;
|
|
1378
|
+
res.smem = 0;
|
|
1379
|
+
return res;
|
|
1380
|
+
}
|
|
1381
|
+
|
|
1382
|
+
ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_attn_score_polar(ggml_metal_library_t lib) {
|
|
1383
|
+
const char * name = "kernel_mul_mv_q4_polar_f32";
|
|
1384
|
+
ggml_metal_pipeline_with_params res = ggml_metal_library_get_pipeline(lib, name);
|
|
1385
|
+
if (!res.pipeline) {
|
|
1386
|
+
res = ggml_metal_library_compile_pipeline(lib, name, name, nullptr);
|
|
1387
|
+
}
|
|
1388
|
+
if (!res.pipeline) {
|
|
1389
|
+
GGML_LOG_ERROR("attn_score_polar: kernel '%s' missing from default.metallib\\n", name);
|
|
1390
|
+
GGML_ABORT("attn_score_polar: pipeline compile failed");
|
|
1391
|
+
}
|
|
1392
|
+
res.nr0 = 1;
|
|
1393
|
+
res.nr1 = 1;
|
|
1394
|
+
res.nsg = 1;
|
|
1395
|
+
res.smem = 0;
|
|
1396
|
+
return res;
|
|
1397
|
+
}
|
|
1398
|
+
|
|
1399
|
+
ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_attn_score_polar_preht(ggml_metal_library_t lib) {
|
|
1400
|
+
const char * name = "kernel_attn_score_q4_polar_preht_f32";
|
|
1401
|
+
ggml_metal_pipeline_with_params res = ggml_metal_library_get_pipeline(lib, name);
|
|
1402
|
+
if (!res.pipeline) {
|
|
1403
|
+
res = ggml_metal_library_compile_pipeline(lib, name, name, nullptr);
|
|
1404
|
+
}
|
|
1405
|
+
if (!res.pipeline) {
|
|
1406
|
+
GGML_LOG_ERROR("attn_score_polar_preht: kernel '%s' missing from default.metallib\\n", name);
|
|
1407
|
+
GGML_ABORT("attn_score_polar_preht: pipeline compile failed");
|
|
1408
|
+
}
|
|
1409
|
+
res.nr0 = 1;
|
|
1410
|
+
res.nr1 = 1;
|
|
1411
|
+
res.nsg = 1;
|
|
1412
|
+
res.smem = 0;
|
|
1413
|
+
return res;
|
|
1414
|
+
}
|
|
1415
|
+
|
|
1416
|
+
ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_fused_attn_qjl_tbq(ggml_metal_library_t lib) {
|
|
1417
|
+
const char * name = "kernel_fused_attn_qjl_tbq3_f32";
|
|
1418
|
+
ggml_metal_pipeline_with_params res = ggml_metal_library_get_pipeline(lib, name);
|
|
1419
|
+
if (!res.pipeline) {
|
|
1420
|
+
res = ggml_metal_library_compile_pipeline(lib, name, name, nullptr);
|
|
1421
|
+
}
|
|
1422
|
+
if (!res.pipeline) {
|
|
1423
|
+
GGML_LOG_ERROR("fused_attn_qjl_tbq: kernel '%s' missing from default.metallib\\n", name);
|
|
1424
|
+
GGML_ABORT("fused_attn_qjl_tbq: pipeline compile failed");
|
|
1425
|
+
}
|
|
1426
|
+
res.nr0 = 1;
|
|
1427
|
+
res.nr1 = 1;
|
|
1428
|
+
res.nsg = 1;
|
|
1429
|
+
res.smem = 0;
|
|
1430
|
+
return res;
|
|
1431
|
+
}
|
|
1432
|
+
|
|
1433
|
+
`;
|
|
1434
|
+
const patched = original.replace(anchor, helper + anchor);
|
|
1435
|
+
if (!dryRun) fs.writeFileSync(cppPath, patched, "utf8");
|
|
1436
|
+
return { changed: !dryRun, path: cppPath };
|
|
1437
|
+
}
|
|
1438
|
+
|
|
1439
|
+
function patchMetalTbqPolarOpsHeader(cacheDir, { dryRun }) {
|
|
1440
|
+
const headerPath = path.join(
|
|
1441
|
+
cacheDir,
|
|
1442
|
+
"ggml",
|
|
1443
|
+
"src",
|
|
1444
|
+
"ggml-metal",
|
|
1445
|
+
"ggml-metal-ops.h",
|
|
1446
|
+
);
|
|
1447
|
+
const original = fs.readFileSync(headerPath, "utf8");
|
|
1448
|
+
if (original.includes(SENTINEL_TBQ_POLAR_ATTN)) {
|
|
1449
|
+
const anchor = `int ggml_metal_op_attn_score_polar(ggml_metal_op_t ctx, int idx);`;
|
|
1450
|
+
const addition = `${anchor}
|
|
1451
|
+
int ggml_metal_op_fused_attn_qjl_tbq(ggml_metal_op_t ctx, int idx);`;
|
|
1452
|
+
const patched = original.includes("ggml_metal_op_fused_attn_qjl_tbq")
|
|
1453
|
+
? original
|
|
1454
|
+
: original.replace(anchor, addition);
|
|
1455
|
+
if (patched !== original && !dryRun)
|
|
1456
|
+
fs.writeFileSync(headerPath, patched, "utf8");
|
|
1457
|
+
return { changed: patched !== original && !dryRun, path: headerPath };
|
|
1458
|
+
}
|
|
1459
|
+
const anchor = `${SENTINEL_QJL_ATTN}
|
|
1460
|
+
int ggml_metal_op_attn_score_qjl (ggml_metal_op_t ctx, int idx);`;
|
|
1461
|
+
if (!original.includes(anchor)) {
|
|
1462
|
+
throw new Error(
|
|
1463
|
+
`[metal-tbq-polar-attn] ops.h QJL declaration anchor not found at ${headerPath}`,
|
|
1464
|
+
);
|
|
1465
|
+
}
|
|
1466
|
+
const patched = original.replace(
|
|
1467
|
+
anchor,
|
|
1468
|
+
`${anchor}
|
|
1469
|
+
${SENTINEL_TBQ_POLAR_ATTN}
|
|
1470
|
+
int ggml_metal_op_attn_score_tbq (ggml_metal_op_t ctx, int idx);
|
|
1471
|
+
int ggml_metal_op_attn_score_polar(ggml_metal_op_t ctx, int idx);
|
|
1472
|
+
int ggml_metal_op_fused_attn_qjl_tbq(ggml_metal_op_t ctx, int idx);`,
|
|
1473
|
+
);
|
|
1474
|
+
if (!dryRun) fs.writeFileSync(headerPath, patched, "utf8");
|
|
1475
|
+
return { changed: !dryRun, path: headerPath };
|
|
1476
|
+
}
|
|
1477
|
+
|
|
1478
|
+
function patchMetalTbqPolarOpsCpp(cacheDir, { dryRun }) {
|
|
1479
|
+
const opsPath = path.join(
|
|
1480
|
+
cacheDir,
|
|
1481
|
+
"ggml",
|
|
1482
|
+
"src",
|
|
1483
|
+
"ggml-metal",
|
|
1484
|
+
"ggml-metal-ops.cpp",
|
|
1485
|
+
);
|
|
1486
|
+
const original = fs.readFileSync(opsPath, "utf8");
|
|
1487
|
+
if (original.includes(SENTINEL_TBQ_POLAR_ATTN)) {
|
|
1488
|
+
let patched = original.replace(
|
|
1489
|
+
"case GGML_TYPE_TBQ4_0: return 1u;",
|
|
1490
|
+
"case GGML_TYPE_TBQ4_0: return 4u;",
|
|
1491
|
+
);
|
|
1492
|
+
if (!patched.includes("eliza_tbq_blocks_per_threadgroup")) {
|
|
1493
|
+
patched = patched.replace(
|
|
1494
|
+
`static inline uint32_t eliza_tbq_blocks_per_row(ggml_type type) {
|
|
1495
|
+
switch (type) {
|
|
1496
|
+
case GGML_TYPE_TBQ3_0: return 4u;
|
|
1497
|
+
case GGML_TYPE_TBQ4_0: return 4u;
|
|
1498
|
+
case GGML_TYPE_TBQ3_TCQ: return 1u;
|
|
1499
|
+
default: GGML_ABORT("unsupported TurboQuant attention score type");
|
|
1500
|
+
}
|
|
1501
|
+
}
|
|
1502
|
+
`,
|
|
1503
|
+
`static inline uint32_t eliza_tbq_blocks_per_row(ggml_type type) {
|
|
1504
|
+
switch (type) {
|
|
1505
|
+
case GGML_TYPE_TBQ3_0: return 4u;
|
|
1506
|
+
case GGML_TYPE_TBQ4_0: return 4u;
|
|
1507
|
+
case GGML_TYPE_TBQ3_TCQ: return 1u;
|
|
1508
|
+
default: GGML_ABORT("unsupported TurboQuant attention score type");
|
|
1509
|
+
}
|
|
1510
|
+
}
|
|
1511
|
+
|
|
1512
|
+
static inline uint32_t eliza_tbq_blocks_per_threadgroup(ggml_type type) {
|
|
1513
|
+
// M4 Max multiblock/autotune bench best medians (2026-05-12):
|
|
1514
|
+
// TBQ3=16, TBQ4=8, TBQ3_TCQ=32. Voice-mode policy can still force N=1
|
|
1515
|
+
// at a higher scheduler layer when barge-in latency dominates.
|
|
1516
|
+
switch (type) {
|
|
1517
|
+
case GGML_TYPE_TBQ3_0: return eliza_env_u32("ELIZA_METAL_TBQ3_BLOCKS_PER_TG", 16u, 1u, 64u);
|
|
1518
|
+
case GGML_TYPE_TBQ4_0: return eliza_env_u32("ELIZA_METAL_TBQ4_BLOCKS_PER_TG", 8u, 1u, 64u);
|
|
1519
|
+
case GGML_TYPE_TBQ3_TCQ: return eliza_env_u32("ELIZA_METAL_TBQ3_TCQ_BLOCKS_PER_TG", 32u, 1u, 64u);
|
|
1520
|
+
default: GGML_ABORT("unsupported TurboQuant attention score type");
|
|
1521
|
+
}
|
|
1522
|
+
}
|
|
1523
|
+
`,
|
|
1524
|
+
);
|
|
1525
|
+
}
|
|
1526
|
+
patched = patched.replace(
|
|
1527
|
+
"/* blocks_per_threadgroup = */ 8u,",
|
|
1528
|
+
"/* blocks_per_threadgroup = */ eliza_tbq_blocks_per_threadgroup(ktype),",
|
|
1529
|
+
);
|
|
1530
|
+
if (!patched.includes("ggml_metal_op_fused_attn_qjl_tbq")) {
|
|
1531
|
+
const funcAnchor = `static int ggml_metal_op_encode_impl(ggml_metal_op_t ctx, int idx) {`;
|
|
1532
|
+
patched = patched.replace(
|
|
1533
|
+
funcAnchor,
|
|
1534
|
+
`int ggml_metal_op_fused_attn_qjl_tbq(ggml_metal_op_t ctx, int idx) {
|
|
1535
|
+
ggml_tensor * op = ctx->node(idx);
|
|
1536
|
+
|
|
1537
|
+
ggml_metal_library_t lib = ctx->lib;
|
|
1538
|
+
ggml_metal_encoder_t enc = ctx->enc;
|
|
1539
|
+
|
|
1540
|
+
const ggml_tensor * q = op->src[0];
|
|
1541
|
+
const ggml_tensor * pk = op->src[1];
|
|
1542
|
+
const ggml_tensor * pv = op->src[2];
|
|
1543
|
+
|
|
1544
|
+
GGML_ASSERT(q != nullptr);
|
|
1545
|
+
GGML_ASSERT(pk != nullptr);
|
|
1546
|
+
GGML_ASSERT(pv != nullptr);
|
|
1547
|
+
GGML_ASSERT(q->type == GGML_TYPE_F32);
|
|
1548
|
+
GGML_ASSERT(pk->type == GGML_TYPE_QJL1_256);
|
|
1549
|
+
GGML_ASSERT(pv->type == GGML_TYPE_TBQ3_0);
|
|
1550
|
+
GGML_ASSERT(op->type == GGML_TYPE_F32);
|
|
1551
|
+
GGML_ASSERT(q->ne[0] == 256);
|
|
1552
|
+
GGML_ASSERT(pk->ne[0] == 128);
|
|
1553
|
+
GGML_ASSERT(pv->ne[0] == 128);
|
|
1554
|
+
GGML_ASSERT(op->ne[0] == 128);
|
|
1555
|
+
GGML_ASSERT(ggml_is_contiguous_rows(q));
|
|
1556
|
+
GGML_ASSERT(ggml_is_contiguous_rows(pk));
|
|
1557
|
+
GGML_ASSERT(ggml_is_contiguous_rows(pv));
|
|
1558
|
+
GGML_ASSERT(ggml_is_contiguous_rows(op));
|
|
1559
|
+
|
|
1560
|
+
const int32_t * params = (const int32_t *) op->op_params;
|
|
1561
|
+
const uint32_t n_kv_heads = (uint32_t) params[0];
|
|
1562
|
+
union { int32_t i; float f; } scale_bits;
|
|
1563
|
+
scale_bits.i = params[1];
|
|
1564
|
+
|
|
1565
|
+
const uint32_t n_heads = (uint32_t) q->ne[1];
|
|
1566
|
+
const uint32_t n_q_pos = (uint32_t) q->ne[2];
|
|
1567
|
+
const uint32_t n_kv = (uint32_t) pk->ne[1];
|
|
1568
|
+
const int64_t ne3 = q->ne[3];
|
|
1569
|
+
|
|
1570
|
+
GGML_ASSERT(n_kv_heads > 0);
|
|
1571
|
+
GGML_ASSERT((n_heads % n_kv_heads) == 0);
|
|
1572
|
+
GGML_ASSERT(pk->ne[2] == (int64_t) n_kv_heads);
|
|
1573
|
+
GGML_ASSERT(pv->ne[1] == (int64_t) n_kv);
|
|
1574
|
+
GGML_ASSERT(pv->ne[2] == (int64_t) n_kv_heads);
|
|
1575
|
+
GGML_ASSERT(pk->ne[3] == ne3);
|
|
1576
|
+
GGML_ASSERT(pv->ne[3] == ne3);
|
|
1577
|
+
GGML_ASSERT(op->ne[1] == (int64_t) n_heads);
|
|
1578
|
+
GGML_ASSERT(op->ne[2] == (int64_t) n_q_pos);
|
|
1579
|
+
GGML_ASSERT(op->ne[3] == ne3);
|
|
1580
|
+
GGML_ASSERT(q->nb[1] == (size_t) q->ne[0] * ggml_type_size(q->type));
|
|
1581
|
+
GGML_ASSERT(q->nb[2] == (size_t) n_heads * q->nb[1]);
|
|
1582
|
+
GGML_ASSERT(pk->nb[1] == ggml_row_size(GGML_TYPE_QJL1_256, 128));
|
|
1583
|
+
GGML_ASSERT(pk->nb[2] == (size_t) n_kv * pk->nb[1]);
|
|
1584
|
+
GGML_ASSERT(pv->nb[1] == ggml_row_size(GGML_TYPE_TBQ3_0, 128));
|
|
1585
|
+
GGML_ASSERT(pv->nb[2] == (size_t) n_kv * pv->nb[1]);
|
|
1586
|
+
GGML_ASSERT(op->nb[1] == (size_t) op->ne[0] * ggml_type_size(op->type));
|
|
1587
|
+
GGML_ASSERT(op->nb[2] == (size_t) n_heads * op->nb[1]);
|
|
1588
|
+
|
|
1589
|
+
eliza_fused_attn_qjl_tbq_args args = {
|
|
1590
|
+
/* head_dim = */ 128u,
|
|
1591
|
+
/* proj_dim = */ 256u,
|
|
1592
|
+
/* n_heads = */ n_heads,
|
|
1593
|
+
/* n_kv_heads = */ n_kv_heads,
|
|
1594
|
+
/* n_q_pos = */ n_q_pos,
|
|
1595
|
+
/* n_kv = */ n_kv,
|
|
1596
|
+
/* kv_tile = */ (uint32_t) params[3],
|
|
1597
|
+
/* v_use_qjl = */ (uint32_t) params[2],
|
|
1598
|
+
/* scale = */ scale_bits.f,
|
|
1599
|
+
/* causal = */ (uint32_t) params[4],
|
|
1600
|
+
/* q_pos_base = */ (uint32_t) params[5],
|
|
1601
|
+
};
|
|
1602
|
+
|
|
1603
|
+
auto pipeline = ggml_metal_library_get_pipeline_fused_attn_qjl_tbq(lib);
|
|
1604
|
+
|
|
1605
|
+
const ggml_metal_buffer_id q_base = ggml_metal_get_buffer_id(q);
|
|
1606
|
+
const ggml_metal_buffer_id pk_base = ggml_metal_get_buffer_id(pk);
|
|
1607
|
+
const ggml_metal_buffer_id pv_base = ggml_metal_get_buffer_id(pv);
|
|
1608
|
+
const ggml_metal_buffer_id dst_base = ggml_metal_get_buffer_id(op);
|
|
1609
|
+
|
|
1610
|
+
ggml_metal_encoder_set_pipeline(enc, pipeline);
|
|
1611
|
+
ggml_metal_encoder_set_bytes(enc, &args, sizeof(args), 4);
|
|
1612
|
+
|
|
1613
|
+
for (int64_t i3 = 0; i3 < ne3; ++i3) {
|
|
1614
|
+
ggml_metal_encoder_set_buffer(enc, eliza_metal_buffer_offset(q_base, (size_t) i3 * q->nb[3]), 0);
|
|
1615
|
+
ggml_metal_encoder_set_buffer(enc, eliza_metal_buffer_offset(pk_base, (size_t) i3 * pk->nb[3]), 1);
|
|
1616
|
+
ggml_metal_encoder_set_buffer(enc, eliza_metal_buffer_offset(pv_base, (size_t) i3 * pv->nb[3]), 2);
|
|
1617
|
+
ggml_metal_encoder_set_buffer(enc, eliza_metal_buffer_offset(dst_base, (size_t) i3 * op->nb[3]), 3);
|
|
1618
|
+
ggml_metal_encoder_dispatch_threadgroups(enc, (int) n_heads, (int) n_q_pos, 1, 32, 1, 1);
|
|
1619
|
+
}
|
|
1620
|
+
|
|
1621
|
+
return 1;
|
|
1622
|
+
}
|
|
1623
|
+
|
|
1624
|
+
${funcAnchor}`,
|
|
1625
|
+
);
|
|
1626
|
+
}
|
|
1627
|
+
if (!patched.includes("case GGML_OP_FUSED_ATTN_QJL_TBQ:")) {
|
|
1628
|
+
patched = patched.replace(
|
|
1629
|
+
` case GGML_OP_ATTN_SCORE_POLAR:
|
|
1630
|
+
{
|
|
1631
|
+
n_fuse = ggml_metal_op_attn_score_polar(ctx, idx);
|
|
1632
|
+
} break;`,
|
|
1633
|
+
` case GGML_OP_ATTN_SCORE_POLAR:
|
|
1634
|
+
{
|
|
1635
|
+
n_fuse = ggml_metal_op_attn_score_polar(ctx, idx);
|
|
1636
|
+
} break;
|
|
1637
|
+
case GGML_OP_FUSED_ATTN_QJL_TBQ:
|
|
1638
|
+
{
|
|
1639
|
+
n_fuse = ggml_metal_op_fused_attn_qjl_tbq(ctx, idx);
|
|
1640
|
+
} break;`,
|
|
1641
|
+
);
|
|
1642
|
+
}
|
|
1643
|
+
if (patched !== original && !dryRun)
|
|
1644
|
+
fs.writeFileSync(opsPath, patched, "utf8");
|
|
1645
|
+
return { changed: patched !== original && !dryRun, path: opsPath };
|
|
1646
|
+
}
|
|
1647
|
+
const tcqCodebook = readTcqCodebookLiteral();
|
|
1648
|
+
const funcAnchor = `static int ggml_metal_op_encode_impl(ggml_metal_op_t ctx, int idx) {`;
|
|
1649
|
+
if (!original.includes(funcAnchor)) {
|
|
1650
|
+
throw new Error(
|
|
1651
|
+
`[metal-tbq-polar-attn] ops.cpp encode anchor not found at ${opsPath}`,
|
|
1652
|
+
);
|
|
1653
|
+
}
|
|
1654
|
+
const opFuncs = `${SENTINEL_TBQ_POLAR_ATTN}
|
|
1655
|
+
struct eliza_tbq_score_args {
|
|
1656
|
+
uint32_t head_dim;
|
|
1657
|
+
uint32_t n_kv;
|
|
1658
|
+
uint32_t kv_stride_blocks;
|
|
1659
|
+
uint32_t q_head;
|
|
1660
|
+
uint32_t head_offset_bytes;
|
|
1661
|
+
uint32_t blocks_per_threadgroup;
|
|
1662
|
+
};
|
|
1663
|
+
|
|
1664
|
+
struct eliza_polar_score_args {
|
|
1665
|
+
uint32_t n_rows;
|
|
1666
|
+
uint32_t head_dim;
|
|
1667
|
+
uint32_t use_qjl;
|
|
1668
|
+
};
|
|
1669
|
+
|
|
1670
|
+
struct eliza_polar_preht_score_args {
|
|
1671
|
+
uint32_t head_dim;
|
|
1672
|
+
uint32_t n_kv;
|
|
1673
|
+
uint32_t kv_stride_blocks;
|
|
1674
|
+
uint32_t q_head;
|
|
1675
|
+
uint32_t head_offset_bytes;
|
|
1676
|
+
uint32_t use_qjl;
|
|
1677
|
+
};
|
|
1678
|
+
|
|
1679
|
+
static const float k_eliza_tbq3_tcq_codebook[512] = {
|
|
1680
|
+
${tcqCodebook}
|
|
1681
|
+
};
|
|
1682
|
+
|
|
1683
|
+
static inline uint32_t eliza_tbq_blocks_per_row(ggml_type type) {
|
|
1684
|
+
switch (type) {
|
|
1685
|
+
case GGML_TYPE_TBQ3_0: return 4u;
|
|
1686
|
+
case GGML_TYPE_TBQ4_0: return 4u;
|
|
1687
|
+
case GGML_TYPE_TBQ3_TCQ: return 1u;
|
|
1688
|
+
default: GGML_ABORT("unsupported TurboQuant attention score type");
|
|
1689
|
+
}
|
|
1690
|
+
}
|
|
1691
|
+
|
|
1692
|
+
static inline uint32_t eliza_tbq_blocks_per_threadgroup(ggml_type type) {
|
|
1693
|
+
// M4 Max multiblock/autotune bench best medians (2026-05-12):
|
|
1694
|
+
// TBQ3=16, TBQ4=8, TBQ3_TCQ=32. Voice-mode policy can still force N=1
|
|
1695
|
+
// at a higher scheduler layer when barge-in latency dominates.
|
|
1696
|
+
switch (type) {
|
|
1697
|
+
case GGML_TYPE_TBQ3_0: return eliza_env_u32("ELIZA_METAL_TBQ3_BLOCKS_PER_TG", 16u, 1u, 64u);
|
|
1698
|
+
case GGML_TYPE_TBQ4_0: return eliza_env_u32("ELIZA_METAL_TBQ4_BLOCKS_PER_TG", 8u, 1u, 64u);
|
|
1699
|
+
case GGML_TYPE_TBQ3_TCQ: return eliza_env_u32("ELIZA_METAL_TBQ3_TCQ_BLOCKS_PER_TG", 32u, 1u, 64u);
|
|
1700
|
+
default: GGML_ABORT("unsupported TurboQuant attention score type");
|
|
1701
|
+
}
|
|
1702
|
+
}
|
|
1703
|
+
|
|
1704
|
+
int ggml_metal_op_attn_score_tbq(ggml_metal_op_t ctx, int idx) {
|
|
1705
|
+
ggml_tensor * op = ctx->node(idx);
|
|
1706
|
+
|
|
1707
|
+
ggml_metal_library_t lib = ctx->lib;
|
|
1708
|
+
ggml_metal_encoder_t enc = ctx->enc;
|
|
1709
|
+
|
|
1710
|
+
const ggml_tensor * q = op->src[0];
|
|
1711
|
+
const ggml_tensor * pk = op->src[1];
|
|
1712
|
+
const ggml_type ktype = pk->type;
|
|
1713
|
+
|
|
1714
|
+
GGML_ASSERT(q != nullptr);
|
|
1715
|
+
GGML_ASSERT(pk != nullptr);
|
|
1716
|
+
GGML_ASSERT(q->type == GGML_TYPE_F32);
|
|
1717
|
+
GGML_ASSERT(ktype == GGML_TYPE_TBQ3_0 || ktype == GGML_TYPE_TBQ4_0 || ktype == GGML_TYPE_TBQ3_TCQ);
|
|
1718
|
+
GGML_ASSERT(op->type == GGML_TYPE_F32);
|
|
1719
|
+
GGML_ASSERT(q->ne[0] == 128);
|
|
1720
|
+
GGML_ASSERT(pk->ne[0] == 128);
|
|
1721
|
+
GGML_ASSERT(ggml_is_contiguous_rows(q));
|
|
1722
|
+
GGML_ASSERT(ggml_is_contiguous_rows(pk));
|
|
1723
|
+
GGML_ASSERT(ggml_is_contiguous_rows(op));
|
|
1724
|
+
|
|
1725
|
+
const uint32_t n_heads = (uint32_t) q->ne[1];
|
|
1726
|
+
const uint32_t n_kv_heads = (uint32_t) ((const int32_t *) op->op_params)[0];
|
|
1727
|
+
const uint32_t n_tokens = (uint32_t) pk->ne[1];
|
|
1728
|
+
const int64_t n_batch = q->ne[2];
|
|
1729
|
+
const int64_t ne3 = q->ne[3];
|
|
1730
|
+
|
|
1731
|
+
GGML_ASSERT(n_kv_heads > 0);
|
|
1732
|
+
GGML_ASSERT((n_heads % n_kv_heads) == 0);
|
|
1733
|
+
GGML_ASSERT(pk->ne[2] == (int64_t) n_kv_heads);
|
|
1734
|
+
GGML_ASSERT(pk->ne[3] == ne3);
|
|
1735
|
+
GGML_ASSERT(op->ne[0] == (int64_t) n_tokens);
|
|
1736
|
+
GGML_ASSERT(op->ne[1] == (int64_t) n_heads);
|
|
1737
|
+
GGML_ASSERT(op->ne[2] == n_batch);
|
|
1738
|
+
GGML_ASSERT(op->ne[3] == ne3);
|
|
1739
|
+
GGML_ASSERT(pk->nb[1] == ggml_row_size(ktype, 128));
|
|
1740
|
+
GGML_ASSERT(pk->nb[2] == (size_t) n_tokens * pk->nb[1]);
|
|
1741
|
+
|
|
1742
|
+
eliza_tbq_score_args args = {
|
|
1743
|
+
/* head_dim = */ 128u,
|
|
1744
|
+
/* n_kv = */ n_tokens,
|
|
1745
|
+
/* kv_stride_blocks = */ eliza_tbq_blocks_per_row(ktype),
|
|
1746
|
+
/* q_head = */ 0u,
|
|
1747
|
+
/* head_offset_bytes = */ 0u,
|
|
1748
|
+
/* blocks_per_threadgroup = */ eliza_tbq_blocks_per_threadgroup(ktype),
|
|
1749
|
+
};
|
|
1750
|
+
|
|
1751
|
+
auto pipeline = ggml_metal_library_get_pipeline_attn_score_tbq(lib, ktype);
|
|
1752
|
+
|
|
1753
|
+
const ggml_metal_buffer_id q_base = ggml_metal_get_buffer_id(q);
|
|
1754
|
+
const ggml_metal_buffer_id pk_base = ggml_metal_get_buffer_id(pk);
|
|
1755
|
+
const ggml_metal_buffer_id dst_base = ggml_metal_get_buffer_id(op);
|
|
1756
|
+
const uint32_t gqa = n_heads / n_kv_heads;
|
|
1757
|
+
|
|
1758
|
+
ggml_metal_encoder_set_pipeline(enc, pipeline);
|
|
1759
|
+
if (ktype == GGML_TYPE_TBQ3_TCQ) {
|
|
1760
|
+
ggml_metal_encoder_set_bytes(enc, (void *) k_eliza_tbq3_tcq_codebook, sizeof(k_eliza_tbq3_tcq_codebook), 3);
|
|
1761
|
+
ggml_metal_encoder_set_bytes(enc, &args, sizeof(args), 4);
|
|
1762
|
+
} else {
|
|
1763
|
+
ggml_metal_encoder_set_bytes(enc, &args, sizeof(args), 3);
|
|
1764
|
+
}
|
|
1765
|
+
|
|
1766
|
+
const int token_groups = (int) ((n_tokens + args.blocks_per_threadgroup - 1u) / args.blocks_per_threadgroup);
|
|
1767
|
+
for (int64_t i3 = 0; i3 < ne3; ++i3) {
|
|
1768
|
+
const size_t q_i3 = (size_t) i3 * q->nb[3];
|
|
1769
|
+
const size_t pk_i3 = (size_t) i3 * pk->nb[3];
|
|
1770
|
+
const size_t dst_i3 = (size_t) i3 * op->nb[3];
|
|
1771
|
+
for (int64_t ib = 0; ib < n_batch; ++ib) {
|
|
1772
|
+
for (uint32_t h = 0; h < n_heads; ++h) {
|
|
1773
|
+
const uint32_t h_k = h / gqa;
|
|
1774
|
+
ggml_metal_encoder_set_buffer(enc, eliza_metal_buffer_offset(q_base, q_i3 + (size_t) ib * q->nb[2] + (size_t) h * q->nb[1]), 0);
|
|
1775
|
+
ggml_metal_encoder_set_buffer(enc, eliza_metal_buffer_offset(pk_base, pk_i3 + (size_t) h_k * pk->nb[2]), 1);
|
|
1776
|
+
ggml_metal_encoder_set_buffer(enc, eliza_metal_buffer_offset(dst_base, dst_i3 + (size_t) ib * op->nb[2] + (size_t) h * op->nb[1]), 2);
|
|
1777
|
+
ggml_metal_encoder_dispatch_threadgroups(enc, token_groups, 1, 1, 32, 1, 1);
|
|
1778
|
+
}
|
|
1779
|
+
}
|
|
1780
|
+
}
|
|
1781
|
+
|
|
1782
|
+
return 1;
|
|
1783
|
+
}
|
|
1784
|
+
|
|
1785
|
+
int ggml_metal_op_attn_score_polar(ggml_metal_op_t ctx, int idx) {
|
|
1786
|
+
ggml_tensor * op = ctx->node(idx);
|
|
1787
|
+
|
|
1788
|
+
ggml_metal_library_t lib = ctx->lib;
|
|
1789
|
+
ggml_metal_encoder_t enc = ctx->enc;
|
|
1790
|
+
|
|
1791
|
+
const ggml_tensor * q = op->src[0];
|
|
1792
|
+
const ggml_tensor * pk = op->src[1];
|
|
1793
|
+
|
|
1794
|
+
GGML_ASSERT(q != nullptr);
|
|
1795
|
+
GGML_ASSERT(pk != nullptr);
|
|
1796
|
+
GGML_ASSERT(q->type == GGML_TYPE_F32);
|
|
1797
|
+
GGML_ASSERT(pk->type == GGML_TYPE_Q4_POLAR);
|
|
1798
|
+
GGML_ASSERT(op->type == GGML_TYPE_F32);
|
|
1799
|
+
GGML_ASSERT(q->ne[0] == 128);
|
|
1800
|
+
GGML_ASSERT(pk->ne[0] == 128);
|
|
1801
|
+
GGML_ASSERT(ggml_is_contiguous_rows(q));
|
|
1802
|
+
GGML_ASSERT(ggml_is_contiguous_rows(pk));
|
|
1803
|
+
GGML_ASSERT(ggml_is_contiguous_rows(op));
|
|
1804
|
+
|
|
1805
|
+
const int32_t * params = (const int32_t *) op->op_params;
|
|
1806
|
+
const uint32_t n_heads = (uint32_t) q->ne[1];
|
|
1807
|
+
const uint32_t n_kv_heads = (uint32_t) params[0];
|
|
1808
|
+
const uint32_t n_tokens = (uint32_t) pk->ne[1];
|
|
1809
|
+
const uint32_t use_qjl = (uint32_t) (params[1] != 0);
|
|
1810
|
+
const uint32_t q_preht = (uint32_t) (params[2] != 0);
|
|
1811
|
+
const int64_t n_batch = q->ne[2];
|
|
1812
|
+
const int64_t ne3 = q->ne[3];
|
|
1813
|
+
|
|
1814
|
+
GGML_ASSERT(n_kv_heads > 0);
|
|
1815
|
+
GGML_ASSERT((n_heads % n_kv_heads) == 0);
|
|
1816
|
+
GGML_ASSERT(pk->ne[2] == (int64_t) n_kv_heads);
|
|
1817
|
+
GGML_ASSERT(pk->ne[3] == ne3);
|
|
1818
|
+
GGML_ASSERT(op->ne[0] == (int64_t) n_tokens);
|
|
1819
|
+
GGML_ASSERT(op->ne[1] == (int64_t) n_heads);
|
|
1820
|
+
GGML_ASSERT(op->ne[2] == n_batch);
|
|
1821
|
+
GGML_ASSERT(op->ne[3] == ne3);
|
|
1822
|
+
GGML_ASSERT(pk->nb[1] == ggml_row_size(GGML_TYPE_Q4_POLAR, 128));
|
|
1823
|
+
GGML_ASSERT(pk->nb[2] == (size_t) n_tokens * pk->nb[1]);
|
|
1824
|
+
|
|
1825
|
+
const ggml_metal_buffer_id q_base = ggml_metal_get_buffer_id(q);
|
|
1826
|
+
const ggml_metal_buffer_id pk_base = ggml_metal_get_buffer_id(pk);
|
|
1827
|
+
const ggml_metal_buffer_id dst_base = ggml_metal_get_buffer_id(op);
|
|
1828
|
+
const uint32_t gqa = n_heads / n_kv_heads;
|
|
1829
|
+
|
|
1830
|
+
if (q_preht != 0u) {
|
|
1831
|
+
auto pipeline = ggml_metal_library_get_pipeline_attn_score_polar_preht(lib);
|
|
1832
|
+
ggml_metal_encoder_set_pipeline(enc, pipeline);
|
|
1833
|
+
|
|
1834
|
+
for (int64_t i3 = 0; i3 < ne3; ++i3) {
|
|
1835
|
+
const size_t q_i3 = (size_t) i3 * q->nb[3];
|
|
1836
|
+
const size_t pk_i3 = (size_t) i3 * pk->nb[3];
|
|
1837
|
+
const size_t dst_i3 = (size_t) i3 * op->nb[3];
|
|
1838
|
+
for (int64_t ib = 0; ib < n_batch; ++ib) {
|
|
1839
|
+
ggml_metal_encoder_set_buffer(enc, eliza_metal_buffer_offset(q_base, q_i3 + (size_t) ib * q->nb[2]), 0);
|
|
1840
|
+
ggml_metal_encoder_set_buffer(enc, eliza_metal_buffer_offset(pk_base, pk_i3), 1);
|
|
1841
|
+
ggml_metal_encoder_set_buffer(enc, eliza_metal_buffer_offset(dst_base, dst_i3 + (size_t) ib * op->nb[2]), 2);
|
|
1842
|
+
for (uint32_t h = 0; h < n_heads; ++h) {
|
|
1843
|
+
const uint32_t h_k = h / gqa;
|
|
1844
|
+
eliza_polar_preht_score_args args = {
|
|
1845
|
+
/* head_dim = */ 128u,
|
|
1846
|
+
/* n_kv = */ n_tokens,
|
|
1847
|
+
/* kv_stride_blocks = */ 1u,
|
|
1848
|
+
/* q_head = */ h,
|
|
1849
|
+
/* head_offset_bytes = */ (uint32_t) ((size_t) h_k * pk->nb[2]),
|
|
1850
|
+
/* use_qjl = */ use_qjl,
|
|
1851
|
+
};
|
|
1852
|
+
ggml_metal_encoder_set_bytes(enc, &args, sizeof(args), 3);
|
|
1853
|
+
ggml_metal_encoder_dispatch_threadgroups(enc, (int) n_tokens, 1, 1, 32, 1, 1);
|
|
1854
|
+
}
|
|
1855
|
+
}
|
|
1856
|
+
}
|
|
1857
|
+
} else {
|
|
1858
|
+
eliza_polar_score_args args = {
|
|
1859
|
+
/* n_rows = */ n_tokens,
|
|
1860
|
+
/* head_dim = */ 128u,
|
|
1861
|
+
/* use_qjl = */ use_qjl,
|
|
1862
|
+
};
|
|
1863
|
+
|
|
1864
|
+
auto pipeline = ggml_metal_library_get_pipeline_attn_score_polar(lib);
|
|
1865
|
+
|
|
1866
|
+
ggml_metal_encoder_set_pipeline(enc, pipeline);
|
|
1867
|
+
ggml_metal_encoder_set_bytes(enc, &args, sizeof(args), 3);
|
|
1868
|
+
|
|
1869
|
+
for (int64_t i3 = 0; i3 < ne3; ++i3) {
|
|
1870
|
+
const size_t q_i3 = (size_t) i3 * q->nb[3];
|
|
1871
|
+
const size_t pk_i3 = (size_t) i3 * pk->nb[3];
|
|
1872
|
+
const size_t dst_i3 = (size_t) i3 * op->nb[3];
|
|
1873
|
+
for (int64_t ib = 0; ib < n_batch; ++ib) {
|
|
1874
|
+
for (uint32_t h = 0; h < n_heads; ++h) {
|
|
1875
|
+
const uint32_t h_k = h / gqa;
|
|
1876
|
+
ggml_metal_encoder_set_buffer(enc, eliza_metal_buffer_offset(pk_base, pk_i3 + (size_t) h_k * pk->nb[2]), 0);
|
|
1877
|
+
ggml_metal_encoder_set_buffer(enc, eliza_metal_buffer_offset(q_base, q_i3 + (size_t) ib * q->nb[2] + (size_t) h * q->nb[1]), 1);
|
|
1878
|
+
ggml_metal_encoder_set_buffer(enc, eliza_metal_buffer_offset(dst_base, dst_i3 + (size_t) ib * op->nb[2] + (size_t) h * op->nb[1]), 2);
|
|
1879
|
+
ggml_metal_encoder_dispatch_threadgroups(enc, (int) n_tokens, 1, 1, 32, 1, 1);
|
|
1880
|
+
}
|
|
1881
|
+
}
|
|
1882
|
+
}
|
|
1883
|
+
}
|
|
1884
|
+
|
|
1885
|
+
return 1;
|
|
1886
|
+
}
|
|
1887
|
+
|
|
1888
|
+
`;
|
|
1889
|
+
let patched = original.replace(funcAnchor, opFuncs + funcAnchor);
|
|
1890
|
+
const switchAnchor = ` case GGML_OP_ATTN_SCORE_QJL:
|
|
1891
|
+
{
|
|
1892
|
+
n_fuse = ggml_metal_op_attn_score_qjl(ctx, idx);
|
|
1893
|
+
} break;`;
|
|
1894
|
+
if (!patched.includes(switchAnchor)) {
|
|
1895
|
+
throw new Error(
|
|
1896
|
+
`[metal-tbq-polar-attn] ops.cpp QJL switch anchor not found at ${opsPath}`,
|
|
1897
|
+
);
|
|
1898
|
+
}
|
|
1899
|
+
patched = patched.replace(
|
|
1900
|
+
switchAnchor,
|
|
1901
|
+
`${switchAnchor}
|
|
1902
|
+
case GGML_OP_ATTN_SCORE_TBQ:
|
|
1903
|
+
{
|
|
1904
|
+
n_fuse = ggml_metal_op_attn_score_tbq(ctx, idx);
|
|
1905
|
+
} break;
|
|
1906
|
+
case GGML_OP_ATTN_SCORE_POLAR:
|
|
1907
|
+
{
|
|
1908
|
+
n_fuse = ggml_metal_op_attn_score_polar(ctx, idx);
|
|
1909
|
+
} break;
|
|
1910
|
+
case GGML_OP_FUSED_ATTN_QJL_TBQ:
|
|
1911
|
+
{
|
|
1912
|
+
n_fuse = ggml_metal_op_fused_attn_qjl_tbq(ctx, idx);
|
|
1913
|
+
} break;`,
|
|
1914
|
+
);
|
|
1915
|
+
if (!dryRun) fs.writeFileSync(opsPath, patched, "utf8");
|
|
1916
|
+
return { changed: !dryRun, path: opsPath };
|
|
1917
|
+
}
|
|
1918
|
+
|
|
1919
|
+
function patchMetalTbqPolarSupportsOp(cacheDir, { dryRun }) {
|
|
1920
|
+
const deviceMPath = path.join(
|
|
1921
|
+
cacheDir,
|
|
1922
|
+
"ggml",
|
|
1923
|
+
"src",
|
|
1924
|
+
"ggml-metal",
|
|
1925
|
+
"ggml-metal-device.m",
|
|
1926
|
+
);
|
|
1927
|
+
const original = fs.readFileSync(deviceMPath, "utf8");
|
|
1928
|
+
if (original.includes(SENTINEL_TBQ_POLAR_ATTN)) {
|
|
1929
|
+
let patched = original.replace(
|
|
1930
|
+
`(op->src[1]->type == GGML_TYPE_TBQ3_0 ||
|
|
1931
|
+
op->src[1]->type == GGML_TYPE_TBQ3_TCQ) &&`,
|
|
1932
|
+
`(op->src[1]->type == GGML_TYPE_TBQ3_0 ||
|
|
1933
|
+
op->src[1]->type == GGML_TYPE_TBQ4_0 ||
|
|
1934
|
+
op->src[1]->type == GGML_TYPE_TBQ3_TCQ) &&`,
|
|
1935
|
+
);
|
|
1936
|
+
if (!patched.includes("case GGML_OP_FUSED_ATTN_QJL_TBQ:")) {
|
|
1937
|
+
patched = patched.replace(
|
|
1938
|
+
` case GGML_OP_ATTN_SCORE_QJL:`,
|
|
1939
|
+
` case GGML_OP_FUSED_ATTN_QJL_TBQ:
|
|
1940
|
+
{
|
|
1941
|
+
const int32_t * params = (const int32_t *) op->op_params;
|
|
1942
|
+
const int64_t n_kv_heads = params[0];
|
|
1943
|
+
return has_simdgroup_reduction &&
|
|
1944
|
+
op->type == GGML_TYPE_F32 &&
|
|
1945
|
+
op->src[0] != NULL &&
|
|
1946
|
+
op->src[1] != NULL &&
|
|
1947
|
+
op->src[2] != NULL &&
|
|
1948
|
+
op->src[0]->type == GGML_TYPE_F32 &&
|
|
1949
|
+
op->src[1]->type == GGML_TYPE_QJL1_256 &&
|
|
1950
|
+
op->src[2]->type == GGML_TYPE_TBQ3_0 &&
|
|
1951
|
+
op->src[0]->ne[0] == 256 &&
|
|
1952
|
+
op->src[1]->ne[0] == 128 &&
|
|
1953
|
+
op->src[2]->ne[0] == 128 &&
|
|
1954
|
+
op->ne[0] == 128 &&
|
|
1955
|
+
n_kv_heads > 0 &&
|
|
1956
|
+
(op->src[0]->ne[1] % n_kv_heads) == 0 &&
|
|
1957
|
+
op->src[1]->ne[1] == op->src[2]->ne[1] &&
|
|
1958
|
+
op->src[1]->ne[2] == n_kv_heads &&
|
|
1959
|
+
op->src[2]->ne[2] == n_kv_heads &&
|
|
1960
|
+
op->src[1]->ne[3] == op->src[0]->ne[3] &&
|
|
1961
|
+
op->src[2]->ne[3] == op->src[0]->ne[3] &&
|
|
1962
|
+
op->ne[1] == op->src[0]->ne[1] &&
|
|
1963
|
+
op->ne[2] == op->src[0]->ne[2] &&
|
|
1964
|
+
op->ne[3] == op->src[0]->ne[3] &&
|
|
1965
|
+
ggml_is_contiguous_rows(op) &&
|
|
1966
|
+
ggml_is_contiguous_rows(op->src[0]) &&
|
|
1967
|
+
ggml_is_contiguous_rows(op->src[1]) &&
|
|
1968
|
+
ggml_is_contiguous_rows(op->src[2]);
|
|
1969
|
+
}
|
|
1970
|
+
case GGML_OP_ATTN_SCORE_QJL:`,
|
|
1971
|
+
);
|
|
1972
|
+
}
|
|
1973
|
+
if (patched !== original && !dryRun)
|
|
1974
|
+
fs.writeFileSync(deviceMPath, patched, "utf8");
|
|
1975
|
+
return { changed: patched !== original && !dryRun, path: deviceMPath };
|
|
1976
|
+
}
|
|
1977
|
+
const anchor = ` case GGML_OP_ATTN_SCORE_QJL:
|
|
1978
|
+
// ${SENTINEL_QJL_ATTN}`;
|
|
1979
|
+
if (!original.includes(anchor)) {
|
|
1980
|
+
throw new Error(
|
|
1981
|
+
`[metal-tbq-polar-attn] supports_op QJL anchor not found at ${deviceMPath}`,
|
|
1982
|
+
);
|
|
1983
|
+
}
|
|
1984
|
+
const insert = ` case GGML_OP_ATTN_SCORE_TBQ:
|
|
1985
|
+
// ${SENTINEL_TBQ_POLAR_ATTN}
|
|
1986
|
+
return has_simdgroup_reduction &&
|
|
1987
|
+
op->type == GGML_TYPE_F32 &&
|
|
1988
|
+
op->src[0] != NULL &&
|
|
1989
|
+
op->src[1] != NULL &&
|
|
1990
|
+
op->src[0]->type == GGML_TYPE_F32 &&
|
|
1991
|
+
(op->src[1]->type == GGML_TYPE_TBQ3_0 ||
|
|
1992
|
+
op->src[1]->type == GGML_TYPE_TBQ4_0 ||
|
|
1993
|
+
op->src[1]->type == GGML_TYPE_TBQ3_TCQ) &&
|
|
1994
|
+
op->src[0]->ne[0] == 128 &&
|
|
1995
|
+
op->src[1]->ne[0] == 128 &&
|
|
1996
|
+
ggml_is_contiguous_rows(op) &&
|
|
1997
|
+
ggml_is_contiguous_rows(op->src[0]) &&
|
|
1998
|
+
ggml_is_contiguous_rows(op->src[1]);
|
|
1999
|
+
case GGML_OP_ATTN_SCORE_POLAR:
|
|
2000
|
+
return has_simdgroup_reduction &&
|
|
2001
|
+
op->type == GGML_TYPE_F32 &&
|
|
2002
|
+
op->src[0] != NULL &&
|
|
2003
|
+
op->src[1] != NULL &&
|
|
2004
|
+
op->src[0]->type == GGML_TYPE_F32 &&
|
|
2005
|
+
op->src[1]->type == GGML_TYPE_Q4_POLAR &&
|
|
2006
|
+
op->src[0]->ne[0] == 128 &&
|
|
2007
|
+
op->src[1]->ne[0] == 128 &&
|
|
2008
|
+
ggml_is_contiguous_rows(op) &&
|
|
2009
|
+
ggml_is_contiguous_rows(op->src[0]) &&
|
|
2010
|
+
ggml_is_contiguous_rows(op->src[1]);
|
|
2011
|
+
case GGML_OP_FUSED_ATTN_QJL_TBQ:
|
|
2012
|
+
{
|
|
2013
|
+
const int32_t * params = (const int32_t *) op->op_params;
|
|
2014
|
+
const int64_t n_kv_heads = params[0];
|
|
2015
|
+
return has_simdgroup_reduction &&
|
|
2016
|
+
op->type == GGML_TYPE_F32 &&
|
|
2017
|
+
op->src[0] != NULL &&
|
|
2018
|
+
op->src[1] != NULL &&
|
|
2019
|
+
op->src[2] != NULL &&
|
|
2020
|
+
op->src[0]->type == GGML_TYPE_F32 &&
|
|
2021
|
+
op->src[1]->type == GGML_TYPE_QJL1_256 &&
|
|
2022
|
+
op->src[2]->type == GGML_TYPE_TBQ3_0 &&
|
|
2023
|
+
op->src[0]->ne[0] == 256 &&
|
|
2024
|
+
op->src[1]->ne[0] == 128 &&
|
|
2025
|
+
op->src[2]->ne[0] == 128 &&
|
|
2026
|
+
op->ne[0] == 128 &&
|
|
2027
|
+
n_kv_heads > 0 &&
|
|
2028
|
+
(op->src[0]->ne[1] % n_kv_heads) == 0 &&
|
|
2029
|
+
op->src[1]->ne[1] == op->src[2]->ne[1] &&
|
|
2030
|
+
op->src[1]->ne[2] == n_kv_heads &&
|
|
2031
|
+
op->src[2]->ne[2] == n_kv_heads &&
|
|
2032
|
+
op->src[1]->ne[3] == op->src[0]->ne[3] &&
|
|
2033
|
+
op->src[2]->ne[3] == op->src[0]->ne[3] &&
|
|
2034
|
+
op->ne[1] == op->src[0]->ne[1] &&
|
|
2035
|
+
op->ne[2] == op->src[0]->ne[2] &&
|
|
2036
|
+
op->ne[3] == op->src[0]->ne[3] &&
|
|
2037
|
+
ggml_is_contiguous_rows(op) &&
|
|
2038
|
+
ggml_is_contiguous_rows(op->src[0]) &&
|
|
2039
|
+
ggml_is_contiguous_rows(op->src[1]) &&
|
|
2040
|
+
ggml_is_contiguous_rows(op->src[2]);
|
|
2041
|
+
}
|
|
2042
|
+
${anchor}`;
|
|
2043
|
+
const patched = original.replace(anchor, insert);
|
|
2044
|
+
if (!dryRun) fs.writeFileSync(deviceMPath, patched, "utf8");
|
|
2045
|
+
return { changed: !dryRun, path: deviceMPath };
|
|
2046
|
+
}
|
|
2047
|
+
|
|
2048
|
+
function patchMetalTbqPolarAttnDispatch(cacheDir, { dryRun }) {
|
|
2049
|
+
const ggmlOps = patchGgmlTbqPolarAttnOps(cacheDir, { dryRun });
|
|
2050
|
+
const deviceHeader = patchMetalTbqPolarDeviceHeader(cacheDir, { dryRun });
|
|
2051
|
+
const deviceCpp = patchMetalTbqPolarDeviceCpp(cacheDir, { dryRun });
|
|
2052
|
+
const opsHeader = patchMetalTbqPolarOpsHeader(cacheDir, { dryRun });
|
|
2053
|
+
const opsCpp = patchMetalTbqPolarOpsCpp(cacheDir, { dryRun });
|
|
2054
|
+
const supportsOp = patchMetalTbqPolarSupportsOp(cacheDir, { dryRun });
|
|
2055
|
+
return { ggmlOps, deviceHeader, deviceCpp, opsHeader, opsCpp, supportsOp };
|
|
2056
|
+
}
|
|
2057
|
+
|
|
2058
|
+
function patchMetalQjlSetRowsSupportsOp(cacheDir, { dryRun }) {
|
|
2059
|
+
const deviceMPath = path.join(
|
|
2060
|
+
cacheDir,
|
|
2061
|
+
"ggml",
|
|
2062
|
+
"src",
|
|
2063
|
+
"ggml-metal",
|
|
2064
|
+
"ggml-metal-device.m",
|
|
2065
|
+
);
|
|
2066
|
+
const original = fs.readFileSync(deviceMPath, "utf8");
|
|
2067
|
+
if (original.includes(SENTINEL_QJL_SET_ROWS)) {
|
|
2068
|
+
return { changed: false, path: deviceMPath };
|
|
2069
|
+
}
|
|
2070
|
+
const anchor = ` case GGML_TYPE_IQ4_NL:
|
|
2071
|
+
return true;`;
|
|
2072
|
+
if (!original.includes(anchor)) {
|
|
2073
|
+
throw new Error(
|
|
2074
|
+
`[metal-qjl-set-rows] supports_op SET_ROWS anchor not found at ${deviceMPath}`,
|
|
2075
|
+
);
|
|
2076
|
+
}
|
|
2077
|
+
const patched = original.replace(
|
|
2078
|
+
anchor,
|
|
2079
|
+
` case GGML_TYPE_IQ4_NL:
|
|
2080
|
+
case GGML_TYPE_QJL1_256:
|
|
2081
|
+
// ${SENTINEL_QJL_SET_ROWS}
|
|
2082
|
+
return true;`,
|
|
2083
|
+
);
|
|
2084
|
+
if (!dryRun) fs.writeFileSync(deviceMPath, patched, "utf8");
|
|
2085
|
+
return { changed: !dryRun, path: deviceMPath };
|
|
2086
|
+
}
|
|
2087
|
+
|
|
2088
|
+
function patchMetalQjlSetRowsOps(cacheDir, { dryRun }) {
|
|
2089
|
+
const opsPath = path.join(
|
|
2090
|
+
cacheDir,
|
|
2091
|
+
"ggml",
|
|
2092
|
+
"src",
|
|
2093
|
+
"ggml-metal",
|
|
2094
|
+
"ggml-metal-ops.cpp",
|
|
2095
|
+
);
|
|
2096
|
+
const original = fs.readFileSync(opsPath, "utf8");
|
|
2097
|
+
if (original.includes(SENTINEL_QJL_SET_ROWS)) {
|
|
2098
|
+
return { changed: false, path: opsPath };
|
|
2099
|
+
}
|
|
2100
|
+
const anchor = ` const int32_t nk0 = ne0/ggml_blck_size(op->type);
|
|
2101
|
+
|
|
2102
|
+
int nth = 32; // SIMD width`;
|
|
2103
|
+
if (!original.includes(anchor)) {
|
|
2104
|
+
throw new Error(
|
|
2105
|
+
`[metal-qjl-set-rows] op_set_rows anchor not found at ${opsPath}`,
|
|
2106
|
+
);
|
|
2107
|
+
}
|
|
2108
|
+
const insert = ` const int32_t nk0 = ne0/ggml_blck_size(op->type);
|
|
2109
|
+
|
|
2110
|
+
if (op->type == GGML_TYPE_QJL1_256) {
|
|
2111
|
+
// ${SENTINEL_QJL_SET_ROWS}
|
|
2112
|
+
ggml_metal_kargs_set_rows args = {
|
|
2113
|
+
/*.nk0 =*/ nk0,
|
|
2114
|
+
/*.ne01 =*/ ne01,
|
|
2115
|
+
/*.nb01 =*/ nb01,
|
|
2116
|
+
/*.nb02 =*/ nb02,
|
|
2117
|
+
/*.nb03 =*/ nb03,
|
|
2118
|
+
/*.ne11 =*/ ne11,
|
|
2119
|
+
/*.ne12 =*/ ne12,
|
|
2120
|
+
/*.nb10 =*/ nb10,
|
|
2121
|
+
/*.nb11 =*/ nb11,
|
|
2122
|
+
/*.nb12 =*/ nb12,
|
|
2123
|
+
/*.nb1 =*/ nb1,
|
|
2124
|
+
/*.nb2 =*/ nb2,
|
|
2125
|
+
/*.nb3 =*/ nb3,
|
|
2126
|
+
};
|
|
2127
|
+
|
|
2128
|
+
ggml_metal_encoder_set_pipeline(enc, pipeline);
|
|
2129
|
+
ggml_metal_encoder_set_bytes (enc, &args, sizeof(args), 0);
|
|
2130
|
+
ggml_metal_encoder_set_buffer (enc, ggml_metal_get_buffer_id(op->src[0]), 1);
|
|
2131
|
+
ggml_metal_encoder_set_buffer (enc, ggml_metal_get_buffer_id(op->src[1]), 2);
|
|
2132
|
+
ggml_metal_encoder_set_buffer (enc, ggml_metal_get_buffer_id(op), 3);
|
|
2133
|
+
|
|
2134
|
+
ggml_metal_encoder_dispatch_threadgroups(enc, ne01, ne02, ne03, 32, 1, 1);
|
|
2135
|
+
|
|
2136
|
+
return 1;
|
|
2137
|
+
}
|
|
2138
|
+
|
|
2139
|
+
int nth = 32; // SIMD width`;
|
|
2140
|
+
const patched = original.replace(anchor, insert);
|
|
2141
|
+
if (!dryRun) fs.writeFileSync(opsPath, patched, "utf8");
|
|
2142
|
+
return { changed: !dryRun, path: opsPath };
|
|
2143
|
+
}
|
|
2144
|
+
|
|
2145
|
+
function patchMetalQjlSetRows(cacheDir, { dryRun }) {
|
|
2146
|
+
return {
|
|
2147
|
+
supportsOp: patchMetalQjlSetRowsSupportsOp(cacheDir, { dryRun }),
|
|
2148
|
+
ops: patchMetalQjlSetRowsOps(cacheDir, { dryRun }),
|
|
2149
|
+
};
|
|
2150
|
+
}
|
|
2151
|
+
|
|
2152
|
+
export function patchMetalDispatch(cacheDir, { dryRun = false } = {}) {
|
|
2153
|
+
const patchedFiles = [
|
|
2154
|
+
path.join(cacheDir, "ggml", "src", "ggml-metal", "ggml-metal-device.h"),
|
|
2155
|
+
path.join(cacheDir, "ggml", "src", "ggml-metal", "ggml-metal-device.cpp"),
|
|
2156
|
+
path.join(cacheDir, "ggml", "src", "ggml-metal", "ggml-metal-ops.cpp"),
|
|
2157
|
+
].filter((file) => {
|
|
2158
|
+
try {
|
|
2159
|
+
return fs.readFileSync(file, "utf8").includes(SENTINEL_DISPATCH);
|
|
2160
|
+
} catch {
|
|
2161
|
+
return false;
|
|
2162
|
+
}
|
|
2163
|
+
});
|
|
2164
|
+
|
|
2165
|
+
const message =
|
|
2166
|
+
"[metal-dispatch] NOT wiring generic Metal GGML dispatch for eliza " +
|
|
2167
|
+
"QJL/Polar/TBQ kernels. The standalone kernels use bespoke attention/" +
|
|
2168
|
+
"projection contracts that do not match generic MUL_MAT/GET_ROWS. " +
|
|
2169
|
+
"Dedicated graph ops are required for runtime-ready bits.";
|
|
2170
|
+
if (patchedFiles.length > 0) {
|
|
2171
|
+
const detail =
|
|
2172
|
+
`${message} Found an older unsafe ELIZA-DISPATCH-V1 patch in:\n` +
|
|
2173
|
+
` ${patchedFiles.join("\n ")}\n` +
|
|
2174
|
+
"Use a clean eliza-llama-cpp checkout/cache before producing artifacts.";
|
|
2175
|
+
if (!dryRun) {
|
|
2176
|
+
throw new Error(detail);
|
|
2177
|
+
}
|
|
2178
|
+
console.warn(detail);
|
|
2179
|
+
} else {
|
|
2180
|
+
console.log(`${dryRun ? "(dry-run) " : ""}${message}`);
|
|
2181
|
+
}
|
|
2182
|
+
const qjlAttn = patchMetalQjlAttnDispatch(cacheDir, { dryRun });
|
|
2183
|
+
const qjlAnchorsAlreadyPresent = [
|
|
2184
|
+
path.join(cacheDir, "ggml", "src", "ggml-metal", "ggml-metal-device.h"),
|
|
2185
|
+
path.join(cacheDir, "ggml", "src", "ggml-metal", "ggml-metal-ops.h"),
|
|
2186
|
+
path.join(cacheDir, "ggml", "src", "ggml-metal", "ggml-metal-ops.cpp"),
|
|
2187
|
+
].every(
|
|
2188
|
+
(file) =>
|
|
2189
|
+
fs.existsSync(file) &&
|
|
2190
|
+
fs.readFileSync(file, "utf8").includes(SENTINEL_QJL_ATTN),
|
|
2191
|
+
);
|
|
2192
|
+
const tbqPolarAttn =
|
|
2193
|
+
dryRun && !qjlAnchorsAlreadyPresent
|
|
2194
|
+
? { deferredUntilQjlPatchWrites: true }
|
|
2195
|
+
: patchMetalTbqPolarAttnDispatch(cacheDir, { dryRun });
|
|
2196
|
+
const qjlSetRows = patchMetalQjlSetRows(cacheDir, { dryRun });
|
|
2197
|
+
console.log(
|
|
2198
|
+
`[metal-dispatch] ${dryRun ? "(dry-run) " : ""}wired dedicated GGML_OP_ATTN_SCORE_QJL dispatch via kernel_attn_score_qjl1_256_multi`,
|
|
2199
|
+
);
|
|
2200
|
+
console.log(
|
|
2201
|
+
`[metal-dispatch] ${dryRun ? "(dry-run) " : ""}wired dedicated GGML_OP_ATTN_SCORE_TBQ / GGML_OP_ATTN_SCORE_POLAR dispatch via shipped TurboQuant and PolarQuant kernels` +
|
|
2202
|
+
(tbqPolarAttn.deferredUntilQjlPatchWrites
|
|
2203
|
+
? " (deferred in dry-run until QJL patch writes anchors)"
|
|
2204
|
+
: ""),
|
|
2205
|
+
);
|
|
629
2206
|
console.log(
|
|
630
|
-
`[metal-dispatch] ${dryRun ? "(dry-run) " : ""}
|
|
2207
|
+
`[metal-dispatch] ${dryRun ? "(dry-run) " : ""}runtime-ready gates: ` +
|
|
2208
|
+
Object.entries(METAL_RUNTIME_DISPATCH_GATES)
|
|
2209
|
+
.map(
|
|
2210
|
+
([key, gate]) =>
|
|
2211
|
+
`${key}=${gate.runtimeReady ? "runtime-ready" : gate.status}`,
|
|
2212
|
+
)
|
|
2213
|
+
.join(", "),
|
|
631
2214
|
);
|
|
632
|
-
return {
|
|
2215
|
+
return {
|
|
2216
|
+
status: "attn-score-qjl-tbq-polar",
|
|
2217
|
+
unsafePatchPresent: patchedFiles,
|
|
2218
|
+
qjlAttn,
|
|
2219
|
+
tbqPolarAttn,
|
|
2220
|
+
qjlSetRows,
|
|
2221
|
+
};
|
|
633
2222
|
}
|
|
634
2223
|
|
|
635
|
-
// Public entry point used by build-llama-cpp-
|
|
2224
|
+
// Public entry point used by build-llama-cpp-mtp.mjs.
|
|
636
2225
|
// Throws on any failure. Idempotent across runs.
|
|
637
2226
|
export function patchMetalKernels(cacheDir, { dryRun = false } = {}) {
|
|
638
2227
|
if (!cacheDir || !fs.existsSync(cacheDir)) {
|