@octomil/browser 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +168 -36
- package/dist/_generated/accelerator_type.d.ts +8 -0
- package/dist/_generated/accelerator_type.d.ts.map +1 -0
- package/dist/_generated/accelerator_type.js +10 -0
- package/dist/_generated/accelerator_type.js.map +1 -0
- package/dist/_generated/activation_policy.d.ts +7 -0
- package/dist/_generated/activation_policy.d.ts.map +1 -0
- package/dist/_generated/activation_policy.js +9 -0
- package/dist/_generated/activation_policy.js.map +1 -0
- package/dist/_generated/adapter_activation_state.d.ts +13 -0
- package/dist/_generated/adapter_activation_state.d.ts.map +1 -0
- package/dist/_generated/adapter_activation_state.js +15 -0
- package/dist/_generated/adapter_activation_state.js.map +1 -0
- package/dist/_generated/artifact_format.d.ts +11 -0
- package/dist/_generated/artifact_format.d.ts.map +1 -0
- package/dist/_generated/artifact_format.js +13 -0
- package/dist/_generated/artifact_format.js.map +1 -0
- package/dist/_generated/artifact_resource_kind.d.ts +16 -0
- package/dist/_generated/artifact_resource_kind.d.ts.map +1 -0
- package/dist/_generated/artifact_resource_kind.js +18 -0
- package/dist/_generated/artifact_resource_kind.js.map +1 -0
- package/dist/_generated/artifact_status.d.ts +21 -0
- package/dist/_generated/artifact_status.d.ts.map +1 -0
- package/dist/_generated/artifact_status.js +23 -0
- package/dist/_generated/artifact_status.js.map +1 -0
- package/dist/_generated/auth_method.d.ts +10 -0
- package/dist/_generated/auth_method.d.ts.map +1 -0
- package/dist/_generated/auth_method.js +12 -0
- package/dist/_generated/auth_method.js.map +1 -0
- package/dist/_generated/auth_type.d.ts +7 -0
- package/dist/_generated/auth_type.d.ts.map +1 -0
- package/dist/_generated/auth_type.js +9 -0
- package/dist/_generated/auth_type.js.map +1 -0
- package/dist/_generated/billing_interval.d.ts +5 -0
- package/dist/_generated/billing_interval.d.ts.map +1 -0
- package/dist/_generated/billing_interval.js +7 -0
- package/dist/_generated/billing_interval.js.map +1 -0
- package/dist/_generated/billing_plan.d.ts +47 -0
- package/dist/_generated/billing_plan.d.ts.map +1 -0
- package/dist/_generated/billing_plan.js +41 -0
- package/dist/_generated/billing_plan.js.map +1 -0
- package/dist/_generated/cloud_provider.d.ts +12 -0
- package/dist/_generated/cloud_provider.d.ts.map +1 -0
- package/dist/_generated/cloud_provider.js +14 -0
- package/dist/_generated/cloud_provider.js.map +1 -0
- package/dist/_generated/compatibility_level.d.ts +7 -0
- package/dist/_generated/compatibility_level.d.ts.map +1 -0
- package/dist/_generated/compatibility_level.js +9 -0
- package/dist/_generated/compatibility_level.js.map +1 -0
- package/dist/_generated/content_part_type.d.ts +7 -0
- package/dist/_generated/content_part_type.d.ts.map +1 -0
- package/dist/_generated/content_part_type.js +9 -0
- package/dist/_generated/content_part_type.js.map +1 -0
- package/dist/_generated/delivery_mode.d.ts +6 -0
- package/dist/_generated/delivery_mode.d.ts.map +1 -0
- package/dist/_generated/delivery_mode.js +8 -0
- package/dist/_generated/delivery_mode.js.map +1 -0
- package/dist/_generated/device_class.d.ts +7 -0
- package/dist/_generated/device_class.d.ts.map +1 -0
- package/dist/_generated/device_class.js +9 -0
- package/dist/_generated/device_class.js.map +1 -0
- package/dist/_generated/device_connectivity_status.d.ts +8 -0
- package/dist/_generated/device_connectivity_status.d.ts.map +1 -0
- package/dist/_generated/device_connectivity_status.js +10 -0
- package/dist/_generated/device_connectivity_status.js.map +1 -0
- package/dist/_generated/device_model_status.d.ts +14 -0
- package/dist/_generated/device_model_status.d.ts.map +1 -0
- package/dist/_generated/device_model_status.js +16 -0
- package/dist/_generated/device_model_status.js.map +1 -0
- package/dist/_generated/device_platform.d.ts +9 -0
- package/dist/_generated/device_platform.d.ts.map +1 -0
- package/dist/_generated/device_platform.js +11 -0
- package/dist/_generated/device_platform.js.map +1 -0
- package/dist/_generated/email_provider.d.ts +8 -0
- package/dist/_generated/email_provider.d.ts.map +1 -0
- package/dist/_generated/email_provider.js +10 -0
- package/dist/_generated/email_provider.js.map +1 -0
- package/dist/_generated/email_template.d.ts +11 -0
- package/dist/_generated/email_template.d.ts.map +1 -0
- package/dist/_generated/email_template.js +13 -0
- package/dist/_generated/email_template.js.map +1 -0
- package/dist/_generated/error_code.d.ts +52 -0
- package/dist/_generated/error_code.d.ts.map +1 -0
- package/dist/_generated/error_code.js +85 -0
- package/dist/_generated/error_code.js.map +1 -0
- package/dist/_generated/federated_participation_state.d.ts +25 -0
- package/dist/_generated/federated_participation_state.d.ts.map +1 -0
- package/dist/_generated/federated_participation_state.js +27 -0
- package/dist/_generated/federated_participation_state.js.map +1 -0
- package/dist/_generated/federated_round_state.d.ts +17 -0
- package/dist/_generated/federated_round_state.d.ts.map +1 -0
- package/dist/_generated/federated_round_state.js +19 -0
- package/dist/_generated/federated_round_state.js.map +1 -0
- package/dist/_generated/finish_reason.d.ts +7 -0
- package/dist/_generated/finish_reason.d.ts.map +1 -0
- package/dist/_generated/finish_reason.js +9 -0
- package/dist/_generated/finish_reason.js.map +1 -0
- package/dist/_generated/index.d.ts +17 -0
- package/dist/_generated/index.d.ts.map +1 -0
- package/dist/_generated/index.js +17 -0
- package/dist/_generated/index.js.map +1 -0
- package/dist/_generated/input_modality.d.ts +7 -0
- package/dist/_generated/input_modality.d.ts.map +1 -0
- package/dist/_generated/input_modality.js +9 -0
- package/dist/_generated/input_modality.js.map +1 -0
- package/dist/_generated/message_role.d.ts +7 -0
- package/dist/_generated/message_role.d.ts.map +1 -0
- package/dist/_generated/message_role.js +9 -0
- package/dist/_generated/message_role.js.map +1 -0
- package/dist/_generated/metric_views.d.ts +18 -0
- package/dist/_generated/metric_views.d.ts.map +1 -0
- package/dist/_generated/metric_views.js +22 -0
- package/dist/_generated/metric_views.js.map +1 -0
- package/dist/_generated/modality.d.ts +7 -0
- package/dist/_generated/modality.d.ts.map +1 -0
- package/dist/_generated/modality.js +9 -0
- package/dist/_generated/modality.js.map +1 -0
- package/dist/_generated/model_capability.d.ts +10 -0
- package/dist/_generated/model_capability.d.ts.map +1 -0
- package/dist/_generated/model_capability.js +12 -0
- package/dist/_generated/model_capability.js.map +1 -0
- package/dist/_generated/model_feature.d.ts +7 -0
- package/dist/_generated/model_feature.d.ts.map +1 -0
- package/dist/_generated/model_feature.js +9 -0
- package/dist/_generated/model_feature.js.map +1 -0
- package/dist/_generated/model_lifecycle.d.ts +7 -0
- package/dist/_generated/model_lifecycle.d.ts.map +1 -0
- package/dist/_generated/model_lifecycle.js +9 -0
- package/dist/_generated/model_lifecycle.js.map +1 -0
- package/dist/_generated/model_source_format.d.ts +9 -0
- package/dist/_generated/model_source_format.d.ts.map +1 -0
- package/dist/_generated/model_source_format.js +11 -0
- package/dist/_generated/model_source_format.js.map +1 -0
- package/dist/_generated/model_status.d.ts +7 -0
- package/dist/_generated/model_status.d.ts.map +1 -0
- package/dist/_generated/model_status.js +9 -0
- package/dist/_generated/model_status.js.map +1 -0
- package/dist/_generated/network_type.d.ts +7 -0
- package/dist/_generated/network_type.d.ts.map +1 -0
- package/dist/_generated/network_type.js +9 -0
- package/dist/_generated/network_type.js.map +1 -0
- package/dist/_generated/oauth_provider.d.ts +8 -0
- package/dist/_generated/oauth_provider.d.ts.map +1 -0
- package/dist/_generated/oauth_provider.js +10 -0
- package/dist/_generated/oauth_provider.js.map +1 -0
- package/dist/_generated/operation_state.d.ts +9 -0
- package/dist/_generated/operation_state.d.ts.map +1 -0
- package/dist/_generated/operation_state.js +11 -0
- package/dist/_generated/operation_state.js.map +1 -0
- package/dist/_generated/otlp_resource_attributes.d.ts +16 -0
- package/dist/_generated/otlp_resource_attributes.d.ts.map +1 -0
- package/dist/_generated/otlp_resource_attributes.js +17 -0
- package/dist/_generated/otlp_resource_attributes.js.map +1 -0
- package/dist/_generated/principal_type.d.ts +7 -0
- package/dist/_generated/principal_type.d.ts.map +1 -0
- package/dist/_generated/principal_type.js +9 -0
- package/dist/_generated/principal_type.js.map +1 -0
- package/dist/_generated/query_routing_tier.d.ts +6 -0
- package/dist/_generated/query_routing_tier.d.ts.map +1 -0
- package/dist/_generated/query_routing_tier.js +8 -0
- package/dist/_generated/query_routing_tier.js.map +1 -0
- package/dist/_generated/routing_policy.d.ts +7 -0
- package/dist/_generated/routing_policy.d.ts.map +1 -0
- package/dist/_generated/routing_policy.js +9 -0
- package/dist/_generated/routing_policy.js.map +1 -0
- package/dist/_generated/runtime_executor.d.ts +18 -0
- package/dist/_generated/runtime_executor.d.ts.map +1 -0
- package/dist/_generated/runtime_executor.js +20 -0
- package/dist/_generated/runtime_executor.js.map +1 -0
- package/dist/_generated/scope.d.ts +16 -0
- package/dist/_generated/scope.d.ts.map +1 -0
- package/dist/_generated/scope.js +18 -0
- package/dist/_generated/scope.js.map +1 -0
- package/dist/_generated/span_attributes.d.ts +25 -0
- package/dist/_generated/span_attributes.d.ts.map +1 -0
- package/dist/_generated/span_attributes.js +34 -0
- package/dist/_generated/span_attributes.js.map +1 -0
- package/dist/_generated/span_event_attributes.d.ts +23 -0
- package/dist/_generated/span_event_attributes.d.ts.map +1 -0
- package/dist/_generated/span_event_attributes.js +36 -0
- package/dist/_generated/span_event_attributes.js.map +1 -0
- package/dist/_generated/span_event_names.d.ts +42 -0
- package/dist/_generated/span_event_names.d.ts.map +1 -0
- package/dist/_generated/span_event_names.js +82 -0
- package/dist/_generated/span_event_names.js.map +1 -0
- package/dist/_generated/span_names.d.ts +17 -0
- package/dist/_generated/span_names.d.ts.map +1 -0
- package/dist/_generated/span_names.js +18 -0
- package/dist/_generated/span_names.js.map +1 -0
- package/dist/_generated/span_status_mapping.d.ts +8 -0
- package/dist/_generated/span_status_mapping.d.ts.map +1 -0
- package/dist/_generated/span_status_mapping.js +17 -0
- package/dist/_generated/span_status_mapping.js.map +1 -0
- package/dist/_generated/subscription_status.d.ts +11 -0
- package/dist/_generated/subscription_status.d.ts.map +1 -0
- package/dist/_generated/subscription_status.js +13 -0
- package/dist/_generated/subscription_status.js.map +1 -0
- package/dist/_generated/support_tier.d.ts +7 -0
- package/dist/_generated/support_tier.d.ts.map +1 -0
- package/dist/_generated/support_tier.js +9 -0
- package/dist/_generated/support_tier.js.map +1 -0
- package/dist/_generated/telemetry_class.d.ts +6 -0
- package/dist/_generated/telemetry_class.d.ts.map +1 -0
- package/dist/_generated/telemetry_class.js +8 -0
- package/dist/_generated/telemetry_class.js.map +1 -0
- package/dist/_generated/telemetry_events.d.ts +10 -0
- package/dist/_generated/telemetry_events.d.ts.map +1 -0
- package/dist/_generated/telemetry_events.js +18 -0
- package/dist/_generated/telemetry_events.js.map +1 -0
- package/dist/_generated/thermal_state.d.ts +7 -0
- package/dist/_generated/thermal_state.d.ts.map +1 -0
- package/dist/_generated/thermal_state.js +9 -0
- package/dist/_generated/thermal_state.js.map +1 -0
- package/dist/_generated/tool_call_tier.d.ts +7 -0
- package/dist/_generated/tool_call_tier.d.ts.map +1 -0
- package/dist/_generated/tool_call_tier.js +9 -0
- package/dist/_generated/tool_call_tier.js.map +1 -0
- package/dist/_generated/training_job_state.d.ts +23 -0
- package/dist/_generated/training_job_state.d.ts.map +1 -0
- package/dist/_generated/training_job_state.js +25 -0
- package/dist/_generated/training_job_state.js.map +1 -0
- package/dist/_generated/work_class.d.ts +6 -0
- package/dist/_generated/work_class.d.ts.map +1 -0
- package/dist/_generated/work_class.js +8 -0
- package/dist/_generated/work_class.js.map +1 -0
- package/dist/artifacts.d.ts +16 -0
- package/dist/artifacts.d.ts.map +1 -0
- package/dist/artifacts.js +16 -0
- package/dist/artifacts.js.map +1 -0
- package/dist/audio/audio-transcriptions.d.ts +31 -0
- package/dist/audio/audio-transcriptions.d.ts.map +1 -0
- package/dist/audio/audio-transcriptions.js +73 -0
- package/dist/audio/audio-transcriptions.js.map +1 -0
- package/dist/audio/index.d.ts +5 -0
- package/dist/audio/index.d.ts.map +1 -0
- package/dist/audio/index.js +3 -0
- package/dist/audio/index.js.map +1 -0
- package/dist/audio/octomil-audio.d.ts +9 -0
- package/dist/audio/octomil-audio.d.ts.map +1 -0
- package/dist/audio/octomil-audio.js +11 -0
- package/dist/audio/octomil-audio.js.map +1 -0
- package/dist/audio/transcription-types.d.ts +17 -0
- package/dist/audio/transcription-types.d.ts.map +1 -0
- package/dist/audio/transcription-types.js +6 -0
- package/dist/audio/transcription-types.js.map +1 -0
- package/dist/cache.js +1 -1
- package/dist/cache.js.map +1 -1
- package/dist/capabilities.d.ts +26 -0
- package/dist/capabilities.d.ts.map +1 -0
- package/dist/capabilities.js +68 -0
- package/dist/capabilities.js.map +1 -0
- package/dist/chat.d.ts +109 -0
- package/dist/chat.d.ts.map +1 -0
- package/dist/chat.js +258 -0
- package/dist/chat.js.map +1 -0
- package/dist/configure.d.ts +18 -0
- package/dist/configure.d.ts.map +1 -0
- package/dist/configure.js +136 -0
- package/dist/configure.js.map +1 -0
- package/dist/control.d.ts +171 -0
- package/dist/control.d.ts.map +1 -0
- package/dist/control.js +317 -0
- package/dist/control.js.map +1 -0
- package/dist/device-auth.d.ts +1 -1
- package/dist/device-auth.d.ts.map +1 -1
- package/dist/device-auth.js +7 -7
- package/dist/device-auth.js.map +1 -1
- package/dist/device-context.d.ts +48 -0
- package/dist/device-context.d.ts.map +1 -0
- package/dist/device-context.js +92 -0
- package/dist/device-context.js.map +1 -0
- package/dist/devices.d.ts +17 -0
- package/dist/devices.d.ts.map +1 -0
- package/dist/devices.js +16 -0
- package/dist/devices.js.map +1 -0
- package/dist/embeddings.d.ts +19 -0
- package/dist/embeddings.d.ts.map +1 -0
- package/dist/embeddings.js +54 -0
- package/dist/embeddings.js.map +1 -0
- package/dist/experiments.d.ts +4 -3
- package/dist/experiments.d.ts.map +1 -1
- package/dist/experiments.js +5 -10
- package/dist/experiments.js.map +1 -1
- package/dist/federated-analytics.d.ts +65 -0
- package/dist/federated-analytics.d.ts.map +1 -0
- package/dist/federated-analytics.js +128 -0
- package/dist/federated-analytics.js.map +1 -0
- package/dist/federated.d.ts +10 -3
- package/dist/federated.d.ts.map +1 -1
- package/dist/federated.js +68 -15
- package/dist/federated.js.map +1 -1
- package/dist/gradient-cache.d.ts +21 -0
- package/dist/gradient-cache.d.ts.map +1 -0
- package/dist/gradient-cache.js +70 -0
- package/dist/gradient-cache.js.map +1 -0
- package/dist/index.cjs +43601 -709
- package/dist/index.cjs.map +4 -4
- package/dist/index.d.ts +64 -13
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +55 -13
- package/dist/index.js.map +1 -1
- package/dist/install-id.d.ts +32 -0
- package/dist/install-id.d.ts.map +1 -0
- package/dist/install-id.js +61 -0
- package/dist/install-id.js.map +1 -0
- package/dist/{model-loader.d.ts → model-manager.d.ts} +4 -4
- package/dist/model-manager.d.ts.map +1 -0
- package/dist/{model-loader.js → model-manager.js} +18 -14
- package/dist/model-manager.js.map +1 -0
- package/dist/models.d.ts +74 -0
- package/dist/models.d.ts.map +1 -0
- package/dist/models.js +113 -0
- package/dist/models.js.map +1 -0
- package/dist/monitoring-config.d.ts +8 -0
- package/dist/monitoring-config.d.ts.map +1 -0
- package/dist/monitoring-config.js +5 -0
- package/dist/monitoring-config.js.map +1 -0
- package/dist/monitoring.d.ts +10 -0
- package/dist/monitoring.d.ts.map +1 -0
- package/dist/monitoring.js +19 -0
- package/dist/monitoring.js.map +1 -0
- package/dist/octomil.d.ts +151 -16
- package/dist/octomil.d.ts.map +1 -1
- package/dist/octomil.js +456 -125
- package/dist/octomil.js.map +1 -1
- package/dist/octomil.min.js +47 -2844
- package/dist/octomil.min.js.map +4 -4
- package/dist/responses-runtime.d.ts +13 -0
- package/dist/responses-runtime.d.ts.map +1 -0
- package/dist/responses-runtime.js +2 -0
- package/dist/responses-runtime.js.map +1 -0
- package/dist/responses-tools.d.ts +18 -0
- package/dist/responses-tools.d.ts.map +1 -0
- package/dist/responses-tools.js +71 -0
- package/dist/responses-tools.js.map +1 -0
- package/dist/responses.d.ts +142 -0
- package/dist/responses.d.ts.map +1 -0
- package/dist/responses.js +549 -0
- package/dist/responses.js.map +1 -0
- package/dist/routing.d.ts +49 -0
- package/dist/routing.d.ts.map +1 -0
- package/dist/routing.js +221 -0
- package/dist/routing.js.map +1 -0
- package/dist/runtime/core/model-runtime.d.ts +19 -0
- package/dist/runtime/core/model-runtime.d.ts.map +1 -0
- package/dist/runtime/core/model-runtime.js +9 -0
- package/dist/runtime/core/model-runtime.js.map +1 -0
- package/dist/{inference.d.ts → runtime/engines/onnx-web/engine.d.ts} +7 -3
- package/dist/runtime/engines/onnx-web/engine.d.ts.map +1 -0
- package/dist/{inference.js → runtime/engines/onnx-web/engine.js} +21 -7
- package/dist/runtime/engines/onnx-web/engine.js.map +1 -0
- package/dist/runtime/engines/registry/engine-plugin.d.ts +11 -0
- package/dist/runtime/engines/registry/engine-plugin.d.ts.map +1 -0
- package/dist/runtime/engines/registry/engine-plugin.js +2 -0
- package/dist/runtime/engines/registry/engine-plugin.js.map +1 -0
- package/dist/runtime/engines/registry/engine-registry.d.ts +35 -0
- package/dist/runtime/engines/registry/engine-registry.d.ts.map +1 -0
- package/dist/runtime/engines/registry/engine-registry.js +74 -0
- package/dist/runtime/engines/registry/engine-registry.js.map +1 -0
- package/dist/runtime/index.d.ts +4 -0
- package/dist/runtime/index.d.ts.map +1 -0
- package/dist/runtime/index.js +2 -0
- package/dist/runtime/index.js.map +1 -0
- package/dist/secure-aggregation.d.ts.map +1 -1
- package/dist/secure-aggregation.js +3 -2
- package/dist/secure-aggregation.js.map +1 -1
- package/dist/server-api.d.ts +17 -0
- package/dist/server-api.d.ts.map +1 -0
- package/dist/server-api.js +56 -0
- package/dist/server-api.js.map +1 -0
- package/dist/settings.d.ts +20 -0
- package/dist/settings.d.ts.map +1 -0
- package/dist/settings.js +49 -0
- package/dist/settings.js.map +1 -0
- package/dist/silent-auth-config.d.ts +33 -0
- package/dist/silent-auth-config.d.ts.map +1 -0
- package/dist/silent-auth-config.js +29 -0
- package/dist/silent-auth-config.js.map +1 -0
- package/dist/streaming.d.ts +4 -3
- package/dist/streaming.d.ts.map +1 -1
- package/dist/streaming.js +15 -25
- package/dist/streaming.js.map +1 -1
- package/dist/sync-manager.d.ts +103 -0
- package/dist/sync-manager.d.ts.map +1 -0
- package/dist/sync-manager.js +314 -0
- package/dist/sync-manager.js.map +1 -0
- package/dist/telemetry.d.ts +99 -4
- package/dist/telemetry.d.ts.map +1 -1
- package/dist/telemetry.js +251 -13
- package/dist/telemetry.js.map +1 -1
- package/dist/text/octomil-text.d.ts +18 -0
- package/dist/text/octomil-text.d.ts.map +1 -0
- package/dist/text/octomil-text.js +20 -0
- package/dist/text/octomil-text.js.map +1 -0
- package/dist/training.d.ts +15 -0
- package/dist/training.d.ts.map +1 -0
- package/dist/training.js +35 -0
- package/dist/training.js.map +1 -0
- package/dist/transformers-local-runtime.d.ts +17 -0
- package/dist/transformers-local-runtime.d.ts.map +1 -0
- package/dist/transformers-local-runtime.js +356 -0
- package/dist/transformers-local-runtime.js.map +1 -0
- package/dist/types.d.ts +353 -25
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +146 -0
- package/dist/types.js.map +1 -1
- package/package.json +7 -6
- package/dist/inference.d.ts.map +0 -1
- package/dist/inference.js.map +0 -1
- package/dist/model-loader.d.ts.map +0 -1
- package/dist/model-loader.js.map +0 -1
- package/dist/rollouts.d.ts +0 -43
- package/dist/rollouts.d.ts.map +0 -1
- package/dist/rollouts.js +0 -114
- package/dist/rollouts.js.map +0 -1
package/dist/octomil.js
CHANGED
|
@@ -1,55 +1,106 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @octomil/browser — Main SDK entry point
|
|
3
3
|
*
|
|
4
|
-
* The `
|
|
4
|
+
* The `OctomilClient` class is the primary public interface. It orchestrates
|
|
5
5
|
* model loading, caching, inference, and optional telemetry.
|
|
6
6
|
*
|
|
7
7
|
* @example
|
|
8
8
|
* ```ts
|
|
9
|
-
* import {
|
|
9
|
+
* import { OctomilClient } from '@octomil/browser';
|
|
10
10
|
*
|
|
11
|
-
* const ml = new
|
|
12
|
-
* model: 'https://models.octomil.
|
|
11
|
+
* const ml = new OctomilClient({
|
|
12
|
+
* model: 'https://models.octomil.com/sentiment-v1.onnx',
|
|
13
13
|
* backend: 'webgpu',
|
|
14
14
|
* });
|
|
15
15
|
*
|
|
16
16
|
* await ml.load();
|
|
17
17
|
* const result = await ml.predict({ raw: inputData, dims: [1, 3, 224, 224] });
|
|
18
18
|
* console.log(result.label, result.score);
|
|
19
|
-
* ml.
|
|
19
|
+
* ml.close();
|
|
20
20
|
* ```
|
|
21
21
|
*/
|
|
22
|
+
import { OctomilAudio } from "./audio/octomil-audio.js";
|
|
23
|
+
import { CapabilitiesClient } from "./capabilities.js";
|
|
22
24
|
import { createModelCache } from "./cache.js";
|
|
23
|
-
import {
|
|
24
|
-
import {
|
|
25
|
+
import { ChatClient } from "./chat.js";
|
|
26
|
+
import { getDeviceContext } from "./configure.js";
|
|
27
|
+
import { ControlClient } from "./control.js";
|
|
28
|
+
import { embed as embedFn } from "./embeddings.js";
|
|
29
|
+
import { InferenceEngine } from "./runtime/engines/onnx-web/engine.js";
|
|
30
|
+
import { ModelManager } from "./model-manager.js";
|
|
31
|
+
import { ModelsClient } from "./models.js";
|
|
32
|
+
import { ResponsesClient } from "./responses.js";
|
|
33
|
+
import { RoutingClient, detectDeviceCapabilities } from "./routing.js";
|
|
25
34
|
import { TelemetryReporter } from "./telemetry.js";
|
|
26
|
-
import {
|
|
35
|
+
import { OctomilText } from "./text/octomil-text.js";
|
|
27
36
|
import { OctomilError } from "./types.js";
|
|
28
37
|
// ---------------------------------------------------------------------------
|
|
29
|
-
//
|
|
38
|
+
// OctomilClient
|
|
30
39
|
// ---------------------------------------------------------------------------
|
|
31
|
-
export class
|
|
40
|
+
export class OctomilClient {
|
|
32
41
|
options;
|
|
33
42
|
cache;
|
|
34
43
|
loader;
|
|
35
44
|
engine;
|
|
45
|
+
inferenceEngine;
|
|
46
|
+
routingClient = null;
|
|
47
|
+
deviceContext;
|
|
36
48
|
telemetry = null;
|
|
49
|
+
deviceCaps = null;
|
|
50
|
+
_responses = null;
|
|
51
|
+
_chat = null;
|
|
52
|
+
_control = null;
|
|
53
|
+
_capabilities = null;
|
|
54
|
+
_models = null;
|
|
55
|
+
_audio = null;
|
|
56
|
+
_text = null;
|
|
37
57
|
loaded = false;
|
|
38
|
-
|
|
58
|
+
closed = false;
|
|
59
|
+
_warmedUp = false;
|
|
39
60
|
constructor(options) {
|
|
61
|
+
// Extract serverUrl and apiKey from the auth config
|
|
62
|
+
const auth = options.auth;
|
|
63
|
+
const serverUrl = auth?.serverUrl;
|
|
64
|
+
const apiKey = auth?.type === "org_api_key"
|
|
65
|
+
? auth.apiKey
|
|
66
|
+
: auth?.type === "device_token"
|
|
67
|
+
? auth.bootstrapToken
|
|
68
|
+
: undefined;
|
|
69
|
+
const orgId = auth?.type === "org_api_key" ? auth.orgId : getDeviceContext()?.orgId ?? undefined;
|
|
70
|
+
this.deviceContext = getDeviceContext();
|
|
40
71
|
this.options = {
|
|
41
72
|
telemetry: false,
|
|
42
73
|
cacheStrategy: "cache-api",
|
|
43
74
|
...options,
|
|
75
|
+
// Map auth fields into legacy locations for internal consumers
|
|
76
|
+
serverUrl,
|
|
77
|
+
apiKey,
|
|
44
78
|
};
|
|
45
79
|
this.cache = createModelCache(this.options.cacheStrategy);
|
|
46
|
-
this.loader = new
|
|
47
|
-
|
|
80
|
+
this.loader = new ModelManager(this.options, this.cache);
|
|
81
|
+
const defaultEngine = options.runtime ? null : new InferenceEngine();
|
|
82
|
+
this.engine = options.runtime ?? defaultEngine;
|
|
83
|
+
this.inferenceEngine = defaultEngine;
|
|
84
|
+
// Routing is opt-in: only enabled when serverUrl + apiKey + routing are set.
|
|
85
|
+
if (serverUrl && apiKey && this.options.routing) {
|
|
86
|
+
this.routingClient = new RoutingClient({
|
|
87
|
+
serverUrl,
|
|
88
|
+
apiKey,
|
|
89
|
+
cacheTtlMs: this.options.routing.cacheTtlMs,
|
|
90
|
+
prefer: this.options.routing.prefer,
|
|
91
|
+
});
|
|
92
|
+
}
|
|
48
93
|
if (this.options.telemetry) {
|
|
49
94
|
this.telemetry = new TelemetryReporter({
|
|
50
95
|
url: this.options.telemetryUrl,
|
|
51
|
-
apiKey
|
|
96
|
+
apiKey,
|
|
97
|
+
authHeadersProvider: () => this.deviceContext?.authHeaders() ?? null,
|
|
98
|
+
orgId,
|
|
99
|
+
deviceId: this.deviceContext?.installationId,
|
|
52
100
|
});
|
|
101
|
+
if (this.deviceContext) {
|
|
102
|
+
this.telemetry.updateResource(this.deviceContext.telemetryResource());
|
|
103
|
+
}
|
|
53
104
|
}
|
|
54
105
|
}
|
|
55
106
|
// -----------------------------------------------------------------------
|
|
@@ -60,38 +111,56 @@ export class Octomil {
|
|
|
60
111
|
* inference session. Must be called before `predict()` or `chat()`.
|
|
61
112
|
*/
|
|
62
113
|
async load() {
|
|
63
|
-
this.
|
|
114
|
+
this.ensureNotClosed();
|
|
64
115
|
const start = performance.now();
|
|
65
|
-
const wasCached = await this.loader.isCached();
|
|
66
116
|
const modelData = await this.loader.load();
|
|
67
117
|
await this.engine.createSession(modelData, this.options.backend);
|
|
68
118
|
this.loaded = true;
|
|
69
119
|
const durationMs = performance.now() - start;
|
|
70
|
-
this.
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
120
|
+
this.telemetry?.reportDeployStarted(this.options.model, "latest");
|
|
121
|
+
this.telemetry?.reportDeployCompleted(this.options.model, "latest", durationMs);
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Explicitly warm up the ONNX runtime by running a minimal dummy inference.
|
|
125
|
+
*
|
|
126
|
+
* This pre-allocates internal buffers, compiles GPU shaders, and triggers
|
|
127
|
+
* any lazy initialisation that would otherwise happen on the first real
|
|
128
|
+
* `predict()` call. Useful for latency-sensitive applications that want
|
|
129
|
+
* predictable first-inference timing.
|
|
130
|
+
*
|
|
131
|
+
* Idempotent: calling `warmup()` after it has already completed is a no-op.
|
|
132
|
+
* Requires `load()` to have been called first.
|
|
133
|
+
*/
|
|
134
|
+
async warmup() {
|
|
135
|
+
this.ensureReady();
|
|
136
|
+
if (this._warmedUp)
|
|
137
|
+
return;
|
|
138
|
+
// Build a minimal input tensor (1-element Float32) for the first input.
|
|
139
|
+
// The goal is to trigger ONNX runtime buffer allocation, not produce
|
|
140
|
+
// meaningful output.
|
|
141
|
+
const inputName = this.inferenceEngine
|
|
142
|
+
? this.inferenceEngine.inputNames[0]
|
|
143
|
+
: undefined;
|
|
144
|
+
if (inputName) {
|
|
145
|
+
const dummyTensors = {
|
|
146
|
+
[inputName]: {
|
|
147
|
+
data: new Float32Array([0]),
|
|
148
|
+
dims: [1, 1],
|
|
149
|
+
},
|
|
150
|
+
};
|
|
151
|
+
try {
|
|
152
|
+
await this.engine.run(dummyTensors);
|
|
153
|
+
}
|
|
154
|
+
catch {
|
|
155
|
+
// Warmup failures are non-fatal. The runtime may reject the dummy
|
|
156
|
+
// shape, but the internal buffers will still have been allocated.
|
|
157
|
+
}
|
|
94
158
|
}
|
|
159
|
+
this._warmedUp = true;
|
|
160
|
+
}
|
|
161
|
+
/** Whether `warmup()` has been called and completed successfully. */
|
|
162
|
+
get isWarmedUp() {
|
|
163
|
+
return this._warmedUp;
|
|
95
164
|
}
|
|
96
165
|
// -----------------------------------------------------------------------
|
|
97
166
|
// Inference
|
|
@@ -104,14 +173,19 @@ export class Octomil {
|
|
|
104
173
|
*/
|
|
105
174
|
async predict(input) {
|
|
106
175
|
this.ensureReady();
|
|
176
|
+
// Attempt cloud routing if configured.
|
|
177
|
+
if (this.routingClient) {
|
|
178
|
+
const cloudResult = await this.tryCloudInference(input);
|
|
179
|
+
if (cloudResult)
|
|
180
|
+
return cloudResult;
|
|
181
|
+
}
|
|
182
|
+
// Local inference (default path).
|
|
107
183
|
const tensors = this.prepareTensors(input);
|
|
184
|
+
this.telemetry?.reportInferenceStarted(this.options.model, { target: "device" });
|
|
108
185
|
const result = await this.engine.run(tensors);
|
|
109
|
-
this.
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
durationMs: result.latencyMs,
|
|
113
|
-
metadata: { backend: this.engine.activeBackend },
|
|
114
|
-
timestamp: Date.now(),
|
|
186
|
+
this.telemetry?.reportInferenceCompleted(this.options.model, result.latencyMs, {
|
|
187
|
+
backend: this.inferenceEngine?.activeBackend ?? "unknown",
|
|
188
|
+
target: "device",
|
|
115
189
|
});
|
|
116
190
|
return result;
|
|
117
191
|
}
|
|
@@ -130,95 +204,158 @@ export class Octomil {
|
|
|
130
204
|
results.push(result);
|
|
131
205
|
}
|
|
132
206
|
const totalMs = performance.now() - start;
|
|
133
|
-
this.
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
durationMs: totalMs,
|
|
137
|
-
metadata: {
|
|
138
|
-
backend: this.engine.activeBackend,
|
|
139
|
-
batchSize: inputs.length,
|
|
140
|
-
},
|
|
141
|
-
timestamp: Date.now(),
|
|
207
|
+
this.telemetry?.reportInferenceCompleted(this.options.model, totalMs, {
|
|
208
|
+
backend: this.inferenceEngine?.activeBackend ?? "unknown",
|
|
209
|
+
batchSize: inputs.length,
|
|
142
210
|
});
|
|
143
211
|
return results;
|
|
144
212
|
}
|
|
145
213
|
/**
|
|
146
214
|
* OpenAI-compatible chat completion.
|
|
147
|
-
*
|
|
148
|
-
*
|
|
215
|
+
*
|
|
216
|
+
* @deprecated Use `client.chat.create()` instead. This method will be
|
|
217
|
+
* removed in the next major version.
|
|
149
218
|
*/
|
|
150
|
-
async
|
|
151
|
-
this.
|
|
152
|
-
|
|
153
|
-
|
|
219
|
+
async createChat(messages, options = {}) {
|
|
220
|
+
return this.chat.create(messages, options);
|
|
221
|
+
}
|
|
222
|
+
/**
|
|
223
|
+
* Streaming chat — yields chunks as they arrive.
|
|
224
|
+
*
|
|
225
|
+
* @deprecated Use `client.chat.stream()` instead. This method will be
|
|
226
|
+
* removed in the next major version.
|
|
227
|
+
*/
|
|
228
|
+
async *createChatStream(messages, options = {}) {
|
|
229
|
+
yield* this.chat.stream(messages, options);
|
|
230
|
+
}
|
|
231
|
+
// -----------------------------------------------------------------------
|
|
232
|
+
// Cloud Streaming Inference (SSE)
|
|
233
|
+
// -----------------------------------------------------------------------
|
|
234
|
+
/**
|
|
235
|
+
* Stream tokens from the cloud inference endpoint via SSE.
|
|
236
|
+
*
|
|
237
|
+
* Consumes `POST /api/v1/inference/stream` and yields `StreamToken`
|
|
238
|
+
* objects as they arrive. Requires `serverUrl` and `apiKey` to be
|
|
239
|
+
* configured.
|
|
240
|
+
*
|
|
241
|
+
* @param modelId - Model identifier (e.g. `"phi-4-mini"`).
|
|
242
|
+
* @param input - Plain string prompt or chat-style messages.
|
|
243
|
+
* @param parameters - Generation parameters (temperature, max_tokens, etc.).
|
|
244
|
+
* @param signal - Optional AbortSignal for cancellation.
|
|
245
|
+
*/
|
|
246
|
+
async *predictStream(modelId, input, parameters, signal) {
|
|
247
|
+
if (!this.options.serverUrl || !this.options.apiKey) {
|
|
248
|
+
throw new OctomilError("INFERENCE_FAILED", "predictStream() requires serverUrl and apiKey to be configured.");
|
|
154
249
|
}
|
|
155
|
-
const
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
});
|
|
160
|
-
const start = performance.now();
|
|
161
|
-
let content = "";
|
|
162
|
-
const generator = streaming.stream(this.options.model, {
|
|
163
|
-
messages,
|
|
164
|
-
temperature: options.temperature,
|
|
165
|
-
max_tokens: options.maxTokens,
|
|
166
|
-
top_p: options.topP,
|
|
167
|
-
}, { modality: "text", signal: options.signal });
|
|
168
|
-
for await (const chunk of generator) {
|
|
169
|
-
if (typeof chunk.data === "string") {
|
|
170
|
-
content += chunk.data;
|
|
171
|
-
}
|
|
250
|
+
const url = `${this.options.serverUrl.replace(/\/+$/, "")}/api/v1/inference/stream`;
|
|
251
|
+
const body = { model_id: modelId };
|
|
252
|
+
if (typeof input === "string") {
|
|
253
|
+
body.input_data = input;
|
|
172
254
|
}
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
255
|
+
else {
|
|
256
|
+
body.messages = input;
|
|
257
|
+
}
|
|
258
|
+
if (parameters) {
|
|
259
|
+
body.parameters = parameters;
|
|
260
|
+
}
|
|
261
|
+
const headers = {
|
|
262
|
+
"Content-Type": "application/json",
|
|
263
|
+
Accept: "text/event-stream",
|
|
264
|
+
Authorization: `Bearer ${this.options.apiKey}`,
|
|
176
265
|
};
|
|
266
|
+
let response;
|
|
267
|
+
try {
|
|
268
|
+
response = await fetch(url, {
|
|
269
|
+
method: "POST",
|
|
270
|
+
headers,
|
|
271
|
+
body: JSON.stringify(body),
|
|
272
|
+
signal,
|
|
273
|
+
});
|
|
274
|
+
}
|
|
275
|
+
catch (err) {
|
|
276
|
+
throw new OctomilError("NETWORK_UNAVAILABLE", `predictStream request failed: ${String(err)}`, err);
|
|
277
|
+
}
|
|
278
|
+
if (!response.ok) {
|
|
279
|
+
throw new OctomilError("INFERENCE_FAILED", `predictStream failed: HTTP ${response.status}`);
|
|
280
|
+
}
|
|
281
|
+
if (!response.body) {
|
|
282
|
+
throw new OctomilError("INFERENCE_FAILED", "Server did not return a streaming body.");
|
|
283
|
+
}
|
|
284
|
+
const reader = response.body.getReader();
|
|
285
|
+
const decoder = new TextDecoder();
|
|
286
|
+
let buffer = "";
|
|
287
|
+
let chunkIndex = 0;
|
|
288
|
+
try {
|
|
289
|
+
while (true) {
|
|
290
|
+
const { done, value } = await reader.read();
|
|
291
|
+
if (done)
|
|
292
|
+
break;
|
|
293
|
+
buffer += decoder.decode(value, { stream: true });
|
|
294
|
+
const lines = buffer.split("\n");
|
|
295
|
+
buffer = lines.pop() ?? "";
|
|
296
|
+
for (const line of lines) {
|
|
297
|
+
if (!line.startsWith("data:"))
|
|
298
|
+
continue;
|
|
299
|
+
const data = line.slice(5).trim();
|
|
300
|
+
if (!data)
|
|
301
|
+
continue;
|
|
302
|
+
let parsed;
|
|
303
|
+
try {
|
|
304
|
+
parsed = JSON.parse(data);
|
|
305
|
+
}
|
|
306
|
+
catch {
|
|
307
|
+
continue;
|
|
308
|
+
}
|
|
309
|
+
this.telemetry?.reportChunkProduced(modelId, chunkIndex);
|
|
310
|
+
chunkIndex++;
|
|
311
|
+
yield {
|
|
312
|
+
token: parsed.token ?? "",
|
|
313
|
+
done: parsed.done ?? false,
|
|
314
|
+
provider: parsed.provider,
|
|
315
|
+
latencyMs: parsed.latency_ms,
|
|
316
|
+
sessionId: parsed.session_id,
|
|
317
|
+
};
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
finally {
|
|
322
|
+
reader.releaseLock();
|
|
323
|
+
}
|
|
177
324
|
}
|
|
325
|
+
// -----------------------------------------------------------------------
|
|
326
|
+
// Embeddings
|
|
327
|
+
// -----------------------------------------------------------------------
|
|
178
328
|
/**
|
|
179
|
-
*
|
|
329
|
+
* Generate embeddings via the Octomil cloud endpoint.
|
|
330
|
+
*
|
|
331
|
+
* Requires `serverUrl` and `apiKey` to be configured.
|
|
332
|
+
*
|
|
333
|
+
* @param modelId - Embedding model identifier (e.g. `"nomic-embed-text"`).
|
|
334
|
+
* @param input - A single string or array of strings to embed.
|
|
335
|
+
* @param signal - Optional AbortSignal for cancellation.
|
|
180
336
|
*/
|
|
181
|
-
async
|
|
182
|
-
this.
|
|
183
|
-
|
|
184
|
-
throw new OctomilError("INFERENCE_FAILED", "chatStream() requires serverUrl to be configured.");
|
|
185
|
-
}
|
|
186
|
-
const streaming = new StreamingInferenceEngine({
|
|
187
|
-
serverUrl: this.options.serverUrl,
|
|
188
|
-
apiKey: this.options.apiKey,
|
|
189
|
-
onTelemetry: (e) => this.trackEvent(e),
|
|
190
|
-
});
|
|
191
|
-
const generator = streaming.stream(this.options.model, {
|
|
192
|
-
messages,
|
|
193
|
-
temperature: options.temperature,
|
|
194
|
-
max_tokens: options.maxTokens,
|
|
195
|
-
top_p: options.topP,
|
|
196
|
-
}, { modality: "text", signal: options.signal });
|
|
197
|
-
for await (const chunk of generator) {
|
|
198
|
-
yield {
|
|
199
|
-
index: chunk.index,
|
|
200
|
-
content: typeof chunk.data === "string" ? chunk.data : JSON.stringify(chunk.data),
|
|
201
|
-
done: chunk.done,
|
|
202
|
-
role: "assistant",
|
|
203
|
-
};
|
|
337
|
+
async embed(modelId, input, signal) {
|
|
338
|
+
if (!this.options.serverUrl || !this.options.apiKey) {
|
|
339
|
+
throw new OctomilError("NETWORK_UNAVAILABLE", "embed() requires serverUrl and apiKey to be configured.");
|
|
204
340
|
}
|
|
341
|
+
return embedFn(this.options.serverUrl, this.options.apiKey, modelId, input, signal);
|
|
205
342
|
}
|
|
206
343
|
// -----------------------------------------------------------------------
|
|
207
344
|
// Cache
|
|
208
345
|
// -----------------------------------------------------------------------
|
|
209
346
|
/** Check whether the model binary is currently cached locally. */
|
|
210
347
|
async isCached() {
|
|
211
|
-
this.
|
|
348
|
+
this.ensureNotClosed();
|
|
212
349
|
return this.loader.isCached();
|
|
213
350
|
}
|
|
214
351
|
/** Remove the cached model binary. */
|
|
215
352
|
async clearCache() {
|
|
216
|
-
this.
|
|
353
|
+
this.ensureNotClosed();
|
|
217
354
|
return this.loader.clearCache();
|
|
218
355
|
}
|
|
219
356
|
/** Get cache metadata for the model. */
|
|
220
357
|
async cacheInfo() {
|
|
221
|
-
this.
|
|
358
|
+
this.ensureNotClosed();
|
|
222
359
|
return this.loader.getCacheInfo();
|
|
223
360
|
}
|
|
224
361
|
// -----------------------------------------------------------------------
|
|
@@ -226,47 +363,206 @@ export class Octomil {
|
|
|
226
363
|
// -----------------------------------------------------------------------
|
|
227
364
|
/** The inference backend currently in use (after `load()`). */
|
|
228
365
|
get activeBackend() {
|
|
229
|
-
return this.
|
|
366
|
+
return this.inferenceEngine?.activeBackend ?? null;
|
|
230
367
|
}
|
|
231
368
|
/** Input tensor names defined by the loaded model. */
|
|
232
369
|
get inputNames() {
|
|
233
370
|
this.ensureReady();
|
|
234
|
-
|
|
371
|
+
if (!this.inferenceEngine) {
|
|
372
|
+
throw new OctomilError("INVALID_INPUT", "inputNames not available with custom runtime");
|
|
373
|
+
}
|
|
374
|
+
return this.inferenceEngine.inputNames;
|
|
235
375
|
}
|
|
236
376
|
/** Output tensor names defined by the loaded model. */
|
|
237
377
|
get outputNames() {
|
|
238
378
|
this.ensureReady();
|
|
239
|
-
|
|
379
|
+
if (!this.inferenceEngine) {
|
|
380
|
+
throw new OctomilError("INVALID_INPUT", "outputNames not available with custom runtime");
|
|
381
|
+
}
|
|
382
|
+
return this.inferenceEngine.outputNames;
|
|
240
383
|
}
|
|
241
384
|
/** Whether `load()` has been called successfully. */
|
|
242
385
|
get isLoaded() {
|
|
243
386
|
return this.loaded;
|
|
244
387
|
}
|
|
245
388
|
// -----------------------------------------------------------------------
|
|
389
|
+
// Chat namespace (OpenAI-compatible chat completions)
|
|
390
|
+
// -----------------------------------------------------------------------
|
|
391
|
+
/**
|
|
392
|
+
* Lazily-created `ChatClient` providing `chat.create()` and
|
|
393
|
+
* `chat.stream()` methods for OpenAI-compatible chat completions.
|
|
394
|
+
*
|
|
395
|
+
* Uses a local responses runtime when configured, otherwise falls back to
|
|
396
|
+
* the configured server-backed responses client.
|
|
397
|
+
*
|
|
398
|
+
* @example
|
|
399
|
+
* ```ts
|
|
400
|
+
* const response = await client.chat.create([
|
|
401
|
+
* { role: 'user', content: 'Hello!' },
|
|
402
|
+
* ]);
|
|
403
|
+
* ```
|
|
404
|
+
*/
|
|
405
|
+
get chat() {
|
|
406
|
+
if (!this._chat) {
|
|
407
|
+
this._chat = new ChatClient({
|
|
408
|
+
model: this.options.model,
|
|
409
|
+
serverUrl: this.options.serverUrl,
|
|
410
|
+
apiKey: this.options.apiKey,
|
|
411
|
+
getResponses: () => this.responses,
|
|
412
|
+
ensureReady: () => this.ensureReady(),
|
|
413
|
+
});
|
|
414
|
+
}
|
|
415
|
+
return this._chat;
|
|
416
|
+
}
|
|
417
|
+
// -----------------------------------------------------------------------
|
|
418
|
+
// Responses namespace (Layer 2 — structured response API)
|
|
419
|
+
// -----------------------------------------------------------------------
|
|
420
|
+
/**
|
|
421
|
+
* Lazily-created `ResponsesClient` providing `responses.create()` and
|
|
422
|
+
* `responses.stream()` methods for the structured response API.
|
|
423
|
+
*
|
|
424
|
+
* Uses a configured local responses runtime when available; otherwise uses
|
|
425
|
+
* the server-backed responses API. `apiKey` is optional but recommended for
|
|
426
|
+
* server-backed usage.
|
|
427
|
+
*/
|
|
428
|
+
get responses() {
|
|
429
|
+
if (!this._responses) {
|
|
430
|
+
this._responses = new ResponsesClient({
|
|
431
|
+
serverUrl: this.options.serverUrl,
|
|
432
|
+
apiKey: this.options.apiKey,
|
|
433
|
+
telemetry: this.telemetry,
|
|
434
|
+
deviceContext: this.deviceContext,
|
|
435
|
+
localRuntime: this.options.responsesRuntime,
|
|
436
|
+
});
|
|
437
|
+
}
|
|
438
|
+
return this._responses;
|
|
439
|
+
}
|
|
440
|
+
// -----------------------------------------------------------------------
|
|
441
|
+
// Control namespace (device registration + heartbeat)
|
|
442
|
+
// -----------------------------------------------------------------------
|
|
443
|
+
/**
|
|
444
|
+
* Lazily-created `ControlClient` providing `control.register()`,
|
|
445
|
+
* `control.heartbeat()`, and `control.refresh()` methods.
|
|
446
|
+
*
|
|
447
|
+
* Uses the configured `serverUrl`, `apiKey`, and any `orgId`
|
|
448
|
+
* inferred from the options.
|
|
449
|
+
*/
|
|
450
|
+
get control() {
|
|
451
|
+
if (!this._control) {
|
|
452
|
+
this._control = new ControlClient({
|
|
453
|
+
serverUrl: this.options.serverUrl,
|
|
454
|
+
apiKey: this.options.apiKey,
|
|
455
|
+
orgId: this.options.auth?.type === "org_api_key" ? this.options.auth.orgId : undefined,
|
|
456
|
+
deviceContext: this.deviceContext,
|
|
457
|
+
telemetry: this.telemetry,
|
|
458
|
+
});
|
|
459
|
+
}
|
|
460
|
+
return this._control;
|
|
461
|
+
}
|
|
462
|
+
// -----------------------------------------------------------------------
|
|
463
|
+
// Capabilities namespace (device capability profiling)
|
|
464
|
+
// -----------------------------------------------------------------------
|
|
465
|
+
/**
|
|
466
|
+
* Lazily-created `CapabilitiesClient` providing `capabilities.current()`
|
|
467
|
+
* to detect the full device capability profile.
|
|
468
|
+
*/
|
|
469
|
+
get capabilities() {
|
|
470
|
+
if (!this._capabilities) {
|
|
471
|
+
this._capabilities = new CapabilitiesClient();
|
|
472
|
+
}
|
|
473
|
+
return this._capabilities;
|
|
474
|
+
}
|
|
475
|
+
// -----------------------------------------------------------------------
|
|
476
|
+
// Models namespace (status / load / unload / list / clearCache)
|
|
477
|
+
// -----------------------------------------------------------------------
|
|
478
|
+
/**
|
|
479
|
+
* Lazily-created `ModelsClient` providing `models.status()`,
|
|
480
|
+
* `models.load()`, `models.unload()`, `models.list()`, and
|
|
481
|
+
* `models.clearCache()`.
|
|
482
|
+
*/
|
|
483
|
+
get models() {
|
|
484
|
+
if (!this._models) {
|
|
485
|
+
this._models = new ModelsClient(this.options.model, this.loader, () => {
|
|
486
|
+
// When ModelsClient.load() succeeds, mark the engine as loaded
|
|
487
|
+
// so that predict()/chat() work without a separate load() call.
|
|
488
|
+
// Note: the engine session is NOT created here — callers should
|
|
489
|
+
// still use OctomilClient.load() for full setup. This callback
|
|
490
|
+
// ensures the downloading→ready state transition is tracked.
|
|
491
|
+
});
|
|
492
|
+
}
|
|
493
|
+
return this._models;
|
|
494
|
+
}
|
|
495
|
+
// -----------------------------------------------------------------------
|
|
496
|
+
// Audio namespace (transcriptions)
|
|
497
|
+
// -----------------------------------------------------------------------
|
|
498
|
+
/**
|
|
499
|
+
* Lazily-created `OctomilAudio` providing
|
|
500
|
+
* `audio.transcriptions.create()` for speech-to-text.
|
|
501
|
+
*
|
|
502
|
+
* Requires `serverUrl` and `apiKey` to be configured.
|
|
503
|
+
*
|
|
504
|
+
* @example
|
|
505
|
+
* ```ts
|
|
506
|
+
* const result = await client.audio.transcriptions.create({
|
|
507
|
+
* file: audioBlob,
|
|
508
|
+
* model: 'whisper-large-v3',
|
|
509
|
+
* });
|
|
510
|
+
* console.log(result.text);
|
|
511
|
+
* ```
|
|
512
|
+
*/
|
|
513
|
+
get audio() {
|
|
514
|
+
if (!this._audio) {
|
|
515
|
+
if (!this.options.serverUrl || !this.options.apiKey) {
|
|
516
|
+
throw new OctomilError("INVALID_INPUT", "audio requires serverUrl and apiKey to be configured.");
|
|
517
|
+
}
|
|
518
|
+
this._audio = new OctomilAudio(this.options.serverUrl, this.options.apiKey);
|
|
519
|
+
}
|
|
520
|
+
return this._audio;
|
|
521
|
+
}
|
|
522
|
+
/**
|
|
523
|
+
* Lazily-created `OctomilText` providing `text.predictions.create()`
|
|
524
|
+
* for browser-local text inference via the loaded model.
|
|
525
|
+
*/
|
|
526
|
+
get text() {
|
|
527
|
+
if (!this._text) {
|
|
528
|
+
this._text = new OctomilText((input) => this.predict(input));
|
|
529
|
+
}
|
|
530
|
+
return this._text;
|
|
531
|
+
}
|
|
532
|
+
// -----------------------------------------------------------------------
|
|
246
533
|
// Cleanup
|
|
247
534
|
// -----------------------------------------------------------------------
|
|
248
535
|
/** Release all resources (WASM memory, WebGPU device, telemetry). */
|
|
249
|
-
|
|
250
|
-
if (this.
|
|
536
|
+
close() {
|
|
537
|
+
if (this.closed)
|
|
251
538
|
return;
|
|
252
|
-
this.
|
|
539
|
+
this.closed = true;
|
|
253
540
|
this.loaded = false;
|
|
254
541
|
this.engine.dispose();
|
|
255
|
-
this.telemetry?.
|
|
542
|
+
this.telemetry?.close();
|
|
256
543
|
this.telemetry = null;
|
|
544
|
+
this._responses = null;
|
|
545
|
+
this._chat = null;
|
|
546
|
+
this._control?.stopHeartbeat();
|
|
547
|
+
this._control = null;
|
|
548
|
+
this._capabilities = null;
|
|
549
|
+
this._models = null;
|
|
550
|
+
this._audio = null;
|
|
551
|
+
this._text = null;
|
|
552
|
+
this._warmedUp = false;
|
|
257
553
|
}
|
|
258
554
|
// -----------------------------------------------------------------------
|
|
259
555
|
// Private helpers
|
|
260
556
|
// -----------------------------------------------------------------------
|
|
261
|
-
|
|
262
|
-
if (this.
|
|
263
|
-
throw new OctomilError("
|
|
557
|
+
ensureNotClosed() {
|
|
558
|
+
if (this.closed) {
|
|
559
|
+
throw new OctomilError("CANCELLED", "This OctomilClient instance has been closed. Create a new one.");
|
|
264
560
|
}
|
|
265
561
|
}
|
|
266
562
|
ensureReady() {
|
|
267
|
-
this.
|
|
563
|
+
this.ensureNotClosed();
|
|
268
564
|
if (!this.loaded) {
|
|
269
|
-
throw new OctomilError("
|
|
565
|
+
throw new OctomilError("MODEL_LOAD_FAILED", "Model not loaded. Call load() before predict() or chat().");
|
|
270
566
|
}
|
|
271
567
|
}
|
|
272
568
|
/**
|
|
@@ -280,7 +576,7 @@ export class Octomil {
|
|
|
280
576
|
}
|
|
281
577
|
// { raw, dims } — wrap in the first input name.
|
|
282
578
|
if ("raw" in input && "dims" in input) {
|
|
283
|
-
const name = this.
|
|
579
|
+
const name = this.inferenceEngine.inputNames[0];
|
|
284
580
|
if (!name) {
|
|
285
581
|
throw new OctomilError("INVALID_INPUT", "Model has no input tensors defined.");
|
|
286
582
|
}
|
|
@@ -290,7 +586,7 @@ export class Octomil {
|
|
|
290
586
|
// Real tokenization would require a tokenizer; this is a minimal
|
|
291
587
|
// placeholder that works for models expecting raw code-point inputs.
|
|
292
588
|
if ("text" in input) {
|
|
293
|
-
const name = this.
|
|
589
|
+
const name = this.inferenceEngine.inputNames[0];
|
|
294
590
|
if (!name) {
|
|
295
591
|
throw new OctomilError("INVALID_INPUT", "Model has no input tensors defined.");
|
|
296
592
|
}
|
|
@@ -350,7 +646,7 @@ export class Octomil {
|
|
|
350
646
|
float[pixels + i] = rgba[i * 4 + 1] / 255; // G
|
|
351
647
|
float[2 * pixels + i] = rgba[i * 4 + 2] / 255; // B
|
|
352
648
|
}
|
|
353
|
-
const name = this.
|
|
649
|
+
const name = this.inferenceEngine?.inputNames[0];
|
|
354
650
|
if (!name) {
|
|
355
651
|
throw new OctomilError("INVALID_INPUT", "Model has no input tensors defined.");
|
|
356
652
|
}
|
|
@@ -361,8 +657,43 @@ export class Octomil {
|
|
|
361
657
|
},
|
|
362
658
|
};
|
|
363
659
|
}
|
|
364
|
-
|
|
365
|
-
|
|
660
|
+
/**
|
|
661
|
+
* Attempt routing + cloud inference. Returns a PredictOutput if the
|
|
662
|
+
* routing decision is "cloud" and the cloud call succeeds, or `null`
|
|
663
|
+
* to fall back to local inference.
|
|
664
|
+
*/
|
|
665
|
+
async tryCloudInference(input) {
|
|
666
|
+
try {
|
|
667
|
+
if (!this.deviceCaps) {
|
|
668
|
+
this.deviceCaps = await detectDeviceCapabilities();
|
|
669
|
+
}
|
|
670
|
+
const routing = this.options.routing;
|
|
671
|
+
const decision = await this.routingClient.route(this.options.model, routing.modelParams ?? 0, routing.modelSizeMb ?? 0, this.deviceCaps);
|
|
672
|
+
if (!decision || decision.target !== "cloud") {
|
|
673
|
+
return null;
|
|
674
|
+
}
|
|
675
|
+
const start = performance.now();
|
|
676
|
+
const cloudResponse = await this.routingClient.cloudInfer(this.options.model, input);
|
|
677
|
+
const latencyMs = performance.now() - start;
|
|
678
|
+
this.telemetry?.reportInferenceCompleted(this.options.model, latencyMs, {
|
|
679
|
+
target: "cloud",
|
|
680
|
+
provider: cloudResponse.provider,
|
|
681
|
+
routingId: decision.id,
|
|
682
|
+
});
|
|
683
|
+
// Wrap the cloud output in PredictOutput shape.
|
|
684
|
+
return {
|
|
685
|
+
tensors: {},
|
|
686
|
+
latencyMs,
|
|
687
|
+
...(typeof cloudResponse.output === "object" &&
|
|
688
|
+
cloudResponse.output !== null
|
|
689
|
+
? cloudResponse.output
|
|
690
|
+
: { label: String(cloudResponse.output) }),
|
|
691
|
+
};
|
|
692
|
+
}
|
|
693
|
+
catch {
|
|
694
|
+
// Any failure in routing/cloud → fall back to local inference silently.
|
|
695
|
+
return null;
|
|
696
|
+
}
|
|
366
697
|
}
|
|
367
698
|
}
|
|
368
699
|
//# sourceMappingURL=octomil.js.map
|