npm - @dxos/plugin-transformer - Versions diffs - 0.7.5-staging.2ff1350 - Mend

@dxos/plugin-transformer 0.7.5-staging.2ff1350

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

package/LICENSE +8 -0
package/README.md +15 -0
package/dist/lib/browser/index.mjs +52 -0
package/dist/lib/browser/index.mjs.map +7 -0
package/dist/lib/browser/meta.json +1 -0
package/dist/lib/browser/types/index.mjs +1 -0
package/dist/lib/browser/types/index.mjs.map +7 -0
package/dist/lib/node/index.cjs +71 -0
package/dist/lib/node/index.cjs.map +7 -0
package/dist/lib/node/meta.json +1 -0
package/dist/lib/node/types/index.cjs +2 -0
package/dist/lib/node/types/index.cjs.map +7 -0
package/dist/lib/node-esm/index.mjs +54 -0
package/dist/lib/node-esm/index.mjs.map +7 -0
package/dist/lib/node-esm/meta.json +1 -0
package/dist/lib/node-esm/types/index.mjs +2 -0
package/dist/lib/node-esm/types/index.mjs.map +7 -0
package/dist/types/src/TransformerPlugin.d.ts +2 -0
package/dist/types/src/TransformerPlugin.d.ts.map +1 -0
package/dist/types/src/capabilities/index.d.ts +1 -0
package/dist/types/src/capabilities/index.d.ts.map +1 -0
package/dist/types/src/components/DebugInfo.d.ts +14 -0
package/dist/types/src/components/DebugInfo.d.ts.map +1 -0
package/dist/types/src/components/Voice.d.ts +7 -0
package/dist/types/src/components/Voice.d.ts.map +1 -0
package/dist/types/src/components/Voice.stories.d.ts +8 -0
package/dist/types/src/components/Voice.stories.d.ts.map +1 -0
package/dist/types/src/hooks/index.d.ts +3 -0
package/dist/types/src/hooks/index.d.ts.map +1 -0
package/dist/types/src/hooks/useAudioStream.d.ts +12 -0
package/dist/types/src/hooks/useAudioStream.d.ts.map +1 -0
package/dist/types/src/hooks/usePipeline.d.ts +41 -0
package/dist/types/src/hooks/usePipeline.d.ts.map +1 -0
package/dist/types/src/index.d.ts +3 -0
package/dist/types/src/index.d.ts.map +1 -0
package/dist/types/src/meta.d.ts +10 -0
package/dist/types/src/meta.d.ts.map +1 -0
package/dist/types/src/testing/model.test.d.ts +1 -0
package/dist/types/src/testing/model.test.d.ts.map +1 -0
package/dist/types/src/testing/node-pipeline.d.ts +12 -0
package/dist/types/src/testing/node-pipeline.d.ts.map +1 -0
package/dist/types/src/testing/pipeline.d.ts +28 -0
package/dist/types/src/testing/pipeline.d.ts.map +1 -0
package/dist/types/src/testing/pipeline.test.d.ts +2 -0
package/dist/types/src/testing/pipeline.test.d.ts.map +1 -0
package/dist/types/src/testing/web-pipeline.d.ts +12 -0
package/dist/types/src/testing/web-pipeline.d.ts.map +1 -0
package/dist/types/src/translations.d.ts +9 -0
package/dist/types/src/translations.d.ts.map +1 -0
package/dist/types/src/types/index.d.ts +1 -0
package/dist/types/src/types/index.d.ts.map +1 -0
package/dist/types/tsconfig.tsbuildinfo +1 -0
package/package.json +80 -0
package/src/TransformerPlugin.tsx +34 -0
package/src/capabilities/index.ts +3 -0
package/src/components/DebugInfo.tsx +79 -0
package/src/components/Voice.stories.tsx +32 -0
package/src/components/Voice.tsx +110 -0
package/src/hooks/index.ts +6 -0
package/src/hooks/useAudioStream.ts +252 -0
package/src/hooks/usePipeline.ts +153 -0
package/src/index.ts +7 -0
package/src/meta.ts +16 -0
package/src/testing/model.test.ts +3 -0
package/src/testing/node-pipeline.ts +35 -0
package/src/testing/pipeline.test.ts +90 -0
package/src/testing/pipeline.ts +74 -0
package/src/testing/web-pipeline.ts +46 -0
package/src/translations.ts +15 -0
package/src/types/index.ts +3 -0

package/package.json ADDED Viewed

@@ -0,0 +1,80 @@
+{
+  "name": "@dxos/plugin-transformer",
+  "version": "0.7.5-staging.2ff1350",
+  "description": "Client transformer",
+  "homepage": "https://dxos.org",
+  "bugs": "https://github.com/dxos/dxos/issues",
+  "license": "MIT",
+  "author": "DXOS.org",
+  "sideEffects": true,
+  "type": "module",
+  "exports": {
+    ".": {
+      "types": "./dist/types/src/index.d.ts",
+      "browser": "./dist/lib/browser/index.mjs",
+      "node": "./dist/lib/node-esm/index.mjs"
+    },
+    "./types": {
+      "types": "./dist/types/src/types/index.d.ts",
+      "browser": "./dist/lib/browser/types/index.mjs",
+      "node": "./dist/lib/node-esm/types/index.mjs"
+    }
+  },
+  "types": "dist/types/src/index.d.ts",
+  "typesVersions": {
+    "*": {
+      "types": [
+        "dist/types/src/types/index.d.ts"
+      ]
+    }
+  },
+  "files": [
+    "dist",
+    "src"
+  ],
+  "dependencies": {
+    "@effect/schema": "^0.75.5",
+    "@huggingface/transformers": "^3.3.3",
+    "@preact/signals-core": "^1.6.0",
+    "date-fns": "^3.3.1",
+    "lodash.get": "^4.4.2",
+    "@dxos/async": "0.7.5-staging.2ff1350",
+    "@dxos/app-framework": "0.7.5-staging.2ff1350",
+    "@dxos/display-name": "0.7.5-staging.2ff1350",
+    "@dxos/echo-schema": "0.7.5-staging.2ff1350",
+    "@dxos/invariant": "0.7.5-staging.2ff1350",
+    "@dxos/live-object": "0.7.5-staging.2ff1350",
+    "@dxos/log": "0.7.5-staging.2ff1350",
+    "@dxos/plugin-graph": "0.7.5-staging.2ff1350",
+    "@dxos/plugin-client": "0.7.5-staging.2ff1350",
+    "@dxos/react-client": "0.7.5-staging.2ff1350",
+    "@dxos/plugin-space": "0.7.5-staging.2ff1350",
+    "@dxos/react-ui-attention": "0.7.5-staging.2ff1350",
+    "@dxos/react-ui-stack": "0.7.5-staging.2ff1350",
+    "@dxos/util": "0.7.5-staging.2ff1350"
+  },
+  "devDependencies": {
+    "@phosphor-icons/react": "^2.1.5",
+    "@types/lodash.get": "^4.4.7",
+    "@types/react": "~18.2.0",
+    "@types/react-dom": "~18.2.0",
+    "@xenova/transformers": "^2.17.2",
+    "react": "~18.2.0",
+    "react-dom": "~18.2.0",
+    "vite": "5.4.7",
+    "@dxos/random": "0.7.5-staging.2ff1350",
+    "@dxos/react-ui": "0.7.5-staging.2ff1350",
+    "@dxos/react-ui-theme": "0.7.5-staging.2ff1350",
+    "@dxos/storybook-utils": "0.7.5-staging.2ff1350"
+  },
+  "peerDependencies": {
+    "@phosphor-icons/react": "^2.1.5",
+    "react": "~18.2.0",
+    "react-dom": "~18.2.0",
+    "@dxos/react-ui": "0.7.5-staging.2ff1350",
+    "@dxos/react-ui-theme": "0.7.5-staging.2ff1350"
+  },
+  "publishConfig": {
+    "access": "public"
+  }
+}

package/src/TransformerPlugin.tsx ADDED Viewed

@@ -0,0 +1,34 @@
+//
+// Copyright 2024 DXOS.org
+//
+import { Capabilities, contributes, defineModule, definePlugin, Events } from '@dxos/app-framework';
+import { ClientCapabilities, ClientEvents } from '@dxos/plugin-client';
+// import { IntentResolver } from './capabilities';
+import { meta } from './meta';
+import translations from './translations';
+export const TransformerPlugin = () =>
+  definePlugin(meta, [
+    defineModule({
+      id: `${meta.id}/module/translations`,
+      activatesOn: Events.SetupTranslations,
+      activate: () => contributes(Capabilities.Translations, translations),
+    }),
+    defineModule({
+      id: `${meta.id}/module/metadata`,
+      activatesOn: Events.SetupMetadata,
+      activate: () => [],
+    }),
+    defineModule({
+      id: `${meta.id}/module/schema`,
+      activatesOn: ClientEvents.SetupSchema,
+      activate: () => contributes(ClientCapabilities.Schema, []),
+    }),
+    // defineModule({
+    //   id: `${meta.id}/module/intent-resolver`,
+    //   activatesOn: Events.SetupIntentResolver,
+    //   activate: IntentResolver,
+    // }),
+  ]);

package/src/capabilities/index.ts ADDED Viewed

@@ -0,0 +1,3 @@
+//
+// Copyright 2025 DXOS.org
+//

package/src/components/DebugInfo.tsx ADDED Viewed

@@ -0,0 +1,79 @@
+//
+// Copyright 2025 DXOS.org
+//
+import React, { type FC } from 'react';
+export type DebugInfoProps = {
+  error: string;
+  isModelLoading: boolean;
+  stream: MediaStream | null;
+  isTranscribing: boolean;
+  transcription: string;
+  audioLevel: number;
+  gpuInfo: string;
+  model: string;
+  debug: boolean;
+};
+export const DebugInfo: FC<Partial<DebugInfoProps>> = ({
+  error,
+  isModelLoading,
+  stream,
+  isTranscribing,
+  transcription,
+  audioLevel,
+  gpuInfo,
+  model,
+  debug = false,
+}) => {
+  return (
+    <div className='p-4'>
+      {error && (
+        <div className='mb-4 text-red-600'>
+          <strong>Error:</strong> {error}
+        </div>
+      )}
+      {isModelLoading && (
+        <div className='mb-4'>
+          <div>Loading model...</div>
+          <div className='text-sm text-gray-500'>This may take a few moments</div>
+        </div>
+      )}
+      {stream ? (
+        <div>
+          <div className='mb-2 text-green-600'>
+            <strong>Status:</strong> Microphone is active
+            {debug && audioLevel && (
+              <div className='mt-2 w-48 h-5 bg-gray-200 rounded relative'>
+                <div
+                  className='h-full bg-green-500 transition-all duration-100 rounded'
+                  style={{ width: `${(audioLevel / 255) * 100}%` }}
+                />
+              </div>
+            )}
+          </div>
+          {isTranscribing && <div className='mb-2 text-gray-500'>Processing audio...</div>}
+          {debug && (
+            <div className='mb-4 text-sm text-gray-500 space-y-1'>
+              <div>Model: {model}</div>
+              <div>Sample Rate: 16000 Hz</div>
+              <div>Format: audio/wav</div>
+              <div>Chunk Size: 10 seconds</div>
+              <div>GPU: {gpuInfo || 'Not available'}</div>
+              <div>Backend: WebGPU</div>
+            </div>
+          )}
+          {transcription && (
+            <div className='mt-4'>
+              <strong>Transcription:</strong>
+              <p className='mt-2 p-4 bg-gray-100 rounded whitespace-pre-wrap'>{transcription}</p>
+            </div>
+          )}
+        </div>
+      ) : (
+        <div>{!isModelLoading && !error && <div className='text-gray-500'>Microphone is inactive</div>}</div>
+      )}
+    </div>
+  );
+};

package/src/components/Voice.stories.tsx ADDED Viewed

@@ -0,0 +1,32 @@
+//
+// Copyright 2025 DXOS.org
+//
+import '@dxos-theme';
+import type { Meta, StoryObj } from '@storybook/react';
+import { withLayout, withTheme } from '@dxos/storybook-utils';
+import { Voice } from './Voice';
+const meta: Meta<typeof Voice> = {
+  title: 'plugins/plugin-transformer/Voice',
+  component: Voice,
+  decorators: [withTheme, withLayout()],
+  parameters: {
+    layout: 'centered',
+  },
+};
+export default meta;
+type Story = StoryObj<typeof Voice>;
+export const Default: Story = {
+  args: {
+    debug: true,
+    active: true,
+    model: 'Xenova/whisper-tiny',
+  },
+};

package/src/components/Voice.tsx ADDED Viewed

@@ -0,0 +1,110 @@
+//
+// Copyright 2025 DXOS.org
+//
+import React, { useState, useCallback, useEffect } from 'react';
+import { log } from '@dxos/log';
+import { DebugInfo } from './DebugInfo';
+import { useAudioStream, usePipeline } from '../hooks';
+export type VoiceProps = {
+  active?: boolean;
+  debug?: boolean;
+  model?: string;
+};
+export const Voice = ({ active, debug, model = 'Xenova/whisper-base' }: VoiceProps) => {
+  const [isTranscribing, setIsTranscribing] = useState(false);
+  const [transcription, setTranscription] = useState<string>('');
+  const {
+    transcribe,
+    gpuInfo,
+    isLoaded: isModelLoaded,
+    isLoading: isModelLoading,
+    error: pipelineError,
+  } = usePipeline({ active, debug, model });
+  const {
+    stream,
+    error: audioError,
+    audioLevel,
+  } = useAudioStream({
+    active,
+    debug,
+    // onAudioData: handleAudioData
+  });
+  const handleAudioData = useCallback(
+    async (audioData: Float32Array) => {
+      if (!isModelLoaded) {
+        return;
+      }
+      if (isTranscribing) {
+        return;
+      }
+      setIsTranscribing(true);
+      try {
+        const result = await transcribe(audioData, {
+          sampling_rate: 16000,
+          chunk_length_s: 5,
+          stride_length_s: 1,
+          return_timestamps: false,
+          language: 'english',
+        });
+        if (result?.text?.trim()) {
+          setTranscription((prev) => prev + ' ' + result.text);
+        }
+      } catch (err) {
+        log.error('transcription error', { err });
+        throw err;
+      } finally {
+        setIsTranscribing(false);
+      }
+    },
+    [transcribe, isTranscribing],
+  );
+  log.info('handleAudioData', { handleAudioData });
+  useEffect(() => {
+    if (debug) {
+      log.info('audio state', {
+        hasStream: !!stream,
+        audioError,
+        audioLevel,
+        shouldBeActive: active && isModelLoaded,
+      });
+    }
+  }, [debug, stream, audioError, audioLevel, active, isModelLoaded]);
+  useEffect(() => {
+    if (debug) {
+      log.info('transcription state', {
+        active,
+        isModelLoaded,
+        isModelLoading,
+        isTranscribing,
+        pipelineError,
+      });
+    }
+  }, [active, debug, isModelLoaded, isModelLoading, pipelineError, isTranscribing]);
+  return (
+    <DebugInfo
+      error={audioError || pipelineError || undefined}
+      isModelLoading={isModelLoading}
+      stream={stream}
+      isTranscribing={isTranscribing}
+      transcription={transcription}
+      audioLevel={audioLevel}
+      gpuInfo={gpuInfo}
+      model={model}
+      debug={debug}
+    />
+  );
+};

package/src/hooks/index.ts ADDED Viewed

@@ -0,0 +1,6 @@
+//
+// Copyright 2025 DXOS.org
+//
+export * from './useAudioStream';
+export * from './usePipeline';

package/src/hooks/useAudioStream.ts ADDED Viewed

@@ -0,0 +1,252 @@
+//
+// Copyright 2025 DXOS.org
+//
+import { useState, useRef, useEffect, useCallback } from 'react';
+import { log } from '@dxos/log';
+export type AudioStreamConfig = {
+  active?: boolean;
+  debug?: boolean;
+  onAudioData?: (audioData: Float32Array) => Promise<void>;
+};
+export type AudioStreamState = {
+  stream: MediaStream | null;
+  error: string | null;
+  audioLevel: number;
+};
+export const useAudioStream = ({ active, debug, onAudioData }: AudioStreamConfig) => {
+  const [state, setState] = useState<AudioStreamState>({
+    stream: null,
+    error: null,
+    audioLevel: 0,
+  });
+  // TODO(burdon): Convert to class.
+  const audioContextRef = useRef<AudioContext | null>(null);
+  const analyserRef = useRef<AnalyserNode | null>(null);
+  const animationFrameRef = useRef<number>();
+  const workletNodeRef = useRef<AudioWorkletNode | null>(null);
+  const isProcessingRef = useRef(false);
+  const mediaStreamRef = useRef<MediaStream | null>(null);
+  const audioBufferRef = useRef<Float32Array[]>([]);
+  // Stats for visualization.
+  const updateAudioLevel = useCallback(() => {
+    if (analyserRef.current) {
+      const dataArray = new Uint8Array(analyserRef.current.frequencyBinCount);
+      analyserRef.current.getByteFrequencyData(dataArray);
+      const average = dataArray.reduce((acc, val) => acc + val, 0) / dataArray.length;
+      setState((prev) => ({ ...prev, audioLevel: average }));
+      animationFrameRef.current = requestAnimationFrame(updateAudioLevel);
+    }
+  }, []);
+  const cleanup = useCallback(() => {
+    log('cleaning up audio resources');
+    // Stop all tracks.
+    if (mediaStreamRef.current) {
+      mediaStreamRef.current.getTracks().forEach((track) => {
+        track.stop();
+        track.enabled = false;
+      });
+      mediaStreamRef.current = null;
+    }
+    // Disconnect and cleanup audio nodes.
+    if (workletNodeRef.current) {
+      workletNodeRef.current.disconnect();
+      workletNodeRef.current = null;
+    }
+    if (analyserRef.current) {
+      analyserRef.current.disconnect();
+      analyserRef.current = null;
+    }
+    if (audioContextRef.current) {
+      void audioContextRef.current.close();
+      audioContextRef.current = null;
+    }
+    if (animationFrameRef.current) {
+      cancelAnimationFrame(animationFrameRef.current);
+      animationFrameRef.current = undefined;
+    }
+    audioBufferRef.current = [];
+    setState({
+      stream: null,
+      error: null,
+      audioLevel: 0,
+    });
+  }, [debug]);
+  useEffect(() => {
+    let mounted = true;
+    const startStream = async () => {
+      try {
+        if (active) {
+          cleanup();
+          log.info('initializing audio stream...');
+          const stream = await navigator.mediaDevices.getUserMedia({
+            audio: {
+              channelCount: 1,
+              sampleRate: 16_000,
+              echoCancellation: true,
+              noiseSuppression: true,
+              autoGainControl: true,
+            },
+            video: false,
+          });
+          if (!mounted || !active) {
+            stream.getTracks().forEach((track) => {
+              track.stop();
+              track.enabled = false;
+            });
+            return;
+          }
+          mediaStreamRef.current = stream;
+          // Create AudioContext for proper audio format.
+          const context = new AudioContext({ sampleRate: 16_000 });
+          // Add the audio worklet module.
+          await context.audioWorklet.addModule(
+            URL.createObjectURL(
+              new Blob(
+                [
+                  `class AudioProcessor extends AudioWorkletProcessor {
+                    constructor() {
+                      super();
+                      this._buffer = [];
+                      this._samplesProcessed = 0;
+                    }
+                    process(inputs, outputs) {
+                      const input = inputs[0];
+                      const channel = input[0];
+                      if (channel) {
+                        this._buffer.push(new Float32Array(channel));
+                        this._samplesProcessed += channel.length;
+                        // Process every 2 seconds (32000 samples at 16kHz).
+                        if (this._samplesProcessed >= 32000) {
+                          const combinedLength = this._buffer.reduce((acc, curr) => acc + curr.length, 0);
+                          const combinedAudio = new Float32Array(combinedLength);
+                          let offset = 0;
+                          for (const buffer of this._buffer) {
+                            combinedAudio.set(buffer, offset);
+                            offset += buffer.length;
+                          }
+                          this.port.postMessage({ type: 'audio-data', data: combinedAudio });
+                          // Reset buffer and counter.
+                          this._buffer = [];
+                          this._samplesProcessed = 0;
+                        }
+                      }
+                      return true;
+                    }
+                  }
+                  registerProcessor('audio-processor', AudioProcessor);`,
+                ],
+                { type: 'application/javascript' },
+              ),
+            ),
+          );
+          const source = context.createMediaStreamSource(stream);
+          const analyser = context.createAnalyser();
+          analyserRef.current = analyser;
+          // Create and connect the audio worklet node.
+          const workletNode = new AudioWorkletNode(context, 'audio-processor');
+          workletNodeRef.current = workletNode;
+          workletNode.port.onmessage = async (event) => {
+            if (!mounted || !active) {
+              return;
+            }
+            if (event.data.type === 'audio-data') {
+              isProcessingRef.current = true;
+              try {
+                log('processing audio', {
+                  sampleRate: context.sampleRate,
+                  length: event.data.data.length,
+                  min: Math.min(...event.data.data),
+                  max: Math.max(...event.data.data),
+                });
+                await onAudioData?.(event.data.data);
+              } catch (err) {
+                if (mounted) {
+                  setState((prev) => ({
+                    ...prev,
+                    error: 'Error processing audio: ' + (err as Error).message,
+                  }));
+                }
+                log.error('audio processing error', { err });
+              } finally {
+                isProcessingRef.current = false;
+              }
+            }
+          };
+          // Connect the audio nodes.
+          source.connect(analyser);
+          analyser.connect(workletNode);
+          workletNode.connect(context.destination);
+          if (debug) {
+            analyser.fftSize = 256;
+            updateAudioLevel();
+          }
+          audioContextRef.current = context;
+          if (mounted && active) {
+            setState({
+              stream,
+              error: null,
+              audioLevel: 0,
+            });
+          }
+        }
+      } catch (err) {
+        if (mounted) {
+          setState((prev) => ({
+            ...prev,
+            error: 'Error accessing microphone: ' + (err as Error).message,
+            stream: null,
+          }));
+        }
+        log.error('microphone error', { err });
+        cleanup();
+      }
+    };
+    void startStream();
+    return () => {
+      mounted = false;
+      cleanup();
+    };
+  }, [active, debug, onAudioData, updateAudioLevel, cleanup]);
+  useEffect(() => {
+    if (!active) {
+      cleanup();
+    }
+  }, [active, cleanup]);
+  return state;
+};