npm - parakeet.js - Versions diffs - 0.0.1 - Mend

parakeet.js 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/LICENSE +21 -0
package/README.md +210 -0
package/examples/react-demo/index.html +12 -0
package/examples/react-demo/package.json +20 -0
package/examples/react-demo/src/App.css +134 -0
package/examples/react-demo/src/App.jsx +327 -0
package/examples/react-demo/src/main.jsx +6 -0
package/examples/react-demo/vite.config.js +41 -0
package/package.json +30 -0
package/src/backend.js +99 -0
package/src/hub.js +242 -0
package/src/index.js +29 -0
package/src/parakeet.js +481 -0
package/src/preprocessor.js +69 -0
package/src/tokenizer.js +54 -0

package/examples/react-demo/src/App.jsx ADDED Viewed

@@ -0,0 +1,327 @@
+import React, { useState, useRef, useEffect } from 'react';
+import { ParakeetModel, getParakeetModel } from 'parakeet.js';
+import './App.css';
+export default function App() {
+  const repoId = 'ysdede/parakeet-tdt-0.6b-v2-onnx';
+  const [backend, setBackend] = useState('webgpu-hybrid');
+  const [quant, setQuant] = useState('fp32');
+  const [preprocessor, setPreprocessor] = useState('nemo128');
+  const [status, setStatus] = useState('Idle');
+  const [progress, setProgress] = useState('');
+  const [progressText, setProgressText] = useState('');
+  const [progressPct, setProgressPct] = useState(null);
+  const [text, setText] = useState('');
+  const [latestMetrics, setLatestMetrics] = useState(null);
+  const [transcriptions, setTranscriptions] = useState([]);
+  const [isTranscribing, setIsTranscribing] = useState(false);
+  const [verboseLog, setVerboseLog] = useState(false);
+  const [decoderInt8, setDecoderInt8] = useState(true);
+  const [frameStride, setFrameStride] = useState(1);
+  const [dumpDetail, setDumpDetail] = useState(false);
+  const maxCores = navigator.hardwareConcurrency || 8;
+  const [cpuThreads, setCpuThreads] = useState(Math.max(1, maxCores - 2));
+  const modelRef = useRef(null);
+  const fileInputRef = useRef(null);
+  // Auto-adjust quant preset when backend changes
+  useEffect(() => {
+    if (backend.startsWith('webgpu')) {
+      setQuant('fp32');
+    } else if (backend === 'wasm') {
+      setQuant('int8');
+    }
+  }, [backend]);
+  async function loadModel() {
+    setStatus('Loading model…');
+    setProgress('');
+    setProgressText('');
+    setProgressPct(0);
+    console.time('LoadModel');
+    try {
+      const progressCallback = ({ loaded, total, file }) => {
+        const pct = total > 0 ? Math.round((loaded / total) * 100) : 0;
+        setProgressText(`${file}: ${pct}%`);
+        setProgressPct(pct);
+      };
+      // 1. Download all model files from HuggingFace Hub
+      const modelUrls = await getParakeetModel(repoId, {
+        quantization: quant,
+        preprocessor,
+        backend, // Pass backend to enable automatic fp32 selection for WebGPU
+        decoderInt8,
+        progress: progressCallback
+      });
+      // Show compiling sessions stage
+      setStatus('Creating sessions…');
+      setProgressText('Compiling model (this may take ~10 s)…');
+      setProgressPct(null);
+      // 2. Create the model instance with all file URLs
+      modelRef.current = await ParakeetModel.fromUrls({
+        ...modelUrls.urls,
+        filenames: modelUrls.filenames,
+        backend,
+        verbose: verboseLog,
+        decoderOnWasm: decoderInt8, // if we selected int8 decoder, keep it on WASM
+        decoderInt8,
+        cpuThreads,
+      });
+      // 3. Warm-up and verify
+      setStatus('Warming up & verifying…');
+      setProgressText('Running a test transcription…');
+      const expectedText = 'it is not life as we know or understand it';
+      try {
+        const audioRes = await fetch('/assets/life_Jim.wav');
+        const buf = await audioRes.arrayBuffer();
+        const audioCtx = new AudioContext({ sampleRate: 16000 });
+        const decoded = await audioCtx.decodeAudioData(buf);
+        const pcm = decoded.getChannelData(0);
+        const { utterance_text } = await modelRef.current.transcribe(pcm, 16000);
+        // Normalize both texts: lowercase and remove punctuation
+        const normalize = (str) => str.toLowerCase().replace(/[.,\/#!$%\^&\*;:{}=\-_`~()]/g,"");
+        if (normalize(utterance_text).includes(normalize(expectedText))) {
+          console.log('[App] Model verification successful.');
+          setStatus('Model ready ✔');
+        } else {
+          console.error(`[App] Model verification failed! Expected: "${expectedText}", Got: "${utterance_text}"`);
+          setStatus('Model verification failed!');
+        }
+      } catch (err) {
+        console.error('[App] Warm-up transcription failed', err);
+        setStatus('Warm-up failed!');
+      }
+      console.timeEnd('LoadModel');
+      // setStatus('Model ready ✔'); // Status is now set by verification
+      setProgressText('');
+      setProgressPct(null);
+    } catch (e) {
+      console.error(e);
+      setStatus(`Failed: ${e.message}`);
+      setProgress('');
+    }
+  }
+  async function transcribeFile(e) {
+    if (!modelRef.current) return alert('Load model first');
+    const file = e.target.files?.[0];
+    if (!file) return;
+    setIsTranscribing(true);
+    setStatus(`Transcribing "${file.name}"…`);
+    try {
+      const buf = await file.arrayBuffer();
+      const audioCtx = new AudioContext({ sampleRate: 16000 });
+      const decoded = await audioCtx.decodeAudioData(buf);
+      const pcm = decoded.getChannelData(0);
+      console.time(`Transcribe-${file.name}`);
+      const res = await modelRef.current.transcribe(pcm, 16_000, {
+        returnTimestamps: true,
+        returnConfidences: true , frameStride
+      });
+      console.timeEnd(`Transcribe-${file.name}`);
+      if (dumpDetail) {
+        console.log('[Parakeet] Detailed transcription output', res);
+      }
+      setLatestMetrics(res.metrics);
+      // Add to transcriptions list
+      const newTranscription = {
+        id: Date.now(),
+        filename: file.name,
+        text: res.utterance_text,
+        timestamp: new Date().toLocaleTimeString(),
+        duration: pcm.length / 16000, // duration in seconds
+        wordCount: res.words?.length || 0,
+        confidence: res.confidence_scores?.overall_log_prob || null,
+        metrics: res.metrics
+      };
+      setTranscriptions(prev => [newTranscription, ...prev]);
+      setText(res.utterance_text); // Show latest transcription
+      setStatus('Model ready ✔'); // Ready for next file
+    } catch (error) {
+      console.error('Transcription failed:', error);
+      setStatus('Transcription failed');
+      alert(`Failed to transcribe "${file.name}": ${error.message}`);
+    } finally {
+      setIsTranscribing(false);
+      // Clear the file input so the same file can be selected again
+      if (fileInputRef.current) {
+        fileInputRef.current.value = '';
+      }
+    }
+  }
+  function clearTranscriptions() {
+    setTranscriptions([]);
+    setText('');
+  }
+  return (
+    <div className="app">
+      <h2>Parakeet JS React Demo</h2>
+      <div className="controls">
+        <p>
+          <strong>Model:</strong> {repoId}
+        </p>
+      </div>
+      <div className="controls">
+        <label>
+          Backend:
+          <select value={backend} onChange={e=>setBackend(e.target.value)}>
+            <option value="webgpu-hybrid">WebGPU (Hybrid)</option>
+            <option value="webgpu-strict">WebGPU (Strict)</option>
+            <option value="wasm">WASM (CPU)</option>
+          </select>
+        </label>
+        {' '}
+        <label>
+          Quant:
+          <select value={quant} onChange={e=>setQuant(e.target.value)}>
+            <option value="int8">int8 (faster)</option>
+            <option value="fp32">fp32 (higher quality)</option>
+          </select>
+        </label>
+        {' '}
+        {backend.startsWith('webgpu') && (
+          <label style={{ fontSize:'0.9em' }}>
+            <input type="checkbox" checked={decoderInt8} onChange={e=>setDecoderInt8(e.target.checked)} />
+            Decoder INT8 on CPU
+          </label>
+        )}
+        {' '}
+        <label>
+          Preprocessor:
+          <select value={preprocessor} onChange={e=>setPreprocessor(e.target.value)}>
+            <option value="nemo80">nemo80 (smaller)</option>
+            <option value="nemo128">nemo128 (default)</option>
+          </select>
+        </label>
+        {' '}
+        <label>
+          Stride:
+          <select value={frameStride} onChange={e=>setFrameStride(Number(e.target.value))}>
+            <option value={1}>1</option>
+            <option value={2}>2</option>
+            <option value={4}>4</option>
+          </select>
+        </label>
+        {' '}
+        <label>
+          <input type="checkbox" checked={verboseLog} onChange={e => setVerboseLog(e.target.checked)} />
+          Verbose Log
+        </label>
+        {' '}
+        <label style={{fontSize:'0.9em'}}>
+          <input type="checkbox" checked={dumpDetail} onChange={e=>setDumpDetail(e.target.checked)} />
+          Dump result to console
+        </label>
+        {(backend === 'wasm' || decoderInt8) && (
+          <label style={{fontSize:'0.9em'}}>
+            Threads:
+            <input type="number" min="1" max={maxCores} value={cpuThreads} onChange={e=>setCpuThreads(Number(e.target.value))} style={{width:'4rem'}} />
+          </label>
+        )}
+        <button
+          onClick={loadModel}
+          disabled={!status.toLowerCase().includes('fail') && status !== 'Idle'}
+          className="primary"
+        >
+          {status === 'Model ready ✔' ? 'Model Loaded' : 'Load Model'}
+        </button>
+      </div>
+      {typeof SharedArrayBuffer === 'undefined' && backend === 'wasm' && (
+        <div style={{
+          marginBottom: '1rem',
+          padding: '0.5rem',
+          backgroundColor: '#fff3cd',
+          border: '1px solid #ffeaa7',
+          borderRadius: '4px',
+          fontSize: '0.9em'
+        }}>
+          ⚠️ <strong>Performance Note:</strong> SharedArrayBuffer is not available.
+          WASM will run single-threaded. For better performance, serve over HTTPS
+          with proper headers or use WebGPU.
+        </div>
+      )}
+      <div className="controls">
+        <input
+          ref={fileInputRef}
+          type="file"
+          accept="audio/*"
+          onChange={transcribeFile}
+          disabled={status !== 'Model ready ✔' || isTranscribing}
+        />
+        {transcriptions.length > 0 && (
+          <button
+            onClick={clearTranscriptions}
+            style={{ marginLeft: '1rem', padding: '0.25rem 0.5rem' }}
+          >
+            Clear History
+          </button>
+        )}
+      </div>
+      <p>Status: {status}</p>
+      {progressPct!==null && (
+        <div className="progress-wrapper">
+          <div className="progress-bar"><div style={{ width: `${progressPct}%` }} /></div>
+          <p className="progress-text">{progressText}</p>
+        </div>
+      )}
+      {/* Latest transcription */}
+      <div className="controls">
+        <h3>Latest Transcription:</h3>
+        <textarea
+          value={text}
+          readOnly
+          className="textarea"
+          placeholder="Transcribed text will appear here..."
+        />
+      </div>
+      {/* Latest transcription performace info */}
+      {latestMetrics && (
+        <div className="performance">
+          <strong>RTF:</strong> {latestMetrics.rtf?.toFixed(2)}x &nbsp;|&nbsp; Total: {latestMetrics.total_ms} ms<br/>
+          Preprocess {latestMetrics.preprocess_ms} ms · Encode {latestMetrics.encode_ms} ms · Decode {latestMetrics.decode_ms} ms · Tokenize {latestMetrics.tokenize_ms} ms
+        </div>
+      )}
+      {/* Transcription history */}
+      {transcriptions.length > 0 && (
+        <div className="history">
+          <h3>Transcription History ({transcriptions.length} files):</h3>
+          <div style={{ maxHeight: '400px', overflowY: 'auto', border: '1px solid #ddd', borderRadius: '4px' }}>
+            {transcriptions.map((trans) => (
+              <div className="history-item" key={trans.id}>
+                <div className="history-meta"><strong>{trans.filename}</strong><span>{trans.timestamp}</span></div>
+                <div className="history-stats">Duration: {trans.duration.toFixed(1)}s | Words: {trans.wordCount}{trans.confidence && ` | Confidence: ${trans.confidence.toFixed(2)}`}{trans.metrics && ` | RTF: ${trans.metrics.rtf?.toFixed(2)}x`}</div>
+                <div className="history-text">{trans.text}</div>
+              </div>
+            ))}
+          </div>
+        </div>
+      )}
+    </div>
+  );
+}

package/examples/react-demo/src/main.jsx ADDED Viewed

@@ -0,0 +1,6 @@
+import React from 'react';
+import { createRoot } from 'react-dom/client';
+import App from './App.jsx';
+const root = createRoot(document.getElementById('root'));
+root.render(<App />);

package/examples/react-demo/vite.config.js ADDED Viewed

@@ -0,0 +1,41 @@
+import { defineConfig } from 'vite';
+import react from '@vitejs/plugin-react';
+import fs from 'fs';
+import path from 'path';
+// Optional HTTPS setup - only if certificates exist
+let httpsConfig = false;
+try {
+  const keyPath = path.resolve('./localhost-key.pem');
+  const certPath = path.resolve('./localhost.pem');
+  if (fs.existsSync(keyPath) && fs.existsSync(certPath)) {
+    httpsConfig = {
+      key: fs.readFileSync(keyPath),
+      cert: fs.readFileSync(certPath),
+    };
+    console.log('✅ HTTPS enabled with local certificates');
+  } else {
+    console.log('ℹ️ No local certificates found, running on HTTP');
+  }
+} catch (err) {
+  console.log('ℹ️ HTTPS setup failed, running on HTTP:', err.message);
+}
+export default defineConfig({
+  plugins: [react()],
+  server: {
+    port: 5173,
+    ...(httpsConfig && { https: httpsConfig }),
+    headers: {
+      'Cross-Origin-Opener-Policy': 'same-origin',
+      'Cross-Origin-Embedder-Policy': 'require-corp',
+    },
+  },
+  optimizeDeps: {
+    include: ['onnxruntime-web'],
+  },
+  define: {
+    global: 'globalThis',
+  },
+});

package/package.json ADDED Viewed

@@ -0,0 +1,30 @@
+{
+  "name": "parakeet.js",
+  "version": "0.0.1",
+  "description": "NVIDIA Parakeet speech recognition for the browser (WebGPU/WASM) powered by ONNX Runtime Web.",
+  "type": "module",
+  "exports": {
+    ".": "./src/index.js"
+  },
+  "keywords": [
+    "parakeet",
+    "speech",
+    "onnx",
+    "webgpu",
+    "wasm",
+    "transcription"
+  ],
+  "dependencies": {
+    "onnxruntime-web": "1.22.0-dev.20250409-89f8206ba4"
+  },
+  "author": "Yunus Seyhan Dede",
+  "license": "MIT",
+  "homepage": "https://github.com/ysdede/parakeet.js",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/ysdede/parakeet.js.git"
+  },
+  "bugs": {
+    "url": "https://github.com/ysdede/parakeet.js/issues"
+  }
+}

package/src/backend.js ADDED Viewed

@@ -0,0 +1,99 @@
+// Back-end initialisation helper for ONNX Runtime Web.
+// At runtime the caller can specify preferred backend ("webgpu", "wasm").
+// The function resolves once ONNX Runtime is ready and returns the `ort` module.
+/**
+ * Initialise ONNX Runtime Web and pick the execution provider.
+ * If WebGPU is requested but not supported, we transparently fall back to WASM.
+ * @param {Object} opts
+ * @param {('webgpu'|'wasm')} [opts.backend='webgpu'] Desired backend.
+ * @param {string} [opts.wasmPaths] Optional path prefix for WASM binaries.
+ * @returns {Promise<typeof import('onnxruntime-web').default>}
+ */
+export async function initOrt({ backend = 'webgpu', wasmPaths, numThreads } = {}) {
+  // Dynamic import to handle Vite bundling issues
+  let ort;
+  try {
+    const ortModule = await import('onnxruntime-web');
+    ort = ortModule.default || ortModule;
+    // Debug: Check the structure of ort
+    console.log('[Parakeet.js] ORT structure:', {
+      hasDefault: !!ortModule.default,
+      hasEnv: !!ort.env,
+      hasWasm: !!ort.env?.wasm,
+      hasWebgpu: !!ort.env?.webgpu,
+      keys: Object.keys(ort).slice(0, 10) // Show first 10 keys
+    });
+    // If still no env, try accessing it differently
+    if (!ort.env) {
+      console.log('[Parakeet.js] Trying alternative access patterns...');
+      console.log('[Parakeet.js] ortModule keys:', Object.keys(ortModule));
+      // Sometimes the module structure is nested
+      if (ortModule.ort) {
+        ort = ortModule.ort;
+        console.log('[Parakeet.js] Found ort in ortModule.ort');
+      }
+    }
+  } catch (e) {
+    console.error('[Parakeet.js] Failed to import onnxruntime-web:', e);
+    throw new Error('Failed to load ONNX Runtime Web. Please check your network connection.');
+  }
+  if (!ort || !ort.env) {
+    throw new Error('ONNX Runtime Web loaded but env is not available. This might be a bundling issue.');
+  }
+  // Set up WASM paths first (needed for all backends)
+  if (!ort.env.wasm.wasmPaths) {
+    // Use the same version as in package.json
+    const ver = '1.22.0-dev.20250409-89f8206ba4';
+    ort.env.wasm.wasmPaths = `https://cdn.jsdelivr.net/npm/onnxruntime-web@${ver}/dist/`;
+  }
+  // Configure WASM for better performance
+  if (backend === 'wasm' || backend === 'webgpu') {
+    // Enable multi-threading if supported
+    if (typeof SharedArrayBuffer !== 'undefined') {
+      ort.env.wasm.numThreads = numThreads || navigator.hardwareConcurrency || 4;
+      ort.env.wasm.simd = true;
+      console.log(`[Parakeet.js] WASM configured with ${ort.env.wasm.numThreads} threads, SIMD enabled`);
+    } else {
+      console.warn('[Parakeet.js] SharedArrayBuffer not available - using single-threaded WASM');
+      ort.env.wasm.numThreads = 1;
+    }
+    // Enable other WASM optimizations
+    ort.env.wasm.proxy = false; // Direct execution for better performance
+  }
+  if (backend === 'webgpu') {
+    // Check WebGPU support properly
+    const webgpuSupported = 'gpu' in navigator;
+    console.log(`[Parakeet.js] WebGPU supported: ${webgpuSupported}`);
+    if (webgpuSupported) {
+      try {
+        // In newer versions of ONNX Runtime Web, WebGPU initialization is automatic
+        // No need to call ort.env.webgpu.init() manually
+        console.log('[Parakeet.js] WebGPU will be initialized automatically when creating session');
+      } catch (error) {
+        console.warn('[Parakeet.js] WebGPU initialization failed:', error);
+        console.warn('[Parakeet.js] Falling back to WASM');
+        backend = 'wasm';
+      }
+    } else {
+      console.warn('[Parakeet.js] WebGPU not supported – falling back to WASM');
+      backend = 'wasm';
+    }
+  }
+  // Store the final backend choice for use in model selection
+  ort._selectedBackend = backend;
+  // Return the ort module for use in creating sessions and tensors
+  return ort;
+}