npm - @tryhamster/gerbil - Versions diffs - 1.0.0-rc.9 → 1.0.1 - Mend

@tryhamster/gerbil 1.0.0-rc.9 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (179) hide show

package/LICENSE +1 -1
package/README.md +318 -104
package/dist/architectures-C1I5V3Dt.mjs +6070 -0
package/dist/architectures-C1I5V3Dt.mjs.map +1 -0
package/dist/browser/index.d.ts +276 -590
package/dist/browser/index.d.ts.map +1 -1
package/dist/browser/index.js +592 -2334
package/dist/browser/index.js.map +1 -1
package/dist/cli.mjs +625 -1098
package/dist/cli.mjs.map +1 -1
package/dist/defaults-9komdrbY.mjs +24 -0
package/dist/defaults-9komdrbY.mjs.map +1 -0
package/dist/frameworks/express.d.mts +1 -3
package/dist/frameworks/express.d.mts.map +1 -1
package/dist/frameworks/express.mjs +7 -7
package/dist/frameworks/express.mjs.map +1 -1
package/dist/frameworks/fastify.d.mts +1 -1
package/dist/frameworks/fastify.d.mts.map +1 -1
package/dist/frameworks/fastify.mjs +3 -3
package/dist/frameworks/fastify.mjs.map +1 -1
package/dist/frameworks/hono.d.mts +1 -1
package/dist/frameworks/hono.d.mts.map +1 -1
package/dist/frameworks/hono.mjs +4 -4
package/dist/frameworks/hono.mjs.map +1 -1
package/dist/frameworks/next.d.mts +3 -2
package/dist/frameworks/next.d.mts.map +1 -1
package/dist/frameworks/next.mjs +4 -4
package/dist/frameworks/next.mjs.map +1 -1
package/dist/frameworks/react.d.mts +1 -1
package/dist/frameworks/trpc.d.mts +1 -1
package/dist/frameworks/trpc.d.mts.map +1 -1
package/dist/frameworks/trpc.mjs +4 -4
package/dist/frameworks/trpc.mjs.map +1 -1
package/dist/gerbil-BetB5xb0.d.mts +488 -0
package/dist/gerbil-BetB5xb0.d.mts.map +1 -0
package/dist/gerbil-CTZUa8EZ.mjs +4 -0
package/dist/gerbil-DNniplr4.mjs +1656 -0
package/dist/gerbil-DNniplr4.mjs.map +1 -0
package/dist/gpu/hooks.d.mts +640 -0
package/dist/gpu/hooks.d.mts.map +1 -0
package/dist/gpu/hooks.mjs +1369 -0
package/dist/gpu/hooks.mjs.map +1 -0
package/dist/gpu/index.d.mts +2 -0
package/dist/gpu/index.mjs +6 -0
package/dist/gpu-DFuglcEx.mjs +3790 -0
package/dist/gpu-DFuglcEx.mjs.map +1 -0
package/dist/index-Dgmb2kE3.d.mts +245 -0
package/dist/index-Dgmb2kE3.d.mts.map +1 -0
package/dist/index-DukkJRMj.d.mts +2114 -0
package/dist/index-DukkJRMj.d.mts.map +1 -0
package/dist/index.d.mts +22 -487
package/dist/index.d.mts.map +1 -1
package/dist/index.mjs +13 -8
package/dist/index.mjs.map +1 -1
package/dist/indexeddb-store-BWIMtxxH.mjs +103 -0
package/dist/indexeddb-store-BWIMtxxH.mjs.map +1 -0
package/dist/indexeddb-store-ClH12Xnl.mjs +4 -0
package/dist/integrations/ai-sdk.d.mts +75 -6
package/dist/integrations/ai-sdk.d.mts.map +1 -1
package/dist/integrations/ai-sdk.mjs +131 -15
package/dist/integrations/ai-sdk.mjs.map +1 -1
package/dist/integrations/langchain.d.mts +1 -1
package/dist/integrations/langchain.d.mts.map +1 -1
package/dist/integrations/langchain.mjs +5 -5
package/dist/integrations/langchain.mjs.map +1 -1
package/dist/integrations/llamaindex.d.mts +1 -1
package/dist/integrations/llamaindex.d.mts.map +1 -1
package/dist/integrations/llamaindex.mjs +5 -5
package/dist/integrations/llamaindex.mjs.map +1 -1
package/dist/integrations/mcp-client.mjs +3 -3
package/dist/integrations/mcp-client.mjs.map +1 -1
package/dist/integrations/mcp.d.mts +3 -2
package/dist/integrations/mcp.d.mts.map +1 -1
package/dist/integrations/mcp.mjs +5 -5
package/dist/{mcp-BvbriaBy.mjs → mcp-D2vvH1Xc.mjs} +4 -4
package/dist/mcp-D2vvH1Xc.mjs.map +1 -0
package/dist/memory/index.d.mts +3 -0
package/dist/memory/index.mjs +6 -0
package/dist/memory-D1P7Tmda.mjs +4 -0
package/dist/memory-DVN0MnIG.mjs +132 -0
package/dist/memory-DVN0MnIG.mjs.map +1 -0
package/dist/memory-Dj0J1v88.mjs +294 -0
package/dist/memory-Dj0J1v88.mjs.map +1 -0
package/dist/moonshine-stt-17dpP1kr.mjs +4 -0
package/dist/moonshine-stt-4ojLtMq7.mjs +11962 -0
package/dist/moonshine-stt-4ojLtMq7.mjs.map +1 -0
package/dist/{one-liner-s-lD8rCC.mjs → one-liner-JhdIPxzF.mjs} +14 -16
package/dist/one-liner-JhdIPxzF.mjs.map +1 -0
package/dist/repl-BDRkwPGX.mjs +9 -0
package/dist/skills/index.d.mts +270 -320
package/dist/skills/index.d.mts.map +1 -1
package/dist/skills/index.mjs +5 -5
package/dist/{skills-CD3Orlex.mjs → skills-CU694Dc8.mjs} +187 -32
package/dist/skills-CU694Dc8.mjs.map +1 -0
package/dist/{tools-Bi1P7Xoy.mjs → tools-DQ1mPUw5.mjs} +34 -22
package/dist/tools-DQ1mPUw5.mjs.map +1 -0
package/dist/types-DQBe2lFo.d.mts +165 -0
package/dist/types-DQBe2lFo.d.mts.map +1 -0
package/dist/{types-CiTc7ez3.d.mts → types-LlyYILII.d.mts} +112 -14
package/dist/types-LlyYILII.d.mts.map +1 -0
package/dist/{utils-CZBZ8dgR.mjs → utils-DKO55ZmZ.mjs} +1 -1
package/dist/{utils-CZBZ8dgR.mjs.map → utils-DKO55ZmZ.mjs.map} +1 -1
package/dist/vector-B0panuy6.mjs +95 -0
package/dist/vector-B0panuy6.mjs.map +1 -0
package/docs/PROJECT-STATE.md +321 -0
package/docs/adding-a-model-family.md +280 -0
package/docs/ai-sdk.md +70 -61
package/docs/architecture/overview.md +17 -7
package/docs/browser.md +203 -8
package/docs/embeddings.md +156 -0
package/docs/gerbil-site-native-migration.md +217 -0
package/docs/gpu-engine/architectures.md +398 -0
package/docs/gpu-engine/ir.md +372 -0
package/docs/gpu-engine/kernels.md +718 -0
package/docs/gpu-engine/paper.html +1759 -0
package/docs/gpu-engine/paper.md +2109 -0
package/docs/gpu-engine/safetensors.md +312 -0
package/docs/gpu-engine/tokenizer.md +302 -0
package/docs/memory-rag.md +91 -0
package/docs/metal-safari-intel.md +190 -0
package/docs/mobile-failure-diagnosis.md +124 -0
package/docs/mobile.md +99 -0
package/docs/observability.md +230 -0
package/docs/onnx-removal-plan.md +339 -0
package/docs/research/autoresearch-portable.md +904 -0
package/docs/research/dispatch-reduction-hivemind.md +84 -0
package/docs/research/ios-safari-model-caching.md +117 -0
package/docs/research/mobile-webgpu-speed-fusion.md +135 -0
package/docs/research/native-stt-model-selection.md +49 -0
package/docs/research/native-tts-model-selection.md +90 -0
package/docs/research/native-vs-chromium-decision.md +152 -0
package/docs/research/nemotron-mamba2-inference.md +910 -0
package/docs/research/qwen35-multimodal.md +293 -0
package/docs/research/qwen36-gemma4-targets.md +337 -0
package/docs/research/sota-embedding-models.md +179 -0
package/docs/research/sota-mobile-models-2026.md +263 -0
package/docs/research/sota-modality-models.md +202 -0
package/docs/research/tps-baselines.md +71 -0
package/docs/research/webgpu-m4-reference.md +104 -0
package/docs/site-update-plan.md +155 -0
package/docs/structured-output.md +123 -0
package/docs/stt.md +63 -446
package/docs/tts.md +77 -499
package/docs/vision.md +100 -338
package/package.json +22 -7
package/dist/chrome-backend-CORwaIyC.mjs +0 -1212
package/dist/chrome-backend-CORwaIyC.mjs.map +0 -1
package/dist/chrome-backend-DIKYoWj-.mjs +0 -3
package/dist/gerbil-CJ3ifloF.mjs +0 -4
package/dist/gerbil-Dw4Qj77e.mjs +0 -1631
package/dist/gerbil-Dw4Qj77e.mjs.map +0 -1
package/dist/gerbil-qOTe1nl2.d.mts +0 -431
package/dist/gerbil-qOTe1nl2.d.mts.map +0 -1
package/dist/kokoro-BNTb6egA.mjs +0 -20210
package/dist/kokoro-BNTb6egA.mjs.map +0 -1
package/dist/kokoro-CMOGDSgT.js +0 -20212
package/dist/kokoro-CMOGDSgT.js.map +0 -1
package/dist/mcp-BvbriaBy.mjs.map +0 -1
package/dist/one-liner-s-lD8rCC.mjs.map +0 -1
package/dist/repl-DveXw36T.mjs +0 -9
package/dist/skills-CD3Orlex.mjs.map +0 -1
package/dist/stt-Bu-E23Sc.js +0 -433
package/dist/stt-Bu-E23Sc.js.map +0 -1
package/dist/stt-CpLYbGFd.mjs +0 -433
package/dist/stt-CpLYbGFd.mjs.map +0 -1
package/dist/stt-DRPLEEHB.mjs +0 -3
package/dist/tools-Bi1P7Xoy.mjs.map +0 -1
package/dist/transformers.web-DiD1gTwk.js +0 -44695
package/dist/transformers.web-DiD1gTwk.js.map +0 -1
package/dist/transformers.web-u34VxRFM.js +0 -3
package/dist/tts-CqroPaSK.js +0 -724
package/dist/tts-CqroPaSK.js.map +0 -1
package/dist/tts-DXgsKGCe.mjs +0 -3
package/dist/tts-DeGANMNV.mjs +0 -730
package/dist/tts-DeGANMNV.mjs.map +0 -1
package/dist/types-CiTc7ez3.d.mts.map +0 -1
/package/dist/{auto-update-S9s5-g0C.mjs → auto-update-BVaLXcDE.mjs} +0 -0
/package/dist/{chunk-CkXuGtQK.mjs → chunk-B9cbKln6.mjs} +0 -0
/package/dist/{microphone-DaMZFRuR.mjs → microphone-Bqmoz9_K.mjs} +0 -0

package/docs/structured-output.md ADDED Viewed

@@ -0,0 +1,123 @@
+# Structured Output
+Gerbil can make a model return a **JSON object** instead of free-form text. The engine
+generates, extracts the JSON from the output, validates it, and — if it fails — retries with
+a corrective nudge until the result is valid or the retry budget is exhausted.
+This is available at three levels:
+- **Engine** — `WebGPUEngine.generateObject()` (lowest level)
+- **`Gerbil` class** — `g.generateObject()` (Node-facing wrapper)
+- **One-liner** — `generateObject()` (zero-setup singleton)
+- **React** — the `useObject` hook (`@tryhamster/gerbil/gpu/hooks`)
+> `generateObject` is distinct from the Zod-driven `Gerbil.json()`. `json()` parses against a
+> Zod schema; `generateObject()` validates with a predicate or a minimal
+> `{ required: [...] }` schema and is the same code path the `useObject` hook uses.
+## Validation
+The `schema` option is an `ObjectValidator`, either:
+- **A predicate** `(o: unknown) => boolean` — full control; return `true` when the parsed
+  value is acceptable.
+- **A minimal schema object** `{ required: string[]; properties?: Record<string, unknown> }`
+  — only the `required` key set is enforced (every key must exist on the parsed object).
+- **Omitted** — any syntactically valid JSON value (object or array) is accepted.
+`maxRetries` (default `4`) is the number of retries **after** the first attempt, so up to
+`maxRetries + 1` generations run. The result includes `attempts` (1 = first try).
+## Engine
+```typescript
+import { WebGPUEngine } from "@tryhamster/gerbil/gpu";
+const engine = await WebGPUEngine.create({ repo: "mlx-community/Qwen3.5-0.8B-4bit" });
+const { object, text, attempts } = await engine.generateObject(
+  'Extract {name, age} from: "I am Sarah, 28"',
+  { schema: { required: ["name", "age"] }, maxRetries: 4 },
+);
+// object === { name: "Sarah", age: 28 }
+```
+`generateObject` extends the engine's `GenerateOptions`, so `maxTokens`, `systemPrompt`, and
+`sampling` (e.g. `{ temperature: 0.2 }`) are all available.
+## `Gerbil` class
+```typescript
+import { Gerbil } from "@tryhamster/gerbil";
+const g = new Gerbil();
+await g.loadModel("qwen3.5-0.8b");
+const { object } = await g.generateObject<{ primes: number[] }>(
+  "List the first 3 primes as {primes: number[]}",
+  { schema: (o) => Array.isArray((o as any).primes) },
+);
+```
+## One-liner
+```typescript
+import { generateObject } from "@tryhamster/gerbil";
+const { object } = await generateObject(
+  "Return {ok: true}",
+  { model: "qwen3.5-0.8b", schema: { required: ["ok"] } },
+);
+```
+## React (`useObject`)
+```tsx
+import { useObject } from "@tryhamster/gerbil/gpu/hooks";
+function CityExtractor() {
+  const { generate, object, isGenerating, attempts } = useObject<{ city: string }>({
+    model: "mlx-community/Qwen3.5-0.8B-4bit",
+  });
+  return (
+    <div>
+      <button
+        disabled={isGenerating}
+        onClick={() =>
+          generate("Extract the city: I live in Paris", { schema: { required: ["city"] } })
+        }
+      >
+        Extract
+      </button>
+      {object && <pre>{JSON.stringify(object, null, 2)} ({attempts} attempts)</pre>}
+    </div>
+  );
+}
+```
+## CLI
+```bash
+# Inline, no schema (any valid JSON accepted)
+gerbil object "Return {ok: true} as JSON"
+# With a schema file (a minimal { required: [...] } validator)
+echo '{ "required": ["name", "age"] }' > person.json
+gerbil object "Extract {name, age}: I am Sarah, 28" --schema person.json
+# Flags: --model, --max-tokens, --temperature, --retries, --schema
+```
+The command prints the parsed object as pretty JSON followed by the attempt count.
+## Types
+```typescript
+import type {
+  GenerateObjectOptions,
+  GenerateObjectResult,
+  ObjectSchema,
+  ObjectValidator,
+} from "@tryhamster/gerbil";
+```

package/docs/stt.md CHANGED Viewed

@@ -1,494 +1,111 @@
 # Speech-to-Text (STT)
-Gerbil provides local speech-to-text transcription using Whisper ONNX models via transformers.js. No API keys required - everything runs on-device.
+Gerbil transcribes audio with **Moonshine** — a native, raw-waveform encoder/decoder STT
+model (no FFT, no log-mel) running on the WebGPU engine. It uses a Conv1d front-end over the
+raw waveform, encodes once, then autoregressively decodes with cross-attention into the
+frozen encoder K/V. No ONNX, no API keys.
+> **Pre-1.0.** Moonshine is the only STT path. The old Whisper ONNX/transformers.js lane has
+> been removed. The `Gerbil`-class `transcribe()` method still works but now runs native
+> Moonshine under the hood (see [below](#gerbil-class-transcribe-native-wrapper)).
 ## Quick Start
-### Node.js
+Moonshine has its own engine class, `MoonshineSTT`. Feed it raw **16 kHz mono PCM**
+(`Float32Array`).
 ```typescript
-import { Gerbil } from "@tryhamster/gerbil";
-import { readFileSync } from "fs";
+import { MoonshineSTT } from "@tryhamster/gerbil/gpu";
-const g = new Gerbil();
+const stt = await MoonshineSTT.create({ repo: "UsefulSensors/moonshine-base" });
-// Transcribe a WAV file
-const audioData = new Uint8Array(readFileSync("audio.wav"));
-const result = await g.transcribe(audioData);
+// pcm: Float32Array, 16 kHz mono
+const result = await stt.transcribe(pcm);
 console.log(result.text);
-```
-### CLI
-```bash
-# Transcribe audio file
-gerbil transcribe audio.wav
-# With timestamps
-gerbil transcribe audio.wav --timestamps
-# List available models
-gerbil transcribe --list-models
-# Use a different model
-gerbil transcribe audio.wav --model whisper-base.en
+stt.destroy();
 ```
-## Available Models
-| Model | Size | Languages | Description |
-|-------|------|-----------|-------------|
-| `whisper-tiny.en` | 39M | English | Fastest, good for simple audio |
-| `whisper-tiny` | 39M | Multi | Multilingual tiny model |
-| `whisper-base.en` | 74M | English | Good balance of speed/accuracy |
-| `whisper-base` | 74M | Multi | Multilingual base model |
-| `whisper-small.en` | 244M | English | High quality transcription |
-| `whisper-small` | 244M | Multi | High quality multilingual |
-| `whisper-large-v3-turbo` | 809M | 80+ langs | Best quality, 5.4x faster than v3 |
+## API
-## API Reference
-### Gerbil Class Methods
+### `MoonshineSTT.create(options?)`
 ```typescript
-// Load STT model explicitly (optional - auto-loads on first transcribe)
-await g.loadSTT("whisper-tiny.en", {
-  onProgress: (p) => console.log(p.status),
-});
-// Transcribe audio
-const result = await g.transcribe(audioData, {
-  language: "en",      // Language hint (multilingual models only)
-  timestamps: true,    // Return segment timestamps
-  onProgress: (p) => console.log(p.status),
-});
-// Result structure
-console.log(result.text);       // Full transcription
-console.log(result.duration);   // Audio duration in seconds
-console.log(result.totalTime);  // Processing time in ms
-console.log(result.segments);   // Timestamped segments (if requested)
-// Check if loaded
-console.log(g.isSTTLoaded());
-// List available models
-const models = await g.listSTTModels();
-```
-### Audio Input Formats
-```typescript
-// WAV file (Uint8Array) - automatically decoded
-const wavData = new Uint8Array(fs.readFileSync("audio.wav"));
-await g.transcribe(wavData);
-// Raw audio (Float32Array at 16kHz mono)
-const raw16k = new Float32Array([...]); // 16kHz mono samples
-await g.transcribe(raw16k);
-// WAV at any sample rate - automatically resampled to 16kHz
-const wav44k = new Uint8Array(fs.readFileSync("audio-44khz.wav"));
-await g.transcribe(wav44k); // Resampled internally
-```
-### Transcription with Timestamps
-```typescript
-const result = await g.transcribe(audioData, { timestamps: true });
-for (const segment of result.segments || []) {
-  console.log(`[${segment.start.toFixed(1)}s - ${segment.end.toFixed(1)}s] ${segment.text}`);
+interface MoonshineSTTOptions {
+  /** HF repo (default "UsefulSensors/moonshine-base"). */
+  repo?: string;
+  revision?: string;
+  hfToken?: string;
+  cacheDir?: string;
+  onProgress?: (loaded: number, total: number, message: string) => void;
 }
-// [0.0s - 5.2s] Hello world, this is a test
-// [5.2s - 8.0s] of the transcription system
-```
-### Language Detection (Multilingual Models)
-```typescript
-// Use multilingual model
-await g.loadSTT("whisper-base");
-// Let Whisper detect language
-const result = await g.transcribe(audioData);
-console.log(result.language); // "en", "es", "fr", etc.
-// Or provide a language hint
-const spanishResult = await g.transcribe(audioData, { language: "es" });
-```
-## Direct WhisperSTT Class
-For lower-level control:
-```typescript
-import { WhisperSTT, decodeWav, resampleAudio } from "@tryhamster/gerbil/core/stt";
-const stt = new WhisperSTT("whisper-tiny.en");
-await stt.load({
-  onProgress: (p) => console.log(p.status),
-});
-const result = await stt.transcribe(audioData, {
-  timestamps: true,
-});
-console.log(result.text);
-stt.dispose();
-```
-### Audio Utilities
-```typescript
-import { decodeWav, resampleAudio } from "@tryhamster/gerbil/core/stt";
-// Decode WAV to Float32Array
-const { audio, sampleRate } = decodeWav(wavUint8Array);
-// Resample to 16kHz (Whisper requirement)
-const audio16k = resampleAudio(audio, sampleRate, 16000);
-```
-## Streaming Transcription (Real-time)
-Transcribe audio in chunks as it comes in - perfect for live captioning, call transcription, or real-time subtitles:
-```typescript
-import { Gerbil } from "@tryhamster/gerbil";
-const g = new Gerbil();
-await g.loadSTT("whisper-tiny.en");
-// Create streaming session
-const session = await g.createStreamingTranscription({
-  chunkDuration: 1500,  // Transcribe every 1.5 seconds (default)
-  onChunk: (text, chunkIndex) => {
-    console.log(`Chunk ${chunkIndex}: ${text}`);
-  },
-  onTranscript: (fullText) => {
-    console.log("Full transcript:", fullText);
-  },
-  onError: (err) => console.error(err),
-});
-// Start the session (begins automatic interval-based transcription)
-session.start();
-// Feed audio data as it comes in (Float32Array at 16kHz)
-// This could come from a microphone, WebRTC stream, etc.
-session.feedAudio(audioChunk);
-session.feedAudio(moreAudioChunk);
-// Or manually trigger transcription of buffered audio
-await session.flush();
-// Get current state
-console.log(session.getTranscript());   // Full accumulated text
-console.log(session.getChunkCount());   // Number of chunks processed
-console.log(session.isRunning());       // Whether session is active
-// Stop and get final transcript
-const finalText = await session.stop();
-console.log("Final:", finalText);
-// Reset for reuse
-session.reset();
 ```
-### Session API
+### `transcribe(pcm, options?)`
 ```typescript
-interface StreamingTranscriptionSession {
-  feedAudio(audio: Float32Array): void;  // Add audio to buffer
-  flush(): Promise<string>;              // Transcribe buffer now
-  start(): void;                         // Start auto-transcription
-  stop(): Promise<string>;               // Stop and get final text
-  isRunning(): boolean;                  // Check if running
-  getTranscript(): string;               // Get current full text
-  getChunkCount(): number;               // Number of chunks processed
-  reset(): void;                         // Clear buffer and transcript
+interface TranscribeOptions {
+  /** Stop after this many decoded tokens (default 194). */
+  maxNewTokens?: number;
 }
-interface StreamingTranscriptionOptions {
-  chunkDuration?: number;    // Interval in ms (default: 1500)
-  minChunkSize?: number;     // Min samples before transcribing (default: 8000)
-  onChunk?: (text: string, chunkIndex: number) => void;
-  onTranscript?: (fullText: string) => void;
-  onError?: (error: string) => void;
-  language?: string;         // Language hint for multilingual models
+interface TranscribeResult {
+  text: string;
+  /** Decoded token ids (excluding the start token, including trailing EOS). */
+  tokens: number[];
+  /** Encoder frames produced by the conv frontend. */
+  encoderFrames: number;
+  /** Audio duration in seconds (samples / 16000). */
+  audioSeconds: number;
 }
 ```
-### Streaming with Microphone (Node.js)
+`pcm` must be raw 16 kHz mono `Float32Array` samples. If your audio is a WAV file or at a
+different sample rate, decode and resample it to 16 kHz first.
-```typescript
-import { Gerbil, StreamingMicrophone } from "@tryhamster/gerbil";
-const g = new Gerbil();
-await g.loadSTT("whisper-tiny.en");
-// Create streaming transcription session
-const session = await g.createStreamingTranscription({
-  chunkDuration: 2500,
-  onChunk: (text, idx) => console.log(`[${idx}] ${text}`),
-  onTranscript: (full) => console.log("Transcript:", full),
-});
-session.start();
-// Create streaming microphone that feeds to session
-const mic = new StreamingMicrophone({
-  onAudioChunk: (audio) => session.feedAudio(audio),
-  chunkDuration: 500,  // 0.5s audio chunks
-});
-await mic.start();
-console.log("Recording... Press Ctrl+C to stop");
-// Handle graceful shutdown
-process.on("SIGINT", async () => {
-  await mic.stop();
-  const transcript = await session.stop();
-  console.log("\nFinal:", transcript);
-  process.exit(0);
-});
-```
-## React Hook (Browser)
-Use `useVoiceInput` for browser-based voice recording and transcription:
-```tsx
-import { useVoiceInput } from "@tryhamster/gerbil/browser";
-function VoiceInput() {
-  const {
-    startRecording,
-    stopRecording,
-    isRecording,
-    isTranscribing,
-    transcript,
-    error,
-  } = useVoiceInput({
-    model: "whisper-tiny.en",
-    onTranscript: (text) => console.log("User said:", text),
-  });
-  return (
-    <div>
-      <button onClick={isRecording ? stopRecording : startRecording}>
-        {isRecording ? "🔴 Stop" : "🎤 Record"}
-      </button>
-      {isTranscribing && <span>Transcribing...</span>}
-      {transcript && <p>{transcript}</p>}
-    </div>
-  );
-}
-```
-### Hook Options
+### Decoding / resampling helpers
 ```typescript
-const {
-  startRecording,   // () => Promise<void> - start recording
-  stopRecording,    // () => Promise<string> - stop and transcribe
-  cancelRecording,  // () => void - stop without transcribing
-  transcribe,       // (audio: Float32Array) => Promise<string>
-  isRecording,      // boolean - currently recording
-  isTranscribing,   // boolean - transcribing audio
-  isLoading,        // boolean - model loading
-  isReady,          // boolean - model ready
-  transcript,       // string - latest result
-  loadingProgress,  // STTProgress - loading status
-  error,            // string | null
-  load,             // () => void - manually load model
-} = useVoiceInput({
-  model: "whisper-tiny.en",  // STT model ID
-  autoLoad: false,           // loads on first record
-  onReady: () => {},         // called when model ready
-  onTranscript: (text) => {},// called on each transcription
-  onError: (error) => {},    // called on errors
-  onProgress: (p) => {},     // loading progress
-});
-```
-## CLI Commands
-### Transcribe
-```bash
-# Basic transcription
-gerbil transcribe recording.wav
-# With timestamps
-gerbil transcribe recording.wav --timestamps
-# Save to file
-gerbil transcribe recording.wav --output transcript.txt
-# Use specific model
-gerbil transcribe recording.wav --model whisper-base.en
+// Reuse the audio utilities from the core package to get 16 kHz mono PCM.
+import { decodeWav, resampleAudio } from "@tryhamster/gerbil/core/stt";
-# Multilingual with language hint
-gerbil transcribe recording.wav --model whisper-base --language es
+const { audio, sampleRate } = decodeWav(wavUint8Array);
+const pcm16k = sampleRate === 16000 ? audio : resampleAudio(audio, sampleRate, 16000);
-# List models
-gerbil transcribe --list-models
+const { text } = await stt.transcribe(pcm16k);
 ```
-### Voice Chat (STT → LLM → TTS)
-Complete voice conversation loop from audio file:
-```bash
-# Voice chat with audio file
-gerbil voice question.wav
+## Model
-# Customize models and voice
-gerbil voice question.wav --model qwen3-1.7b --voice bf_emma
+### Moonshine
-# With custom system prompt
-gerbil voice question.wav --system "You are a pirate. Speak like one!"
+- **Architecture:** `MoonshineForConditionalGeneration` (encoder-decoder)
+- **Front-end:** raw-waveform Conv1d (no FFT / log-mel)
+- **Default repo:** `UsefulSensors/moonshine-base`
+- **Sample rate:** 16 kHz mono
+- **Language:** English (the native engine is English-only; multilingual Whisper has been removed).
-# Enable thinking mode
-gerbil voice question.wav --thinking
-```
-This command:
-1. Transcribes your audio (Whisper)
-2. Generates a response (LLM)
-3. Speaks the response (Kokoro TTS)
+---
-### Microphone Recording
+## `Gerbil`-class `transcribe()` (native wrapper)
-Record directly from your microphone:
+> The Whisper ONNX/transformers.js lane (and its multilingual support) has been removed. The
+> `Gerbil`-class `transcribe()` method now runs native Moonshine: it requires WebGPU,
+> accepts 16 kHz mono `Float32Array` PCM (not WAV bytes), is English-only, and does not
+> produce timestamps. The browser `useVoiceInput` hook is gone — use `useSTT` from
+> `@tryhamster/gerbil/gpu/hooks`. The AI SDK `gerbil.transcription()` provider also routes
+> through this native path.
 ```typescript
 import { Gerbil } from "@tryhamster/gerbil";
 const g = new Gerbil();
-// Record for 5 seconds and transcribe
-const result = await g.listen(5000);
+const pcm16kMono = /* Float32Array @ 16 kHz */;
+const result = await g.transcribe(pcm16kMono);
 console.log(result.text);
-// Check if microphone is available
-const available = await g.isMicrophoneAvailable();
-```
-**Requirements:** SoX must be installed for microphone recording:
-- macOS: `brew install sox`
-- Ubuntu: `sudo apt install sox`
-- Windows: Download from https://sox.sourceforge.net/
-### REPL Voice Input
-In the Gerbil REPL chat:
-- Press `Ctrl+R` to record from microphone (5 seconds)
-- The transcribed text appears in the input field
-- Combined with voice mode (`Ctrl+V`), enables full voice conversations
-```bash
-gerbil chat
-# In chat: Press ^R to record, ^V to enable voice responses
-```
-## Performance
-Transcription speed on Apple M1:
-| Model | Audio Length | Time | Speed |
-|-------|--------------|------|-------|
-| whisper-tiny.en | 11s | ~250ms | 44x realtime |
-| whisper-base.en | 11s | ~500ms | 22x realtime |
-| whisper-small.en | 11s | ~1.5s | 7x realtime |
-## Troubleshooting
-### "Unsupported bit depth"
-Only 16-bit WAV files are supported. Convert with:
-```bash
-ffmpeg -i audio.mp3 -ar 16000 -ac 1 -sample_fmt s16 output.wav
-```
-### Slow first transcription
-The first call downloads the model (~40-250MB). Subsequent calls use the cached model.
-### Memory usage
-Whisper models require:
-- tiny: ~150MB RAM
-- base: ~300MB RAM
-- small: ~600MB RAM
-- large-v3-turbo: ~1.5GB RAM
-## useVoiceChat Hook (Full Voice Conversation)
-For complete voice-to-voice conversations (STT → LLM → TTS):
-```tsx
-import { useVoiceChat } from "@tryhamster/gerbil/browser";
-function VoiceAssistant() {
-  const {
-    messages,
-    startListening,
-    stopListening,
-    isListening,
-    isSpeaking,
-    stage,  // "idle" | "listening" | "transcribing" | "thinking" | "speaking"
-    isReady,
-  } = useVoiceChat({
-    llmModel: "qwen3-0.6b",
-    sttModel: "whisper-tiny.en",
-    voice: "af_bella",
-    system: "You are a helpful voice assistant.",
-  });
-  return (
-    <div>
-      {messages.map(m => <p key={m.id}>{m.role}: {m.content}</p>)}
-      <button
-        onMouseDown={startListening}
-        onMouseUp={stopListening}
-      >
-        {stage === "idle" ? "🎤 Hold to Speak" : stage}
-      </button>
-    </div>
-  );
-}
-```
-## Adding Custom Models
-Gerbil supports any ONNX model with the `transformers.js` tag on HuggingFace:
-```typescript
-// Use any onnx-community model
-await g.loadSTT("whisper-large-v3-turbo");
-// Browse compatible models at:
-// https://huggingface.co/models?library=transformers.js&pipeline_tag=automatic-speech-recognition
-```
-To add a new model to the registry, edit `src/core/stt.ts`:
-```typescript
-{
-  id: "my-custom-model",
-  repo: "onnx-community/my-model",
-  description: "My custom model",
-  size: "100M",
-  multilingual: true,
-  languages: ["en", "es"],
-  sampleRate: 16000,
-}
 ```
 ## See Also
-- [Text-to-Speech (TTS)](./tts.md) - Speech synthesis
-- [Browser Hooks](./browser.md) - useChat, useVoiceInput, useVoiceChat
+- [Text-to-Speech (TTS)](./tts.md) — native Kani-TTS-2
+- [Browser Hooks](./browser.md) — React hooks