npm - workers-ai-provider - Versions diffs - 3.0.5 → 3.1.1 - Mend

workers-ai-provider 3.0.5 → 3.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/README.md +102 -13
package/dist/index.d.ts +143 -2
package/dist/index.js +606 -235
package/dist/index.js.map +1 -1
package/package.json +3 -3
package/src/index.ts +85 -0
package/src/streaming.ts +90 -24
package/src/utils.ts +84 -2
package/src/workersai-models.ts +27 -0
package/src/workersai-reranking-model.ts +106 -0
package/src/workersai-reranking-settings.ts +1 -0
package/src/workersai-speech-model.ts +141 -0
package/src/workersai-speech-settings.ts +1 -0
package/src/workersai-transcription-model.ts +244 -0
package/src/workersai-transcription-settings.ts +13 -0

package/README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # workers-ai-provider
-[Workers AI](https://developers.cloudflare.com/workers-ai/) provider for the [AI SDK](https://sdk.vercel.ai/). Use Cloudflare's models for chat, tool calling, structured output, embeddings, image generation, and [AI Search](https://developers.cloudflare.com/ai-search/).
+[Workers AI](https://developers.cloudflare.com/workers-ai/) provider for the [AI SDK](https://sdk.vercel.ai/). Run Cloudflare's models for chat, embeddings, image generation, transcription, text-to-speech, reranking, and [AI Search](https://developers.cloudflare.com/ai-search/) — all from a single provider.
 ## Quick Start
@@ -71,13 +71,19 @@ Browse the full catalog at [developers.cloudflare.com/workers-ai/models](https:/
 Some good defaults:
-| Task       | Model                                      | Notes                       |
-| ---------- | ------------------------------------------ | --------------------------- |
-| Chat       | `@cf/meta/llama-4-scout-17b-16e-instruct`  | Fast, strong tool calling   |
-| Chat       | `@cf/meta/llama-3.3-70b-instruct-fp8-fast` | Largest Llama, best quality |
-| Reasoning  | `@cf/qwen/qwq-32b`                         | Emits `reasoning_content`   |
-| Embeddings | `@cf/baai/bge-base-en-v1.5`                | 768-dim, English            |
-| Images     | `@cf/black-forest-labs/flux-1-schnell`     | Fast image generation       |
+| Task           | Model                                      | Notes                            |
+| -------------- | ------------------------------------------ | -------------------------------- |
+| Chat           | `@cf/meta/llama-4-scout-17b-16e-instruct`  | Fast, strong tool calling        |
+| Chat           | `@cf/meta/llama-3.3-70b-instruct-fp8-fast` | Largest Llama, best quality      |
+| Chat           | `@cf/openai/gpt-oss-120b`                  | OpenAI open-weights, high reason |
+| Reasoning      | `@cf/qwen/qwq-32b`                         | Emits `reasoning_content`        |
+| Embeddings     | `@cf/baai/bge-base-en-v1.5`                | 768-dim, English                 |
+| Embeddings     | `@cf/google/embeddinggemma-300m`           | 100+ languages, by Google        |
+| Images         | `@cf/black-forest-labs/flux-1-schnell`     | Fast image generation            |
+| Transcription  | `@cf/openai/whisper-large-v3-turbo`        | Best accuracy, multilingual      |
+| Transcription  | `@cf/deepgram/nova-3`                      | Fast, high accuracy              |
+| Text-to-Speech | `@cf/deepgram/aura-2-en`                   | Context-aware, natural pacing    |
+| Reranking      | `@cf/baai/bge-reranker-base`               | Fast document reranking          |
 ## Text Generation
@@ -169,6 +175,80 @@ const { images } = await generateImage({
 // images[0].uint8Array contains the PNG bytes
 ```
+## Transcription (Speech-to-Text)
+Transcribe audio using Whisper or Deepgram Nova-3 models.
+```ts
+import { transcribe } from "ai";
+import { readFile } from "node:fs/promises";
+const { text, segments } = await transcribe({
+	model: workersai.transcription("@cf/openai/whisper-large-v3-turbo"),
+	audio: await readFile("./audio.mp3"),
+	mediaType: "audio/mpeg",
+});
+```
+With language hints (Whisper only):
+```ts
+const { text } = await transcribe({
+	model: workersai.transcription("@cf/openai/whisper-large-v3-turbo", {
+		language: "fr",
+	}),
+	audio: audioBuffer,
+	mediaType: "audio/wav",
+});
+```
+Deepgram Nova-3 is also supported and detects language automatically:
+```ts
+const { text } = await transcribe({
+	model: workersai.transcription("@cf/deepgram/nova-3"),
+	audio: audioBuffer,
+	mediaType: "audio/wav",
+});
+```
+## Text-to-Speech
+Generate spoken audio from text using Deepgram Aura-2.
+```ts
+import { speech } from "ai";
+const { audio } = await speech({
+	model: workersai.speech("@cf/deepgram/aura-2-en"),
+	text: "Hello from Cloudflare Workers AI!",
+	voice: "asteria",
+});
+// audio is a Uint8Array of MP3 bytes
+```
+## Reranking
+Reorder documents by relevance to a query — useful for RAG pipelines.
+```ts
+import { rerank } from "ai";
+const { results } = await rerank({
+	model: workersai.reranking("@cf/baai/bge-reranker-base"),
+	query: "What is Cloudflare Workers?",
+	documents: [
+		"Cloudflare Workers lets you run JavaScript at the edge.",
+		"A cookie is a small piece of data stored in the browser.",
+		"Workers AI runs inference on Cloudflare's global network.",
+	],
+	topN: 2,
+});
+// results is sorted by relevance score
+```
 ## AI Search
 [AI Search](https://developers.cloudflare.com/ai-search/) is Cloudflare's managed RAG service. Connect your data and query it with natural language.
@@ -192,7 +272,7 @@ const { text } = await generateText({
 });
 ```
-Streaming works the same way -- use `streamText` instead of `generateText`.
+Streaming works the same way — use `streamText` instead of `generateText`.
 > `createAutoRAG` still works but is deprecated. Use `createAISearch` instead.
@@ -207,18 +287,27 @@ Streaming works the same way -- use `streamText` instead of `generateText`.
 | `apiKey`    | `string`         | Cloudflare API token. Required with `accountId`.                             |
 | `gateway`   | `GatewayOptions` | Optional [AI Gateway](https://developers.cloudflare.com/ai-gateway/) config. |
-Returns a provider with model factories for each AI SDK function:
+Returns a provider with model factories:
 ```ts
-// For generateText / streamText:
+// Chat — for generateText / streamText
 workersai(modelId);
 workersai.chat(modelId);
-// For embedMany / embed:
+// Embeddings — for embedMany / embed
 workersai.textEmbedding(modelId);
-// For generateImage:
+// Images — for generateImage
 workersai.image(modelId);
+// Transcription — for transcribe
+workersai.transcription(modelId, settings?);
+// Text-to-Speech — for speech
+workersai.speech(modelId);
+// Reranking — for rerank
+workersai.reranking(modelId);
 ```
 ### `createAISearch(options)`

package/dist/index.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { LanguageModelV3, EmbeddingModelV3, EmbeddingModelV3CallOptions, EmbeddingModelV3Result, ImageModelV3 } from '@ai-sdk/provider';
+import { LanguageModelV3, EmbeddingModelV3, EmbeddingModelV3CallOptions, EmbeddingModelV3Result, ImageModelV3, TranscriptionModelV3, SpeechModelV3, RerankingModelV3 } from '@ai-sdk/provider';
 type AISearchChatSettings = {
     /**
@@ -21,6 +21,24 @@ type ImageGenerationModels = value2key<AiModels, BaseAiTextToImage>;
  * The names of the BaseAiTextToEmbeddings models.
  */
 type EmbeddingModels = value2key<AiModels, BaseAiTextEmbeddings>;
+/**
+ * Workers AI models that support speech-to-text transcription.
+ *
+ * Includes Whisper variants from `@cloudflare/workers-types` plus
+ * Deepgram partner models that may not be in the typed interface yet.
+ */
+type TranscriptionModels = value2key<AiModels, BaseAiAutomaticSpeechRecognition> | "@cf/deepgram/nova-3";
+/**
+ * Workers AI models that support text-to-speech.
+ *
+ * Includes models from `@cloudflare/workers-types` plus Deepgram partner
+ * models that may not be in the typed interface yet.
+ */
+type SpeechModels = value2key<AiModels, BaseAiTextToSpeech> | "@cf/deepgram/aura-1" | "@cf/deepgram/aura-2-en" | "@cf/deepgram/aura-2-es";
+/**
+ * Workers AI models that support reranking.
+ */
+type RerankingModels = "@cf/baai/bge-reranker-base" | "@cf/baai/bge-reranker-v2-m3";
 type value2key<T, V> = {
     [K in keyof T]: T[K] extends V ? K : never;
 }[keyof T];
@@ -141,6 +159,114 @@ declare class WorkersAIImageModel implements ImageModelV3 {
     doGenerate({ prompt, n, size, aspectRatio, seed, }: Parameters<ImageModelV3["doGenerate"]>[0]): Promise<Awaited<ReturnType<ImageModelV3["doGenerate"]>>>;
 }
+type WorkersAITranscriptionSettings = {
+    /**
+     * Language of the audio, as an ISO-639-1 code (e.g. "en", "fr").
+     * Only supported by Whisper models. Nova-3 detects language automatically.
+     */
+    language?: string;
+    /**
+     * Initial prompt / context to guide the transcription.
+     * Mapped to `initial_prompt` for Whisper models.
+     */
+    prompt?: string;
+};
+/**
+ * Parameters for configuring the Cloudflare-based AI runner.
+ */
+interface CreateRunConfig {
+    /** Your Cloudflare account identifier. */
+    accountId: string;
+    /** Cloudflare API token/key with appropriate permissions. */
+    apiKey: string;
+}
+type WorkersAITranscriptionConfig = {
+    provider: string;
+    binding: Ai;
+    gateway?: GatewayOptions;
+    /**
+     * Whether the binding is a real `env.AI` binding (true) or a REST shim (false).
+     * Nova-3 uses different upload paths depending on this.
+     */
+    isBinding: boolean;
+    /**
+     * REST credentials, only set when `isBinding` is false.
+     * Needed for Nova-3 which requires binary upload, bypassing the JSON-based REST shim.
+     */
+    credentials?: CreateRunConfig;
+};
+/**
+ * Workers AI transcription model implementing the AI SDK's `TranscriptionModelV3` interface.
+ *
+ * Supports:
+ * - Whisper models (`@cf/openai/whisper`, `whisper-tiny-en`, `whisper-large-v3-turbo`)
+ * - Deepgram Nova-3 (`@cf/deepgram/nova-3`) — uses a different input/output format
+ */
+declare class WorkersAITranscriptionModel implements TranscriptionModelV3 {
+    readonly modelId: TranscriptionModels;
+    readonly settings: WorkersAITranscriptionSettings;
+    readonly config: WorkersAITranscriptionConfig;
+    readonly specificationVersion = "v3";
+    get provider(): string;
+    constructor(modelId: TranscriptionModels, settings: WorkersAITranscriptionSettings, config: WorkersAITranscriptionConfig);
+    doGenerate(options: Parameters<TranscriptionModelV3["doGenerate"]>[0]): Promise<Awaited<ReturnType<TranscriptionModelV3["doGenerate"]>>>;
+    private runWhisper;
+    private normalizeWhisperResponse;
+    private runNova3;
+    private normalizeNova3Response;
+}
+type WorkersAISpeechSettings = {};
+type WorkersAISpeechConfig = {
+    provider: string;
+    binding: Ai;
+    gateway?: GatewayOptions;
+};
+/**
+ * Workers AI speech (text-to-speech) model implementing the AI SDK's `SpeechModelV3` interface.
+ *
+ * Currently supports Deepgram Aura-1 (`@cf/deepgram/aura-1`).
+ * The model accepts `{ text, voice?, speed? }` and returns raw audio bytes.
+ */
+declare class WorkersAISpeechModel implements SpeechModelV3 {
+    readonly modelId: SpeechModels;
+    readonly settings: WorkersAISpeechSettings;
+    readonly config: WorkersAISpeechConfig;
+    readonly specificationVersion = "v3";
+    get provider(): string;
+    constructor(modelId: SpeechModels, settings: WorkersAISpeechSettings, config: WorkersAISpeechConfig);
+    doGenerate(options: Parameters<SpeechModelV3["doGenerate"]>[0]): Promise<Awaited<ReturnType<SpeechModelV3["doGenerate"]>>>;
+}
+type WorkersAIRerankingSettings = {};
+type WorkersAIRerankingConfig = {
+    provider: string;
+    binding: Ai;
+    gateway?: GatewayOptions;
+};
+/**
+ * Workers AI reranking model implementing the AI SDK's `RerankingModelV3` interface.
+ *
+ * Supports BGE reranker models (`@cf/baai/bge-reranker-base`, `bge-reranker-v2-m3`).
+ *
+ * Workers AI reranking API:
+ * - Input: `{ query, contexts: [{ text }], top_k? }`
+ * - Output: `{ response: [{ id, score }] }`
+ */
+declare class WorkersAIRerankingModel implements RerankingModelV3 {
+    readonly modelId: RerankingModels;
+    readonly settings: WorkersAIRerankingSettings;
+    readonly config: WorkersAIRerankingConfig;
+    readonly specificationVersion = "v3";
+    get provider(): string;
+    constructor(modelId: RerankingModels, settings: WorkersAIRerankingSettings, config: WorkersAIRerankingConfig);
+    doRerank(options: Parameters<RerankingModelV3["doRerank"]>[0]): Promise<Awaited<ReturnType<RerankingModelV3["doRerank"]>>>;
+}
 /**
  * @deprecated Use `AISearchChatLanguageModel` instead. AutoRAG has been renamed to AI Search.
  * @see https://developers.cloudflare.com/ai-search/
@@ -194,6 +320,21 @@ interface WorkersAI {
      **/
     image(modelId: ImageGenerationModels, settings?: WorkersAIImageSettings): WorkersAIImageModel;
     imageModel(modelId: ImageGenerationModels, settings?: WorkersAIImageSettings): WorkersAIImageModel;
+    /**
+     * Creates a model for speech-to-text transcription.
+     **/
+    transcription(modelId: TranscriptionModels, settings?: WorkersAITranscriptionSettings): WorkersAITranscriptionModel;
+    transcriptionModel(modelId: TranscriptionModels, settings?: WorkersAITranscriptionSettings): WorkersAITranscriptionModel;
+    /**
+     * Creates a model for text-to-speech synthesis.
+     **/
+    speech(modelId: SpeechModels, settings?: WorkersAISpeechSettings): WorkersAISpeechModel;
+    speechModel(modelId: SpeechModels, settings?: WorkersAISpeechSettings): WorkersAISpeechModel;
+    /**
+     * Creates a model for document reranking.
+     **/
+    reranking(modelId: RerankingModels, settings?: WorkersAIRerankingSettings): WorkersAIRerankingModel;
+    rerankingModel(modelId: RerankingModels, settings?: WorkersAIRerankingSettings): WorkersAIRerankingModel;
 }
 /**
  * Create a Workers AI provider instance.
@@ -234,4 +375,4 @@ type AutoRAGProvider = AISearchProvider;
  */
 declare function createAutoRAG(options: AISearchSettings): AISearchProvider;
-export { AISearchChatLanguageModel, type AISearchChatSettings, type AISearchProvider, type AISearchSettings, AutoRAGChatLanguageModel, type AutoRAGChatSettings, type AutoRAGProvider, type AutoRAGSettings, type WorkersAI, type WorkersAISettings, createAISearch, createAutoRAG, createWorkersAI };
+export { AISearchChatLanguageModel, type AISearchChatSettings, type AISearchProvider, type AISearchSettings, AutoRAGChatLanguageModel, type AutoRAGChatSettings, type AutoRAGProvider, type AutoRAGSettings, type WorkersAI, WorkersAIRerankingModel, type WorkersAIRerankingSettings, type WorkersAISettings, WorkersAISpeechModel, type WorkersAISpeechSettings, WorkersAITranscriptionModel, type WorkersAITranscriptionSettings, createAISearch, createAutoRAG, createWorkersAI };