npm - @huggingface/inference - Versions diffs - 2.6.7 → 2.7.1 - Mend

@huggingface/inference 2.6.7 → 2.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (129) hide show

package/README.md +126 -27
package/dist/index.cjs +92 -24
package/dist/index.js +90 -24
package/dist/src/HfInference.d.ts +28 -0
package/dist/src/HfInference.d.ts.map +1 -0
package/dist/src/index.d.ts +5 -0
package/dist/src/index.d.ts.map +1 -0
package/dist/src/lib/InferenceOutputError.d.ts +4 -0
package/dist/src/lib/InferenceOutputError.d.ts.map +1 -0
package/dist/src/lib/getDefaultTask.d.ts +12 -0
package/dist/src/lib/getDefaultTask.d.ts.map +1 -0
package/dist/src/lib/isUrl.d.ts +2 -0
package/dist/src/lib/isUrl.d.ts.map +1 -0
package/dist/src/lib/makeRequestOptions.d.ts +18 -0
package/dist/src/lib/makeRequestOptions.d.ts.map +1 -0
package/dist/src/tasks/audio/audioClassification.d.ts +24 -0
package/dist/src/tasks/audio/audioClassification.d.ts.map +1 -0
package/dist/src/tasks/audio/audioToAudio.d.ts +28 -0
package/dist/src/tasks/audio/audioToAudio.d.ts.map +1 -0
package/dist/src/tasks/audio/automaticSpeechRecognition.d.ts +19 -0
package/dist/src/tasks/audio/automaticSpeechRecognition.d.ts.map +1 -0
package/dist/src/tasks/audio/textToSpeech.d.ts +14 -0
package/dist/src/tasks/audio/textToSpeech.d.ts.map +1 -0
package/dist/src/tasks/custom/request.d.ts +13 -0
package/dist/src/tasks/custom/request.d.ts.map +1 -0
package/dist/src/tasks/custom/streamingRequest.d.ts +13 -0
package/dist/src/tasks/custom/streamingRequest.d.ts.map +1 -0
package/dist/src/tasks/cv/imageClassification.d.ts +24 -0
package/dist/src/tasks/cv/imageClassification.d.ts.map +1 -0
package/dist/src/tasks/cv/imageSegmentation.d.ts +28 -0
package/dist/src/tasks/cv/imageSegmentation.d.ts.map +1 -0
package/dist/src/tasks/cv/imageToImage.d.ts +55 -0
package/dist/src/tasks/cv/imageToImage.d.ts.map +1 -0
package/dist/src/tasks/cv/imageToText.d.ts +18 -0
package/dist/src/tasks/cv/imageToText.d.ts.map +1 -0
package/dist/src/tasks/cv/objectDetection.d.ts +33 -0
package/dist/src/tasks/cv/objectDetection.d.ts.map +1 -0
package/dist/src/tasks/cv/textToImage.d.ts +36 -0
package/dist/src/tasks/cv/textToImage.d.ts.map +1 -0
package/dist/src/tasks/cv/zeroShotImageClassification.d.ts +26 -0
package/dist/src/tasks/cv/zeroShotImageClassification.d.ts.map +1 -0
package/dist/src/tasks/index.d.ts +32 -0
package/dist/src/tasks/index.d.ts.map +1 -0
package/dist/src/tasks/multimodal/documentQuestionAnswering.d.ts +35 -0
package/dist/src/tasks/multimodal/documentQuestionAnswering.d.ts.map +1 -0
package/dist/src/tasks/multimodal/visualQuestionAnswering.d.ts +27 -0
package/dist/src/tasks/multimodal/visualQuestionAnswering.d.ts.map +1 -0
package/dist/src/tasks/nlp/chatCompletion.d.ts +7 -0
package/dist/src/tasks/nlp/chatCompletion.d.ts.map +1 -0
package/dist/src/tasks/nlp/chatCompletionStream.d.ts +7 -0
package/dist/src/tasks/nlp/chatCompletionStream.d.ts.map +1 -0
package/dist/src/tasks/nlp/featureExtraction.d.ts +19 -0
package/dist/src/tasks/nlp/featureExtraction.d.ts.map +1 -0
package/dist/src/tasks/nlp/fillMask.d.ts +27 -0
package/dist/src/tasks/nlp/fillMask.d.ts.map +1 -0
package/dist/src/tasks/nlp/questionAnswering.d.ts +30 -0
package/dist/src/tasks/nlp/questionAnswering.d.ts.map +1 -0
package/dist/src/tasks/nlp/sentenceSimilarity.d.ts +19 -0
package/dist/src/tasks/nlp/sentenceSimilarity.d.ts.map +1 -0
package/dist/src/tasks/nlp/summarization.d.ts +48 -0
package/dist/src/tasks/nlp/summarization.d.ts.map +1 -0
package/dist/src/tasks/nlp/tableQuestionAnswering.d.ts +36 -0
package/dist/src/tasks/nlp/tableQuestionAnswering.d.ts.map +1 -0
package/dist/src/tasks/nlp/textClassification.d.ts +22 -0
package/dist/src/tasks/nlp/textClassification.d.ts.map +1 -0
package/dist/src/tasks/nlp/textGeneration.d.ts +8 -0
package/dist/src/tasks/nlp/textGeneration.d.ts.map +1 -0
package/dist/src/tasks/nlp/textGenerationStream.d.ts +81 -0
package/dist/src/tasks/nlp/textGenerationStream.d.ts.map +1 -0
package/dist/src/tasks/nlp/tokenClassification.d.ts +51 -0
package/dist/src/tasks/nlp/tokenClassification.d.ts.map +1 -0
package/dist/src/tasks/nlp/translation.d.ts +19 -0
package/dist/src/tasks/nlp/translation.d.ts.map +1 -0
package/dist/src/tasks/nlp/zeroShotClassification.d.ts +28 -0
package/dist/src/tasks/nlp/zeroShotClassification.d.ts.map +1 -0
package/dist/src/tasks/tabular/tabularClassification.d.ts +20 -0
package/dist/src/tasks/tabular/tabularClassification.d.ts.map +1 -0
package/dist/src/tasks/tabular/tabularRegression.d.ts +20 -0
package/dist/src/tasks/tabular/tabularRegression.d.ts.map +1 -0
package/dist/src/types.d.ts +69 -0
package/dist/src/types.d.ts.map +1 -0
package/dist/src/utils/base64FromBytes.d.ts +2 -0
package/dist/src/utils/base64FromBytes.d.ts.map +1 -0
package/dist/src/utils/distributive-omit.d.ts +9 -0
package/dist/src/utils/distributive-omit.d.ts.map +1 -0
package/dist/src/utils/isBackend.d.ts +2 -0
package/dist/src/utils/isBackend.d.ts.map +1 -0
package/dist/src/utils/isFrontend.d.ts +2 -0
package/dist/src/utils/isFrontend.d.ts.map +1 -0
package/dist/src/utils/omit.d.ts +5 -0
package/dist/src/utils/omit.d.ts.map +1 -0
package/dist/src/utils/pick.d.ts +5 -0
package/dist/src/utils/pick.d.ts.map +1 -0
package/dist/src/utils/toArray.d.ts +2 -0
package/dist/src/utils/toArray.d.ts.map +1 -0
package/dist/src/utils/typedInclude.d.ts +2 -0
package/dist/src/utils/typedInclude.d.ts.map +1 -0
package/dist/src/vendor/fetch-event-source/parse.d.ts +69 -0
package/dist/src/vendor/fetch-event-source/parse.d.ts.map +1 -0
package/dist/src/vendor/fetch-event-source/parse.spec.d.ts +2 -0
package/dist/src/vendor/fetch-event-source/parse.spec.d.ts.map +1 -0
package/dist/test/HfInference.spec.d.ts +2 -0
package/dist/test/HfInference.spec.d.ts.map +1 -0
package/dist/test/expect-closeto.d.ts +2 -0
package/dist/test/expect-closeto.d.ts.map +1 -0
package/dist/test/test-files.d.ts +2 -0
package/dist/test/test-files.d.ts.map +1 -0
package/dist/test/vcr.d.ts +2 -0
package/dist/test/vcr.d.ts.map +1 -0
package/package.json +9 -7
package/src/HfInference.ts +4 -4
package/src/lib/makeRequestOptions.ts +17 -7
package/src/tasks/custom/request.ts +5 -0
package/src/tasks/custom/streamingRequest.ts +8 -0
package/src/tasks/cv/imageToImage.ts +1 -1
package/src/tasks/cv/zeroShotImageClassification.ts +1 -1
package/src/tasks/index.ts +2 -0
package/src/tasks/multimodal/documentQuestionAnswering.ts +1 -1
package/src/tasks/multimodal/visualQuestionAnswering.ts +1 -1
package/src/tasks/nlp/chatCompletion.ts +32 -0
package/src/tasks/nlp/chatCompletionStream.ts +17 -0
package/src/tasks/nlp/textGeneration.ts +9 -206
package/src/tasks/nlp/textGenerationStream.ts +2 -1
package/src/types.ts +14 -3
package/src/utils/base64FromBytes.ts +11 -0
package/src/utils/{distributive-omit.d.ts → distributive-omit.ts} +0 -2
package/src/utils/isBackend.ts +6 -0
package/src/utils/isFrontend.ts +3 -0
package/dist/index.d.ts +0 -1536

package/README.md CHANGED Viewed

@@ -5,7 +5,7 @@ It works with both [Inference API (serverless)](https://huggingface.co/docs/api-
 Check out the [full documentation](https://huggingface.co/docs/huggingface.js/inference/README).
-You can also try out a live [interactive notebook](https://observablehq.com/@huggingface/hello-huggingface-js-inference), see some demos on [hf.co/huggingfacejs](https://huggingface.co/huggingfacejs), or watch a [Scrimba tutorial that explains how Inference Endpoints works](https://scrimba.com/scrim/cod8248f5adfd6e129582c523).
+You can also try out a live [interactive notebook](https://observablehq.com/@huggingface/hello-huggingface-js-inference), see some demos on [hf.co/huggingfacejs](https://huggingface.co/huggingfacejs), or watch a [Scrimba tutorial that explains how Inference Endpoints works](https://scrimba.com/scrim/cod8248f5adfd6e129582c523).
 ## Getting Started
@@ -30,7 +30,6 @@ import { HfInference } from "https://esm.sh/@huggingface/inference"
 import { HfInference } from "npm:@huggingface/inference"
 ```
 ### Initialize
 ```typescript
@@ -43,7 +42,6 @@ const hf = new HfInference('your access token')
 Your access token should be kept private. If you need to protect it in front-end applications, we suggest setting up a proxy server that stores the access token.
 #### Tree-shaking
 You can import the functions you need directly from the module instead of using the `HfInference` class.
@@ -63,6 +61,85 @@ This will enable tree-shaking by your bundler.
 ## Natural Language Processing
+### Text Generation
+Generates text from an input prompt.
+[Demo](https://huggingface.co/spaces/huggingfacejs/streaming-text-generation)
+```typescript
+await hf.textGeneration({
+  model: 'gpt2',
+  inputs: 'The answer to the universe is'
+})
+for await (const output of hf.textGenerationStream({
+  model: "google/flan-t5-xxl",
+  inputs: 'repeat "one two three four"',
+  parameters: { max_new_tokens: 250 }
+})) {
+  console.log(output.token.text, output.generated_text);
+}
+```
+### Text Generation (Chat Completion API Compatible)
+Using the `chatCompletion` method, you can generate text with models compatible with the OpenAI Chat Completion API. All models served by [TGI](https://api-inference.huggingface.co/framework/text-generation-inference) on Hugging Face support Messages API.
+[Demo](https://huggingface.co/spaces/huggingfacejs/streaming-chat-completion)
+```typescript
+// Non-streaming API
+const out = await hf.chatCompletion({
+  model: "mistralai/Mistral-7B-Instruct-v0.2",
+  messages: [{ role: "user", content: "Complete the this sentence with words one plus one is equal " }],
+  max_tokens: 500,
+  temperature: 0.1,
+  seed: 0,
+});
+// Streaming API
+let out = "";
+for await (const chunk of hf.chatCompletionStream({
+  model: "mistralai/Mistral-7B-Instruct-v0.2",
+  messages: [
+    { role: "user", content: "Complete the equation 1+1= ,just the answer" },
+  ],
+  max_tokens: 500,
+  temperature: 0.1,
+  seed: 0,
+})) {
+  if (chunk.choices && chunk.choices.length > 0) {
+    out += chunk.choices[0].delta.content;
+  }
+}
+```
+It's also possible to call Mistral or OpenAI endpoints directly:
+```typescript
+const openai = new HfInference(OPENAI_TOKEN).endpoint("https://api.openai.com");
+let out = "";
+for await (const chunk of openai.chatCompletionStream({
+  model: "gpt-3.5-turbo",
+  messages: [
+    { role: "user", content: "Complete the equation 1+1= ,just the answer" },
+  ],
+  max_tokens: 500,
+  temperature: 0.1,
+  seed: 0,
+})) {
+  if (chunk.choices && chunk.choices.length > 0) {
+    out += chunk.choices[0].delta.content;
+  }
+}
+// For mistral AI:
+// endpointUrl: "https://api.mistral.ai"
+// model: "mistral-tiny"
+```
 ### Fill Mask
 Tries to fill in a hole with a missing word (token to be precise).
@@ -131,27 +208,6 @@ await hf.textClassification({
 })
 ```
-### Text Generation
-Generates text from an input prompt.
-[Demo](https://huggingface.co/spaces/huggingfacejs/streaming-text-generation)
-```typescript
-await hf.textGeneration({
-  model: 'gpt2',
-  inputs: 'The answer to the universe is'
-})
-for await (const output of hf.textGenerationStream({
-  model: "google/flan-t5-xxl",
-  inputs: 'repeat "one two three four"',
-  parameters: { max_new_tokens: 250 }
-})) {
-  console.log(output.token.text, output.generated_text);
-}
-```
 ### Token Classification
 Used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text.
@@ -177,9 +233,9 @@ await hf.translation({
   model: 'facebook/mbart-large-50-many-to-many-mmt',
   inputs: textToTranslate,
   parameters: {
-		"src_lang": "en_XX",
-		"tgt_lang": "fr_XX"
-	}
+  "src_lang": "en_XX",
+  "tgt_lang": "fr_XX"
+ }
 })
 ```
@@ -497,6 +553,26 @@ for await (const output of hf.streamingRequest({
 }
 ```
+You can use any Chat Completion API-compatible provider with the `chatCompletion` method.
+```typescript
+// Chat Completion Example
+const MISTRAL_KEY = process.env.MISTRAL_KEY;
+const hf = new HfInference(MISTRAL_KEY);
+const ep = hf.endpoint("https://api.mistral.ai");
+const stream = ep.chatCompletionStream({
+  model: "mistral-tiny",
+  messages: [{ role: "user", content: "Complete the equation one + one = , just the answer" }],
+});
+let out = "";
+for await (const chunk of stream) {
+  if (chunk.choices && chunk.choices.length > 0) {
+    out += chunk.choices[0].delta.content;
+    console.log(out);
+  }
+}
+```
 ## Custom Inference Endpoints
 Learn more about using your own inference endpoints [here](https://hf.co/docs/inference-endpoints/)
@@ -504,6 +580,25 @@ Learn more about using your own inference endpoints [here](https://hf.co/docs/in
 ```typescript
 const gpt2 = hf.endpoint('https://xyz.eu-west-1.aws.endpoints.huggingface.cloud/gpt2');
 const { generated_text } = await gpt2.textGeneration({inputs: 'The answer to the universe is'});
+// Chat Completion Example
+const ep = hf.endpoint(
+  "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
+);
+const stream = ep.chatCompletionStream({
+  model: "tgi",
+  messages: [{ role: "user", content: "Complete the equation 1+1= ,just the answer" }],
+  max_tokens: 500,
+  temperature: 0.1,
+  seed: 0,
+});
+let out = "";
+for await (const chunk of stream) {
+  if (chunk.choices && chunk.choices.length > 0) {
+    out += chunk.choices[0].delta.content;
+    console.log(out);
+  }
+}
 ```
 By default, all calls to the inference endpoint will wait until the model is
@@ -532,3 +627,7 @@ HF_TOKEN="your access token" pnpm run test
 We have an informative documentation project called [Tasks](https://huggingface.co/tasks) to list available models for each task and explain how each task works in detail.
 It also contains demos, example outputs, and other resources should you want to dig deeper into the ML side of things.
+## Dependencies
+- `@huggingface/tasks` : Typings only

package/dist/index.cjs CHANGED Viewed

@@ -1,4 +1,3 @@
-/// <reference path="./index.d.ts" />
 "use strict";
 var __defProp = Object.defineProperty;
 var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
@@ -27,6 +26,8 @@ __export(src_exports, {
   audioClassification: () => audioClassification,
   audioToAudio: () => audioToAudio,
   automaticSpeechRecognition: () => automaticSpeechRecognition,
+  chatCompletion: () => chatCompletion,
+  chatCompletionStream: () => chatCompletionStream,
   documentQuestionAnswering: () => documentQuestionAnswering,
   featureExtraction: () => featureExtraction,
   fillMask: () => fillMask,
@@ -62,6 +63,8 @@ __export(tasks_exports, {
   audioClassification: () => audioClassification,
   audioToAudio: () => audioToAudio,
   automaticSpeechRecognition: () => automaticSpeechRecognition,
+  chatCompletion: () => chatCompletion,
+  chatCompletionStream: () => chatCompletionStream,
   documentQuestionAnswering: () => documentQuestionAnswering,
   featureExtraction: () => featureExtraction,
   fillMask: () => fillMask,
@@ -90,6 +93,30 @@ __export(tasks_exports, {
   zeroShotImageClassification: () => zeroShotImageClassification
 });
+// src/utils/pick.ts
+function pick(o, props) {
+  return Object.assign(
+    {},
+    ...props.map((prop) => {
+      if (o[prop] !== void 0) {
+        return { [prop]: o[prop] };
+      }
+    })
+  );
+}
+// src/utils/typedInclude.ts
+function typedInclude(arr, v) {
+  return arr.includes(v);
+}
+// src/utils/omit.ts
+function omit(o, props) {
+  const propsArr = Array.isArray(props) ? props : [props];
+  const letsKeep = Object.keys(o).filter((prop) => !typedInclude(propsArr, prop));
+  return pick(o, letsKeep);
+}
 // src/lib/isUrl.ts
 function isUrl(modelOrUrl) {
   return /^http(s?):/.test(modelOrUrl) || modelOrUrl.startsWith("/");
@@ -130,7 +157,7 @@ async function getDefaultTask(model, accessToken, options) {
 var HF_INFERENCE_API_BASE_URL = "https://api-inference.huggingface.co";
 var tasks = null;
 async function makeRequestOptions(args, options) {
-  const { accessToken, model: _model, ...otherArgs } = args;
+  const { accessToken, endpointUrl, ...otherArgs } = args;
   let { model } = args;
   const {
     forceTask: task,
@@ -139,7 +166,7 @@ async function makeRequestOptions(args, options) {
     wait_for_model,
     use_cache,
     dont_load_model,
-    ...otherOptions
+    chatCompletion: chatCompletion2
   } = options ?? {};
   const headers = {};
   if (accessToken) {
@@ -173,15 +200,25 @@ async function makeRequestOptions(args, options) {
   if (dont_load_model) {
     headers["X-Load-Model"] = "0";
   }
-  const url = (() => {
+  let url = (() => {
+    if (endpointUrl && isUrl(model)) {
+      throw new TypeError("Both model and endpointUrl cannot be URLs");
+    }
     if (isUrl(model)) {
+      console.warn("Using a model URL is deprecated, please use the `endpointUrl` parameter instead");
       return model;
     }
+    if (endpointUrl) {
+      return endpointUrl;
+    }
     if (task) {
       return `${HF_INFERENCE_API_BASE_URL}/pipeline/${task}/${model}`;
     }
     return `${HF_INFERENCE_API_BASE_URL}/models/${model}`;
   })();
+  if (chatCompletion2 && !url.endsWith("/chat/completions")) {
+    url += "/v1/chat/completions";
+  }
   let credentials;
   if (typeof includeCredentials === "string") {
     credentials = includeCredentials;
@@ -192,8 +229,7 @@ async function makeRequestOptions(args, options) {
     headers,
     method: "POST",
     body: binary ? args.data : JSON.stringify({
-      ...otherArgs,
-      options: options && otherOptions
+      ...otherArgs.model && isUrl(otherArgs.model) ? omit(otherArgs, "model") : otherArgs
     }),
     ...credentials && { credentials },
     signal: options?.signal
@@ -214,6 +250,9 @@ async function request(args, options) {
   if (!response.ok) {
     if (response.headers.get("Content-Type")?.startsWith("application/json")) {
       const output = await response.json();
+      if ([400, 422, 404, 500].includes(response.status) && options?.chatCompletion) {
+        throw new Error(`Server ${args.model} does not seem to support chat completion. Error: ${output.error}`);
+      }
       if (output.error) {
         throw new Error(output.error);
       }
@@ -338,6 +377,9 @@ async function* streamingRequest(args, options) {
   if (!response.ok) {
     if (response.headers.get("Content-Type")?.startsWith("application/json")) {
       const output = await response.json();
+      if ([400, 422, 404, 500].includes(response.status) && options?.chatCompletion) {
+        throw new Error(`Server ${args.model} does not seem to support chat completion. Error: ${output.error}`);
+      }
       if (output.error) {
         throw new Error(output.error);
       }
@@ -374,6 +416,9 @@ async function* streamingRequest(args, options) {
       onChunk(value);
       for (const event of events) {
         if (event.data.length > 0) {
+          if (event.data === "[DONE]") {
+            return;
+          }
           const data = JSON.parse(event.data);
           if (typeof data === "object" && data !== null && "error" in data) {
             throw new Error(data.error);
@@ -520,7 +565,7 @@ async function textToImage(args, options) {
   return res;
 }
-// ../shared/src/base64FromBytes.ts
+// src/utils/base64FromBytes.ts
 function base64FromBytes(arr) {
   if (globalThis.Buffer) {
     return globalThis.Buffer.from(arr).toString("base64");
@@ -533,10 +578,6 @@ function base64FromBytes(arr) {
   }
 }
-// ../shared/src/isBackend.ts
-var isBrowser = typeof window !== "undefined" && typeof window.document !== "undefined";
-var isWebWorker = typeof self === "object" && self.constructor && self.constructor.name === "DedicatedWorkerGlobalScope";
 // src/tasks/cv/imageToImage.ts
 async function imageToImage(args, options) {
   let reqArgs;
@@ -699,12 +740,22 @@ async function textClassification(args, options) {
   return res;
 }
+// src/utils/toArray.ts
+function toArray(obj) {
+  if (Array.isArray(obj)) {
+    return obj;
+  }
+  return [obj];
+}
 // src/tasks/nlp/textGeneration.ts
 async function textGeneration(args, options) {
-  const res = await request(args, {
-    ...options,
-    taskHint: "text-generation"
-  });
+  const res = toArray(
+    await request(args, {
+      ...options,
+      taskHint: "text-generation"
+    })
+  );
   const isValidOutput = Array.isArray(res) && res.every((x) => typeof x?.generated_text === "string");
   if (!isValidOutput) {
     throw new InferenceOutputError("Expected Array<{generated_text: string}>");
@@ -720,14 +771,6 @@ async function* textGenerationStream(args, options) {
   });
 }
-// src/utils/toArray.ts
-function toArray(obj) {
-  if (Array.isArray(obj)) {
-    return obj;
-  }
-  return [obj];
-}
 // src/tasks/nlp/tokenClassification.ts
 async function tokenClassification(args, options) {
   const res = toArray(
@@ -777,6 +820,29 @@ async function zeroShotClassification(args, options) {
   return res;
 }
+// src/tasks/nlp/chatCompletion.ts
+async function chatCompletion(args, options) {
+  const res = await request(args, {
+    ...options,
+    taskHint: "text-generation",
+    chatCompletion: true
+  });
+  const isValidOutput = typeof res === "object" && Array.isArray(res?.choices) && typeof res?.created === "number" && typeof res?.id === "string" && typeof res?.model === "string" && typeof res?.system_fingerprint === "string" && typeof res?.usage === "object";
+  if (!isValidOutput) {
+    throw new InferenceOutputError("Expected ChatCompletionOutput");
+  }
+  return res;
+}
+// src/tasks/nlp/chatCompletionStream.ts
+async function* chatCompletionStream(args, options) {
+  yield* streamingRequest(args, {
+    ...options,
+    taskHint: "text-generation",
+    chatCompletion: true
+  });
+}
 // src/tasks/multimodal/documentQuestionAnswering.ts
 async function documentQuestionAnswering(args, options) {
   const reqArgs = {
@@ -888,7 +954,7 @@ var HfInferenceEndpoint = class {
         enumerable: false,
         value: (params, options) => (
           // eslint-disable-next-line @typescript-eslint/no-explicit-any
-          fn({ ...params, accessToken, model: endpointUrl }, { ...defaultOptions, ...options })
+          fn({ ...params, accessToken, endpointUrl }, { ...defaultOptions, ...options })
         )
       });
     }
@@ -902,6 +968,8 @@ var HfInferenceEndpoint = class {
   audioClassification,
   audioToAudio,
   automaticSpeechRecognition,
+  chatCompletion,
+  chatCompletionStream,
   documentQuestionAnswering,
   featureExtraction,
   fillMask,

package/dist/index.js CHANGED Viewed

@@ -1,4 +1,3 @@
-/// <reference path="./index.d.ts" />
 var __defProp = Object.defineProperty;
 var __export = (target, all) => {
   for (var name in all)
@@ -11,6 +10,8 @@ __export(tasks_exports, {
   audioClassification: () => audioClassification,
   audioToAudio: () => audioToAudio,
   automaticSpeechRecognition: () => automaticSpeechRecognition,
+  chatCompletion: () => chatCompletion,
+  chatCompletionStream: () => chatCompletionStream,
   documentQuestionAnswering: () => documentQuestionAnswering,
   featureExtraction: () => featureExtraction,
   fillMask: () => fillMask,
@@ -39,6 +40,30 @@ __export(tasks_exports, {
   zeroShotImageClassification: () => zeroShotImageClassification
 });
+// src/utils/pick.ts
+function pick(o, props) {
+  return Object.assign(
+    {},
+    ...props.map((prop) => {
+      if (o[prop] !== void 0) {
+        return { [prop]: o[prop] };
+      }
+    })
+  );
+}
+// src/utils/typedInclude.ts
+function typedInclude(arr, v) {
+  return arr.includes(v);
+}
+// src/utils/omit.ts
+function omit(o, props) {
+  const propsArr = Array.isArray(props) ? props : [props];
+  const letsKeep = Object.keys(o).filter((prop) => !typedInclude(propsArr, prop));
+  return pick(o, letsKeep);
+}
 // src/lib/isUrl.ts
 function isUrl(modelOrUrl) {
   return /^http(s?):/.test(modelOrUrl) || modelOrUrl.startsWith("/");
@@ -79,7 +104,7 @@ async function getDefaultTask(model, accessToken, options) {
 var HF_INFERENCE_API_BASE_URL = "https://api-inference.huggingface.co";
 var tasks = null;
 async function makeRequestOptions(args, options) {
-  const { accessToken, model: _model, ...otherArgs } = args;
+  const { accessToken, endpointUrl, ...otherArgs } = args;
   let { model } = args;
   const {
     forceTask: task,
@@ -88,7 +113,7 @@ async function makeRequestOptions(args, options) {
     wait_for_model,
     use_cache,
     dont_load_model,
-    ...otherOptions
+    chatCompletion: chatCompletion2
   } = options ?? {};
   const headers = {};
   if (accessToken) {
@@ -122,15 +147,25 @@ async function makeRequestOptions(args, options) {
   if (dont_load_model) {
     headers["X-Load-Model"] = "0";
   }
-  const url = (() => {
+  let url = (() => {
+    if (endpointUrl && isUrl(model)) {
+      throw new TypeError("Both model and endpointUrl cannot be URLs");
+    }
     if (isUrl(model)) {
+      console.warn("Using a model URL is deprecated, please use the `endpointUrl` parameter instead");
       return model;
     }
+    if (endpointUrl) {
+      return endpointUrl;
+    }
     if (task) {
       return `${HF_INFERENCE_API_BASE_URL}/pipeline/${task}/${model}`;
     }
     return `${HF_INFERENCE_API_BASE_URL}/models/${model}`;
   })();
+  if (chatCompletion2 && !url.endsWith("/chat/completions")) {
+    url += "/v1/chat/completions";
+  }
   let credentials;
   if (typeof includeCredentials === "string") {
     credentials = includeCredentials;
@@ -141,8 +176,7 @@ async function makeRequestOptions(args, options) {
     headers,
     method: "POST",
     body: binary ? args.data : JSON.stringify({
-      ...otherArgs,
-      options: options && otherOptions
+      ...otherArgs.model && isUrl(otherArgs.model) ? omit(otherArgs, "model") : otherArgs
     }),
     ...credentials && { credentials },
     signal: options?.signal
@@ -163,6 +197,9 @@ async function request(args, options) {
   if (!response.ok) {
     if (response.headers.get("Content-Type")?.startsWith("application/json")) {
       const output = await response.json();
+      if ([400, 422, 404, 500].includes(response.status) && options?.chatCompletion) {
+        throw new Error(`Server ${args.model} does not seem to support chat completion. Error: ${output.error}`);
+      }
       if (output.error) {
         throw new Error(output.error);
       }
@@ -287,6 +324,9 @@ async function* streamingRequest(args, options) {
   if (!response.ok) {
     if (response.headers.get("Content-Type")?.startsWith("application/json")) {
       const output = await response.json();
+      if ([400, 422, 404, 500].includes(response.status) && options?.chatCompletion) {
+        throw new Error(`Server ${args.model} does not seem to support chat completion. Error: ${output.error}`);
+      }
       if (output.error) {
         throw new Error(output.error);
       }
@@ -323,6 +363,9 @@ async function* streamingRequest(args, options) {
       onChunk(value);
       for (const event of events) {
         if (event.data.length > 0) {
+          if (event.data === "[DONE]") {
+            return;
+          }
           const data = JSON.parse(event.data);
           if (typeof data === "object" && data !== null && "error" in data) {
             throw new Error(data.error);
@@ -469,7 +512,7 @@ async function textToImage(args, options) {
   return res;
 }
-// ../shared/src/base64FromBytes.ts
+// src/utils/base64FromBytes.ts
 function base64FromBytes(arr) {
   if (globalThis.Buffer) {
     return globalThis.Buffer.from(arr).toString("base64");
@@ -482,10 +525,6 @@ function base64FromBytes(arr) {
   }
 }
-// ../shared/src/isBackend.ts
-var isBrowser = typeof window !== "undefined" && typeof window.document !== "undefined";
-var isWebWorker = typeof self === "object" && self.constructor && self.constructor.name === "DedicatedWorkerGlobalScope";
 // src/tasks/cv/imageToImage.ts
 async function imageToImage(args, options) {
   let reqArgs;
@@ -648,12 +687,22 @@ async function textClassification(args, options) {
   return res;
 }
+// src/utils/toArray.ts
+function toArray(obj) {
+  if (Array.isArray(obj)) {
+    return obj;
+  }
+  return [obj];
+}
 // src/tasks/nlp/textGeneration.ts
 async function textGeneration(args, options) {
-  const res = await request(args, {
-    ...options,
-    taskHint: "text-generation"
-  });
+  const res = toArray(
+    await request(args, {
+      ...options,
+      taskHint: "text-generation"
+    })
+  );
   const isValidOutput = Array.isArray(res) && res.every((x) => typeof x?.generated_text === "string");
   if (!isValidOutput) {
     throw new InferenceOutputError("Expected Array<{generated_text: string}>");
@@ -669,14 +718,6 @@ async function* textGenerationStream(args, options) {
   });
 }
-// src/utils/toArray.ts
-function toArray(obj) {
-  if (Array.isArray(obj)) {
-    return obj;
-  }
-  return [obj];
-}
 // src/tasks/nlp/tokenClassification.ts
 async function tokenClassification(args, options) {
   const res = toArray(
@@ -726,6 +767,29 @@ async function zeroShotClassification(args, options) {
   return res;
 }
+// src/tasks/nlp/chatCompletion.ts
+async function chatCompletion(args, options) {
+  const res = await request(args, {
+    ...options,
+    taskHint: "text-generation",
+    chatCompletion: true
+  });
+  const isValidOutput = typeof res === "object" && Array.isArray(res?.choices) && typeof res?.created === "number" && typeof res?.id === "string" && typeof res?.model === "string" && typeof res?.system_fingerprint === "string" && typeof res?.usage === "object";
+  if (!isValidOutput) {
+    throw new InferenceOutputError("Expected ChatCompletionOutput");
+  }
+  return res;
+}
+// src/tasks/nlp/chatCompletionStream.ts
+async function* chatCompletionStream(args, options) {
+  yield* streamingRequest(args, {
+    ...options,
+    taskHint: "text-generation",
+    chatCompletion: true
+  });
+}
 // src/tasks/multimodal/documentQuestionAnswering.ts
 async function documentQuestionAnswering(args, options) {
   const reqArgs = {
@@ -837,7 +901,7 @@ var HfInferenceEndpoint = class {
         enumerable: false,
         value: (params, options) => (
           // eslint-disable-next-line @typescript-eslint/no-explicit-any
-          fn({ ...params, accessToken, model: endpointUrl }, { ...defaultOptions, ...options })
+          fn({ ...params, accessToken, endpointUrl }, { ...defaultOptions, ...options })
         )
       });
     }
@@ -850,6 +914,8 @@ export {
   audioClassification,
   audioToAudio,
   automaticSpeechRecognition,
+  chatCompletion,
+  chatCompletionStream,
   documentQuestionAnswering,
   featureExtraction,
   fillMask,