@huggingface/inference 2.6.6 → 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. package/README.md +126 -27
  2. package/dist/index.cjs +81 -17
  3. package/dist/index.js +79 -17
  4. package/dist/src/HfInference.d.ts +28 -0
  5. package/dist/src/HfInference.d.ts.map +1 -0
  6. package/dist/src/index.d.ts +5 -0
  7. package/dist/src/index.d.ts.map +1 -0
  8. package/dist/src/lib/InferenceOutputError.d.ts +4 -0
  9. package/dist/src/lib/InferenceOutputError.d.ts.map +1 -0
  10. package/dist/src/lib/getDefaultTask.d.ts +12 -0
  11. package/dist/src/lib/getDefaultTask.d.ts.map +1 -0
  12. package/dist/src/lib/isUrl.d.ts +2 -0
  13. package/dist/src/lib/isUrl.d.ts.map +1 -0
  14. package/dist/src/lib/makeRequestOptions.d.ts +18 -0
  15. package/dist/src/lib/makeRequestOptions.d.ts.map +1 -0
  16. package/dist/src/tasks/audio/audioClassification.d.ts +24 -0
  17. package/dist/src/tasks/audio/audioClassification.d.ts.map +1 -0
  18. package/dist/src/tasks/audio/audioToAudio.d.ts +28 -0
  19. package/dist/src/tasks/audio/audioToAudio.d.ts.map +1 -0
  20. package/dist/src/tasks/audio/automaticSpeechRecognition.d.ts +19 -0
  21. package/dist/src/tasks/audio/automaticSpeechRecognition.d.ts.map +1 -0
  22. package/dist/src/tasks/audio/textToSpeech.d.ts +14 -0
  23. package/dist/src/tasks/audio/textToSpeech.d.ts.map +1 -0
  24. package/dist/src/tasks/custom/request.d.ts +13 -0
  25. package/dist/src/tasks/custom/request.d.ts.map +1 -0
  26. package/dist/src/tasks/custom/streamingRequest.d.ts +13 -0
  27. package/dist/src/tasks/custom/streamingRequest.d.ts.map +1 -0
  28. package/dist/src/tasks/cv/imageClassification.d.ts +24 -0
  29. package/dist/src/tasks/cv/imageClassification.d.ts.map +1 -0
  30. package/dist/src/tasks/cv/imageSegmentation.d.ts +28 -0
  31. package/dist/src/tasks/cv/imageSegmentation.d.ts.map +1 -0
  32. package/dist/src/tasks/cv/imageToImage.d.ts +55 -0
  33. package/dist/src/tasks/cv/imageToImage.d.ts.map +1 -0
  34. package/dist/src/tasks/cv/imageToText.d.ts +18 -0
  35. package/dist/src/tasks/cv/imageToText.d.ts.map +1 -0
  36. package/dist/src/tasks/cv/objectDetection.d.ts +33 -0
  37. package/dist/src/tasks/cv/objectDetection.d.ts.map +1 -0
  38. package/dist/src/tasks/cv/textToImage.d.ts +36 -0
  39. package/dist/src/tasks/cv/textToImage.d.ts.map +1 -0
  40. package/dist/src/tasks/cv/zeroShotImageClassification.d.ts +26 -0
  41. package/dist/src/tasks/cv/zeroShotImageClassification.d.ts.map +1 -0
  42. package/dist/src/tasks/index.d.ts +32 -0
  43. package/dist/src/tasks/index.d.ts.map +1 -0
  44. package/dist/src/tasks/multimodal/documentQuestionAnswering.d.ts +35 -0
  45. package/dist/src/tasks/multimodal/documentQuestionAnswering.d.ts.map +1 -0
  46. package/dist/src/tasks/multimodal/visualQuestionAnswering.d.ts +27 -0
  47. package/dist/src/tasks/multimodal/visualQuestionAnswering.d.ts.map +1 -0
  48. package/dist/src/tasks/nlp/chatCompletion.d.ts +7 -0
  49. package/dist/src/tasks/nlp/chatCompletion.d.ts.map +1 -0
  50. package/dist/src/tasks/nlp/chatCompletionStream.d.ts +7 -0
  51. package/dist/src/tasks/nlp/chatCompletionStream.d.ts.map +1 -0
  52. package/dist/src/tasks/nlp/featureExtraction.d.ts +19 -0
  53. package/dist/src/tasks/nlp/featureExtraction.d.ts.map +1 -0
  54. package/dist/src/tasks/nlp/fillMask.d.ts +27 -0
  55. package/dist/src/tasks/nlp/fillMask.d.ts.map +1 -0
  56. package/dist/src/tasks/nlp/questionAnswering.d.ts +30 -0
  57. package/dist/src/tasks/nlp/questionAnswering.d.ts.map +1 -0
  58. package/dist/src/tasks/nlp/sentenceSimilarity.d.ts +19 -0
  59. package/dist/src/tasks/nlp/sentenceSimilarity.d.ts.map +1 -0
  60. package/dist/src/tasks/nlp/summarization.d.ts +48 -0
  61. package/dist/src/tasks/nlp/summarization.d.ts.map +1 -0
  62. package/dist/src/tasks/nlp/tableQuestionAnswering.d.ts +36 -0
  63. package/dist/src/tasks/nlp/tableQuestionAnswering.d.ts.map +1 -0
  64. package/dist/src/tasks/nlp/textClassification.d.ts +22 -0
  65. package/dist/src/tasks/nlp/textClassification.d.ts.map +1 -0
  66. package/dist/src/tasks/nlp/textGeneration.d.ts +8 -0
  67. package/dist/src/tasks/nlp/textGeneration.d.ts.map +1 -0
  68. package/dist/src/tasks/nlp/textGenerationStream.d.ts +81 -0
  69. package/dist/src/tasks/nlp/textGenerationStream.d.ts.map +1 -0
  70. package/dist/src/tasks/nlp/tokenClassification.d.ts +51 -0
  71. package/dist/src/tasks/nlp/tokenClassification.d.ts.map +1 -0
  72. package/dist/src/tasks/nlp/translation.d.ts +19 -0
  73. package/dist/src/tasks/nlp/translation.d.ts.map +1 -0
  74. package/dist/src/tasks/nlp/zeroShotClassification.d.ts +28 -0
  75. package/dist/src/tasks/nlp/zeroShotClassification.d.ts.map +1 -0
  76. package/dist/src/tasks/tabular/tabularClassification.d.ts +20 -0
  77. package/dist/src/tasks/tabular/tabularClassification.d.ts.map +1 -0
  78. package/dist/src/tasks/tabular/tabularRegression.d.ts +20 -0
  79. package/dist/src/tasks/tabular/tabularRegression.d.ts.map +1 -0
  80. package/dist/src/types.d.ts +69 -0
  81. package/dist/src/types.d.ts.map +1 -0
  82. package/dist/src/utils/base64FromBytes.d.ts +2 -0
  83. package/dist/src/utils/base64FromBytes.d.ts.map +1 -0
  84. package/dist/src/utils/distributive-omit.d.ts +9 -0
  85. package/dist/src/utils/distributive-omit.d.ts.map +1 -0
  86. package/dist/src/utils/isBackend.d.ts +2 -0
  87. package/dist/src/utils/isBackend.d.ts.map +1 -0
  88. package/dist/src/utils/isFrontend.d.ts +2 -0
  89. package/dist/src/utils/isFrontend.d.ts.map +1 -0
  90. package/dist/src/utils/omit.d.ts +5 -0
  91. package/dist/src/utils/omit.d.ts.map +1 -0
  92. package/dist/src/utils/pick.d.ts +5 -0
  93. package/dist/src/utils/pick.d.ts.map +1 -0
  94. package/dist/src/utils/toArray.d.ts +2 -0
  95. package/dist/src/utils/toArray.d.ts.map +1 -0
  96. package/dist/src/utils/typedInclude.d.ts +2 -0
  97. package/dist/src/utils/typedInclude.d.ts.map +1 -0
  98. package/dist/src/vendor/fetch-event-source/parse.d.ts +69 -0
  99. package/dist/src/vendor/fetch-event-source/parse.d.ts.map +1 -0
  100. package/dist/src/vendor/fetch-event-source/parse.spec.d.ts +2 -0
  101. package/dist/src/vendor/fetch-event-source/parse.spec.d.ts.map +1 -0
  102. package/dist/test/HfInference.spec.d.ts +2 -0
  103. package/dist/test/HfInference.spec.d.ts.map +1 -0
  104. package/dist/test/expect-closeto.d.ts +2 -0
  105. package/dist/test/expect-closeto.d.ts.map +1 -0
  106. package/dist/test/test-files.d.ts +2 -0
  107. package/dist/test/test-files.d.ts.map +1 -0
  108. package/dist/test/vcr.d.ts +2 -0
  109. package/dist/test/vcr.d.ts.map +1 -0
  110. package/package.json +9 -7
  111. package/src/HfInference.ts +7 -6
  112. package/src/lib/makeRequestOptions.ts +23 -18
  113. package/src/tasks/custom/request.ts +5 -0
  114. package/src/tasks/custom/streamingRequest.ts +8 -0
  115. package/src/tasks/cv/imageToImage.ts +1 -1
  116. package/src/tasks/cv/zeroShotImageClassification.ts +1 -1
  117. package/src/tasks/index.ts +2 -0
  118. package/src/tasks/multimodal/documentQuestionAnswering.ts +1 -1
  119. package/src/tasks/multimodal/visualQuestionAnswering.ts +1 -1
  120. package/src/tasks/nlp/chatCompletion.ts +32 -0
  121. package/src/tasks/nlp/chatCompletionStream.ts +17 -0
  122. package/src/tasks/nlp/textGeneration.ts +3 -1
  123. package/src/tasks/nlp/textGenerationStream.ts +2 -2
  124. package/src/types.ts +13 -2
  125. package/src/utils/base64FromBytes.ts +11 -0
  126. package/src/utils/{distributive-omit.d.ts → distributive-omit.ts} +0 -2
  127. package/src/utils/isBackend.ts +6 -0
  128. package/src/utils/isFrontend.ts +3 -0
  129. package/dist/index.d.ts +0 -1341
package/README.md CHANGED
@@ -5,7 +5,7 @@ It works with both [Inference API (serverless)](https://huggingface.co/docs/api-
5
5
 
6
6
  Check out the [full documentation](https://huggingface.co/docs/huggingface.js/inference/README).
7
7
 
8
- You can also try out a live [interactive notebook](https://observablehq.com/@huggingface/hello-huggingface-js-inference), see some demos on [hf.co/huggingfacejs](https://huggingface.co/huggingfacejs), or watch a [Scrimba tutorial that explains how Inference Endpoints works](https://scrimba.com/scrim/cod8248f5adfd6e129582c523).
8
+ You can also try out a live [interactive notebook](https://observablehq.com/@huggingface/hello-huggingface-js-inference), see some demos on [hf.co/huggingfacejs](https://huggingface.co/huggingfacejs), or watch a [Scrimba tutorial that explains how Inference Endpoints works](https://scrimba.com/scrim/cod8248f5adfd6e129582c523).
9
9
 
10
10
  ## Getting Started
11
11
 
@@ -30,7 +30,6 @@ import { HfInference } from "https://esm.sh/@huggingface/inference"
30
30
  import { HfInference } from "npm:@huggingface/inference"
31
31
  ```
32
32
 
33
-
34
33
  ### Initialize
35
34
 
36
35
  ```typescript
@@ -43,7 +42,6 @@ const hf = new HfInference('your access token')
43
42
 
44
43
  Your access token should be kept private. If you need to protect it in front-end applications, we suggest setting up a proxy server that stores the access token.
45
44
 
46
-
47
45
  #### Tree-shaking
48
46
 
49
47
  You can import the functions you need directly from the module instead of using the `HfInference` class.
@@ -63,6 +61,85 @@ This will enable tree-shaking by your bundler.
63
61
 
64
62
  ## Natural Language Processing
65
63
 
64
+ ### Text Generation
65
+
66
+ Generates text from an input prompt.
67
+
68
+ [Demo](https://huggingface.co/spaces/huggingfacejs/streaming-text-generation)
69
+
70
+ ```typescript
71
+ await hf.textGeneration({
72
+ model: 'gpt2',
73
+ inputs: 'The answer to the universe is'
74
+ })
75
+
76
+ for await (const output of hf.textGenerationStream({
77
+ model: "google/flan-t5-xxl",
78
+ inputs: 'repeat "one two three four"',
79
+ parameters: { max_new_tokens: 250 }
80
+ })) {
81
+ console.log(output.token.text, output.generated_text);
82
+ }
83
+ ```
84
+
85
+ ### Text Generation (Chat Completion API Compatible)
86
+
87
+ Using the `chatCompletion` method, you can generate text with models compatible with the OpenAI Chat Completion API. All models served by [TGI](https://api-inference.huggingface.co/framework/text-generation-inference) on Hugging Face support Messages API.
88
+
89
+ [Demo](https://huggingface.co/spaces/huggingfacejs/streaming-chat-completion)
90
+
91
+ ```typescript
92
+ // Non-streaming API
93
+ const out = await hf.chatCompletion({
94
+ model: "mistralai/Mistral-7B-Instruct-v0.2",
95
+ messages: [{ role: "user", content: "Complete the this sentence with words one plus one is equal " }],
96
+ max_tokens: 500,
97
+ temperature: 0.1,
98
+ seed: 0,
99
+ });
100
+
101
+ // Streaming API
102
+ let out = "";
103
+ for await (const chunk of hf.chatCompletionStream({
104
+ model: "mistralai/Mistral-7B-Instruct-v0.2",
105
+ messages: [
106
+ { role: "user", content: "Complete the equation 1+1= ,just the answer" },
107
+ ],
108
+ max_tokens: 500,
109
+ temperature: 0.1,
110
+ seed: 0,
111
+ })) {
112
+ if (chunk.choices && chunk.choices.length > 0) {
113
+ out += chunk.choices[0].delta.content;
114
+ }
115
+ }
116
+ ```
117
+
118
+ It's also possible to call Mistral or OpenAI endpoints directly:
119
+
120
+ ```typescript
121
+ const openai = new HfInference(OPENAI_TOKEN).endpoint("https://api.openai.com");
122
+
123
+ let out = "";
124
+ for await (const chunk of openai.chatCompletionStream({
125
+ model: "gpt-3.5-turbo",
126
+ messages: [
127
+ { role: "user", content: "Complete the equation 1+1= ,just the answer" },
128
+ ],
129
+ max_tokens: 500,
130
+ temperature: 0.1,
131
+ seed: 0,
132
+ })) {
133
+ if (chunk.choices && chunk.choices.length > 0) {
134
+ out += chunk.choices[0].delta.content;
135
+ }
136
+ }
137
+
138
+ // For mistral AI:
139
+ // endpointUrl: "https://api.mistral.ai"
140
+ // model: "mistral-tiny"
141
+ ```
142
+
66
143
  ### Fill Mask
67
144
 
68
145
  Tries to fill in a hole with a missing word (token to be precise).
@@ -131,27 +208,6 @@ await hf.textClassification({
131
208
  })
132
209
  ```
133
210
 
134
- ### Text Generation
135
-
136
- Generates text from an input prompt.
137
-
138
- [Demo](https://huggingface.co/spaces/huggingfacejs/streaming-text-generation)
139
-
140
- ```typescript
141
- await hf.textGeneration({
142
- model: 'gpt2',
143
- inputs: 'The answer to the universe is'
144
- })
145
-
146
- for await (const output of hf.textGenerationStream({
147
- model: "google/flan-t5-xxl",
148
- inputs: 'repeat "one two three four"',
149
- parameters: { max_new_tokens: 250 }
150
- })) {
151
- console.log(output.token.text, output.generated_text);
152
- }
153
- ```
154
-
155
211
  ### Token Classification
156
212
 
157
213
  Used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text.
@@ -177,9 +233,9 @@ await hf.translation({
177
233
  model: 'facebook/mbart-large-50-many-to-many-mmt',
178
234
  inputs: textToTranslate,
179
235
  parameters: {
180
- "src_lang": "en_XX",
181
- "tgt_lang": "fr_XX"
182
- }
236
+ "src_lang": "en_XX",
237
+ "tgt_lang": "fr_XX"
238
+ }
183
239
  })
184
240
  ```
185
241
 
@@ -497,6 +553,26 @@ for await (const output of hf.streamingRequest({
497
553
  }
498
554
  ```
499
555
 
556
+ You can use any Chat Completion API-compatible provider with the `chatCompletion` method.
557
+
558
+ ```typescript
559
+ // Chat Completion Example
560
+ const MISTRAL_KEY = process.env.MISTRAL_KEY;
561
+ const hf = new HfInference(MISTRAL_KEY);
562
+ const ep = hf.endpoint("https://api.mistral.ai");
563
+ const stream = ep.chatCompletionStream({
564
+ model: "mistral-tiny",
565
+ messages: [{ role: "user", content: "Complete the equation one + one = , just the answer" }],
566
+ });
567
+ let out = "";
568
+ for await (const chunk of stream) {
569
+ if (chunk.choices && chunk.choices.length > 0) {
570
+ out += chunk.choices[0].delta.content;
571
+ console.log(out);
572
+ }
573
+ }
574
+ ```
575
+
500
576
  ## Custom Inference Endpoints
501
577
 
502
578
  Learn more about using your own inference endpoints [here](https://hf.co/docs/inference-endpoints/)
@@ -504,6 +580,25 @@ Learn more about using your own inference endpoints [here](https://hf.co/docs/in
504
580
  ```typescript
505
581
  const gpt2 = hf.endpoint('https://xyz.eu-west-1.aws.endpoints.huggingface.cloud/gpt2');
506
582
  const { generated_text } = await gpt2.textGeneration({inputs: 'The answer to the universe is'});
583
+
584
+ // Chat Completion Example
585
+ const ep = hf.endpoint(
586
+ "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
587
+ );
588
+ const stream = ep.chatCompletionStream({
589
+ model: "tgi",
590
+ messages: [{ role: "user", content: "Complete the equation 1+1= ,just the answer" }],
591
+ max_tokens: 500,
592
+ temperature: 0.1,
593
+ seed: 0,
594
+ });
595
+ let out = "";
596
+ for await (const chunk of stream) {
597
+ if (chunk.choices && chunk.choices.length > 0) {
598
+ out += chunk.choices[0].delta.content;
599
+ console.log(out);
600
+ }
601
+ }
507
602
  ```
508
603
 
509
604
  By default, all calls to the inference endpoint will wait until the model is
@@ -532,3 +627,7 @@ HF_TOKEN="your access token" pnpm run test
532
627
  We have an informative documentation project called [Tasks](https://huggingface.co/tasks) to list available models for each task and explain how each task works in detail.
533
628
 
534
629
  It also contains demos, example outputs, and other resources should you want to dig deeper into the ML side of things.
630
+
631
+ ## Dependencies
632
+
633
+ - `@huggingface/tasks` : Typings only
package/dist/index.cjs CHANGED
@@ -1,4 +1,3 @@
1
- /// <reference path="./index.d.ts" />
2
1
  "use strict";
3
2
  var __defProp = Object.defineProperty;
4
3
  var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
@@ -27,6 +26,8 @@ __export(src_exports, {
27
26
  audioClassification: () => audioClassification,
28
27
  audioToAudio: () => audioToAudio,
29
28
  automaticSpeechRecognition: () => automaticSpeechRecognition,
29
+ chatCompletion: () => chatCompletion,
30
+ chatCompletionStream: () => chatCompletionStream,
30
31
  documentQuestionAnswering: () => documentQuestionAnswering,
31
32
  featureExtraction: () => featureExtraction,
32
33
  fillMask: () => fillMask,
@@ -62,6 +63,8 @@ __export(tasks_exports, {
62
63
  audioClassification: () => audioClassification,
63
64
  audioToAudio: () => audioToAudio,
64
65
  automaticSpeechRecognition: () => automaticSpeechRecognition,
66
+ chatCompletion: () => chatCompletion,
67
+ chatCompletionStream: () => chatCompletionStream,
65
68
  documentQuestionAnswering: () => documentQuestionAnswering,
66
69
  featureExtraction: () => featureExtraction,
67
70
  fillMask: () => fillMask,
@@ -90,6 +93,30 @@ __export(tasks_exports, {
90
93
  zeroShotImageClassification: () => zeroShotImageClassification
91
94
  });
92
95
 
96
+ // src/utils/pick.ts
97
+ function pick(o, props) {
98
+ return Object.assign(
99
+ {},
100
+ ...props.map((prop) => {
101
+ if (o[prop] !== void 0) {
102
+ return { [prop]: o[prop] };
103
+ }
104
+ })
105
+ );
106
+ }
107
+
108
+ // src/utils/typedInclude.ts
109
+ function typedInclude(arr, v) {
110
+ return arr.includes(v);
111
+ }
112
+
113
+ // src/utils/omit.ts
114
+ function omit(o, props) {
115
+ const propsArr = Array.isArray(props) ? props : [props];
116
+ const letsKeep = Object.keys(o).filter((prop) => !typedInclude(propsArr, prop));
117
+ return pick(o, letsKeep);
118
+ }
119
+
93
120
  // src/lib/isUrl.ts
94
121
  function isUrl(modelOrUrl) {
95
122
  return /^http(s?):/.test(modelOrUrl) || modelOrUrl.startsWith("/");
@@ -130,7 +157,7 @@ async function getDefaultTask(model, accessToken, options) {
130
157
  var HF_INFERENCE_API_BASE_URL = "https://api-inference.huggingface.co";
131
158
  var tasks = null;
132
159
  async function makeRequestOptions(args, options) {
133
- const { accessToken, model: _model, ...otherArgs } = args;
160
+ const { accessToken, endpointUrl, ...otherArgs } = args;
134
161
  let { model } = args;
135
162
  const {
136
163
  forceTask: task,
@@ -139,7 +166,7 @@ async function makeRequestOptions(args, options) {
139
166
  wait_for_model,
140
167
  use_cache,
141
168
  dont_load_model,
142
- ...otherOptions
169
+ chatCompletion: chatCompletion2
143
170
  } = options ?? {};
144
171
  const headers = {};
145
172
  if (accessToken) {
@@ -173,31 +200,38 @@ async function makeRequestOptions(args, options) {
173
200
  if (dont_load_model) {
174
201
  headers["X-Load-Model"] = "0";
175
202
  }
176
- const url = (() => {
203
+ let url = (() => {
204
+ if (endpointUrl && isUrl(model)) {
205
+ throw new TypeError("Both model and endpointUrl cannot be URLs");
206
+ }
177
207
  if (isUrl(model)) {
208
+ console.warn("Using a model URL is deprecated, please use the `endpointUrl` parameter instead");
178
209
  return model;
179
210
  }
211
+ if (endpointUrl) {
212
+ return endpointUrl;
213
+ }
180
214
  if (task) {
181
215
  return `${HF_INFERENCE_API_BASE_URL}/pipeline/${task}/${model}`;
182
216
  }
183
217
  return `${HF_INFERENCE_API_BASE_URL}/models/${model}`;
184
218
  })();
219
+ if (chatCompletion2 && !url.endsWith("/chat/completions")) {
220
+ url += "/v1/chat/completions";
221
+ }
185
222
  let credentials;
186
223
  if (typeof includeCredentials === "string") {
187
224
  credentials = includeCredentials;
188
- } else if (typeof includeCredentials === "boolean") {
189
- credentials = includeCredentials ? "include" : void 0;
190
- } else if (includeCredentials === void 0) {
191
- credentials = "same-origin";
225
+ } else if (includeCredentials === true) {
226
+ credentials = "include";
192
227
  }
193
228
  const info = {
194
229
  headers,
195
230
  method: "POST",
196
231
  body: binary ? args.data : JSON.stringify({
197
- ...otherArgs,
198
- options: options && otherOptions
232
+ ...otherArgs.model && isUrl(otherArgs.model) ? omit(otherArgs, "model") : otherArgs
199
233
  }),
200
- credentials,
234
+ ...credentials && { credentials },
201
235
  signal: options?.signal
202
236
  };
203
237
  return { url, info };
@@ -216,6 +250,9 @@ async function request(args, options) {
216
250
  if (!response.ok) {
217
251
  if (response.headers.get("Content-Type")?.startsWith("application/json")) {
218
252
  const output = await response.json();
253
+ if ([400, 422, 404, 500].includes(response.status) && options?.chatCompletion) {
254
+ throw new Error(`Server ${args.model} does not seem to support chat completion. Error: ${output.error}`);
255
+ }
219
256
  if (output.error) {
220
257
  throw new Error(output.error);
221
258
  }
@@ -340,6 +377,9 @@ async function* streamingRequest(args, options) {
340
377
  if (!response.ok) {
341
378
  if (response.headers.get("Content-Type")?.startsWith("application/json")) {
342
379
  const output = await response.json();
380
+ if ([400, 422, 404, 500].includes(response.status) && options?.chatCompletion) {
381
+ throw new Error(`Server ${args.model} does not seem to support chat completion. Error: ${output.error}`);
382
+ }
343
383
  if (output.error) {
344
384
  throw new Error(output.error);
345
385
  }
@@ -376,6 +416,9 @@ async function* streamingRequest(args, options) {
376
416
  onChunk(value);
377
417
  for (const event of events) {
378
418
  if (event.data.length > 0) {
419
+ if (event.data === "[DONE]") {
420
+ return;
421
+ }
379
422
  const data = JSON.parse(event.data);
380
423
  if (typeof data === "object" && data !== null && "error" in data) {
381
424
  throw new Error(data.error);
@@ -522,7 +565,7 @@ async function textToImage(args, options) {
522
565
  return res;
523
566
  }
524
567
 
525
- // ../shared/src/base64FromBytes.ts
568
+ // src/utils/base64FromBytes.ts
526
569
  function base64FromBytes(arr) {
527
570
  if (globalThis.Buffer) {
528
571
  return globalThis.Buffer.from(arr).toString("base64");
@@ -535,10 +578,6 @@ function base64FromBytes(arr) {
535
578
  }
536
579
  }
537
580
 
538
- // ../shared/src/isBackend.ts
539
- var isBrowser = typeof window !== "undefined" && typeof window.document !== "undefined";
540
- var isWebWorker = typeof self === "object" && self.constructor && self.constructor.name === "DedicatedWorkerGlobalScope";
541
-
542
581
  // src/tasks/cv/imageToImage.ts
543
582
  async function imageToImage(args, options) {
544
583
  let reqArgs;
@@ -779,6 +818,29 @@ async function zeroShotClassification(args, options) {
779
818
  return res;
780
819
  }
781
820
 
821
+ // src/tasks/nlp/chatCompletion.ts
822
+ async function chatCompletion(args, options) {
823
+ const res = await request(args, {
824
+ ...options,
825
+ taskHint: "text-generation",
826
+ chatCompletion: true
827
+ });
828
+ const isValidOutput = typeof res === "object" && Array.isArray(res?.choices) && typeof res?.created === "number" && typeof res?.id === "string" && typeof res?.model === "string" && typeof res?.system_fingerprint === "string" && typeof res?.usage === "object";
829
+ if (!isValidOutput) {
830
+ throw new InferenceOutputError("Expected ChatCompletionOutput");
831
+ }
832
+ return res;
833
+ }
834
+
835
+ // src/tasks/nlp/chatCompletionStream.ts
836
+ async function* chatCompletionStream(args, options) {
837
+ yield* streamingRequest(args, {
838
+ ...options,
839
+ taskHint: "text-generation",
840
+ chatCompletion: true
841
+ });
842
+ }
843
+
782
844
  // src/tasks/multimodal/documentQuestionAnswering.ts
783
845
  async function documentQuestionAnswering(args, options) {
784
846
  const reqArgs = {
@@ -890,7 +952,7 @@ var HfInferenceEndpoint = class {
890
952
  enumerable: false,
891
953
  value: (params, options) => (
892
954
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
893
- fn({ ...params, accessToken, model: endpointUrl }, { ...defaultOptions, ...options })
955
+ fn({ ...params, accessToken, endpointUrl }, { ...defaultOptions, ...options })
894
956
  )
895
957
  });
896
958
  }
@@ -904,6 +966,8 @@ var HfInferenceEndpoint = class {
904
966
  audioClassification,
905
967
  audioToAudio,
906
968
  automaticSpeechRecognition,
969
+ chatCompletion,
970
+ chatCompletionStream,
907
971
  documentQuestionAnswering,
908
972
  featureExtraction,
909
973
  fillMask,
package/dist/index.js CHANGED
@@ -1,4 +1,3 @@
1
- /// <reference path="./index.d.ts" />
2
1
  var __defProp = Object.defineProperty;
3
2
  var __export = (target, all) => {
4
3
  for (var name in all)
@@ -11,6 +10,8 @@ __export(tasks_exports, {
11
10
  audioClassification: () => audioClassification,
12
11
  audioToAudio: () => audioToAudio,
13
12
  automaticSpeechRecognition: () => automaticSpeechRecognition,
13
+ chatCompletion: () => chatCompletion,
14
+ chatCompletionStream: () => chatCompletionStream,
14
15
  documentQuestionAnswering: () => documentQuestionAnswering,
15
16
  featureExtraction: () => featureExtraction,
16
17
  fillMask: () => fillMask,
@@ -39,6 +40,30 @@ __export(tasks_exports, {
39
40
  zeroShotImageClassification: () => zeroShotImageClassification
40
41
  });
41
42
 
43
+ // src/utils/pick.ts
44
+ function pick(o, props) {
45
+ return Object.assign(
46
+ {},
47
+ ...props.map((prop) => {
48
+ if (o[prop] !== void 0) {
49
+ return { [prop]: o[prop] };
50
+ }
51
+ })
52
+ );
53
+ }
54
+
55
+ // src/utils/typedInclude.ts
56
+ function typedInclude(arr, v) {
57
+ return arr.includes(v);
58
+ }
59
+
60
+ // src/utils/omit.ts
61
+ function omit(o, props) {
62
+ const propsArr = Array.isArray(props) ? props : [props];
63
+ const letsKeep = Object.keys(o).filter((prop) => !typedInclude(propsArr, prop));
64
+ return pick(o, letsKeep);
65
+ }
66
+
42
67
  // src/lib/isUrl.ts
43
68
  function isUrl(modelOrUrl) {
44
69
  return /^http(s?):/.test(modelOrUrl) || modelOrUrl.startsWith("/");
@@ -79,7 +104,7 @@ async function getDefaultTask(model, accessToken, options) {
79
104
  var HF_INFERENCE_API_BASE_URL = "https://api-inference.huggingface.co";
80
105
  var tasks = null;
81
106
  async function makeRequestOptions(args, options) {
82
- const { accessToken, model: _model, ...otherArgs } = args;
107
+ const { accessToken, endpointUrl, ...otherArgs } = args;
83
108
  let { model } = args;
84
109
  const {
85
110
  forceTask: task,
@@ -88,7 +113,7 @@ async function makeRequestOptions(args, options) {
88
113
  wait_for_model,
89
114
  use_cache,
90
115
  dont_load_model,
91
- ...otherOptions
116
+ chatCompletion: chatCompletion2
92
117
  } = options ?? {};
93
118
  const headers = {};
94
119
  if (accessToken) {
@@ -122,31 +147,38 @@ async function makeRequestOptions(args, options) {
122
147
  if (dont_load_model) {
123
148
  headers["X-Load-Model"] = "0";
124
149
  }
125
- const url = (() => {
150
+ let url = (() => {
151
+ if (endpointUrl && isUrl(model)) {
152
+ throw new TypeError("Both model and endpointUrl cannot be URLs");
153
+ }
126
154
  if (isUrl(model)) {
155
+ console.warn("Using a model URL is deprecated, please use the `endpointUrl` parameter instead");
127
156
  return model;
128
157
  }
158
+ if (endpointUrl) {
159
+ return endpointUrl;
160
+ }
129
161
  if (task) {
130
162
  return `${HF_INFERENCE_API_BASE_URL}/pipeline/${task}/${model}`;
131
163
  }
132
164
  return `${HF_INFERENCE_API_BASE_URL}/models/${model}`;
133
165
  })();
166
+ if (chatCompletion2 && !url.endsWith("/chat/completions")) {
167
+ url += "/v1/chat/completions";
168
+ }
134
169
  let credentials;
135
170
  if (typeof includeCredentials === "string") {
136
171
  credentials = includeCredentials;
137
- } else if (typeof includeCredentials === "boolean") {
138
- credentials = includeCredentials ? "include" : void 0;
139
- } else if (includeCredentials === void 0) {
140
- credentials = "same-origin";
172
+ } else if (includeCredentials === true) {
173
+ credentials = "include";
141
174
  }
142
175
  const info = {
143
176
  headers,
144
177
  method: "POST",
145
178
  body: binary ? args.data : JSON.stringify({
146
- ...otherArgs,
147
- options: options && otherOptions
179
+ ...otherArgs.model && isUrl(otherArgs.model) ? omit(otherArgs, "model") : otherArgs
148
180
  }),
149
- credentials,
181
+ ...credentials && { credentials },
150
182
  signal: options?.signal
151
183
  };
152
184
  return { url, info };
@@ -165,6 +197,9 @@ async function request(args, options) {
165
197
  if (!response.ok) {
166
198
  if (response.headers.get("Content-Type")?.startsWith("application/json")) {
167
199
  const output = await response.json();
200
+ if ([400, 422, 404, 500].includes(response.status) && options?.chatCompletion) {
201
+ throw new Error(`Server ${args.model} does not seem to support chat completion. Error: ${output.error}`);
202
+ }
168
203
  if (output.error) {
169
204
  throw new Error(output.error);
170
205
  }
@@ -289,6 +324,9 @@ async function* streamingRequest(args, options) {
289
324
  if (!response.ok) {
290
325
  if (response.headers.get("Content-Type")?.startsWith("application/json")) {
291
326
  const output = await response.json();
327
+ if ([400, 422, 404, 500].includes(response.status) && options?.chatCompletion) {
328
+ throw new Error(`Server ${args.model} does not seem to support chat completion. Error: ${output.error}`);
329
+ }
292
330
  if (output.error) {
293
331
  throw new Error(output.error);
294
332
  }
@@ -325,6 +363,9 @@ async function* streamingRequest(args, options) {
325
363
  onChunk(value);
326
364
  for (const event of events) {
327
365
  if (event.data.length > 0) {
366
+ if (event.data === "[DONE]") {
367
+ return;
368
+ }
328
369
  const data = JSON.parse(event.data);
329
370
  if (typeof data === "object" && data !== null && "error" in data) {
330
371
  throw new Error(data.error);
@@ -471,7 +512,7 @@ async function textToImage(args, options) {
471
512
  return res;
472
513
  }
473
514
 
474
- // ../shared/src/base64FromBytes.ts
515
+ // src/utils/base64FromBytes.ts
475
516
  function base64FromBytes(arr) {
476
517
  if (globalThis.Buffer) {
477
518
  return globalThis.Buffer.from(arr).toString("base64");
@@ -484,10 +525,6 @@ function base64FromBytes(arr) {
484
525
  }
485
526
  }
486
527
 
487
- // ../shared/src/isBackend.ts
488
- var isBrowser = typeof window !== "undefined" && typeof window.document !== "undefined";
489
- var isWebWorker = typeof self === "object" && self.constructor && self.constructor.name === "DedicatedWorkerGlobalScope";
490
-
491
528
  // src/tasks/cv/imageToImage.ts
492
529
  async function imageToImage(args, options) {
493
530
  let reqArgs;
@@ -728,6 +765,29 @@ async function zeroShotClassification(args, options) {
728
765
  return res;
729
766
  }
730
767
 
768
+ // src/tasks/nlp/chatCompletion.ts
769
+ async function chatCompletion(args, options) {
770
+ const res = await request(args, {
771
+ ...options,
772
+ taskHint: "text-generation",
773
+ chatCompletion: true
774
+ });
775
+ const isValidOutput = typeof res === "object" && Array.isArray(res?.choices) && typeof res?.created === "number" && typeof res?.id === "string" && typeof res?.model === "string" && typeof res?.system_fingerprint === "string" && typeof res?.usage === "object";
776
+ if (!isValidOutput) {
777
+ throw new InferenceOutputError("Expected ChatCompletionOutput");
778
+ }
779
+ return res;
780
+ }
781
+
782
+ // src/tasks/nlp/chatCompletionStream.ts
783
+ async function* chatCompletionStream(args, options) {
784
+ yield* streamingRequest(args, {
785
+ ...options,
786
+ taskHint: "text-generation",
787
+ chatCompletion: true
788
+ });
789
+ }
790
+
731
791
  // src/tasks/multimodal/documentQuestionAnswering.ts
732
792
  async function documentQuestionAnswering(args, options) {
733
793
  const reqArgs = {
@@ -839,7 +899,7 @@ var HfInferenceEndpoint = class {
839
899
  enumerable: false,
840
900
  value: (params, options) => (
841
901
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
842
- fn({ ...params, accessToken, model: endpointUrl }, { ...defaultOptions, ...options })
902
+ fn({ ...params, accessToken, endpointUrl }, { ...defaultOptions, ...options })
843
903
  )
844
904
  });
845
905
  }
@@ -852,6 +912,8 @@ export {
852
912
  audioClassification,
853
913
  audioToAudio,
854
914
  automaticSpeechRecognition,
915
+ chatCompletion,
916
+ chatCompletionStream,
855
917
  documentQuestionAnswering,
856
918
  featureExtraction,
857
919
  fillMask,
@@ -0,0 +1,28 @@
1
+ import * as tasks from "./tasks";
2
+ import type { Options } from "./types";
3
+ import type { DistributiveOmit } from "./utils/distributive-omit";
4
+ type Task = typeof tasks;
5
+ type TaskWithNoAccessToken = {
6
+ [key in keyof Task]: (args: DistributiveOmit<Parameters<Task[key]>[0], "accessToken">, options?: Parameters<Task[key]>[1]) => ReturnType<Task[key]>;
7
+ };
8
+ type TaskWithNoAccessTokenNoEndpointUrl = {
9
+ [key in keyof Task]: (args: DistributiveOmit<Parameters<Task[key]>[0], "accessToken" | "endpointUrl">, options?: Parameters<Task[key]>[1]) => ReturnType<Task[key]>;
10
+ };
11
+ export declare class HfInference {
12
+ private readonly accessToken;
13
+ private readonly defaultOptions;
14
+ constructor(accessToken?: string, defaultOptions?: Options);
15
+ /**
16
+ * Returns copy of HfInference tied to a specified endpoint.
17
+ */
18
+ endpoint(endpointUrl: string): HfInferenceEndpoint;
19
+ }
20
+ export declare class HfInferenceEndpoint {
21
+ constructor(endpointUrl: string, accessToken?: string, defaultOptions?: Options);
22
+ }
23
+ export interface HfInference extends TaskWithNoAccessToken {
24
+ }
25
+ export interface HfInferenceEndpoint extends TaskWithNoAccessTokenNoEndpointUrl {
26
+ }
27
+ export {};
28
+ //# sourceMappingURL=HfInference.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"HfInference.d.ts","sourceRoot":"","sources":["../../src/HfInference.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,KAAK,MAAM,SAAS,CAAC;AACjC,OAAO,KAAK,EAAE,OAAO,EAAe,MAAM,SAAS,CAAC;AACpD,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,2BAA2B,CAAC;AAKlE,KAAK,IAAI,GAAG,OAAO,KAAK,CAAC;AAEzB,KAAK,qBAAqB,GAAG;KAC3B,GAAG,IAAI,MAAM,IAAI,GAAG,CACpB,IAAI,EAAE,gBAAgB,CAAC,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,aAAa,CAAC,EAC/D,OAAO,CAAC,EAAE,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,KAC9B,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;CAC1B,CAAC;AAEF,KAAK,kCAAkC,GAAG;KACxC,GAAG,IAAI,MAAM,IAAI,GAAG,CACpB,IAAI,EAAE,gBAAgB,CAAC,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,aAAa,GAAG,aAAa,CAAC,EAC/E,OAAO,CAAC,EAAE,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,KAC9B,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;CAC1B,CAAC;AAEF,qBAAa,WAAW;IACvB,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAS;IACrC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAU;gBAE7B,WAAW,SAAK,EAAE,cAAc,GAAE,OAAY;IAc1D;;OAEG;IACI,QAAQ,CAAC,WAAW,EAAE,MAAM,GAAG,mBAAmB;CAGzD;AAED,qBAAa,mBAAmB;gBACnB,WAAW,EAAE,MAAM,EAAE,WAAW,SAAK,EAAE,cAAc,GAAE,OAAY;CAa/E;AAED,MAAM,WAAW,WAAY,SAAQ,qBAAqB;CAAG;AAE7D,MAAM,WAAW,mBAAoB,SAAQ,kCAAkC;CAAG"}
@@ -0,0 +1,5 @@
1
+ export { HfInference, HfInferenceEndpoint } from "./HfInference";
2
+ export { InferenceOutputError } from "./lib/InferenceOutputError";
3
+ export * from "./types";
4
+ export * from "./tasks";
5
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,mBAAmB,EAAE,MAAM,eAAe,CAAC;AACjE,OAAO,EAAE,oBAAoB,EAAE,MAAM,4BAA4B,CAAC;AAClE,cAAc,SAAS,CAAC;AACxB,cAAc,SAAS,CAAC"}
@@ -0,0 +1,4 @@
1
+ export declare class InferenceOutputError extends TypeError {
2
+ constructor(message: string);
3
+ }
4
+ //# sourceMappingURL=InferenceOutputError.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"InferenceOutputError.d.ts","sourceRoot":"","sources":["../../../src/lib/InferenceOutputError.ts"],"names":[],"mappings":"AAAA,qBAAa,oBAAqB,SAAQ,SAAS;gBACtC,OAAO,EAAE,MAAM;CAM3B"}