@huggingface/inference 2.6.7 → 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. package/README.md +126 -27
  2. package/dist/index.cjs +78 -12
  3. package/dist/index.js +76 -12
  4. package/dist/src/HfInference.d.ts +28 -0
  5. package/dist/src/HfInference.d.ts.map +1 -0
  6. package/dist/src/index.d.ts +5 -0
  7. package/dist/src/index.d.ts.map +1 -0
  8. package/dist/src/lib/InferenceOutputError.d.ts +4 -0
  9. package/dist/src/lib/InferenceOutputError.d.ts.map +1 -0
  10. package/dist/src/lib/getDefaultTask.d.ts +12 -0
  11. package/dist/src/lib/getDefaultTask.d.ts.map +1 -0
  12. package/dist/src/lib/isUrl.d.ts +2 -0
  13. package/dist/src/lib/isUrl.d.ts.map +1 -0
  14. package/dist/src/lib/makeRequestOptions.d.ts +18 -0
  15. package/dist/src/lib/makeRequestOptions.d.ts.map +1 -0
  16. package/dist/src/tasks/audio/audioClassification.d.ts +24 -0
  17. package/dist/src/tasks/audio/audioClassification.d.ts.map +1 -0
  18. package/dist/src/tasks/audio/audioToAudio.d.ts +28 -0
  19. package/dist/src/tasks/audio/audioToAudio.d.ts.map +1 -0
  20. package/dist/src/tasks/audio/automaticSpeechRecognition.d.ts +19 -0
  21. package/dist/src/tasks/audio/automaticSpeechRecognition.d.ts.map +1 -0
  22. package/dist/src/tasks/audio/textToSpeech.d.ts +14 -0
  23. package/dist/src/tasks/audio/textToSpeech.d.ts.map +1 -0
  24. package/dist/src/tasks/custom/request.d.ts +13 -0
  25. package/dist/src/tasks/custom/request.d.ts.map +1 -0
  26. package/dist/src/tasks/custom/streamingRequest.d.ts +13 -0
  27. package/dist/src/tasks/custom/streamingRequest.d.ts.map +1 -0
  28. package/dist/src/tasks/cv/imageClassification.d.ts +24 -0
  29. package/dist/src/tasks/cv/imageClassification.d.ts.map +1 -0
  30. package/dist/src/tasks/cv/imageSegmentation.d.ts +28 -0
  31. package/dist/src/tasks/cv/imageSegmentation.d.ts.map +1 -0
  32. package/dist/src/tasks/cv/imageToImage.d.ts +55 -0
  33. package/dist/src/tasks/cv/imageToImage.d.ts.map +1 -0
  34. package/dist/src/tasks/cv/imageToText.d.ts +18 -0
  35. package/dist/src/tasks/cv/imageToText.d.ts.map +1 -0
  36. package/dist/src/tasks/cv/objectDetection.d.ts +33 -0
  37. package/dist/src/tasks/cv/objectDetection.d.ts.map +1 -0
  38. package/dist/src/tasks/cv/textToImage.d.ts +36 -0
  39. package/dist/src/tasks/cv/textToImage.d.ts.map +1 -0
  40. package/dist/src/tasks/cv/zeroShotImageClassification.d.ts +26 -0
  41. package/dist/src/tasks/cv/zeroShotImageClassification.d.ts.map +1 -0
  42. package/dist/src/tasks/index.d.ts +32 -0
  43. package/dist/src/tasks/index.d.ts.map +1 -0
  44. package/dist/src/tasks/multimodal/documentQuestionAnswering.d.ts +35 -0
  45. package/dist/src/tasks/multimodal/documentQuestionAnswering.d.ts.map +1 -0
  46. package/dist/src/tasks/multimodal/visualQuestionAnswering.d.ts +27 -0
  47. package/dist/src/tasks/multimodal/visualQuestionAnswering.d.ts.map +1 -0
  48. package/dist/src/tasks/nlp/chatCompletion.d.ts +7 -0
  49. package/dist/src/tasks/nlp/chatCompletion.d.ts.map +1 -0
  50. package/dist/src/tasks/nlp/chatCompletionStream.d.ts +7 -0
  51. package/dist/src/tasks/nlp/chatCompletionStream.d.ts.map +1 -0
  52. package/dist/src/tasks/nlp/featureExtraction.d.ts +19 -0
  53. package/dist/src/tasks/nlp/featureExtraction.d.ts.map +1 -0
  54. package/dist/src/tasks/nlp/fillMask.d.ts +27 -0
  55. package/dist/src/tasks/nlp/fillMask.d.ts.map +1 -0
  56. package/dist/src/tasks/nlp/questionAnswering.d.ts +30 -0
  57. package/dist/src/tasks/nlp/questionAnswering.d.ts.map +1 -0
  58. package/dist/src/tasks/nlp/sentenceSimilarity.d.ts +19 -0
  59. package/dist/src/tasks/nlp/sentenceSimilarity.d.ts.map +1 -0
  60. package/dist/src/tasks/nlp/summarization.d.ts +48 -0
  61. package/dist/src/tasks/nlp/summarization.d.ts.map +1 -0
  62. package/dist/src/tasks/nlp/tableQuestionAnswering.d.ts +36 -0
  63. package/dist/src/tasks/nlp/tableQuestionAnswering.d.ts.map +1 -0
  64. package/dist/src/tasks/nlp/textClassification.d.ts +22 -0
  65. package/dist/src/tasks/nlp/textClassification.d.ts.map +1 -0
  66. package/dist/src/tasks/nlp/textGeneration.d.ts +8 -0
  67. package/dist/src/tasks/nlp/textGeneration.d.ts.map +1 -0
  68. package/dist/src/tasks/nlp/textGenerationStream.d.ts +81 -0
  69. package/dist/src/tasks/nlp/textGenerationStream.d.ts.map +1 -0
  70. package/dist/src/tasks/nlp/tokenClassification.d.ts +51 -0
  71. package/dist/src/tasks/nlp/tokenClassification.d.ts.map +1 -0
  72. package/dist/src/tasks/nlp/translation.d.ts +19 -0
  73. package/dist/src/tasks/nlp/translation.d.ts.map +1 -0
  74. package/dist/src/tasks/nlp/zeroShotClassification.d.ts +28 -0
  75. package/dist/src/tasks/nlp/zeroShotClassification.d.ts.map +1 -0
  76. package/dist/src/tasks/tabular/tabularClassification.d.ts +20 -0
  77. package/dist/src/tasks/tabular/tabularClassification.d.ts.map +1 -0
  78. package/dist/src/tasks/tabular/tabularRegression.d.ts +20 -0
  79. package/dist/src/tasks/tabular/tabularRegression.d.ts.map +1 -0
  80. package/dist/src/types.d.ts +69 -0
  81. package/dist/src/types.d.ts.map +1 -0
  82. package/dist/src/utils/base64FromBytes.d.ts +2 -0
  83. package/dist/src/utils/base64FromBytes.d.ts.map +1 -0
  84. package/dist/src/utils/distributive-omit.d.ts +9 -0
  85. package/dist/src/utils/distributive-omit.d.ts.map +1 -0
  86. package/dist/src/utils/isBackend.d.ts +2 -0
  87. package/dist/src/utils/isBackend.d.ts.map +1 -0
  88. package/dist/src/utils/isFrontend.d.ts +2 -0
  89. package/dist/src/utils/isFrontend.d.ts.map +1 -0
  90. package/dist/src/utils/omit.d.ts +5 -0
  91. package/dist/src/utils/omit.d.ts.map +1 -0
  92. package/dist/src/utils/pick.d.ts +5 -0
  93. package/dist/src/utils/pick.d.ts.map +1 -0
  94. package/dist/src/utils/toArray.d.ts +2 -0
  95. package/dist/src/utils/toArray.d.ts.map +1 -0
  96. package/dist/src/utils/typedInclude.d.ts +2 -0
  97. package/dist/src/utils/typedInclude.d.ts.map +1 -0
  98. package/dist/src/vendor/fetch-event-source/parse.d.ts +69 -0
  99. package/dist/src/vendor/fetch-event-source/parse.d.ts.map +1 -0
  100. package/dist/src/vendor/fetch-event-source/parse.spec.d.ts +2 -0
  101. package/dist/src/vendor/fetch-event-source/parse.spec.d.ts.map +1 -0
  102. package/dist/test/HfInference.spec.d.ts +2 -0
  103. package/dist/test/HfInference.spec.d.ts.map +1 -0
  104. package/dist/test/expect-closeto.d.ts +2 -0
  105. package/dist/test/expect-closeto.d.ts.map +1 -0
  106. package/dist/test/test-files.d.ts +2 -0
  107. package/dist/test/test-files.d.ts.map +1 -0
  108. package/dist/test/vcr.d.ts +2 -0
  109. package/dist/test/vcr.d.ts.map +1 -0
  110. package/package.json +9 -7
  111. package/src/HfInference.ts +4 -4
  112. package/src/lib/makeRequestOptions.ts +17 -7
  113. package/src/tasks/custom/request.ts +5 -0
  114. package/src/tasks/custom/streamingRequest.ts +8 -0
  115. package/src/tasks/cv/imageToImage.ts +1 -1
  116. package/src/tasks/cv/zeroShotImageClassification.ts +1 -1
  117. package/src/tasks/index.ts +2 -0
  118. package/src/tasks/multimodal/documentQuestionAnswering.ts +1 -1
  119. package/src/tasks/multimodal/visualQuestionAnswering.ts +1 -1
  120. package/src/tasks/nlp/chatCompletion.ts +32 -0
  121. package/src/tasks/nlp/chatCompletionStream.ts +17 -0
  122. package/src/tasks/nlp/textGeneration.ts +2 -202
  123. package/src/tasks/nlp/textGenerationStream.ts +2 -1
  124. package/src/types.ts +14 -3
  125. package/src/utils/base64FromBytes.ts +11 -0
  126. package/src/utils/{distributive-omit.d.ts → distributive-omit.ts} +0 -2
  127. package/src/utils/isBackend.ts +6 -0
  128. package/src/utils/isFrontend.ts +3 -0
  129. package/dist/index.d.ts +0 -1536
package/README.md CHANGED
@@ -5,7 +5,7 @@ It works with both [Inference API (serverless)](https://huggingface.co/docs/api-
5
5
 
6
6
  Check out the [full documentation](https://huggingface.co/docs/huggingface.js/inference/README).
7
7
 
8
- You can also try out a live [interactive notebook](https://observablehq.com/@huggingface/hello-huggingface-js-inference), see some demos on [hf.co/huggingfacejs](https://huggingface.co/huggingfacejs), or watch a [Scrimba tutorial that explains how Inference Endpoints works](https://scrimba.com/scrim/cod8248f5adfd6e129582c523).
8
+ You can also try out a live [interactive notebook](https://observablehq.com/@huggingface/hello-huggingface-js-inference), see some demos on [hf.co/huggingfacejs](https://huggingface.co/huggingfacejs), or watch a [Scrimba tutorial that explains how Inference Endpoints works](https://scrimba.com/scrim/cod8248f5adfd6e129582c523).
9
9
 
10
10
  ## Getting Started
11
11
 
@@ -30,7 +30,6 @@ import { HfInference } from "https://esm.sh/@huggingface/inference"
30
30
  import { HfInference } from "npm:@huggingface/inference"
31
31
  ```
32
32
 
33
-
34
33
  ### Initialize
35
34
 
36
35
  ```typescript
@@ -43,7 +42,6 @@ const hf = new HfInference('your access token')
43
42
 
44
43
  Your access token should be kept private. If you need to protect it in front-end applications, we suggest setting up a proxy server that stores the access token.
45
44
 
46
-
47
45
  #### Tree-shaking
48
46
 
49
47
  You can import the functions you need directly from the module instead of using the `HfInference` class.
@@ -63,6 +61,85 @@ This will enable tree-shaking by your bundler.
63
61
 
64
62
  ## Natural Language Processing
65
63
 
64
+ ### Text Generation
65
+
66
+ Generates text from an input prompt.
67
+
68
+ [Demo](https://huggingface.co/spaces/huggingfacejs/streaming-text-generation)
69
+
70
+ ```typescript
71
+ await hf.textGeneration({
72
+ model: 'gpt2',
73
+ inputs: 'The answer to the universe is'
74
+ })
75
+
76
+ for await (const output of hf.textGenerationStream({
77
+ model: "google/flan-t5-xxl",
78
+ inputs: 'repeat "one two three four"',
79
+ parameters: { max_new_tokens: 250 }
80
+ })) {
81
+ console.log(output.token.text, output.generated_text);
82
+ }
83
+ ```
84
+
85
+ ### Text Generation (Chat Completion API Compatible)
86
+
87
+ Using the `chatCompletion` method, you can generate text with models compatible with the OpenAI Chat Completion API. All models served by [TGI](https://api-inference.huggingface.co/framework/text-generation-inference) on Hugging Face support Messages API.
88
+
89
+ [Demo](https://huggingface.co/spaces/huggingfacejs/streaming-chat-completion)
90
+
91
+ ```typescript
92
+ // Non-streaming API
93
+ const out = await hf.chatCompletion({
94
+ model: "mistralai/Mistral-7B-Instruct-v0.2",
95
+ messages: [{ role: "user", content: "Complete the this sentence with words one plus one is equal " }],
96
+ max_tokens: 500,
97
+ temperature: 0.1,
98
+ seed: 0,
99
+ });
100
+
101
+ // Streaming API
102
+ let out = "";
103
+ for await (const chunk of hf.chatCompletionStream({
104
+ model: "mistralai/Mistral-7B-Instruct-v0.2",
105
+ messages: [
106
+ { role: "user", content: "Complete the equation 1+1= ,just the answer" },
107
+ ],
108
+ max_tokens: 500,
109
+ temperature: 0.1,
110
+ seed: 0,
111
+ })) {
112
+ if (chunk.choices && chunk.choices.length > 0) {
113
+ out += chunk.choices[0].delta.content;
114
+ }
115
+ }
116
+ ```
117
+
118
+ It's also possible to call Mistral or OpenAI endpoints directly:
119
+
120
+ ```typescript
121
+ const openai = new HfInference(OPENAI_TOKEN).endpoint("https://api.openai.com");
122
+
123
+ let out = "";
124
+ for await (const chunk of openai.chatCompletionStream({
125
+ model: "gpt-3.5-turbo",
126
+ messages: [
127
+ { role: "user", content: "Complete the equation 1+1= ,just the answer" },
128
+ ],
129
+ max_tokens: 500,
130
+ temperature: 0.1,
131
+ seed: 0,
132
+ })) {
133
+ if (chunk.choices && chunk.choices.length > 0) {
134
+ out += chunk.choices[0].delta.content;
135
+ }
136
+ }
137
+
138
+ // For mistral AI:
139
+ // endpointUrl: "https://api.mistral.ai"
140
+ // model: "mistral-tiny"
141
+ ```
142
+
66
143
  ### Fill Mask
67
144
 
68
145
  Tries to fill in a hole with a missing word (token to be precise).
@@ -131,27 +208,6 @@ await hf.textClassification({
131
208
  })
132
209
  ```
133
210
 
134
- ### Text Generation
135
-
136
- Generates text from an input prompt.
137
-
138
- [Demo](https://huggingface.co/spaces/huggingfacejs/streaming-text-generation)
139
-
140
- ```typescript
141
- await hf.textGeneration({
142
- model: 'gpt2',
143
- inputs: 'The answer to the universe is'
144
- })
145
-
146
- for await (const output of hf.textGenerationStream({
147
- model: "google/flan-t5-xxl",
148
- inputs: 'repeat "one two three four"',
149
- parameters: { max_new_tokens: 250 }
150
- })) {
151
- console.log(output.token.text, output.generated_text);
152
- }
153
- ```
154
-
155
211
  ### Token Classification
156
212
 
157
213
  Used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text.
@@ -177,9 +233,9 @@ await hf.translation({
177
233
  model: 'facebook/mbart-large-50-many-to-many-mmt',
178
234
  inputs: textToTranslate,
179
235
  parameters: {
180
- "src_lang": "en_XX",
181
- "tgt_lang": "fr_XX"
182
- }
236
+ "src_lang": "en_XX",
237
+ "tgt_lang": "fr_XX"
238
+ }
183
239
  })
184
240
  ```
185
241
 
@@ -497,6 +553,26 @@ for await (const output of hf.streamingRequest({
497
553
  }
498
554
  ```
499
555
 
556
+ You can use any Chat Completion API-compatible provider with the `chatCompletion` method.
557
+
558
+ ```typescript
559
+ // Chat Completion Example
560
+ const MISTRAL_KEY = process.env.MISTRAL_KEY;
561
+ const hf = new HfInference(MISTRAL_KEY);
562
+ const ep = hf.endpoint("https://api.mistral.ai");
563
+ const stream = ep.chatCompletionStream({
564
+ model: "mistral-tiny",
565
+ messages: [{ role: "user", content: "Complete the equation one + one = , just the answer" }],
566
+ });
567
+ let out = "";
568
+ for await (const chunk of stream) {
569
+ if (chunk.choices && chunk.choices.length > 0) {
570
+ out += chunk.choices[0].delta.content;
571
+ console.log(out);
572
+ }
573
+ }
574
+ ```
575
+
500
576
  ## Custom Inference Endpoints
501
577
 
502
578
  Learn more about using your own inference endpoints [here](https://hf.co/docs/inference-endpoints/)
@@ -504,6 +580,25 @@ Learn more about using your own inference endpoints [here](https://hf.co/docs/in
504
580
  ```typescript
505
581
  const gpt2 = hf.endpoint('https://xyz.eu-west-1.aws.endpoints.huggingface.cloud/gpt2');
506
582
  const { generated_text } = await gpt2.textGeneration({inputs: 'The answer to the universe is'});
583
+
584
+ // Chat Completion Example
585
+ const ep = hf.endpoint(
586
+ "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
587
+ );
588
+ const stream = ep.chatCompletionStream({
589
+ model: "tgi",
590
+ messages: [{ role: "user", content: "Complete the equation 1+1= ,just the answer" }],
591
+ max_tokens: 500,
592
+ temperature: 0.1,
593
+ seed: 0,
594
+ });
595
+ let out = "";
596
+ for await (const chunk of stream) {
597
+ if (chunk.choices && chunk.choices.length > 0) {
598
+ out += chunk.choices[0].delta.content;
599
+ console.log(out);
600
+ }
601
+ }
507
602
  ```
508
603
 
509
604
  By default, all calls to the inference endpoint will wait until the model is
@@ -532,3 +627,7 @@ HF_TOKEN="your access token" pnpm run test
532
627
  We have an informative documentation project called [Tasks](https://huggingface.co/tasks) to list available models for each task and explain how each task works in detail.
533
628
 
534
629
  It also contains demos, example outputs, and other resources should you want to dig deeper into the ML side of things.
630
+
631
+ ## Dependencies
632
+
633
+ - `@huggingface/tasks` : Typings only
package/dist/index.cjs CHANGED
@@ -1,4 +1,3 @@
1
- /// <reference path="./index.d.ts" />
2
1
  "use strict";
3
2
  var __defProp = Object.defineProperty;
4
3
  var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
@@ -27,6 +26,8 @@ __export(src_exports, {
27
26
  audioClassification: () => audioClassification,
28
27
  audioToAudio: () => audioToAudio,
29
28
  automaticSpeechRecognition: () => automaticSpeechRecognition,
29
+ chatCompletion: () => chatCompletion,
30
+ chatCompletionStream: () => chatCompletionStream,
30
31
  documentQuestionAnswering: () => documentQuestionAnswering,
31
32
  featureExtraction: () => featureExtraction,
32
33
  fillMask: () => fillMask,
@@ -62,6 +63,8 @@ __export(tasks_exports, {
62
63
  audioClassification: () => audioClassification,
63
64
  audioToAudio: () => audioToAudio,
64
65
  automaticSpeechRecognition: () => automaticSpeechRecognition,
66
+ chatCompletion: () => chatCompletion,
67
+ chatCompletionStream: () => chatCompletionStream,
65
68
  documentQuestionAnswering: () => documentQuestionAnswering,
66
69
  featureExtraction: () => featureExtraction,
67
70
  fillMask: () => fillMask,
@@ -90,6 +93,30 @@ __export(tasks_exports, {
90
93
  zeroShotImageClassification: () => zeroShotImageClassification
91
94
  });
92
95
 
96
+ // src/utils/pick.ts
97
+ function pick(o, props) {
98
+ return Object.assign(
99
+ {},
100
+ ...props.map((prop) => {
101
+ if (o[prop] !== void 0) {
102
+ return { [prop]: o[prop] };
103
+ }
104
+ })
105
+ );
106
+ }
107
+
108
+ // src/utils/typedInclude.ts
109
+ function typedInclude(arr, v) {
110
+ return arr.includes(v);
111
+ }
112
+
113
+ // src/utils/omit.ts
114
+ function omit(o, props) {
115
+ const propsArr = Array.isArray(props) ? props : [props];
116
+ const letsKeep = Object.keys(o).filter((prop) => !typedInclude(propsArr, prop));
117
+ return pick(o, letsKeep);
118
+ }
119
+
93
120
  // src/lib/isUrl.ts
94
121
  function isUrl(modelOrUrl) {
95
122
  return /^http(s?):/.test(modelOrUrl) || modelOrUrl.startsWith("/");
@@ -130,7 +157,7 @@ async function getDefaultTask(model, accessToken, options) {
130
157
  var HF_INFERENCE_API_BASE_URL = "https://api-inference.huggingface.co";
131
158
  var tasks = null;
132
159
  async function makeRequestOptions(args, options) {
133
- const { accessToken, model: _model, ...otherArgs } = args;
160
+ const { accessToken, endpointUrl, ...otherArgs } = args;
134
161
  let { model } = args;
135
162
  const {
136
163
  forceTask: task,
@@ -139,7 +166,7 @@ async function makeRequestOptions(args, options) {
139
166
  wait_for_model,
140
167
  use_cache,
141
168
  dont_load_model,
142
- ...otherOptions
169
+ chatCompletion: chatCompletion2
143
170
  } = options ?? {};
144
171
  const headers = {};
145
172
  if (accessToken) {
@@ -173,15 +200,25 @@ async function makeRequestOptions(args, options) {
173
200
  if (dont_load_model) {
174
201
  headers["X-Load-Model"] = "0";
175
202
  }
176
- const url = (() => {
203
+ let url = (() => {
204
+ if (endpointUrl && isUrl(model)) {
205
+ throw new TypeError("Both model and endpointUrl cannot be URLs");
206
+ }
177
207
  if (isUrl(model)) {
208
+ console.warn("Using a model URL is deprecated, please use the `endpointUrl` parameter instead");
178
209
  return model;
179
210
  }
211
+ if (endpointUrl) {
212
+ return endpointUrl;
213
+ }
180
214
  if (task) {
181
215
  return `${HF_INFERENCE_API_BASE_URL}/pipeline/${task}/${model}`;
182
216
  }
183
217
  return `${HF_INFERENCE_API_BASE_URL}/models/${model}`;
184
218
  })();
219
+ if (chatCompletion2 && !url.endsWith("/chat/completions")) {
220
+ url += "/v1/chat/completions";
221
+ }
185
222
  let credentials;
186
223
  if (typeof includeCredentials === "string") {
187
224
  credentials = includeCredentials;
@@ -192,8 +229,7 @@ async function makeRequestOptions(args, options) {
192
229
  headers,
193
230
  method: "POST",
194
231
  body: binary ? args.data : JSON.stringify({
195
- ...otherArgs,
196
- options: options && otherOptions
232
+ ...otherArgs.model && isUrl(otherArgs.model) ? omit(otherArgs, "model") : otherArgs
197
233
  }),
198
234
  ...credentials && { credentials },
199
235
  signal: options?.signal
@@ -214,6 +250,9 @@ async function request(args, options) {
214
250
  if (!response.ok) {
215
251
  if (response.headers.get("Content-Type")?.startsWith("application/json")) {
216
252
  const output = await response.json();
253
+ if ([400, 422, 404, 500].includes(response.status) && options?.chatCompletion) {
254
+ throw new Error(`Server ${args.model} does not seem to support chat completion. Error: ${output.error}`);
255
+ }
217
256
  if (output.error) {
218
257
  throw new Error(output.error);
219
258
  }
@@ -338,6 +377,9 @@ async function* streamingRequest(args, options) {
338
377
  if (!response.ok) {
339
378
  if (response.headers.get("Content-Type")?.startsWith("application/json")) {
340
379
  const output = await response.json();
380
+ if ([400, 422, 404, 500].includes(response.status) && options?.chatCompletion) {
381
+ throw new Error(`Server ${args.model} does not seem to support chat completion. Error: ${output.error}`);
382
+ }
341
383
  if (output.error) {
342
384
  throw new Error(output.error);
343
385
  }
@@ -374,6 +416,9 @@ async function* streamingRequest(args, options) {
374
416
  onChunk(value);
375
417
  for (const event of events) {
376
418
  if (event.data.length > 0) {
419
+ if (event.data === "[DONE]") {
420
+ return;
421
+ }
377
422
  const data = JSON.parse(event.data);
378
423
  if (typeof data === "object" && data !== null && "error" in data) {
379
424
  throw new Error(data.error);
@@ -520,7 +565,7 @@ async function textToImage(args, options) {
520
565
  return res;
521
566
  }
522
567
 
523
- // ../shared/src/base64FromBytes.ts
568
+ // src/utils/base64FromBytes.ts
524
569
  function base64FromBytes(arr) {
525
570
  if (globalThis.Buffer) {
526
571
  return globalThis.Buffer.from(arr).toString("base64");
@@ -533,10 +578,6 @@ function base64FromBytes(arr) {
533
578
  }
534
579
  }
535
580
 
536
- // ../shared/src/isBackend.ts
537
- var isBrowser = typeof window !== "undefined" && typeof window.document !== "undefined";
538
- var isWebWorker = typeof self === "object" && self.constructor && self.constructor.name === "DedicatedWorkerGlobalScope";
539
-
540
581
  // src/tasks/cv/imageToImage.ts
541
582
  async function imageToImage(args, options) {
542
583
  let reqArgs;
@@ -777,6 +818,29 @@ async function zeroShotClassification(args, options) {
777
818
  return res;
778
819
  }
779
820
 
821
+ // src/tasks/nlp/chatCompletion.ts
822
+ async function chatCompletion(args, options) {
823
+ const res = await request(args, {
824
+ ...options,
825
+ taskHint: "text-generation",
826
+ chatCompletion: true
827
+ });
828
+ const isValidOutput = typeof res === "object" && Array.isArray(res?.choices) && typeof res?.created === "number" && typeof res?.id === "string" && typeof res?.model === "string" && typeof res?.system_fingerprint === "string" && typeof res?.usage === "object";
829
+ if (!isValidOutput) {
830
+ throw new InferenceOutputError("Expected ChatCompletionOutput");
831
+ }
832
+ return res;
833
+ }
834
+
835
+ // src/tasks/nlp/chatCompletionStream.ts
836
+ async function* chatCompletionStream(args, options) {
837
+ yield* streamingRequest(args, {
838
+ ...options,
839
+ taskHint: "text-generation",
840
+ chatCompletion: true
841
+ });
842
+ }
843
+
780
844
  // src/tasks/multimodal/documentQuestionAnswering.ts
781
845
  async function documentQuestionAnswering(args, options) {
782
846
  const reqArgs = {
@@ -888,7 +952,7 @@ var HfInferenceEndpoint = class {
888
952
  enumerable: false,
889
953
  value: (params, options) => (
890
954
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
891
- fn({ ...params, accessToken, model: endpointUrl }, { ...defaultOptions, ...options })
955
+ fn({ ...params, accessToken, endpointUrl }, { ...defaultOptions, ...options })
892
956
  )
893
957
  });
894
958
  }
@@ -902,6 +966,8 @@ var HfInferenceEndpoint = class {
902
966
  audioClassification,
903
967
  audioToAudio,
904
968
  automaticSpeechRecognition,
969
+ chatCompletion,
970
+ chatCompletionStream,
905
971
  documentQuestionAnswering,
906
972
  featureExtraction,
907
973
  fillMask,
package/dist/index.js CHANGED
@@ -1,4 +1,3 @@
1
- /// <reference path="./index.d.ts" />
2
1
  var __defProp = Object.defineProperty;
3
2
  var __export = (target, all) => {
4
3
  for (var name in all)
@@ -11,6 +10,8 @@ __export(tasks_exports, {
11
10
  audioClassification: () => audioClassification,
12
11
  audioToAudio: () => audioToAudio,
13
12
  automaticSpeechRecognition: () => automaticSpeechRecognition,
13
+ chatCompletion: () => chatCompletion,
14
+ chatCompletionStream: () => chatCompletionStream,
14
15
  documentQuestionAnswering: () => documentQuestionAnswering,
15
16
  featureExtraction: () => featureExtraction,
16
17
  fillMask: () => fillMask,
@@ -39,6 +40,30 @@ __export(tasks_exports, {
39
40
  zeroShotImageClassification: () => zeroShotImageClassification
40
41
  });
41
42
 
43
+ // src/utils/pick.ts
44
+ function pick(o, props) {
45
+ return Object.assign(
46
+ {},
47
+ ...props.map((prop) => {
48
+ if (o[prop] !== void 0) {
49
+ return { [prop]: o[prop] };
50
+ }
51
+ })
52
+ );
53
+ }
54
+
55
+ // src/utils/typedInclude.ts
56
+ function typedInclude(arr, v) {
57
+ return arr.includes(v);
58
+ }
59
+
60
+ // src/utils/omit.ts
61
+ function omit(o, props) {
62
+ const propsArr = Array.isArray(props) ? props : [props];
63
+ const letsKeep = Object.keys(o).filter((prop) => !typedInclude(propsArr, prop));
64
+ return pick(o, letsKeep);
65
+ }
66
+
42
67
  // src/lib/isUrl.ts
43
68
  function isUrl(modelOrUrl) {
44
69
  return /^http(s?):/.test(modelOrUrl) || modelOrUrl.startsWith("/");
@@ -79,7 +104,7 @@ async function getDefaultTask(model, accessToken, options) {
79
104
  var HF_INFERENCE_API_BASE_URL = "https://api-inference.huggingface.co";
80
105
  var tasks = null;
81
106
  async function makeRequestOptions(args, options) {
82
- const { accessToken, model: _model, ...otherArgs } = args;
107
+ const { accessToken, endpointUrl, ...otherArgs } = args;
83
108
  let { model } = args;
84
109
  const {
85
110
  forceTask: task,
@@ -88,7 +113,7 @@ async function makeRequestOptions(args, options) {
88
113
  wait_for_model,
89
114
  use_cache,
90
115
  dont_load_model,
91
- ...otherOptions
116
+ chatCompletion: chatCompletion2
92
117
  } = options ?? {};
93
118
  const headers = {};
94
119
  if (accessToken) {
@@ -122,15 +147,25 @@ async function makeRequestOptions(args, options) {
122
147
  if (dont_load_model) {
123
148
  headers["X-Load-Model"] = "0";
124
149
  }
125
- const url = (() => {
150
+ let url = (() => {
151
+ if (endpointUrl && isUrl(model)) {
152
+ throw new TypeError("Both model and endpointUrl cannot be URLs");
153
+ }
126
154
  if (isUrl(model)) {
155
+ console.warn("Using a model URL is deprecated, please use the `endpointUrl` parameter instead");
127
156
  return model;
128
157
  }
158
+ if (endpointUrl) {
159
+ return endpointUrl;
160
+ }
129
161
  if (task) {
130
162
  return `${HF_INFERENCE_API_BASE_URL}/pipeline/${task}/${model}`;
131
163
  }
132
164
  return `${HF_INFERENCE_API_BASE_URL}/models/${model}`;
133
165
  })();
166
+ if (chatCompletion2 && !url.endsWith("/chat/completions")) {
167
+ url += "/v1/chat/completions";
168
+ }
134
169
  let credentials;
135
170
  if (typeof includeCredentials === "string") {
136
171
  credentials = includeCredentials;
@@ -141,8 +176,7 @@ async function makeRequestOptions(args, options) {
141
176
  headers,
142
177
  method: "POST",
143
178
  body: binary ? args.data : JSON.stringify({
144
- ...otherArgs,
145
- options: options && otherOptions
179
+ ...otherArgs.model && isUrl(otherArgs.model) ? omit(otherArgs, "model") : otherArgs
146
180
  }),
147
181
  ...credentials && { credentials },
148
182
  signal: options?.signal
@@ -163,6 +197,9 @@ async function request(args, options) {
163
197
  if (!response.ok) {
164
198
  if (response.headers.get("Content-Type")?.startsWith("application/json")) {
165
199
  const output = await response.json();
200
+ if ([400, 422, 404, 500].includes(response.status) && options?.chatCompletion) {
201
+ throw new Error(`Server ${args.model} does not seem to support chat completion. Error: ${output.error}`);
202
+ }
166
203
  if (output.error) {
167
204
  throw new Error(output.error);
168
205
  }
@@ -287,6 +324,9 @@ async function* streamingRequest(args, options) {
287
324
  if (!response.ok) {
288
325
  if (response.headers.get("Content-Type")?.startsWith("application/json")) {
289
326
  const output = await response.json();
327
+ if ([400, 422, 404, 500].includes(response.status) && options?.chatCompletion) {
328
+ throw new Error(`Server ${args.model} does not seem to support chat completion. Error: ${output.error}`);
329
+ }
290
330
  if (output.error) {
291
331
  throw new Error(output.error);
292
332
  }
@@ -323,6 +363,9 @@ async function* streamingRequest(args, options) {
323
363
  onChunk(value);
324
364
  for (const event of events) {
325
365
  if (event.data.length > 0) {
366
+ if (event.data === "[DONE]") {
367
+ return;
368
+ }
326
369
  const data = JSON.parse(event.data);
327
370
  if (typeof data === "object" && data !== null && "error" in data) {
328
371
  throw new Error(data.error);
@@ -469,7 +512,7 @@ async function textToImage(args, options) {
469
512
  return res;
470
513
  }
471
514
 
472
- // ../shared/src/base64FromBytes.ts
515
+ // src/utils/base64FromBytes.ts
473
516
  function base64FromBytes(arr) {
474
517
  if (globalThis.Buffer) {
475
518
  return globalThis.Buffer.from(arr).toString("base64");
@@ -482,10 +525,6 @@ function base64FromBytes(arr) {
482
525
  }
483
526
  }
484
527
 
485
- // ../shared/src/isBackend.ts
486
- var isBrowser = typeof window !== "undefined" && typeof window.document !== "undefined";
487
- var isWebWorker = typeof self === "object" && self.constructor && self.constructor.name === "DedicatedWorkerGlobalScope";
488
-
489
528
  // src/tasks/cv/imageToImage.ts
490
529
  async function imageToImage(args, options) {
491
530
  let reqArgs;
@@ -726,6 +765,29 @@ async function zeroShotClassification(args, options) {
726
765
  return res;
727
766
  }
728
767
 
768
+ // src/tasks/nlp/chatCompletion.ts
769
+ async function chatCompletion(args, options) {
770
+ const res = await request(args, {
771
+ ...options,
772
+ taskHint: "text-generation",
773
+ chatCompletion: true
774
+ });
775
+ const isValidOutput = typeof res === "object" && Array.isArray(res?.choices) && typeof res?.created === "number" && typeof res?.id === "string" && typeof res?.model === "string" && typeof res?.system_fingerprint === "string" && typeof res?.usage === "object";
776
+ if (!isValidOutput) {
777
+ throw new InferenceOutputError("Expected ChatCompletionOutput");
778
+ }
779
+ return res;
780
+ }
781
+
782
+ // src/tasks/nlp/chatCompletionStream.ts
783
+ async function* chatCompletionStream(args, options) {
784
+ yield* streamingRequest(args, {
785
+ ...options,
786
+ taskHint: "text-generation",
787
+ chatCompletion: true
788
+ });
789
+ }
790
+
729
791
  // src/tasks/multimodal/documentQuestionAnswering.ts
730
792
  async function documentQuestionAnswering(args, options) {
731
793
  const reqArgs = {
@@ -837,7 +899,7 @@ var HfInferenceEndpoint = class {
837
899
  enumerable: false,
838
900
  value: (params, options) => (
839
901
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
840
- fn({ ...params, accessToken, model: endpointUrl }, { ...defaultOptions, ...options })
902
+ fn({ ...params, accessToken, endpointUrl }, { ...defaultOptions, ...options })
841
903
  )
842
904
  });
843
905
  }
@@ -850,6 +912,8 @@ export {
850
912
  audioClassification,
851
913
  audioToAudio,
852
914
  automaticSpeechRecognition,
915
+ chatCompletion,
916
+ chatCompletionStream,
853
917
  documentQuestionAnswering,
854
918
  featureExtraction,
855
919
  fillMask,
@@ -0,0 +1,28 @@
1
+ import * as tasks from "./tasks";
2
+ import type { Options } from "./types";
3
+ import type { DistributiveOmit } from "./utils/distributive-omit";
4
+ type Task = typeof tasks;
5
+ type TaskWithNoAccessToken = {
6
+ [key in keyof Task]: (args: DistributiveOmit<Parameters<Task[key]>[0], "accessToken">, options?: Parameters<Task[key]>[1]) => ReturnType<Task[key]>;
7
+ };
8
+ type TaskWithNoAccessTokenNoEndpointUrl = {
9
+ [key in keyof Task]: (args: DistributiveOmit<Parameters<Task[key]>[0], "accessToken" | "endpointUrl">, options?: Parameters<Task[key]>[1]) => ReturnType<Task[key]>;
10
+ };
11
+ export declare class HfInference {
12
+ private readonly accessToken;
13
+ private readonly defaultOptions;
14
+ constructor(accessToken?: string, defaultOptions?: Options);
15
+ /**
16
+ * Returns copy of HfInference tied to a specified endpoint.
17
+ */
18
+ endpoint(endpointUrl: string): HfInferenceEndpoint;
19
+ }
20
+ export declare class HfInferenceEndpoint {
21
+ constructor(endpointUrl: string, accessToken?: string, defaultOptions?: Options);
22
+ }
23
+ export interface HfInference extends TaskWithNoAccessToken {
24
+ }
25
+ export interface HfInferenceEndpoint extends TaskWithNoAccessTokenNoEndpointUrl {
26
+ }
27
+ export {};
28
+ //# sourceMappingURL=HfInference.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"HfInference.d.ts","sourceRoot":"","sources":["../../src/HfInference.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,KAAK,MAAM,SAAS,CAAC;AACjC,OAAO,KAAK,EAAE,OAAO,EAAe,MAAM,SAAS,CAAC;AACpD,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,2BAA2B,CAAC;AAKlE,KAAK,IAAI,GAAG,OAAO,KAAK,CAAC;AAEzB,KAAK,qBAAqB,GAAG;KAC3B,GAAG,IAAI,MAAM,IAAI,GAAG,CACpB,IAAI,EAAE,gBAAgB,CAAC,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,aAAa,CAAC,EAC/D,OAAO,CAAC,EAAE,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,KAC9B,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;CAC1B,CAAC;AAEF,KAAK,kCAAkC,GAAG;KACxC,GAAG,IAAI,MAAM,IAAI,GAAG,CACpB,IAAI,EAAE,gBAAgB,CAAC,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,aAAa,GAAG,aAAa,CAAC,EAC/E,OAAO,CAAC,EAAE,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,KAC9B,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;CAC1B,CAAC;AAEF,qBAAa,WAAW;IACvB,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAS;IACrC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAU;gBAE7B,WAAW,SAAK,EAAE,cAAc,GAAE,OAAY;IAc1D;;OAEG;IACI,QAAQ,CAAC,WAAW,EAAE,MAAM,GAAG,mBAAmB;CAGzD;AAED,qBAAa,mBAAmB;gBACnB,WAAW,EAAE,MAAM,EAAE,WAAW,SAAK,EAAE,cAAc,GAAE,OAAY;CAa/E;AAED,MAAM,WAAW,WAAY,SAAQ,qBAAqB;CAAG;AAE7D,MAAM,WAAW,mBAAoB,SAAQ,kCAAkC;CAAG"}
@@ -0,0 +1,5 @@
1
+ export { HfInference, HfInferenceEndpoint } from "./HfInference";
2
+ export { InferenceOutputError } from "./lib/InferenceOutputError";
3
+ export * from "./types";
4
+ export * from "./tasks";
5
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,mBAAmB,EAAE,MAAM,eAAe,CAAC;AACjE,OAAO,EAAE,oBAAoB,EAAE,MAAM,4BAA4B,CAAC;AAClE,cAAc,SAAS,CAAC;AACxB,cAAc,SAAS,CAAC"}
@@ -0,0 +1,4 @@
1
+ export declare class InferenceOutputError extends TypeError {
2
+ constructor(message: string);
3
+ }
4
+ //# sourceMappingURL=InferenceOutputError.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"InferenceOutputError.d.ts","sourceRoot":"","sources":["../../../src/lib/InferenceOutputError.ts"],"names":[],"mappings":"AAAA,qBAAa,oBAAqB,SAAQ,SAAS;gBACtC,OAAO,EAAE,MAAM;CAM3B"}
@@ -0,0 +1,12 @@
1
+ export declare const HF_HUB_URL = "https://huggingface.co";
2
+ export interface DefaultTaskOptions {
3
+ fetch?: typeof fetch;
4
+ }
5
+ /**
6
+ * Get the default task. Use a LRU cache of 1000 items with 10 minutes expiration
7
+ * to avoid making too many calls to the HF hub.
8
+ *
9
+ * @returns The default task for the model, or `null` if it was impossible to get it
10
+ */
11
+ export declare function getDefaultTask(model: string, accessToken: string | undefined, options?: DefaultTaskOptions): Promise<string | null>;
12
+ //# sourceMappingURL=getDefaultTask.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"getDefaultTask.d.ts","sourceRoot":"","sources":["../../../src/lib/getDefaultTask.ts"],"names":[],"mappings":"AAUA,eAAO,MAAM,UAAU,2BAA2B,CAAC;AAEnD,MAAM,WAAW,kBAAkB;IAClC,KAAK,CAAC,EAAE,OAAO,KAAK,CAAC;CACrB;AAED;;;;;GAKG;AACH,wBAAsB,cAAc,CACnC,KAAK,EAAE,MAAM,EACb,WAAW,EAAE,MAAM,GAAG,SAAS,EAC/B,OAAO,CAAC,EAAE,kBAAkB,GAC1B,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAkCxB"}
@@ -0,0 +1,2 @@
1
+ export declare function isUrl(modelOrUrl: string): boolean;
2
+ //# sourceMappingURL=isUrl.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"isUrl.d.ts","sourceRoot":"","sources":["../../../src/lib/isUrl.ts"],"names":[],"mappings":"AAAA,wBAAgB,KAAK,CAAC,UAAU,EAAE,MAAM,GAAG,OAAO,CAEjD"}