@huggingface/inference 2.6.7 → 2.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +126 -27
- package/dist/index.cjs +92 -24
- package/dist/index.js +90 -24
- package/dist/src/HfInference.d.ts +28 -0
- package/dist/src/HfInference.d.ts.map +1 -0
- package/dist/src/index.d.ts +5 -0
- package/dist/src/index.d.ts.map +1 -0
- package/dist/src/lib/InferenceOutputError.d.ts +4 -0
- package/dist/src/lib/InferenceOutputError.d.ts.map +1 -0
- package/dist/src/lib/getDefaultTask.d.ts +12 -0
- package/dist/src/lib/getDefaultTask.d.ts.map +1 -0
- package/dist/src/lib/isUrl.d.ts +2 -0
- package/dist/src/lib/isUrl.d.ts.map +1 -0
- package/dist/src/lib/makeRequestOptions.d.ts +18 -0
- package/dist/src/lib/makeRequestOptions.d.ts.map +1 -0
- package/dist/src/tasks/audio/audioClassification.d.ts +24 -0
- package/dist/src/tasks/audio/audioClassification.d.ts.map +1 -0
- package/dist/src/tasks/audio/audioToAudio.d.ts +28 -0
- package/dist/src/tasks/audio/audioToAudio.d.ts.map +1 -0
- package/dist/src/tasks/audio/automaticSpeechRecognition.d.ts +19 -0
- package/dist/src/tasks/audio/automaticSpeechRecognition.d.ts.map +1 -0
- package/dist/src/tasks/audio/textToSpeech.d.ts +14 -0
- package/dist/src/tasks/audio/textToSpeech.d.ts.map +1 -0
- package/dist/src/tasks/custom/request.d.ts +13 -0
- package/dist/src/tasks/custom/request.d.ts.map +1 -0
- package/dist/src/tasks/custom/streamingRequest.d.ts +13 -0
- package/dist/src/tasks/custom/streamingRequest.d.ts.map +1 -0
- package/dist/src/tasks/cv/imageClassification.d.ts +24 -0
- package/dist/src/tasks/cv/imageClassification.d.ts.map +1 -0
- package/dist/src/tasks/cv/imageSegmentation.d.ts +28 -0
- package/dist/src/tasks/cv/imageSegmentation.d.ts.map +1 -0
- package/dist/src/tasks/cv/imageToImage.d.ts +55 -0
- package/dist/src/tasks/cv/imageToImage.d.ts.map +1 -0
- package/dist/src/tasks/cv/imageToText.d.ts +18 -0
- package/dist/src/tasks/cv/imageToText.d.ts.map +1 -0
- package/dist/src/tasks/cv/objectDetection.d.ts +33 -0
- package/dist/src/tasks/cv/objectDetection.d.ts.map +1 -0
- package/dist/src/tasks/cv/textToImage.d.ts +36 -0
- package/dist/src/tasks/cv/textToImage.d.ts.map +1 -0
- package/dist/src/tasks/cv/zeroShotImageClassification.d.ts +26 -0
- package/dist/src/tasks/cv/zeroShotImageClassification.d.ts.map +1 -0
- package/dist/src/tasks/index.d.ts +32 -0
- package/dist/src/tasks/index.d.ts.map +1 -0
- package/dist/src/tasks/multimodal/documentQuestionAnswering.d.ts +35 -0
- package/dist/src/tasks/multimodal/documentQuestionAnswering.d.ts.map +1 -0
- package/dist/src/tasks/multimodal/visualQuestionAnswering.d.ts +27 -0
- package/dist/src/tasks/multimodal/visualQuestionAnswering.d.ts.map +1 -0
- package/dist/src/tasks/nlp/chatCompletion.d.ts +7 -0
- package/dist/src/tasks/nlp/chatCompletion.d.ts.map +1 -0
- package/dist/src/tasks/nlp/chatCompletionStream.d.ts +7 -0
- package/dist/src/tasks/nlp/chatCompletionStream.d.ts.map +1 -0
- package/dist/src/tasks/nlp/featureExtraction.d.ts +19 -0
- package/dist/src/tasks/nlp/featureExtraction.d.ts.map +1 -0
- package/dist/src/tasks/nlp/fillMask.d.ts +27 -0
- package/dist/src/tasks/nlp/fillMask.d.ts.map +1 -0
- package/dist/src/tasks/nlp/questionAnswering.d.ts +30 -0
- package/dist/src/tasks/nlp/questionAnswering.d.ts.map +1 -0
- package/dist/src/tasks/nlp/sentenceSimilarity.d.ts +19 -0
- package/dist/src/tasks/nlp/sentenceSimilarity.d.ts.map +1 -0
- package/dist/src/tasks/nlp/summarization.d.ts +48 -0
- package/dist/src/tasks/nlp/summarization.d.ts.map +1 -0
- package/dist/src/tasks/nlp/tableQuestionAnswering.d.ts +36 -0
- package/dist/src/tasks/nlp/tableQuestionAnswering.d.ts.map +1 -0
- package/dist/src/tasks/nlp/textClassification.d.ts +22 -0
- package/dist/src/tasks/nlp/textClassification.d.ts.map +1 -0
- package/dist/src/tasks/nlp/textGeneration.d.ts +8 -0
- package/dist/src/tasks/nlp/textGeneration.d.ts.map +1 -0
- package/dist/src/tasks/nlp/textGenerationStream.d.ts +81 -0
- package/dist/src/tasks/nlp/textGenerationStream.d.ts.map +1 -0
- package/dist/src/tasks/nlp/tokenClassification.d.ts +51 -0
- package/dist/src/tasks/nlp/tokenClassification.d.ts.map +1 -0
- package/dist/src/tasks/nlp/translation.d.ts +19 -0
- package/dist/src/tasks/nlp/translation.d.ts.map +1 -0
- package/dist/src/tasks/nlp/zeroShotClassification.d.ts +28 -0
- package/dist/src/tasks/nlp/zeroShotClassification.d.ts.map +1 -0
- package/dist/src/tasks/tabular/tabularClassification.d.ts +20 -0
- package/dist/src/tasks/tabular/tabularClassification.d.ts.map +1 -0
- package/dist/src/tasks/tabular/tabularRegression.d.ts +20 -0
- package/dist/src/tasks/tabular/tabularRegression.d.ts.map +1 -0
- package/dist/src/types.d.ts +69 -0
- package/dist/src/types.d.ts.map +1 -0
- package/dist/src/utils/base64FromBytes.d.ts +2 -0
- package/dist/src/utils/base64FromBytes.d.ts.map +1 -0
- package/dist/src/utils/distributive-omit.d.ts +9 -0
- package/dist/src/utils/distributive-omit.d.ts.map +1 -0
- package/dist/src/utils/isBackend.d.ts +2 -0
- package/dist/src/utils/isBackend.d.ts.map +1 -0
- package/dist/src/utils/isFrontend.d.ts +2 -0
- package/dist/src/utils/isFrontend.d.ts.map +1 -0
- package/dist/src/utils/omit.d.ts +5 -0
- package/dist/src/utils/omit.d.ts.map +1 -0
- package/dist/src/utils/pick.d.ts +5 -0
- package/dist/src/utils/pick.d.ts.map +1 -0
- package/dist/src/utils/toArray.d.ts +2 -0
- package/dist/src/utils/toArray.d.ts.map +1 -0
- package/dist/src/utils/typedInclude.d.ts +2 -0
- package/dist/src/utils/typedInclude.d.ts.map +1 -0
- package/dist/src/vendor/fetch-event-source/parse.d.ts +69 -0
- package/dist/src/vendor/fetch-event-source/parse.d.ts.map +1 -0
- package/dist/src/vendor/fetch-event-source/parse.spec.d.ts +2 -0
- package/dist/src/vendor/fetch-event-source/parse.spec.d.ts.map +1 -0
- package/dist/test/HfInference.spec.d.ts +2 -0
- package/dist/test/HfInference.spec.d.ts.map +1 -0
- package/dist/test/expect-closeto.d.ts +2 -0
- package/dist/test/expect-closeto.d.ts.map +1 -0
- package/dist/test/test-files.d.ts +2 -0
- package/dist/test/test-files.d.ts.map +1 -0
- package/dist/test/vcr.d.ts +2 -0
- package/dist/test/vcr.d.ts.map +1 -0
- package/package.json +9 -7
- package/src/HfInference.ts +4 -4
- package/src/lib/makeRequestOptions.ts +17 -7
- package/src/tasks/custom/request.ts +5 -0
- package/src/tasks/custom/streamingRequest.ts +8 -0
- package/src/tasks/cv/imageToImage.ts +1 -1
- package/src/tasks/cv/zeroShotImageClassification.ts +1 -1
- package/src/tasks/index.ts +2 -0
- package/src/tasks/multimodal/documentQuestionAnswering.ts +1 -1
- package/src/tasks/multimodal/visualQuestionAnswering.ts +1 -1
- package/src/tasks/nlp/chatCompletion.ts +32 -0
- package/src/tasks/nlp/chatCompletionStream.ts +17 -0
- package/src/tasks/nlp/textGeneration.ts +9 -206
- package/src/tasks/nlp/textGenerationStream.ts +2 -1
- package/src/types.ts +14 -3
- package/src/utils/base64FromBytes.ts +11 -0
- package/src/utils/{distributive-omit.d.ts → distributive-omit.ts} +0 -2
- package/src/utils/isBackend.ts +6 -0
- package/src/utils/isFrontend.ts +3 -0
- package/dist/index.d.ts +0 -1536
package/README.md
CHANGED
|
@@ -5,7 +5,7 @@ It works with both [Inference API (serverless)](https://huggingface.co/docs/api-
|
|
|
5
5
|
|
|
6
6
|
Check out the [full documentation](https://huggingface.co/docs/huggingface.js/inference/README).
|
|
7
7
|
|
|
8
|
-
You can also try out a live [interactive notebook](https://observablehq.com/@huggingface/hello-huggingface-js-inference), see some demos on [hf.co/huggingfacejs](https://huggingface.co/huggingfacejs), or watch a [Scrimba tutorial that explains how Inference Endpoints works](https://scrimba.com/scrim/cod8248f5adfd6e129582c523).
|
|
8
|
+
You can also try out a live [interactive notebook](https://observablehq.com/@huggingface/hello-huggingface-js-inference), see some demos on [hf.co/huggingfacejs](https://huggingface.co/huggingfacejs), or watch a [Scrimba tutorial that explains how Inference Endpoints works](https://scrimba.com/scrim/cod8248f5adfd6e129582c523).
|
|
9
9
|
|
|
10
10
|
## Getting Started
|
|
11
11
|
|
|
@@ -30,7 +30,6 @@ import { HfInference } from "https://esm.sh/@huggingface/inference"
|
|
|
30
30
|
import { HfInference } from "npm:@huggingface/inference"
|
|
31
31
|
```
|
|
32
32
|
|
|
33
|
-
|
|
34
33
|
### Initialize
|
|
35
34
|
|
|
36
35
|
```typescript
|
|
@@ -43,7 +42,6 @@ const hf = new HfInference('your access token')
|
|
|
43
42
|
|
|
44
43
|
Your access token should be kept private. If you need to protect it in front-end applications, we suggest setting up a proxy server that stores the access token.
|
|
45
44
|
|
|
46
|
-
|
|
47
45
|
#### Tree-shaking
|
|
48
46
|
|
|
49
47
|
You can import the functions you need directly from the module instead of using the `HfInference` class.
|
|
@@ -63,6 +61,85 @@ This will enable tree-shaking by your bundler.
|
|
|
63
61
|
|
|
64
62
|
## Natural Language Processing
|
|
65
63
|
|
|
64
|
+
### Text Generation
|
|
65
|
+
|
|
66
|
+
Generates text from an input prompt.
|
|
67
|
+
|
|
68
|
+
[Demo](https://huggingface.co/spaces/huggingfacejs/streaming-text-generation)
|
|
69
|
+
|
|
70
|
+
```typescript
|
|
71
|
+
await hf.textGeneration({
|
|
72
|
+
model: 'gpt2',
|
|
73
|
+
inputs: 'The answer to the universe is'
|
|
74
|
+
})
|
|
75
|
+
|
|
76
|
+
for await (const output of hf.textGenerationStream({
|
|
77
|
+
model: "google/flan-t5-xxl",
|
|
78
|
+
inputs: 'repeat "one two three four"',
|
|
79
|
+
parameters: { max_new_tokens: 250 }
|
|
80
|
+
})) {
|
|
81
|
+
console.log(output.token.text, output.generated_text);
|
|
82
|
+
}
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### Text Generation (Chat Completion API Compatible)
|
|
86
|
+
|
|
87
|
+
Using the `chatCompletion` method, you can generate text with models compatible with the OpenAI Chat Completion API. All models served by [TGI](https://api-inference.huggingface.co/framework/text-generation-inference) on Hugging Face support Messages API.
|
|
88
|
+
|
|
89
|
+
[Demo](https://huggingface.co/spaces/huggingfacejs/streaming-chat-completion)
|
|
90
|
+
|
|
91
|
+
```typescript
|
|
92
|
+
// Non-streaming API
|
|
93
|
+
const out = await hf.chatCompletion({
|
|
94
|
+
model: "mistralai/Mistral-7B-Instruct-v0.2",
|
|
95
|
+
messages: [{ role: "user", content: "Complete the this sentence with words one plus one is equal " }],
|
|
96
|
+
max_tokens: 500,
|
|
97
|
+
temperature: 0.1,
|
|
98
|
+
seed: 0,
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
// Streaming API
|
|
102
|
+
let out = "";
|
|
103
|
+
for await (const chunk of hf.chatCompletionStream({
|
|
104
|
+
model: "mistralai/Mistral-7B-Instruct-v0.2",
|
|
105
|
+
messages: [
|
|
106
|
+
{ role: "user", content: "Complete the equation 1+1= ,just the answer" },
|
|
107
|
+
],
|
|
108
|
+
max_tokens: 500,
|
|
109
|
+
temperature: 0.1,
|
|
110
|
+
seed: 0,
|
|
111
|
+
})) {
|
|
112
|
+
if (chunk.choices && chunk.choices.length > 0) {
|
|
113
|
+
out += chunk.choices[0].delta.content;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
It's also possible to call Mistral or OpenAI endpoints directly:
|
|
119
|
+
|
|
120
|
+
```typescript
|
|
121
|
+
const openai = new HfInference(OPENAI_TOKEN).endpoint("https://api.openai.com");
|
|
122
|
+
|
|
123
|
+
let out = "";
|
|
124
|
+
for await (const chunk of openai.chatCompletionStream({
|
|
125
|
+
model: "gpt-3.5-turbo",
|
|
126
|
+
messages: [
|
|
127
|
+
{ role: "user", content: "Complete the equation 1+1= ,just the answer" },
|
|
128
|
+
],
|
|
129
|
+
max_tokens: 500,
|
|
130
|
+
temperature: 0.1,
|
|
131
|
+
seed: 0,
|
|
132
|
+
})) {
|
|
133
|
+
if (chunk.choices && chunk.choices.length > 0) {
|
|
134
|
+
out += chunk.choices[0].delta.content;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// For mistral AI:
|
|
139
|
+
// endpointUrl: "https://api.mistral.ai"
|
|
140
|
+
// model: "mistral-tiny"
|
|
141
|
+
```
|
|
142
|
+
|
|
66
143
|
### Fill Mask
|
|
67
144
|
|
|
68
145
|
Tries to fill in a hole with a missing word (token to be precise).
|
|
@@ -131,27 +208,6 @@ await hf.textClassification({
|
|
|
131
208
|
})
|
|
132
209
|
```
|
|
133
210
|
|
|
134
|
-
### Text Generation
|
|
135
|
-
|
|
136
|
-
Generates text from an input prompt.
|
|
137
|
-
|
|
138
|
-
[Demo](https://huggingface.co/spaces/huggingfacejs/streaming-text-generation)
|
|
139
|
-
|
|
140
|
-
```typescript
|
|
141
|
-
await hf.textGeneration({
|
|
142
|
-
model: 'gpt2',
|
|
143
|
-
inputs: 'The answer to the universe is'
|
|
144
|
-
})
|
|
145
|
-
|
|
146
|
-
for await (const output of hf.textGenerationStream({
|
|
147
|
-
model: "google/flan-t5-xxl",
|
|
148
|
-
inputs: 'repeat "one two three four"',
|
|
149
|
-
parameters: { max_new_tokens: 250 }
|
|
150
|
-
})) {
|
|
151
|
-
console.log(output.token.text, output.generated_text);
|
|
152
|
-
}
|
|
153
|
-
```
|
|
154
|
-
|
|
155
211
|
### Token Classification
|
|
156
212
|
|
|
157
213
|
Used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text.
|
|
@@ -177,9 +233,9 @@ await hf.translation({
|
|
|
177
233
|
model: 'facebook/mbart-large-50-many-to-many-mmt',
|
|
178
234
|
inputs: textToTranslate,
|
|
179
235
|
parameters: {
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
236
|
+
"src_lang": "en_XX",
|
|
237
|
+
"tgt_lang": "fr_XX"
|
|
238
|
+
}
|
|
183
239
|
})
|
|
184
240
|
```
|
|
185
241
|
|
|
@@ -497,6 +553,26 @@ for await (const output of hf.streamingRequest({
|
|
|
497
553
|
}
|
|
498
554
|
```
|
|
499
555
|
|
|
556
|
+
You can use any Chat Completion API-compatible provider with the `chatCompletion` method.
|
|
557
|
+
|
|
558
|
+
```typescript
|
|
559
|
+
// Chat Completion Example
|
|
560
|
+
const MISTRAL_KEY = process.env.MISTRAL_KEY;
|
|
561
|
+
const hf = new HfInference(MISTRAL_KEY);
|
|
562
|
+
const ep = hf.endpoint("https://api.mistral.ai");
|
|
563
|
+
const stream = ep.chatCompletionStream({
|
|
564
|
+
model: "mistral-tiny",
|
|
565
|
+
messages: [{ role: "user", content: "Complete the equation one + one = , just the answer" }],
|
|
566
|
+
});
|
|
567
|
+
let out = "";
|
|
568
|
+
for await (const chunk of stream) {
|
|
569
|
+
if (chunk.choices && chunk.choices.length > 0) {
|
|
570
|
+
out += chunk.choices[0].delta.content;
|
|
571
|
+
console.log(out);
|
|
572
|
+
}
|
|
573
|
+
}
|
|
574
|
+
```
|
|
575
|
+
|
|
500
576
|
## Custom Inference Endpoints
|
|
501
577
|
|
|
502
578
|
Learn more about using your own inference endpoints [here](https://hf.co/docs/inference-endpoints/)
|
|
@@ -504,6 +580,25 @@ Learn more about using your own inference endpoints [here](https://hf.co/docs/in
|
|
|
504
580
|
```typescript
|
|
505
581
|
const gpt2 = hf.endpoint('https://xyz.eu-west-1.aws.endpoints.huggingface.cloud/gpt2');
|
|
506
582
|
const { generated_text } = await gpt2.textGeneration({inputs: 'The answer to the universe is'});
|
|
583
|
+
|
|
584
|
+
// Chat Completion Example
|
|
585
|
+
const ep = hf.endpoint(
|
|
586
|
+
"https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
|
|
587
|
+
);
|
|
588
|
+
const stream = ep.chatCompletionStream({
|
|
589
|
+
model: "tgi",
|
|
590
|
+
messages: [{ role: "user", content: "Complete the equation 1+1= ,just the answer" }],
|
|
591
|
+
max_tokens: 500,
|
|
592
|
+
temperature: 0.1,
|
|
593
|
+
seed: 0,
|
|
594
|
+
});
|
|
595
|
+
let out = "";
|
|
596
|
+
for await (const chunk of stream) {
|
|
597
|
+
if (chunk.choices && chunk.choices.length > 0) {
|
|
598
|
+
out += chunk.choices[0].delta.content;
|
|
599
|
+
console.log(out);
|
|
600
|
+
}
|
|
601
|
+
}
|
|
507
602
|
```
|
|
508
603
|
|
|
509
604
|
By default, all calls to the inference endpoint will wait until the model is
|
|
@@ -532,3 +627,7 @@ HF_TOKEN="your access token" pnpm run test
|
|
|
532
627
|
We have an informative documentation project called [Tasks](https://huggingface.co/tasks) to list available models for each task and explain how each task works in detail.
|
|
533
628
|
|
|
534
629
|
It also contains demos, example outputs, and other resources should you want to dig deeper into the ML side of things.
|
|
630
|
+
|
|
631
|
+
## Dependencies
|
|
632
|
+
|
|
633
|
+
- `@huggingface/tasks` : Typings only
|
package/dist/index.cjs
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
/// <reference path="./index.d.ts" />
|
|
2
1
|
"use strict";
|
|
3
2
|
var __defProp = Object.defineProperty;
|
|
4
3
|
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
@@ -27,6 +26,8 @@ __export(src_exports, {
|
|
|
27
26
|
audioClassification: () => audioClassification,
|
|
28
27
|
audioToAudio: () => audioToAudio,
|
|
29
28
|
automaticSpeechRecognition: () => automaticSpeechRecognition,
|
|
29
|
+
chatCompletion: () => chatCompletion,
|
|
30
|
+
chatCompletionStream: () => chatCompletionStream,
|
|
30
31
|
documentQuestionAnswering: () => documentQuestionAnswering,
|
|
31
32
|
featureExtraction: () => featureExtraction,
|
|
32
33
|
fillMask: () => fillMask,
|
|
@@ -62,6 +63,8 @@ __export(tasks_exports, {
|
|
|
62
63
|
audioClassification: () => audioClassification,
|
|
63
64
|
audioToAudio: () => audioToAudio,
|
|
64
65
|
automaticSpeechRecognition: () => automaticSpeechRecognition,
|
|
66
|
+
chatCompletion: () => chatCompletion,
|
|
67
|
+
chatCompletionStream: () => chatCompletionStream,
|
|
65
68
|
documentQuestionAnswering: () => documentQuestionAnswering,
|
|
66
69
|
featureExtraction: () => featureExtraction,
|
|
67
70
|
fillMask: () => fillMask,
|
|
@@ -90,6 +93,30 @@ __export(tasks_exports, {
|
|
|
90
93
|
zeroShotImageClassification: () => zeroShotImageClassification
|
|
91
94
|
});
|
|
92
95
|
|
|
96
|
+
// src/utils/pick.ts
|
|
97
|
+
function pick(o, props) {
|
|
98
|
+
return Object.assign(
|
|
99
|
+
{},
|
|
100
|
+
...props.map((prop) => {
|
|
101
|
+
if (o[prop] !== void 0) {
|
|
102
|
+
return { [prop]: o[prop] };
|
|
103
|
+
}
|
|
104
|
+
})
|
|
105
|
+
);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// src/utils/typedInclude.ts
|
|
109
|
+
function typedInclude(arr, v) {
|
|
110
|
+
return arr.includes(v);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// src/utils/omit.ts
|
|
114
|
+
function omit(o, props) {
|
|
115
|
+
const propsArr = Array.isArray(props) ? props : [props];
|
|
116
|
+
const letsKeep = Object.keys(o).filter((prop) => !typedInclude(propsArr, prop));
|
|
117
|
+
return pick(o, letsKeep);
|
|
118
|
+
}
|
|
119
|
+
|
|
93
120
|
// src/lib/isUrl.ts
|
|
94
121
|
function isUrl(modelOrUrl) {
|
|
95
122
|
return /^http(s?):/.test(modelOrUrl) || modelOrUrl.startsWith("/");
|
|
@@ -130,7 +157,7 @@ async function getDefaultTask(model, accessToken, options) {
|
|
|
130
157
|
var HF_INFERENCE_API_BASE_URL = "https://api-inference.huggingface.co";
|
|
131
158
|
var tasks = null;
|
|
132
159
|
async function makeRequestOptions(args, options) {
|
|
133
|
-
const { accessToken,
|
|
160
|
+
const { accessToken, endpointUrl, ...otherArgs } = args;
|
|
134
161
|
let { model } = args;
|
|
135
162
|
const {
|
|
136
163
|
forceTask: task,
|
|
@@ -139,7 +166,7 @@ async function makeRequestOptions(args, options) {
|
|
|
139
166
|
wait_for_model,
|
|
140
167
|
use_cache,
|
|
141
168
|
dont_load_model,
|
|
142
|
-
|
|
169
|
+
chatCompletion: chatCompletion2
|
|
143
170
|
} = options ?? {};
|
|
144
171
|
const headers = {};
|
|
145
172
|
if (accessToken) {
|
|
@@ -173,15 +200,25 @@ async function makeRequestOptions(args, options) {
|
|
|
173
200
|
if (dont_load_model) {
|
|
174
201
|
headers["X-Load-Model"] = "0";
|
|
175
202
|
}
|
|
176
|
-
|
|
203
|
+
let url = (() => {
|
|
204
|
+
if (endpointUrl && isUrl(model)) {
|
|
205
|
+
throw new TypeError("Both model and endpointUrl cannot be URLs");
|
|
206
|
+
}
|
|
177
207
|
if (isUrl(model)) {
|
|
208
|
+
console.warn("Using a model URL is deprecated, please use the `endpointUrl` parameter instead");
|
|
178
209
|
return model;
|
|
179
210
|
}
|
|
211
|
+
if (endpointUrl) {
|
|
212
|
+
return endpointUrl;
|
|
213
|
+
}
|
|
180
214
|
if (task) {
|
|
181
215
|
return `${HF_INFERENCE_API_BASE_URL}/pipeline/${task}/${model}`;
|
|
182
216
|
}
|
|
183
217
|
return `${HF_INFERENCE_API_BASE_URL}/models/${model}`;
|
|
184
218
|
})();
|
|
219
|
+
if (chatCompletion2 && !url.endsWith("/chat/completions")) {
|
|
220
|
+
url += "/v1/chat/completions";
|
|
221
|
+
}
|
|
185
222
|
let credentials;
|
|
186
223
|
if (typeof includeCredentials === "string") {
|
|
187
224
|
credentials = includeCredentials;
|
|
@@ -192,8 +229,7 @@ async function makeRequestOptions(args, options) {
|
|
|
192
229
|
headers,
|
|
193
230
|
method: "POST",
|
|
194
231
|
body: binary ? args.data : JSON.stringify({
|
|
195
|
-
...otherArgs,
|
|
196
|
-
options: options && otherOptions
|
|
232
|
+
...otherArgs.model && isUrl(otherArgs.model) ? omit(otherArgs, "model") : otherArgs
|
|
197
233
|
}),
|
|
198
234
|
...credentials && { credentials },
|
|
199
235
|
signal: options?.signal
|
|
@@ -214,6 +250,9 @@ async function request(args, options) {
|
|
|
214
250
|
if (!response.ok) {
|
|
215
251
|
if (response.headers.get("Content-Type")?.startsWith("application/json")) {
|
|
216
252
|
const output = await response.json();
|
|
253
|
+
if ([400, 422, 404, 500].includes(response.status) && options?.chatCompletion) {
|
|
254
|
+
throw new Error(`Server ${args.model} does not seem to support chat completion. Error: ${output.error}`);
|
|
255
|
+
}
|
|
217
256
|
if (output.error) {
|
|
218
257
|
throw new Error(output.error);
|
|
219
258
|
}
|
|
@@ -338,6 +377,9 @@ async function* streamingRequest(args, options) {
|
|
|
338
377
|
if (!response.ok) {
|
|
339
378
|
if (response.headers.get("Content-Type")?.startsWith("application/json")) {
|
|
340
379
|
const output = await response.json();
|
|
380
|
+
if ([400, 422, 404, 500].includes(response.status) && options?.chatCompletion) {
|
|
381
|
+
throw new Error(`Server ${args.model} does not seem to support chat completion. Error: ${output.error}`);
|
|
382
|
+
}
|
|
341
383
|
if (output.error) {
|
|
342
384
|
throw new Error(output.error);
|
|
343
385
|
}
|
|
@@ -374,6 +416,9 @@ async function* streamingRequest(args, options) {
|
|
|
374
416
|
onChunk(value);
|
|
375
417
|
for (const event of events) {
|
|
376
418
|
if (event.data.length > 0) {
|
|
419
|
+
if (event.data === "[DONE]") {
|
|
420
|
+
return;
|
|
421
|
+
}
|
|
377
422
|
const data = JSON.parse(event.data);
|
|
378
423
|
if (typeof data === "object" && data !== null && "error" in data) {
|
|
379
424
|
throw new Error(data.error);
|
|
@@ -520,7 +565,7 @@ async function textToImage(args, options) {
|
|
|
520
565
|
return res;
|
|
521
566
|
}
|
|
522
567
|
|
|
523
|
-
//
|
|
568
|
+
// src/utils/base64FromBytes.ts
|
|
524
569
|
function base64FromBytes(arr) {
|
|
525
570
|
if (globalThis.Buffer) {
|
|
526
571
|
return globalThis.Buffer.from(arr).toString("base64");
|
|
@@ -533,10 +578,6 @@ function base64FromBytes(arr) {
|
|
|
533
578
|
}
|
|
534
579
|
}
|
|
535
580
|
|
|
536
|
-
// ../shared/src/isBackend.ts
|
|
537
|
-
var isBrowser = typeof window !== "undefined" && typeof window.document !== "undefined";
|
|
538
|
-
var isWebWorker = typeof self === "object" && self.constructor && self.constructor.name === "DedicatedWorkerGlobalScope";
|
|
539
|
-
|
|
540
581
|
// src/tasks/cv/imageToImage.ts
|
|
541
582
|
async function imageToImage(args, options) {
|
|
542
583
|
let reqArgs;
|
|
@@ -699,12 +740,22 @@ async function textClassification(args, options) {
|
|
|
699
740
|
return res;
|
|
700
741
|
}
|
|
701
742
|
|
|
743
|
+
// src/utils/toArray.ts
|
|
744
|
+
function toArray(obj) {
|
|
745
|
+
if (Array.isArray(obj)) {
|
|
746
|
+
return obj;
|
|
747
|
+
}
|
|
748
|
+
return [obj];
|
|
749
|
+
}
|
|
750
|
+
|
|
702
751
|
// src/tasks/nlp/textGeneration.ts
|
|
703
752
|
async function textGeneration(args, options) {
|
|
704
|
-
const res =
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
753
|
+
const res = toArray(
|
|
754
|
+
await request(args, {
|
|
755
|
+
...options,
|
|
756
|
+
taskHint: "text-generation"
|
|
757
|
+
})
|
|
758
|
+
);
|
|
708
759
|
const isValidOutput = Array.isArray(res) && res.every((x) => typeof x?.generated_text === "string");
|
|
709
760
|
if (!isValidOutput) {
|
|
710
761
|
throw new InferenceOutputError("Expected Array<{generated_text: string}>");
|
|
@@ -720,14 +771,6 @@ async function* textGenerationStream(args, options) {
|
|
|
720
771
|
});
|
|
721
772
|
}
|
|
722
773
|
|
|
723
|
-
// src/utils/toArray.ts
|
|
724
|
-
function toArray(obj) {
|
|
725
|
-
if (Array.isArray(obj)) {
|
|
726
|
-
return obj;
|
|
727
|
-
}
|
|
728
|
-
return [obj];
|
|
729
|
-
}
|
|
730
|
-
|
|
731
774
|
// src/tasks/nlp/tokenClassification.ts
|
|
732
775
|
async function tokenClassification(args, options) {
|
|
733
776
|
const res = toArray(
|
|
@@ -777,6 +820,29 @@ async function zeroShotClassification(args, options) {
|
|
|
777
820
|
return res;
|
|
778
821
|
}
|
|
779
822
|
|
|
823
|
+
// src/tasks/nlp/chatCompletion.ts
|
|
824
|
+
async function chatCompletion(args, options) {
|
|
825
|
+
const res = await request(args, {
|
|
826
|
+
...options,
|
|
827
|
+
taskHint: "text-generation",
|
|
828
|
+
chatCompletion: true
|
|
829
|
+
});
|
|
830
|
+
const isValidOutput = typeof res === "object" && Array.isArray(res?.choices) && typeof res?.created === "number" && typeof res?.id === "string" && typeof res?.model === "string" && typeof res?.system_fingerprint === "string" && typeof res?.usage === "object";
|
|
831
|
+
if (!isValidOutput) {
|
|
832
|
+
throw new InferenceOutputError("Expected ChatCompletionOutput");
|
|
833
|
+
}
|
|
834
|
+
return res;
|
|
835
|
+
}
|
|
836
|
+
|
|
837
|
+
// src/tasks/nlp/chatCompletionStream.ts
|
|
838
|
+
async function* chatCompletionStream(args, options) {
|
|
839
|
+
yield* streamingRequest(args, {
|
|
840
|
+
...options,
|
|
841
|
+
taskHint: "text-generation",
|
|
842
|
+
chatCompletion: true
|
|
843
|
+
});
|
|
844
|
+
}
|
|
845
|
+
|
|
780
846
|
// src/tasks/multimodal/documentQuestionAnswering.ts
|
|
781
847
|
async function documentQuestionAnswering(args, options) {
|
|
782
848
|
const reqArgs = {
|
|
@@ -888,7 +954,7 @@ var HfInferenceEndpoint = class {
|
|
|
888
954
|
enumerable: false,
|
|
889
955
|
value: (params, options) => (
|
|
890
956
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
891
|
-
fn({ ...params, accessToken,
|
|
957
|
+
fn({ ...params, accessToken, endpointUrl }, { ...defaultOptions, ...options })
|
|
892
958
|
)
|
|
893
959
|
});
|
|
894
960
|
}
|
|
@@ -902,6 +968,8 @@ var HfInferenceEndpoint = class {
|
|
|
902
968
|
audioClassification,
|
|
903
969
|
audioToAudio,
|
|
904
970
|
automaticSpeechRecognition,
|
|
971
|
+
chatCompletion,
|
|
972
|
+
chatCompletionStream,
|
|
905
973
|
documentQuestionAnswering,
|
|
906
974
|
featureExtraction,
|
|
907
975
|
fillMask,
|
package/dist/index.js
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
/// <reference path="./index.d.ts" />
|
|
2
1
|
var __defProp = Object.defineProperty;
|
|
3
2
|
var __export = (target, all) => {
|
|
4
3
|
for (var name in all)
|
|
@@ -11,6 +10,8 @@ __export(tasks_exports, {
|
|
|
11
10
|
audioClassification: () => audioClassification,
|
|
12
11
|
audioToAudio: () => audioToAudio,
|
|
13
12
|
automaticSpeechRecognition: () => automaticSpeechRecognition,
|
|
13
|
+
chatCompletion: () => chatCompletion,
|
|
14
|
+
chatCompletionStream: () => chatCompletionStream,
|
|
14
15
|
documentQuestionAnswering: () => documentQuestionAnswering,
|
|
15
16
|
featureExtraction: () => featureExtraction,
|
|
16
17
|
fillMask: () => fillMask,
|
|
@@ -39,6 +40,30 @@ __export(tasks_exports, {
|
|
|
39
40
|
zeroShotImageClassification: () => zeroShotImageClassification
|
|
40
41
|
});
|
|
41
42
|
|
|
43
|
+
// src/utils/pick.ts
|
|
44
|
+
function pick(o, props) {
|
|
45
|
+
return Object.assign(
|
|
46
|
+
{},
|
|
47
|
+
...props.map((prop) => {
|
|
48
|
+
if (o[prop] !== void 0) {
|
|
49
|
+
return { [prop]: o[prop] };
|
|
50
|
+
}
|
|
51
|
+
})
|
|
52
|
+
);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// src/utils/typedInclude.ts
|
|
56
|
+
function typedInclude(arr, v) {
|
|
57
|
+
return arr.includes(v);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// src/utils/omit.ts
|
|
61
|
+
function omit(o, props) {
|
|
62
|
+
const propsArr = Array.isArray(props) ? props : [props];
|
|
63
|
+
const letsKeep = Object.keys(o).filter((prop) => !typedInclude(propsArr, prop));
|
|
64
|
+
return pick(o, letsKeep);
|
|
65
|
+
}
|
|
66
|
+
|
|
42
67
|
// src/lib/isUrl.ts
|
|
43
68
|
function isUrl(modelOrUrl) {
|
|
44
69
|
return /^http(s?):/.test(modelOrUrl) || modelOrUrl.startsWith("/");
|
|
@@ -79,7 +104,7 @@ async function getDefaultTask(model, accessToken, options) {
|
|
|
79
104
|
var HF_INFERENCE_API_BASE_URL = "https://api-inference.huggingface.co";
|
|
80
105
|
var tasks = null;
|
|
81
106
|
async function makeRequestOptions(args, options) {
|
|
82
|
-
const { accessToken,
|
|
107
|
+
const { accessToken, endpointUrl, ...otherArgs } = args;
|
|
83
108
|
let { model } = args;
|
|
84
109
|
const {
|
|
85
110
|
forceTask: task,
|
|
@@ -88,7 +113,7 @@ async function makeRequestOptions(args, options) {
|
|
|
88
113
|
wait_for_model,
|
|
89
114
|
use_cache,
|
|
90
115
|
dont_load_model,
|
|
91
|
-
|
|
116
|
+
chatCompletion: chatCompletion2
|
|
92
117
|
} = options ?? {};
|
|
93
118
|
const headers = {};
|
|
94
119
|
if (accessToken) {
|
|
@@ -122,15 +147,25 @@ async function makeRequestOptions(args, options) {
|
|
|
122
147
|
if (dont_load_model) {
|
|
123
148
|
headers["X-Load-Model"] = "0";
|
|
124
149
|
}
|
|
125
|
-
|
|
150
|
+
let url = (() => {
|
|
151
|
+
if (endpointUrl && isUrl(model)) {
|
|
152
|
+
throw new TypeError("Both model and endpointUrl cannot be URLs");
|
|
153
|
+
}
|
|
126
154
|
if (isUrl(model)) {
|
|
155
|
+
console.warn("Using a model URL is deprecated, please use the `endpointUrl` parameter instead");
|
|
127
156
|
return model;
|
|
128
157
|
}
|
|
158
|
+
if (endpointUrl) {
|
|
159
|
+
return endpointUrl;
|
|
160
|
+
}
|
|
129
161
|
if (task) {
|
|
130
162
|
return `${HF_INFERENCE_API_BASE_URL}/pipeline/${task}/${model}`;
|
|
131
163
|
}
|
|
132
164
|
return `${HF_INFERENCE_API_BASE_URL}/models/${model}`;
|
|
133
165
|
})();
|
|
166
|
+
if (chatCompletion2 && !url.endsWith("/chat/completions")) {
|
|
167
|
+
url += "/v1/chat/completions";
|
|
168
|
+
}
|
|
134
169
|
let credentials;
|
|
135
170
|
if (typeof includeCredentials === "string") {
|
|
136
171
|
credentials = includeCredentials;
|
|
@@ -141,8 +176,7 @@ async function makeRequestOptions(args, options) {
|
|
|
141
176
|
headers,
|
|
142
177
|
method: "POST",
|
|
143
178
|
body: binary ? args.data : JSON.stringify({
|
|
144
|
-
...otherArgs,
|
|
145
|
-
options: options && otherOptions
|
|
179
|
+
...otherArgs.model && isUrl(otherArgs.model) ? omit(otherArgs, "model") : otherArgs
|
|
146
180
|
}),
|
|
147
181
|
...credentials && { credentials },
|
|
148
182
|
signal: options?.signal
|
|
@@ -163,6 +197,9 @@ async function request(args, options) {
|
|
|
163
197
|
if (!response.ok) {
|
|
164
198
|
if (response.headers.get("Content-Type")?.startsWith("application/json")) {
|
|
165
199
|
const output = await response.json();
|
|
200
|
+
if ([400, 422, 404, 500].includes(response.status) && options?.chatCompletion) {
|
|
201
|
+
throw new Error(`Server ${args.model} does not seem to support chat completion. Error: ${output.error}`);
|
|
202
|
+
}
|
|
166
203
|
if (output.error) {
|
|
167
204
|
throw new Error(output.error);
|
|
168
205
|
}
|
|
@@ -287,6 +324,9 @@ async function* streamingRequest(args, options) {
|
|
|
287
324
|
if (!response.ok) {
|
|
288
325
|
if (response.headers.get("Content-Type")?.startsWith("application/json")) {
|
|
289
326
|
const output = await response.json();
|
|
327
|
+
if ([400, 422, 404, 500].includes(response.status) && options?.chatCompletion) {
|
|
328
|
+
throw new Error(`Server ${args.model} does not seem to support chat completion. Error: ${output.error}`);
|
|
329
|
+
}
|
|
290
330
|
if (output.error) {
|
|
291
331
|
throw new Error(output.error);
|
|
292
332
|
}
|
|
@@ -323,6 +363,9 @@ async function* streamingRequest(args, options) {
|
|
|
323
363
|
onChunk(value);
|
|
324
364
|
for (const event of events) {
|
|
325
365
|
if (event.data.length > 0) {
|
|
366
|
+
if (event.data === "[DONE]") {
|
|
367
|
+
return;
|
|
368
|
+
}
|
|
326
369
|
const data = JSON.parse(event.data);
|
|
327
370
|
if (typeof data === "object" && data !== null && "error" in data) {
|
|
328
371
|
throw new Error(data.error);
|
|
@@ -469,7 +512,7 @@ async function textToImage(args, options) {
|
|
|
469
512
|
return res;
|
|
470
513
|
}
|
|
471
514
|
|
|
472
|
-
//
|
|
515
|
+
// src/utils/base64FromBytes.ts
|
|
473
516
|
function base64FromBytes(arr) {
|
|
474
517
|
if (globalThis.Buffer) {
|
|
475
518
|
return globalThis.Buffer.from(arr).toString("base64");
|
|
@@ -482,10 +525,6 @@ function base64FromBytes(arr) {
|
|
|
482
525
|
}
|
|
483
526
|
}
|
|
484
527
|
|
|
485
|
-
// ../shared/src/isBackend.ts
|
|
486
|
-
var isBrowser = typeof window !== "undefined" && typeof window.document !== "undefined";
|
|
487
|
-
var isWebWorker = typeof self === "object" && self.constructor && self.constructor.name === "DedicatedWorkerGlobalScope";
|
|
488
|
-
|
|
489
528
|
// src/tasks/cv/imageToImage.ts
|
|
490
529
|
async function imageToImage(args, options) {
|
|
491
530
|
let reqArgs;
|
|
@@ -648,12 +687,22 @@ async function textClassification(args, options) {
|
|
|
648
687
|
return res;
|
|
649
688
|
}
|
|
650
689
|
|
|
690
|
+
// src/utils/toArray.ts
|
|
691
|
+
function toArray(obj) {
|
|
692
|
+
if (Array.isArray(obj)) {
|
|
693
|
+
return obj;
|
|
694
|
+
}
|
|
695
|
+
return [obj];
|
|
696
|
+
}
|
|
697
|
+
|
|
651
698
|
// src/tasks/nlp/textGeneration.ts
|
|
652
699
|
async function textGeneration(args, options) {
|
|
653
|
-
const res =
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
700
|
+
const res = toArray(
|
|
701
|
+
await request(args, {
|
|
702
|
+
...options,
|
|
703
|
+
taskHint: "text-generation"
|
|
704
|
+
})
|
|
705
|
+
);
|
|
657
706
|
const isValidOutput = Array.isArray(res) && res.every((x) => typeof x?.generated_text === "string");
|
|
658
707
|
if (!isValidOutput) {
|
|
659
708
|
throw new InferenceOutputError("Expected Array<{generated_text: string}>");
|
|
@@ -669,14 +718,6 @@ async function* textGenerationStream(args, options) {
|
|
|
669
718
|
});
|
|
670
719
|
}
|
|
671
720
|
|
|
672
|
-
// src/utils/toArray.ts
|
|
673
|
-
function toArray(obj) {
|
|
674
|
-
if (Array.isArray(obj)) {
|
|
675
|
-
return obj;
|
|
676
|
-
}
|
|
677
|
-
return [obj];
|
|
678
|
-
}
|
|
679
|
-
|
|
680
721
|
// src/tasks/nlp/tokenClassification.ts
|
|
681
722
|
async function tokenClassification(args, options) {
|
|
682
723
|
const res = toArray(
|
|
@@ -726,6 +767,29 @@ async function zeroShotClassification(args, options) {
|
|
|
726
767
|
return res;
|
|
727
768
|
}
|
|
728
769
|
|
|
770
|
+
// src/tasks/nlp/chatCompletion.ts
|
|
771
|
+
async function chatCompletion(args, options) {
|
|
772
|
+
const res = await request(args, {
|
|
773
|
+
...options,
|
|
774
|
+
taskHint: "text-generation",
|
|
775
|
+
chatCompletion: true
|
|
776
|
+
});
|
|
777
|
+
const isValidOutput = typeof res === "object" && Array.isArray(res?.choices) && typeof res?.created === "number" && typeof res?.id === "string" && typeof res?.model === "string" && typeof res?.system_fingerprint === "string" && typeof res?.usage === "object";
|
|
778
|
+
if (!isValidOutput) {
|
|
779
|
+
throw new InferenceOutputError("Expected ChatCompletionOutput");
|
|
780
|
+
}
|
|
781
|
+
return res;
|
|
782
|
+
}
|
|
783
|
+
|
|
784
|
+
// src/tasks/nlp/chatCompletionStream.ts
|
|
785
|
+
async function* chatCompletionStream(args, options) {
|
|
786
|
+
yield* streamingRequest(args, {
|
|
787
|
+
...options,
|
|
788
|
+
taskHint: "text-generation",
|
|
789
|
+
chatCompletion: true
|
|
790
|
+
});
|
|
791
|
+
}
|
|
792
|
+
|
|
729
793
|
// src/tasks/multimodal/documentQuestionAnswering.ts
|
|
730
794
|
async function documentQuestionAnswering(args, options) {
|
|
731
795
|
const reqArgs = {
|
|
@@ -837,7 +901,7 @@ var HfInferenceEndpoint = class {
|
|
|
837
901
|
enumerable: false,
|
|
838
902
|
value: (params, options) => (
|
|
839
903
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
840
|
-
fn({ ...params, accessToken,
|
|
904
|
+
fn({ ...params, accessToken, endpointUrl }, { ...defaultOptions, ...options })
|
|
841
905
|
)
|
|
842
906
|
});
|
|
843
907
|
}
|
|
@@ -850,6 +914,8 @@ export {
|
|
|
850
914
|
audioClassification,
|
|
851
915
|
audioToAudio,
|
|
852
916
|
automaticSpeechRecognition,
|
|
917
|
+
chatCompletion,
|
|
918
|
+
chatCompletionStream,
|
|
853
919
|
documentQuestionAnswering,
|
|
854
920
|
featureExtraction,
|
|
855
921
|
fillMask,
|