@huggingface/inference 2.6.5 → 2.6.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -0
- package/dist/index.cjs +21 -16
- package/dist/index.d.ts +196 -1
- package/dist/index.js +21 -16
- package/package.json +2 -2
- package/src/HfInference.ts +3 -2
- package/src/lib/makeRequestOptions.ts +25 -22
- package/src/tasks/nlp/textGeneration.ts +203 -1
- package/src/tasks/nlp/textGenerationStream.ts +1 -2
- package/src/types.ts +1 -1
package/README.md
CHANGED
|
@@ -506,6 +506,21 @@ const gpt2 = hf.endpoint('https://xyz.eu-west-1.aws.endpoints.huggingface.cloud/
|
|
|
506
506
|
const { generated_text } = await gpt2.textGeneration({inputs: 'The answer to the universe is'});
|
|
507
507
|
```
|
|
508
508
|
|
|
509
|
+
By default, all calls to the inference endpoint will wait until the model is
|
|
510
|
+
loaded. When [scaling to
|
|
511
|
+
0](https://huggingface.co/docs/inference-endpoints/en/autoscaling#scaling-to-0)
|
|
512
|
+
is enabled on the endpoint, this can result in non-trivial waiting time. If
|
|
513
|
+
you'd rather disable this behavior and handle the endpoint's returned 500 HTTP
|
|
514
|
+
errors yourself, you can do so like so:
|
|
515
|
+
|
|
516
|
+
```typescript
|
|
517
|
+
const gpt2 = hf.endpoint('https://xyz.eu-west-1.aws.endpoints.huggingface.cloud/gpt2');
|
|
518
|
+
const { generated_text } = await gpt2.textGeneration(
|
|
519
|
+
{inputs: 'The answer to the universe is'},
|
|
520
|
+
{retry_on_error: false},
|
|
521
|
+
);
|
|
522
|
+
```
|
|
523
|
+
|
|
509
524
|
## Running tests
|
|
510
525
|
|
|
511
526
|
```console
|
package/dist/index.cjs
CHANGED
|
@@ -132,7 +132,15 @@ var tasks = null;
|
|
|
132
132
|
async function makeRequestOptions(args, options) {
|
|
133
133
|
const { accessToken, model: _model, ...otherArgs } = args;
|
|
134
134
|
let { model } = args;
|
|
135
|
-
const {
|
|
135
|
+
const {
|
|
136
|
+
forceTask: task,
|
|
137
|
+
includeCredentials,
|
|
138
|
+
taskHint,
|
|
139
|
+
wait_for_model,
|
|
140
|
+
use_cache,
|
|
141
|
+
dont_load_model,
|
|
142
|
+
...otherOptions
|
|
143
|
+
} = options ?? {};
|
|
136
144
|
const headers = {};
|
|
137
145
|
if (accessToken) {
|
|
138
146
|
headers["Authorization"] = `Bearer ${accessToken}`;
|
|
@@ -155,16 +163,15 @@ async function makeRequestOptions(args, options) {
|
|
|
155
163
|
const binary = "data" in args && !!args.data;
|
|
156
164
|
if (!binary) {
|
|
157
165
|
headers["Content-Type"] = "application/json";
|
|
158
|
-
}
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
}
|
|
166
|
+
}
|
|
167
|
+
if (wait_for_model) {
|
|
168
|
+
headers["X-Wait-For-Model"] = "true";
|
|
169
|
+
}
|
|
170
|
+
if (use_cache === false) {
|
|
171
|
+
headers["X-Use-Cache"] = "false";
|
|
172
|
+
}
|
|
173
|
+
if (dont_load_model) {
|
|
174
|
+
headers["X-Load-Model"] = "0";
|
|
168
175
|
}
|
|
169
176
|
const url = (() => {
|
|
170
177
|
if (isUrl(model)) {
|
|
@@ -178,10 +185,8 @@ async function makeRequestOptions(args, options) {
|
|
|
178
185
|
let credentials;
|
|
179
186
|
if (typeof includeCredentials === "string") {
|
|
180
187
|
credentials = includeCredentials;
|
|
181
|
-
} else if (
|
|
182
|
-
credentials =
|
|
183
|
-
} else if (includeCredentials === void 0) {
|
|
184
|
-
credentials = "same-origin";
|
|
188
|
+
} else if (includeCredentials === true) {
|
|
189
|
+
credentials = "include";
|
|
185
190
|
}
|
|
186
191
|
const info = {
|
|
187
192
|
headers,
|
|
@@ -190,7 +195,7 @@ async function makeRequestOptions(args, options) {
|
|
|
190
195
|
...otherArgs,
|
|
191
196
|
options: options && otherOptions
|
|
192
197
|
}),
|
|
193
|
-
credentials,
|
|
198
|
+
...credentials && { credentials },
|
|
194
199
|
signal: options?.signal
|
|
195
200
|
};
|
|
196
201
|
return { url, info };
|
package/dist/index.d.ts
CHANGED
|
@@ -31,7 +31,7 @@ export interface Options {
|
|
|
31
31
|
signal?: AbortSignal;
|
|
32
32
|
|
|
33
33
|
/**
|
|
34
|
-
*
|
|
34
|
+
* Credentials to use for the request. If this is a string, it will be passed straight on. If it's a boolean, true will be "include" and false will not send credentials at all (which defaults to "same-origin" inside browsers).
|
|
35
35
|
*/
|
|
36
36
|
includeCredentials?: string | boolean;
|
|
37
37
|
}
|
|
@@ -702,6 +702,201 @@ export function textClassification(
|
|
|
702
702
|
args: TextClassificationArgs,
|
|
703
703
|
options?: Options
|
|
704
704
|
): Promise<TextClassificationOutput>;
|
|
705
|
+
/**
|
|
706
|
+
* The reason why the generation was stopped.
|
|
707
|
+
*
|
|
708
|
+
* length: The generated sequence reached the maximum allowed length
|
|
709
|
+
*
|
|
710
|
+
* eos_token: The model generated an end-of-sentence (EOS) token
|
|
711
|
+
*
|
|
712
|
+
* stop_sequence: One of the sequence in stop_sequences was generated
|
|
713
|
+
*/
|
|
714
|
+
export type TextGenerationFinishReason = "length" | "eos_token" | "stop_sequence";
|
|
715
|
+
/**
|
|
716
|
+
* Inputs for Text Generation inference
|
|
717
|
+
*/
|
|
718
|
+
export interface TextGenerationInput {
|
|
719
|
+
/**
|
|
720
|
+
* The text to initialize generation with
|
|
721
|
+
*/
|
|
722
|
+
inputs: string;
|
|
723
|
+
/**
|
|
724
|
+
* Additional inference parameters
|
|
725
|
+
*/
|
|
726
|
+
parameters?: TextGenerationParameters;
|
|
727
|
+
/**
|
|
728
|
+
* Whether to stream output tokens
|
|
729
|
+
*/
|
|
730
|
+
stream?: boolean;
|
|
731
|
+
[property: string]: unknown;
|
|
732
|
+
}
|
|
733
|
+
/**
|
|
734
|
+
* Additional inference parameters
|
|
735
|
+
*
|
|
736
|
+
* Additional inference parameters for Text Generation
|
|
737
|
+
*/
|
|
738
|
+
export interface TextGenerationParameters {
|
|
739
|
+
/**
|
|
740
|
+
* The number of sampling queries to run. Only the best one (in terms of total logprob) will
|
|
741
|
+
* be returned.
|
|
742
|
+
*/
|
|
743
|
+
best_of?: number;
|
|
744
|
+
/**
|
|
745
|
+
* Whether or not to output decoder input details
|
|
746
|
+
*/
|
|
747
|
+
decoder_input_details?: boolean;
|
|
748
|
+
/**
|
|
749
|
+
* Whether or not to output details
|
|
750
|
+
*/
|
|
751
|
+
details?: boolean;
|
|
752
|
+
/**
|
|
753
|
+
* Whether to use logits sampling instead of greedy decoding when generating new tokens.
|
|
754
|
+
*/
|
|
755
|
+
do_sample?: boolean;
|
|
756
|
+
/**
|
|
757
|
+
* The maximum number of tokens to generate.
|
|
758
|
+
*/
|
|
759
|
+
max_new_tokens?: number;
|
|
760
|
+
/**
|
|
761
|
+
* The parameter for repetition penalty. A value of 1.0 means no penalty. See [this
|
|
762
|
+
* paper](https://hf.co/papers/1909.05858) for more details.
|
|
763
|
+
*/
|
|
764
|
+
repetition_penalty?: number;
|
|
765
|
+
/**
|
|
766
|
+
* Whether to prepend the prompt to the generated text.
|
|
767
|
+
*/
|
|
768
|
+
return_full_text?: boolean;
|
|
769
|
+
/**
|
|
770
|
+
* The random sampling seed.
|
|
771
|
+
*/
|
|
772
|
+
seed?: number;
|
|
773
|
+
/**
|
|
774
|
+
* Stop generating tokens if a member of `stop_sequences` is generated.
|
|
775
|
+
*/
|
|
776
|
+
stop_sequences?: string[];
|
|
777
|
+
/**
|
|
778
|
+
* The value used to modulate the logits distribution.
|
|
779
|
+
*/
|
|
780
|
+
temperature?: number;
|
|
781
|
+
/**
|
|
782
|
+
* The number of highest probability vocabulary tokens to keep for top-k-filtering.
|
|
783
|
+
*/
|
|
784
|
+
top_k?: number;
|
|
785
|
+
/**
|
|
786
|
+
* If set to < 1, only the smallest set of most probable tokens with probabilities that add
|
|
787
|
+
* up to `top_p` or higher are kept for generation.
|
|
788
|
+
*/
|
|
789
|
+
top_p?: number;
|
|
790
|
+
/**
|
|
791
|
+
* Truncate input tokens to the given size.
|
|
792
|
+
*/
|
|
793
|
+
truncate?: number;
|
|
794
|
+
/**
|
|
795
|
+
* Typical Decoding mass. See [Typical Decoding for Natural Language
|
|
796
|
+
* Generation](https://hf.co/papers/2202.00666) for more information
|
|
797
|
+
*/
|
|
798
|
+
typical_p?: number;
|
|
799
|
+
/**
|
|
800
|
+
* Watermarking with [A Watermark for Large Language Models](https://hf.co/papers/2301.10226)
|
|
801
|
+
*/
|
|
802
|
+
watermark?: boolean;
|
|
803
|
+
[property: string]: unknown;
|
|
804
|
+
}
|
|
805
|
+
/**
|
|
806
|
+
* Outputs for Text Generation inference
|
|
807
|
+
*/
|
|
808
|
+
export interface TextGenerationOutput {
|
|
809
|
+
/**
|
|
810
|
+
* When enabled, details about the generation
|
|
811
|
+
*/
|
|
812
|
+
details?: TextGenerationOutputDetails;
|
|
813
|
+
/**
|
|
814
|
+
* The generated text
|
|
815
|
+
*/
|
|
816
|
+
generated_text: string;
|
|
817
|
+
[property: string]: unknown;
|
|
818
|
+
}
|
|
819
|
+
/**
|
|
820
|
+
* When enabled, details about the generation
|
|
821
|
+
*/
|
|
822
|
+
export interface TextGenerationOutputDetails {
|
|
823
|
+
/**
|
|
824
|
+
* Details about additional sequences when best_of is provided
|
|
825
|
+
*/
|
|
826
|
+
best_of_sequences?: TextGenerationOutputSequenceDetails[];
|
|
827
|
+
/**
|
|
828
|
+
* The reason why the generation was stopped.
|
|
829
|
+
*/
|
|
830
|
+
finish_reason: TextGenerationFinishReason;
|
|
831
|
+
/**
|
|
832
|
+
* The number of generated tokens
|
|
833
|
+
*/
|
|
834
|
+
generated_tokens: number;
|
|
835
|
+
prefill: TextGenerationPrefillToken[];
|
|
836
|
+
/**
|
|
837
|
+
* The random seed used for generation
|
|
838
|
+
*/
|
|
839
|
+
seed?: number;
|
|
840
|
+
/**
|
|
841
|
+
* The generated tokens and associated details
|
|
842
|
+
*/
|
|
843
|
+
tokens: TextGenerationOutputToken[];
|
|
844
|
+
/**
|
|
845
|
+
* Most likely tokens
|
|
846
|
+
*/
|
|
847
|
+
top_tokens?: Array<TextGenerationOutputToken[]>;
|
|
848
|
+
[property: string]: unknown;
|
|
849
|
+
}
|
|
850
|
+
export interface TextGenerationOutputSequenceDetails {
|
|
851
|
+
finish_reason: TextGenerationFinishReason;
|
|
852
|
+
/**
|
|
853
|
+
* The generated text
|
|
854
|
+
*/
|
|
855
|
+
generated_text: string;
|
|
856
|
+
/**
|
|
857
|
+
* The number of generated tokens
|
|
858
|
+
*/
|
|
859
|
+
generated_tokens: number;
|
|
860
|
+
prefill: TextGenerationPrefillToken[];
|
|
861
|
+
/**
|
|
862
|
+
* The random seed used for generation
|
|
863
|
+
*/
|
|
864
|
+
seed?: number;
|
|
865
|
+
/**
|
|
866
|
+
* The generated tokens and associated details
|
|
867
|
+
*/
|
|
868
|
+
tokens: TextGenerationOutputToken[];
|
|
869
|
+
/**
|
|
870
|
+
* Most likely tokens
|
|
871
|
+
*/
|
|
872
|
+
top_tokens?: Array<TextGenerationOutputToken[]>;
|
|
873
|
+
[property: string]: unknown;
|
|
874
|
+
}
|
|
875
|
+
export interface TextGenerationPrefillToken {
|
|
876
|
+
id: number;
|
|
877
|
+
logprob: number;
|
|
878
|
+
/**
|
|
879
|
+
* The text associated with that token
|
|
880
|
+
*/
|
|
881
|
+
text: string;
|
|
882
|
+
[property: string]: unknown;
|
|
883
|
+
}
|
|
884
|
+
/**
|
|
885
|
+
* Generated token.
|
|
886
|
+
*/
|
|
887
|
+
export interface TextGenerationOutputToken {
|
|
888
|
+
id: number;
|
|
889
|
+
logprob?: number;
|
|
890
|
+
/**
|
|
891
|
+
* Whether or not that token is a special one
|
|
892
|
+
*/
|
|
893
|
+
special: boolean;
|
|
894
|
+
/**
|
|
895
|
+
* The text associated with that token
|
|
896
|
+
*/
|
|
897
|
+
text: string;
|
|
898
|
+
[property: string]: unknown;
|
|
899
|
+
}
|
|
705
900
|
/**
|
|
706
901
|
* Use to continue text from a prompt. This is a very generic task. Recommended model: gpt2 (it’s a simple model, but fun to play with).
|
|
707
902
|
*/
|
package/dist/index.js
CHANGED
|
@@ -81,7 +81,15 @@ var tasks = null;
|
|
|
81
81
|
async function makeRequestOptions(args, options) {
|
|
82
82
|
const { accessToken, model: _model, ...otherArgs } = args;
|
|
83
83
|
let { model } = args;
|
|
84
|
-
const {
|
|
84
|
+
const {
|
|
85
|
+
forceTask: task,
|
|
86
|
+
includeCredentials,
|
|
87
|
+
taskHint,
|
|
88
|
+
wait_for_model,
|
|
89
|
+
use_cache,
|
|
90
|
+
dont_load_model,
|
|
91
|
+
...otherOptions
|
|
92
|
+
} = options ?? {};
|
|
85
93
|
const headers = {};
|
|
86
94
|
if (accessToken) {
|
|
87
95
|
headers["Authorization"] = `Bearer ${accessToken}`;
|
|
@@ -104,16 +112,15 @@ async function makeRequestOptions(args, options) {
|
|
|
104
112
|
const binary = "data" in args && !!args.data;
|
|
105
113
|
if (!binary) {
|
|
106
114
|
headers["Content-Type"] = "application/json";
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
}
|
|
115
|
+
}
|
|
116
|
+
if (wait_for_model) {
|
|
117
|
+
headers["X-Wait-For-Model"] = "true";
|
|
118
|
+
}
|
|
119
|
+
if (use_cache === false) {
|
|
120
|
+
headers["X-Use-Cache"] = "false";
|
|
121
|
+
}
|
|
122
|
+
if (dont_load_model) {
|
|
123
|
+
headers["X-Load-Model"] = "0";
|
|
117
124
|
}
|
|
118
125
|
const url = (() => {
|
|
119
126
|
if (isUrl(model)) {
|
|
@@ -127,10 +134,8 @@ async function makeRequestOptions(args, options) {
|
|
|
127
134
|
let credentials;
|
|
128
135
|
if (typeof includeCredentials === "string") {
|
|
129
136
|
credentials = includeCredentials;
|
|
130
|
-
} else if (
|
|
131
|
-
credentials =
|
|
132
|
-
} else if (includeCredentials === void 0) {
|
|
133
|
-
credentials = "same-origin";
|
|
137
|
+
} else if (includeCredentials === true) {
|
|
138
|
+
credentials = "include";
|
|
134
139
|
}
|
|
135
140
|
const info = {
|
|
136
141
|
headers,
|
|
@@ -139,7 +144,7 @@ async function makeRequestOptions(args, options) {
|
|
|
139
144
|
...otherArgs,
|
|
140
145
|
options: options && otherOptions
|
|
141
146
|
}),
|
|
142
|
-
credentials,
|
|
147
|
+
...credentials && { credentials },
|
|
143
148
|
signal: options?.signal
|
|
144
149
|
};
|
|
145
150
|
return { url, info };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@huggingface/inference",
|
|
3
|
-
"version": "2.6.
|
|
3
|
+
"version": "2.6.7",
|
|
4
4
|
"packageManager": "pnpm@8.10.5",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "Tim Mikeladze <tim.mikeladze@gmail.com>",
|
|
@@ -40,7 +40,7 @@
|
|
|
40
40
|
"type": "module",
|
|
41
41
|
"devDependencies": {
|
|
42
42
|
"@types/node": "18.13.0",
|
|
43
|
-
"@huggingface/tasks": "^0.
|
|
43
|
+
"@huggingface/tasks": "^0.8.0"
|
|
44
44
|
},
|
|
45
45
|
"resolutions": {},
|
|
46
46
|
"scripts": {
|
package/src/HfInference.ts
CHANGED
|
@@ -2,6 +2,9 @@ import * as tasks from "./tasks";
|
|
|
2
2
|
import type { Options, RequestArgs } from "./types";
|
|
3
3
|
import type { DistributiveOmit } from "./utils/distributive-omit";
|
|
4
4
|
|
|
5
|
+
/* eslint-disable @typescript-eslint/no-empty-interface */
|
|
6
|
+
/* eslint-disable @typescript-eslint/no-unsafe-declaration-merging */
|
|
7
|
+
|
|
5
8
|
type Task = typeof tasks;
|
|
6
9
|
|
|
7
10
|
type TaskWithNoAccessToken = {
|
|
@@ -60,8 +63,6 @@ export class HfInferenceEndpoint {
|
|
|
60
63
|
}
|
|
61
64
|
}
|
|
62
65
|
|
|
63
|
-
// eslint-disable-next-line @typescript-eslint/no-empty-interface
|
|
64
66
|
export interface HfInference extends TaskWithNoAccessToken {}
|
|
65
67
|
|
|
66
|
-
// eslint-disable-next-line @typescript-eslint/no-empty-interface
|
|
67
68
|
export interface HfInferenceEndpoint extends TaskWithNoAccessTokenNoModel {}
|
|
@@ -27,7 +27,15 @@ export async function makeRequestOptions(
|
|
|
27
27
|
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
28
28
|
const { accessToken, model: _model, ...otherArgs } = args;
|
|
29
29
|
let { model } = args;
|
|
30
|
-
const {
|
|
30
|
+
const {
|
|
31
|
+
forceTask: task,
|
|
32
|
+
includeCredentials,
|
|
33
|
+
taskHint,
|
|
34
|
+
wait_for_model,
|
|
35
|
+
use_cache,
|
|
36
|
+
dont_load_model,
|
|
37
|
+
...otherOptions
|
|
38
|
+
} = options ?? {};
|
|
31
39
|
|
|
32
40
|
const headers: Record<string, string> = {};
|
|
33
41
|
if (accessToken) {
|
|
@@ -57,16 +65,16 @@ export async function makeRequestOptions(
|
|
|
57
65
|
|
|
58
66
|
if (!binary) {
|
|
59
67
|
headers["Content-Type"] = "application/json";
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
if (wait_for_model) {
|
|
71
|
+
headers["X-Wait-For-Model"] = "true";
|
|
72
|
+
}
|
|
73
|
+
if (use_cache === false) {
|
|
74
|
+
headers["X-Use-Cache"] = "false";
|
|
75
|
+
}
|
|
76
|
+
if (dont_load_model) {
|
|
77
|
+
headers["X-Load-Model"] = "0";
|
|
70
78
|
}
|
|
71
79
|
|
|
72
80
|
const url = (() => {
|
|
@@ -81,19 +89,14 @@ export async function makeRequestOptions(
|
|
|
81
89
|
return `${HF_INFERENCE_API_BASE_URL}/models/${model}`;
|
|
82
90
|
})();
|
|
83
91
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
// So in order to make this backwards compatible, if it's undefined we go to "same-origin" (default behaviour before).
|
|
88
|
-
// If it's a boolean and set to true then set to "include". If false, don't define credentials at all (useful for edge runtimes)
|
|
89
|
-
// Then finally, if it's a string, use it as-is.
|
|
92
|
+
/**
|
|
93
|
+
* For edge runtimes, leave 'credentials' undefined, otherwise cloudflare workers will error
|
|
94
|
+
*/
|
|
90
95
|
let credentials: RequestCredentials | undefined;
|
|
91
96
|
if (typeof includeCredentials === "string") {
|
|
92
97
|
credentials = includeCredentials as RequestCredentials;
|
|
93
|
-
} else if (
|
|
94
|
-
credentials =
|
|
95
|
-
} else if (includeCredentials === undefined) {
|
|
96
|
-
credentials = "same-origin";
|
|
98
|
+
} else if (includeCredentials === true) {
|
|
99
|
+
credentials = "include";
|
|
97
100
|
}
|
|
98
101
|
|
|
99
102
|
const info: RequestInit = {
|
|
@@ -105,7 +108,7 @@ export async function makeRequestOptions(
|
|
|
105
108
|
...otherArgs,
|
|
106
109
|
options: options && otherOptions,
|
|
107
110
|
}),
|
|
108
|
-
credentials,
|
|
111
|
+
...(credentials && { credentials }),
|
|
109
112
|
signal: options?.signal,
|
|
110
113
|
};
|
|
111
114
|
|
|
@@ -1,8 +1,210 @@
|
|
|
1
|
-
import type { TextGenerationInput, TextGenerationOutput } from "@huggingface/tasks/src/tasks/text-generation/inference";
|
|
2
1
|
import { InferenceOutputError } from "../../lib/InferenceOutputError";
|
|
3
2
|
import type { BaseArgs, Options } from "../../types";
|
|
4
3
|
import { request } from "../custom/request";
|
|
5
4
|
|
|
5
|
+
/**
|
|
6
|
+
* Inputs for Text Generation inference
|
|
7
|
+
*/
|
|
8
|
+
export interface TextGenerationInput {
|
|
9
|
+
/**
|
|
10
|
+
* The text to initialize generation with
|
|
11
|
+
*/
|
|
12
|
+
inputs: string;
|
|
13
|
+
/**
|
|
14
|
+
* Additional inference parameters
|
|
15
|
+
*/
|
|
16
|
+
parameters?: TextGenerationParameters;
|
|
17
|
+
/**
|
|
18
|
+
* Whether to stream output tokens
|
|
19
|
+
*/
|
|
20
|
+
stream?: boolean;
|
|
21
|
+
[property: string]: unknown;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Additional inference parameters
|
|
26
|
+
*
|
|
27
|
+
* Additional inference parameters for Text Generation
|
|
28
|
+
*/
|
|
29
|
+
export interface TextGenerationParameters {
|
|
30
|
+
/**
|
|
31
|
+
* The number of sampling queries to run. Only the best one (in terms of total logprob) will
|
|
32
|
+
* be returned.
|
|
33
|
+
*/
|
|
34
|
+
best_of?: number;
|
|
35
|
+
/**
|
|
36
|
+
* Whether or not to output decoder input details
|
|
37
|
+
*/
|
|
38
|
+
decoder_input_details?: boolean;
|
|
39
|
+
/**
|
|
40
|
+
* Whether or not to output details
|
|
41
|
+
*/
|
|
42
|
+
details?: boolean;
|
|
43
|
+
/**
|
|
44
|
+
* Whether to use logits sampling instead of greedy decoding when generating new tokens.
|
|
45
|
+
*/
|
|
46
|
+
do_sample?: boolean;
|
|
47
|
+
/**
|
|
48
|
+
* The maximum number of tokens to generate.
|
|
49
|
+
*/
|
|
50
|
+
max_new_tokens?: number;
|
|
51
|
+
/**
|
|
52
|
+
* The parameter for repetition penalty. A value of 1.0 means no penalty. See [this
|
|
53
|
+
* paper](https://hf.co/papers/1909.05858) for more details.
|
|
54
|
+
*/
|
|
55
|
+
repetition_penalty?: number;
|
|
56
|
+
/**
|
|
57
|
+
* Whether to prepend the prompt to the generated text.
|
|
58
|
+
*/
|
|
59
|
+
return_full_text?: boolean;
|
|
60
|
+
/**
|
|
61
|
+
* The random sampling seed.
|
|
62
|
+
*/
|
|
63
|
+
seed?: number;
|
|
64
|
+
/**
|
|
65
|
+
* Stop generating tokens if a member of `stop_sequences` is generated.
|
|
66
|
+
*/
|
|
67
|
+
stop_sequences?: string[];
|
|
68
|
+
/**
|
|
69
|
+
* The value used to modulate the logits distribution.
|
|
70
|
+
*/
|
|
71
|
+
temperature?: number;
|
|
72
|
+
/**
|
|
73
|
+
* The number of highest probability vocabulary tokens to keep for top-k-filtering.
|
|
74
|
+
*/
|
|
75
|
+
top_k?: number;
|
|
76
|
+
/**
|
|
77
|
+
* If set to < 1, only the smallest set of most probable tokens with probabilities that add
|
|
78
|
+
* up to `top_p` or higher are kept for generation.
|
|
79
|
+
*/
|
|
80
|
+
top_p?: number;
|
|
81
|
+
/**
|
|
82
|
+
* Truncate input tokens to the given size.
|
|
83
|
+
*/
|
|
84
|
+
truncate?: number;
|
|
85
|
+
/**
|
|
86
|
+
* Typical Decoding mass. See [Typical Decoding for Natural Language
|
|
87
|
+
* Generation](https://hf.co/papers/2202.00666) for more information
|
|
88
|
+
*/
|
|
89
|
+
typical_p?: number;
|
|
90
|
+
/**
|
|
91
|
+
* Watermarking with [A Watermark for Large Language Models](https://hf.co/papers/2301.10226)
|
|
92
|
+
*/
|
|
93
|
+
watermark?: boolean;
|
|
94
|
+
[property: string]: unknown;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Outputs for Text Generation inference
|
|
99
|
+
*/
|
|
100
|
+
export interface TextGenerationOutput {
|
|
101
|
+
/**
|
|
102
|
+
* When enabled, details about the generation
|
|
103
|
+
*/
|
|
104
|
+
details?: TextGenerationOutputDetails;
|
|
105
|
+
/**
|
|
106
|
+
* The generated text
|
|
107
|
+
*/
|
|
108
|
+
generated_text: string;
|
|
109
|
+
[property: string]: unknown;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* When enabled, details about the generation
|
|
114
|
+
*/
|
|
115
|
+
export interface TextGenerationOutputDetails {
|
|
116
|
+
/**
|
|
117
|
+
* Details about additional sequences when best_of is provided
|
|
118
|
+
*/
|
|
119
|
+
best_of_sequences?: TextGenerationOutputSequenceDetails[];
|
|
120
|
+
/**
|
|
121
|
+
* The reason why the generation was stopped.
|
|
122
|
+
*/
|
|
123
|
+
finish_reason: TextGenerationFinishReason;
|
|
124
|
+
/**
|
|
125
|
+
* The number of generated tokens
|
|
126
|
+
*/
|
|
127
|
+
generated_tokens: number;
|
|
128
|
+
prefill: TextGenerationPrefillToken[];
|
|
129
|
+
/**
|
|
130
|
+
* The random seed used for generation
|
|
131
|
+
*/
|
|
132
|
+
seed?: number;
|
|
133
|
+
/**
|
|
134
|
+
* The generated tokens and associated details
|
|
135
|
+
*/
|
|
136
|
+
tokens: TextGenerationOutputToken[];
|
|
137
|
+
/**
|
|
138
|
+
* Most likely tokens
|
|
139
|
+
*/
|
|
140
|
+
top_tokens?: Array<TextGenerationOutputToken[]>;
|
|
141
|
+
[property: string]: unknown;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
export interface TextGenerationOutputSequenceDetails {
|
|
145
|
+
finish_reason: TextGenerationFinishReason;
|
|
146
|
+
/**
|
|
147
|
+
* The generated text
|
|
148
|
+
*/
|
|
149
|
+
generated_text: string;
|
|
150
|
+
/**
|
|
151
|
+
* The number of generated tokens
|
|
152
|
+
*/
|
|
153
|
+
generated_tokens: number;
|
|
154
|
+
prefill: TextGenerationPrefillToken[];
|
|
155
|
+
/**
|
|
156
|
+
* The random seed used for generation
|
|
157
|
+
*/
|
|
158
|
+
seed?: number;
|
|
159
|
+
/**
|
|
160
|
+
* The generated tokens and associated details
|
|
161
|
+
*/
|
|
162
|
+
tokens: TextGenerationOutputToken[];
|
|
163
|
+
/**
|
|
164
|
+
* Most likely tokens
|
|
165
|
+
*/
|
|
166
|
+
top_tokens?: Array<TextGenerationOutputToken[]>;
|
|
167
|
+
[property: string]: unknown;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
export interface TextGenerationPrefillToken {
|
|
171
|
+
id: number;
|
|
172
|
+
logprob: number;
|
|
173
|
+
/**
|
|
174
|
+
* The text associated with that token
|
|
175
|
+
*/
|
|
176
|
+
text: string;
|
|
177
|
+
[property: string]: unknown;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Generated token.
|
|
182
|
+
*/
|
|
183
|
+
export interface TextGenerationOutputToken {
|
|
184
|
+
id: number;
|
|
185
|
+
logprob?: number;
|
|
186
|
+
/**
|
|
187
|
+
* Whether or not that token is a special one
|
|
188
|
+
*/
|
|
189
|
+
special: boolean;
|
|
190
|
+
/**
|
|
191
|
+
* The text associated with that token
|
|
192
|
+
*/
|
|
193
|
+
text: string;
|
|
194
|
+
[property: string]: unknown;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* The reason why the generation was stopped.
|
|
199
|
+
*
|
|
200
|
+
* length: The generated sequence reached the maximum allowed length
|
|
201
|
+
*
|
|
202
|
+
* eos_token: The model generated an end-of-sentence (EOS) token
|
|
203
|
+
*
|
|
204
|
+
* stop_sequence: One of the sequence in stop_sequences was generated
|
|
205
|
+
*/
|
|
206
|
+
export type TextGenerationFinishReason = "length" | "eos_token" | "stop_sequence";
|
|
207
|
+
|
|
6
208
|
/**
|
|
7
209
|
* Use to continue text from a prompt. This is a very generic task. Recommended model: gpt2 (it’s a simple model, but fun to play with).
|
|
8
210
|
*/
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import type { BaseArgs, Options } from "../../types";
|
|
2
2
|
import { streamingRequest } from "../custom/streamingRequest";
|
|
3
|
-
|
|
4
|
-
import type { TextGenerationInput } from "@huggingface/tasks/src/tasks/text-generation/inference";
|
|
3
|
+
import type { TextGenerationInput } from "./textGeneration";
|
|
5
4
|
|
|
6
5
|
export interface TextGenerationStreamToken {
|
|
7
6
|
/** Token ID from the model tokenizer */
|
package/src/types.ts
CHANGED
|
@@ -32,7 +32,7 @@ export interface Options {
|
|
|
32
32
|
signal?: AbortSignal;
|
|
33
33
|
|
|
34
34
|
/**
|
|
35
|
-
*
|
|
35
|
+
* Credentials to use for the request. If this is a string, it will be passed straight on. If it's a boolean, true will be "include" and false will not send credentials at all (which defaults to "same-origin" inside browsers).
|
|
36
36
|
*/
|
|
37
37
|
includeCredentials?: string | boolean;
|
|
38
38
|
}
|