@huggingface/inference 2.6.5 → 2.6.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -506,6 +506,21 @@ const gpt2 = hf.endpoint('https://xyz.eu-west-1.aws.endpoints.huggingface.cloud/
506
506
  const { generated_text } = await gpt2.textGeneration({inputs: 'The answer to the universe is'});
507
507
  ```
508
508
 
509
+ By default, all calls to the inference endpoint will wait until the model is
510
+ loaded. When [scaling to
511
+ 0](https://huggingface.co/docs/inference-endpoints/en/autoscaling#scaling-to-0)
512
+ is enabled on the endpoint, this can result in non-trivial waiting time. If
513
+ you'd rather disable this behavior and handle the endpoint's returned 500 HTTP
514
+ errors yourself, you can do so like so:
515
+
516
+ ```typescript
517
+ const gpt2 = hf.endpoint('https://xyz.eu-west-1.aws.endpoints.huggingface.cloud/gpt2');
518
+ const { generated_text } = await gpt2.textGeneration(
519
+ {inputs: 'The answer to the universe is'},
520
+ {retry_on_error: false},
521
+ );
522
+ ```
523
+
509
524
  ## Running tests
510
525
 
511
526
  ```console
package/dist/index.cjs CHANGED
@@ -132,7 +132,15 @@ var tasks = null;
132
132
  async function makeRequestOptions(args, options) {
133
133
  const { accessToken, model: _model, ...otherArgs } = args;
134
134
  let { model } = args;
135
- const { forceTask: task, includeCredentials, taskHint, ...otherOptions } = options ?? {};
135
+ const {
136
+ forceTask: task,
137
+ includeCredentials,
138
+ taskHint,
139
+ wait_for_model,
140
+ use_cache,
141
+ dont_load_model,
142
+ ...otherOptions
143
+ } = options ?? {};
136
144
  const headers = {};
137
145
  if (accessToken) {
138
146
  headers["Authorization"] = `Bearer ${accessToken}`;
@@ -155,16 +163,15 @@ async function makeRequestOptions(args, options) {
155
163
  const binary = "data" in args && !!args.data;
156
164
  if (!binary) {
157
165
  headers["Content-Type"] = "application/json";
158
- } else {
159
- if (options?.wait_for_model) {
160
- headers["X-Wait-For-Model"] = "true";
161
- }
162
- if (options?.use_cache === false) {
163
- headers["X-Use-Cache"] = "false";
164
- }
165
- if (options?.dont_load_model) {
166
- headers["X-Load-Model"] = "0";
167
- }
166
+ }
167
+ if (wait_for_model) {
168
+ headers["X-Wait-For-Model"] = "true";
169
+ }
170
+ if (use_cache === false) {
171
+ headers["X-Use-Cache"] = "false";
172
+ }
173
+ if (dont_load_model) {
174
+ headers["X-Load-Model"] = "0";
168
175
  }
169
176
  const url = (() => {
170
177
  if (isUrl(model)) {
@@ -178,10 +185,8 @@ async function makeRequestOptions(args, options) {
178
185
  let credentials;
179
186
  if (typeof includeCredentials === "string") {
180
187
  credentials = includeCredentials;
181
- } else if (typeof includeCredentials === "boolean") {
182
- credentials = includeCredentials ? "include" : void 0;
183
- } else if (includeCredentials === void 0) {
184
- credentials = "same-origin";
188
+ } else if (includeCredentials === true) {
189
+ credentials = "include";
185
190
  }
186
191
  const info = {
187
192
  headers,
@@ -190,7 +195,7 @@ async function makeRequestOptions(args, options) {
190
195
  ...otherArgs,
191
196
  options: options && otherOptions
192
197
  }),
193
- credentials,
198
+ ...credentials && { credentials },
194
199
  signal: options?.signal
195
200
  };
196
201
  return { url, info };
package/dist/index.d.ts CHANGED
@@ -31,7 +31,7 @@ export interface Options {
31
31
  signal?: AbortSignal;
32
32
 
33
33
  /**
34
- * (Default: "same-origin"). String | Boolean. Credentials to use for the request. If this is a string, it will be passed straight on. If it's a boolean, true will be "include" and false will not send credentials at all.
34
+ * Credentials to use for the request. If this is a string, it will be passed straight on. If it's a boolean, true will be "include" and false will not send credentials at all (which defaults to "same-origin" inside browsers).
35
35
  */
36
36
  includeCredentials?: string | boolean;
37
37
  }
@@ -702,6 +702,201 @@ export function textClassification(
702
702
  args: TextClassificationArgs,
703
703
  options?: Options
704
704
  ): Promise<TextClassificationOutput>;
705
+ /**
706
+ * The reason why the generation was stopped.
707
+ *
708
+ * length: The generated sequence reached the maximum allowed length
709
+ *
710
+ * eos_token: The model generated an end-of-sentence (EOS) token
711
+ *
712
+ * stop_sequence: One of the sequence in stop_sequences was generated
713
+ */
714
+ export type TextGenerationFinishReason = "length" | "eos_token" | "stop_sequence";
715
+ /**
716
+ * Inputs for Text Generation inference
717
+ */
718
+ export interface TextGenerationInput {
719
+ /**
720
+ * The text to initialize generation with
721
+ */
722
+ inputs: string;
723
+ /**
724
+ * Additional inference parameters
725
+ */
726
+ parameters?: TextGenerationParameters;
727
+ /**
728
+ * Whether to stream output tokens
729
+ */
730
+ stream?: boolean;
731
+ [property: string]: unknown;
732
+ }
733
+ /**
734
+ * Additional inference parameters
735
+ *
736
+ * Additional inference parameters for Text Generation
737
+ */
738
+ export interface TextGenerationParameters {
739
+ /**
740
+ * The number of sampling queries to run. Only the best one (in terms of total logprob) will
741
+ * be returned.
742
+ */
743
+ best_of?: number;
744
+ /**
745
+ * Whether or not to output decoder input details
746
+ */
747
+ decoder_input_details?: boolean;
748
+ /**
749
+ * Whether or not to output details
750
+ */
751
+ details?: boolean;
752
+ /**
753
+ * Whether to use logits sampling instead of greedy decoding when generating new tokens.
754
+ */
755
+ do_sample?: boolean;
756
+ /**
757
+ * The maximum number of tokens to generate.
758
+ */
759
+ max_new_tokens?: number;
760
+ /**
761
+ * The parameter for repetition penalty. A value of 1.0 means no penalty. See [this
762
+ * paper](https://hf.co/papers/1909.05858) for more details.
763
+ */
764
+ repetition_penalty?: number;
765
+ /**
766
+ * Whether to prepend the prompt to the generated text.
767
+ */
768
+ return_full_text?: boolean;
769
+ /**
770
+ * The random sampling seed.
771
+ */
772
+ seed?: number;
773
+ /**
774
+ * Stop generating tokens if a member of `stop_sequences` is generated.
775
+ */
776
+ stop_sequences?: string[];
777
+ /**
778
+ * The value used to modulate the logits distribution.
779
+ */
780
+ temperature?: number;
781
+ /**
782
+ * The number of highest probability vocabulary tokens to keep for top-k-filtering.
783
+ */
784
+ top_k?: number;
785
+ /**
786
+ * If set to < 1, only the smallest set of most probable tokens with probabilities that add
787
+ * up to `top_p` or higher are kept for generation.
788
+ */
789
+ top_p?: number;
790
+ /**
791
+ * Truncate input tokens to the given size.
792
+ */
793
+ truncate?: number;
794
+ /**
795
+ * Typical Decoding mass. See [Typical Decoding for Natural Language
796
+ * Generation](https://hf.co/papers/2202.00666) for more information
797
+ */
798
+ typical_p?: number;
799
+ /**
800
+ * Watermarking with [A Watermark for Large Language Models](https://hf.co/papers/2301.10226)
801
+ */
802
+ watermark?: boolean;
803
+ [property: string]: unknown;
804
+ }
805
+ /**
806
+ * Outputs for Text Generation inference
807
+ */
808
+ export interface TextGenerationOutput {
809
+ /**
810
+ * When enabled, details about the generation
811
+ */
812
+ details?: TextGenerationOutputDetails;
813
+ /**
814
+ * The generated text
815
+ */
816
+ generated_text: string;
817
+ [property: string]: unknown;
818
+ }
819
+ /**
820
+ * When enabled, details about the generation
821
+ */
822
+ export interface TextGenerationOutputDetails {
823
+ /**
824
+ * Details about additional sequences when best_of is provided
825
+ */
826
+ best_of_sequences?: TextGenerationOutputSequenceDetails[];
827
+ /**
828
+ * The reason why the generation was stopped.
829
+ */
830
+ finish_reason: TextGenerationFinishReason;
831
+ /**
832
+ * The number of generated tokens
833
+ */
834
+ generated_tokens: number;
835
+ prefill: TextGenerationPrefillToken[];
836
+ /**
837
+ * The random seed used for generation
838
+ */
839
+ seed?: number;
840
+ /**
841
+ * The generated tokens and associated details
842
+ */
843
+ tokens: TextGenerationOutputToken[];
844
+ /**
845
+ * Most likely tokens
846
+ */
847
+ top_tokens?: Array<TextGenerationOutputToken[]>;
848
+ [property: string]: unknown;
849
+ }
850
+ export interface TextGenerationOutputSequenceDetails {
851
+ finish_reason: TextGenerationFinishReason;
852
+ /**
853
+ * The generated text
854
+ */
855
+ generated_text: string;
856
+ /**
857
+ * The number of generated tokens
858
+ */
859
+ generated_tokens: number;
860
+ prefill: TextGenerationPrefillToken[];
861
+ /**
862
+ * The random seed used for generation
863
+ */
864
+ seed?: number;
865
+ /**
866
+ * The generated tokens and associated details
867
+ */
868
+ tokens: TextGenerationOutputToken[];
869
+ /**
870
+ * Most likely tokens
871
+ */
872
+ top_tokens?: Array<TextGenerationOutputToken[]>;
873
+ [property: string]: unknown;
874
+ }
875
+ export interface TextGenerationPrefillToken {
876
+ id: number;
877
+ logprob: number;
878
+ /**
879
+ * The text associated with that token
880
+ */
881
+ text: string;
882
+ [property: string]: unknown;
883
+ }
884
+ /**
885
+ * Generated token.
886
+ */
887
+ export interface TextGenerationOutputToken {
888
+ id: number;
889
+ logprob?: number;
890
+ /**
891
+ * Whether or not that token is a special one
892
+ */
893
+ special: boolean;
894
+ /**
895
+ * The text associated with that token
896
+ */
897
+ text: string;
898
+ [property: string]: unknown;
899
+ }
705
900
  /**
706
901
  * Use to continue text from a prompt. This is a very generic task. Recommended model: gpt2 (it’s a simple model, but fun to play with).
707
902
  */
package/dist/index.js CHANGED
@@ -81,7 +81,15 @@ var tasks = null;
81
81
  async function makeRequestOptions(args, options) {
82
82
  const { accessToken, model: _model, ...otherArgs } = args;
83
83
  let { model } = args;
84
- const { forceTask: task, includeCredentials, taskHint, ...otherOptions } = options ?? {};
84
+ const {
85
+ forceTask: task,
86
+ includeCredentials,
87
+ taskHint,
88
+ wait_for_model,
89
+ use_cache,
90
+ dont_load_model,
91
+ ...otherOptions
92
+ } = options ?? {};
85
93
  const headers = {};
86
94
  if (accessToken) {
87
95
  headers["Authorization"] = `Bearer ${accessToken}`;
@@ -104,16 +112,15 @@ async function makeRequestOptions(args, options) {
104
112
  const binary = "data" in args && !!args.data;
105
113
  if (!binary) {
106
114
  headers["Content-Type"] = "application/json";
107
- } else {
108
- if (options?.wait_for_model) {
109
- headers["X-Wait-For-Model"] = "true";
110
- }
111
- if (options?.use_cache === false) {
112
- headers["X-Use-Cache"] = "false";
113
- }
114
- if (options?.dont_load_model) {
115
- headers["X-Load-Model"] = "0";
116
- }
115
+ }
116
+ if (wait_for_model) {
117
+ headers["X-Wait-For-Model"] = "true";
118
+ }
119
+ if (use_cache === false) {
120
+ headers["X-Use-Cache"] = "false";
121
+ }
122
+ if (dont_load_model) {
123
+ headers["X-Load-Model"] = "0";
117
124
  }
118
125
  const url = (() => {
119
126
  if (isUrl(model)) {
@@ -127,10 +134,8 @@ async function makeRequestOptions(args, options) {
127
134
  let credentials;
128
135
  if (typeof includeCredentials === "string") {
129
136
  credentials = includeCredentials;
130
- } else if (typeof includeCredentials === "boolean") {
131
- credentials = includeCredentials ? "include" : void 0;
132
- } else if (includeCredentials === void 0) {
133
- credentials = "same-origin";
137
+ } else if (includeCredentials === true) {
138
+ credentials = "include";
134
139
  }
135
140
  const info = {
136
141
  headers,
@@ -139,7 +144,7 @@ async function makeRequestOptions(args, options) {
139
144
  ...otherArgs,
140
145
  options: options && otherOptions
141
146
  }),
142
- credentials,
147
+ ...credentials && { credentials },
143
148
  signal: options?.signal
144
149
  };
145
150
  return { url, info };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@huggingface/inference",
3
- "version": "2.6.5",
3
+ "version": "2.6.7",
4
4
  "packageManager": "pnpm@8.10.5",
5
5
  "license": "MIT",
6
6
  "author": "Tim Mikeladze <tim.mikeladze@gmail.com>",
@@ -40,7 +40,7 @@
40
40
  "type": "module",
41
41
  "devDependencies": {
42
42
  "@types/node": "18.13.0",
43
- "@huggingface/tasks": "^0.6.0"
43
+ "@huggingface/tasks": "^0.8.0"
44
44
  },
45
45
  "resolutions": {},
46
46
  "scripts": {
@@ -2,6 +2,9 @@ import * as tasks from "./tasks";
2
2
  import type { Options, RequestArgs } from "./types";
3
3
  import type { DistributiveOmit } from "./utils/distributive-omit";
4
4
 
5
+ /* eslint-disable @typescript-eslint/no-empty-interface */
6
+ /* eslint-disable @typescript-eslint/no-unsafe-declaration-merging */
7
+
5
8
  type Task = typeof tasks;
6
9
 
7
10
  type TaskWithNoAccessToken = {
@@ -60,8 +63,6 @@ export class HfInferenceEndpoint {
60
63
  }
61
64
  }
62
65
 
63
- // eslint-disable-next-line @typescript-eslint/no-empty-interface
64
66
  export interface HfInference extends TaskWithNoAccessToken {}
65
67
 
66
- // eslint-disable-next-line @typescript-eslint/no-empty-interface
67
68
  export interface HfInferenceEndpoint extends TaskWithNoAccessTokenNoModel {}
@@ -27,7 +27,15 @@ export async function makeRequestOptions(
27
27
  // eslint-disable-next-line @typescript-eslint/no-unused-vars
28
28
  const { accessToken, model: _model, ...otherArgs } = args;
29
29
  let { model } = args;
30
- const { forceTask: task, includeCredentials, taskHint, ...otherOptions } = options ?? {};
30
+ const {
31
+ forceTask: task,
32
+ includeCredentials,
33
+ taskHint,
34
+ wait_for_model,
35
+ use_cache,
36
+ dont_load_model,
37
+ ...otherOptions
38
+ } = options ?? {};
31
39
 
32
40
  const headers: Record<string, string> = {};
33
41
  if (accessToken) {
@@ -57,16 +65,16 @@ export async function makeRequestOptions(
57
65
 
58
66
  if (!binary) {
59
67
  headers["Content-Type"] = "application/json";
60
- } else {
61
- if (options?.wait_for_model) {
62
- headers["X-Wait-For-Model"] = "true";
63
- }
64
- if (options?.use_cache === false) {
65
- headers["X-Use-Cache"] = "false";
66
- }
67
- if (options?.dont_load_model) {
68
- headers["X-Load-Model"] = "0";
69
- }
68
+ }
69
+
70
+ if (wait_for_model) {
71
+ headers["X-Wait-For-Model"] = "true";
72
+ }
73
+ if (use_cache === false) {
74
+ headers["X-Use-Cache"] = "false";
75
+ }
76
+ if (dont_load_model) {
77
+ headers["X-Load-Model"] = "0";
70
78
  }
71
79
 
72
80
  const url = (() => {
@@ -81,19 +89,14 @@ export async function makeRequestOptions(
81
89
  return `${HF_INFERENCE_API_BASE_URL}/models/${model}`;
82
90
  })();
83
91
 
84
- // Let users configure credentials, or disable them all together (or keep default behavior).
85
- // ---
86
- // This used to be an internal property only and never exposed to users. This means that most usages will never define this value
87
- // So in order to make this backwards compatible, if it's undefined we go to "same-origin" (default behaviour before).
88
- // If it's a boolean and set to true then set to "include". If false, don't define credentials at all (useful for edge runtimes)
89
- // Then finally, if it's a string, use it as-is.
92
+ /**
93
+ * For edge runtimes, leave 'credentials' undefined, otherwise cloudflare workers will error
94
+ */
90
95
  let credentials: RequestCredentials | undefined;
91
96
  if (typeof includeCredentials === "string") {
92
97
  credentials = includeCredentials as RequestCredentials;
93
- } else if (typeof includeCredentials === "boolean") {
94
- credentials = includeCredentials ? "include" : undefined;
95
- } else if (includeCredentials === undefined) {
96
- credentials = "same-origin";
98
+ } else if (includeCredentials === true) {
99
+ credentials = "include";
97
100
  }
98
101
 
99
102
  const info: RequestInit = {
@@ -105,7 +108,7 @@ export async function makeRequestOptions(
105
108
  ...otherArgs,
106
109
  options: options && otherOptions,
107
110
  }),
108
- credentials,
111
+ ...(credentials && { credentials }),
109
112
  signal: options?.signal,
110
113
  };
111
114
 
@@ -1,8 +1,210 @@
1
- import type { TextGenerationInput, TextGenerationOutput } from "@huggingface/tasks/src/tasks/text-generation/inference";
2
1
  import { InferenceOutputError } from "../../lib/InferenceOutputError";
3
2
  import type { BaseArgs, Options } from "../../types";
4
3
  import { request } from "../custom/request";
5
4
 
5
+ /**
6
+ * Inputs for Text Generation inference
7
+ */
8
+ export interface TextGenerationInput {
9
+ /**
10
+ * The text to initialize generation with
11
+ */
12
+ inputs: string;
13
+ /**
14
+ * Additional inference parameters
15
+ */
16
+ parameters?: TextGenerationParameters;
17
+ /**
18
+ * Whether to stream output tokens
19
+ */
20
+ stream?: boolean;
21
+ [property: string]: unknown;
22
+ }
23
+
24
+ /**
25
+ * Additional inference parameters
26
+ *
27
+ * Additional inference parameters for Text Generation
28
+ */
29
+ export interface TextGenerationParameters {
30
+ /**
31
+ * The number of sampling queries to run. Only the best one (in terms of total logprob) will
32
+ * be returned.
33
+ */
34
+ best_of?: number;
35
+ /**
36
+ * Whether or not to output decoder input details
37
+ */
38
+ decoder_input_details?: boolean;
39
+ /**
40
+ * Whether or not to output details
41
+ */
42
+ details?: boolean;
43
+ /**
44
+ * Whether to use logits sampling instead of greedy decoding when generating new tokens.
45
+ */
46
+ do_sample?: boolean;
47
+ /**
48
+ * The maximum number of tokens to generate.
49
+ */
50
+ max_new_tokens?: number;
51
+ /**
52
+ * The parameter for repetition penalty. A value of 1.0 means no penalty. See [this
53
+ * paper](https://hf.co/papers/1909.05858) for more details.
54
+ */
55
+ repetition_penalty?: number;
56
+ /**
57
+ * Whether to prepend the prompt to the generated text.
58
+ */
59
+ return_full_text?: boolean;
60
+ /**
61
+ * The random sampling seed.
62
+ */
63
+ seed?: number;
64
+ /**
65
+ * Stop generating tokens if a member of `stop_sequences` is generated.
66
+ */
67
+ stop_sequences?: string[];
68
+ /**
69
+ * The value used to modulate the logits distribution.
70
+ */
71
+ temperature?: number;
72
+ /**
73
+ * The number of highest probability vocabulary tokens to keep for top-k-filtering.
74
+ */
75
+ top_k?: number;
76
+ /**
77
+ * If set to < 1, only the smallest set of most probable tokens with probabilities that add
78
+ * up to `top_p` or higher are kept for generation.
79
+ */
80
+ top_p?: number;
81
+ /**
82
+ * Truncate input tokens to the given size.
83
+ */
84
+ truncate?: number;
85
+ /**
86
+ * Typical Decoding mass. See [Typical Decoding for Natural Language
87
+ * Generation](https://hf.co/papers/2202.00666) for more information
88
+ */
89
+ typical_p?: number;
90
+ /**
91
+ * Watermarking with [A Watermark for Large Language Models](https://hf.co/papers/2301.10226)
92
+ */
93
+ watermark?: boolean;
94
+ [property: string]: unknown;
95
+ }
96
+
97
+ /**
98
+ * Outputs for Text Generation inference
99
+ */
100
+ export interface TextGenerationOutput {
101
+ /**
102
+ * When enabled, details about the generation
103
+ */
104
+ details?: TextGenerationOutputDetails;
105
+ /**
106
+ * The generated text
107
+ */
108
+ generated_text: string;
109
+ [property: string]: unknown;
110
+ }
111
+
112
+ /**
113
+ * When enabled, details about the generation
114
+ */
115
+ export interface TextGenerationOutputDetails {
116
+ /**
117
+ * Details about additional sequences when best_of is provided
118
+ */
119
+ best_of_sequences?: TextGenerationOutputSequenceDetails[];
120
+ /**
121
+ * The reason why the generation was stopped.
122
+ */
123
+ finish_reason: TextGenerationFinishReason;
124
+ /**
125
+ * The number of generated tokens
126
+ */
127
+ generated_tokens: number;
128
+ prefill: TextGenerationPrefillToken[];
129
+ /**
130
+ * The random seed used for generation
131
+ */
132
+ seed?: number;
133
+ /**
134
+ * The generated tokens and associated details
135
+ */
136
+ tokens: TextGenerationOutputToken[];
137
+ /**
138
+ * Most likely tokens
139
+ */
140
+ top_tokens?: Array<TextGenerationOutputToken[]>;
141
+ [property: string]: unknown;
142
+ }
143
+
144
+ export interface TextGenerationOutputSequenceDetails {
145
+ finish_reason: TextGenerationFinishReason;
146
+ /**
147
+ * The generated text
148
+ */
149
+ generated_text: string;
150
+ /**
151
+ * The number of generated tokens
152
+ */
153
+ generated_tokens: number;
154
+ prefill: TextGenerationPrefillToken[];
155
+ /**
156
+ * The random seed used for generation
157
+ */
158
+ seed?: number;
159
+ /**
160
+ * The generated tokens and associated details
161
+ */
162
+ tokens: TextGenerationOutputToken[];
163
+ /**
164
+ * Most likely tokens
165
+ */
166
+ top_tokens?: Array<TextGenerationOutputToken[]>;
167
+ [property: string]: unknown;
168
+ }
169
+
170
+ export interface TextGenerationPrefillToken {
171
+ id: number;
172
+ logprob: number;
173
+ /**
174
+ * The text associated with that token
175
+ */
176
+ text: string;
177
+ [property: string]: unknown;
178
+ }
179
+
180
+ /**
181
+ * Generated token.
182
+ */
183
+ export interface TextGenerationOutputToken {
184
+ id: number;
185
+ logprob?: number;
186
+ /**
187
+ * Whether or not that token is a special one
188
+ */
189
+ special: boolean;
190
+ /**
191
+ * The text associated with that token
192
+ */
193
+ text: string;
194
+ [property: string]: unknown;
195
+ }
196
+
197
+ /**
198
+ * The reason why the generation was stopped.
199
+ *
200
+ * length: The generated sequence reached the maximum allowed length
201
+ *
202
+ * eos_token: The model generated an end-of-sentence (EOS) token
203
+ *
204
+ * stop_sequence: One of the sequence in stop_sequences was generated
205
+ */
206
+ export type TextGenerationFinishReason = "length" | "eos_token" | "stop_sequence";
207
+
6
208
  /**
7
209
  * Use to continue text from a prompt. This is a very generic task. Recommended model: gpt2 (it’s a simple model, but fun to play with).
8
210
  */
@@ -1,7 +1,6 @@
1
1
  import type { BaseArgs, Options } from "../../types";
2
2
  import { streamingRequest } from "../custom/streamingRequest";
3
-
4
- import type { TextGenerationInput } from "@huggingface/tasks/src/tasks/text-generation/inference";
3
+ import type { TextGenerationInput } from "./textGeneration";
5
4
 
6
5
  export interface TextGenerationStreamToken {
7
6
  /** Token ID from the model tokenizer */
package/src/types.ts CHANGED
@@ -32,7 +32,7 @@ export interface Options {
32
32
  signal?: AbortSignal;
33
33
 
34
34
  /**
35
- * (Default: "same-origin"). String | Boolean. Credentials to use for the request. If this is a string, it will be passed straight on. If it's a boolean, true will be "include" and false will not send credentials at all.
35
+ * Credentials to use for the request. If this is a string, it will be passed straight on. If it's a boolean, true will be "include" and false will not send credentials at all (which defaults to "same-origin" inside browsers).
36
36
  */
37
37
  includeCredentials?: string | boolean;
38
38
  }