@huggingface/inference 1.6.3 → 1.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -76,6 +76,14 @@ await hf.textGeneration({
76
76
  inputs: 'The answer to the universe is'
77
77
  })
78
78
 
79
+ for await (const output of hf.textGenerationStream({
80
+ model: "google/flan-t5-xxl",
81
+ inputs: 'repeat "one two three four"',
82
+ parameters: { max_new_tokens: 250 }
83
+ })) {
84
+ console.log(output.token.text, output.generated_text);
85
+ }
86
+
79
87
  await hf.tokenClassification({
80
88
  model: 'dbmdz/bert-large-cased-finetuned-conll03-english',
81
89
  inputs: 'My name is Sarah Jessica Parker but you can call me Jessica'
package/dist/index.d.ts CHANGED
@@ -206,6 +206,80 @@ interface TextGenerationReturn {
206
206
  */
207
207
  generated_text: string;
208
208
  }
209
+ interface TextGenerationStreamToken {
210
+ /** Token ID from the model tokenizer */
211
+ id: number;
212
+ /** Token text */
213
+ text: string;
214
+ /** Logprob */
215
+ logprob: number;
216
+ /**
217
+ * Is the token a special token
218
+ * Can be used to ignore tokens when concatenating
219
+ */
220
+ special: boolean;
221
+ }
222
+ interface TextGenerationStreamPrefillToken {
223
+ /** Token ID from the model tokenizer */
224
+ id: number;
225
+ /** Token text */
226
+ text: string;
227
+ /**
228
+ * Logprob
229
+ * Optional since the logprob of the first token cannot be computed
230
+ */
231
+ logprob?: number;
232
+ }
233
+ interface TextGenerationStreamBestOfSequence {
234
+ /** Generated text */
235
+ generated_text: string;
236
+ /** Generation finish reason */
237
+ finish_reason: TextGenerationStreamFinishReason;
238
+ /** Number of generated tokens */
239
+ generated_tokens: number;
240
+ /** Sampling seed if sampling was activated */
241
+ seed?: number;
242
+ /** Prompt tokens */
243
+ prefill: TextGenerationStreamPrefillToken[];
244
+ /** Generated tokens */
245
+ tokens: TextGenerationStreamToken[];
246
+ }
247
+ declare enum TextGenerationStreamFinishReason {
248
+ /** number of generated tokens == `max_new_tokens` */
249
+ Length = "length",
250
+ /** the model generated its end of sequence token */
251
+ EndOfSequenceToken = "eos_token",
252
+ /** the model generated a text included in `stop_sequences` */
253
+ StopSequence = "stop_sequence"
254
+ }
255
+ interface TextGenerationStreamDetails {
256
+ /** Generation finish reason */
257
+ finish_reason: TextGenerationStreamFinishReason;
258
+ /** Number of generated tokens */
259
+ generated_tokens: number;
260
+ /** Sampling seed if sampling was activated */
261
+ seed?: number;
262
+ /** Prompt tokens */
263
+ prefill: TextGenerationStreamPrefillToken[];
264
+ /** */
265
+ tokens: TextGenerationStreamToken[];
266
+ /** Additional sequences when using the `best_of` parameter */
267
+ best_of_sequences?: TextGenerationStreamBestOfSequence[];
268
+ }
269
+ interface TextGenerationStreamReturn {
270
+ /** Generated token, one at a time */
271
+ token: TextGenerationStreamToken;
272
+ /**
273
+ * Complete generated text
274
+ * Only available when the generation is finished
275
+ */
276
+ generated_text?: string;
277
+ /**
278
+ * Generation details
279
+ * Only available when the generation is finished
280
+ */
281
+ details?: TextGenerationStreamDetails;
282
+ }
209
283
  type TokenClassificationArgs = Args & {
210
284
  /**
211
285
  * A string to be classified
@@ -486,6 +560,10 @@ declare class HfInference {
486
560
  * Use to continue text from a prompt. This is a very generic task. Recommended model: gpt2 (it’s a simple model, but fun to play with).
487
561
  */
488
562
  textGeneration(args: TextGenerationArgs, options?: Options): Promise<TextGenerationReturn>;
563
+ /**
564
+ * Use to continue text from a prompt. Same as `textGeneration` but returns generator that can be read one token at a time
565
+ */
566
+ textGenerationStream(args: TextGenerationArgs, options?: Options): AsyncGenerator<TextGenerationStreamReturn>;
489
567
  /**
490
568
  * Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text. Recommended model: dbmdz/bert-large-cased-finetuned-conll03-english
491
569
  */
@@ -537,6 +615,10 @@ declare class HfInference {
537
615
  * Recommended model: stabilityai/stable-diffusion-2
538
616
  */
539
617
  textToImage(args: TextToImageArgs, options?: Options): Promise<TextToImageReturn>;
618
+ /**
619
+ * Helper that prepares request arguments
620
+ */
621
+ private makeRequestOptions;
540
622
  request<T>(args: Args & {
541
623
  data?: Blob | ArrayBuffer;
542
624
  }, options?: Options & {
@@ -545,6 +627,17 @@ declare class HfInference {
545
627
  /** For internal HF use, which is why it's not exposed in {@link Options} */
546
628
  includeCredentials?: boolean;
547
629
  }): Promise<T>;
630
+ /**
631
+ * Make request that uses server-sent events and returns response as a generator
632
+ */
633
+ streamingRequest<T>(args: Args & {
634
+ data?: Blob | ArrayBuffer;
635
+ }, options?: Options & {
636
+ binary?: boolean;
637
+ blob?: boolean;
638
+ /** For internal HF use, which is why it's not exposed in {@link Options} */
639
+ includeCredentials?: boolean;
640
+ }): AsyncGenerator<T>;
548
641
  }
549
642
 
550
- export { Args, AudioClassificationArgs, AudioClassificationReturn, AudioClassificationReturnValue, AutomaticSpeechRecognitionArgs, AutomaticSpeechRecognitionReturn, ConversationalArgs, ConversationalReturn, FeatureExtractionArgs, FeatureExtractionReturn, FillMaskArgs, FillMaskReturn, HfInference, ImageClassificationArgs, ImageClassificationReturn, ImageClassificationReturnValue, ImageSegmentationArgs, ImageSegmentationReturn, ImageSegmentationReturnValue, ObjectDetectionArgs, ObjectDetectionReturn, ObjectDetectionReturnValue, Options, QuestionAnswerArgs, QuestionAnswerReturn, SummarizationArgs, SummarizationReturn, TableQuestionAnswerArgs, TableQuestionAnswerReturn, TextClassificationArgs, TextClassificationReturn, TextGenerationArgs, TextGenerationReturn, TextToImageArgs, TextToImageReturn, TokenClassificationArgs, TokenClassificationReturn, TokenClassificationReturnValue, TranslationArgs, TranslationReturn, ZeroShotClassificationArgs, ZeroShotClassificationReturn, ZeroShotClassificationReturnValue };
643
+ export { Args, AudioClassificationArgs, AudioClassificationReturn, AudioClassificationReturnValue, AutomaticSpeechRecognitionArgs, AutomaticSpeechRecognitionReturn, ConversationalArgs, ConversationalReturn, FeatureExtractionArgs, FeatureExtractionReturn, FillMaskArgs, FillMaskReturn, HfInference, ImageClassificationArgs, ImageClassificationReturn, ImageClassificationReturnValue, ImageSegmentationArgs, ImageSegmentationReturn, ImageSegmentationReturnValue, ObjectDetectionArgs, ObjectDetectionReturn, ObjectDetectionReturnValue, Options, QuestionAnswerArgs, QuestionAnswerReturn, SummarizationArgs, SummarizationReturn, TableQuestionAnswerArgs, TableQuestionAnswerReturn, TextClassificationArgs, TextClassificationReturn, TextGenerationArgs, TextGenerationReturn, TextGenerationStreamBestOfSequence, TextGenerationStreamDetails, TextGenerationStreamFinishReason, TextGenerationStreamPrefillToken, TextGenerationStreamReturn, TextGenerationStreamToken, TextToImageArgs, TextToImageReturn, TokenClassificationArgs, TokenClassificationReturn, TokenClassificationReturnValue, TranslationArgs, TranslationReturn, ZeroShotClassificationArgs, ZeroShotClassificationReturn, ZeroShotClassificationReturnValue };
package/dist/index.js CHANGED
@@ -19,7 +19,8 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
19
19
  // src/index.ts
20
20
  var src_exports = {};
21
21
  __export(src_exports, {
22
- HfInference: () => HfInference
22
+ HfInference: () => HfInference,
23
+ TextGenerationStreamFinishReason: () => TextGenerationStreamFinishReason
23
24
  });
24
25
  module.exports = __toCommonJS(src_exports);
25
26
 
@@ -31,7 +32,113 @@ function toArray(obj) {
31
32
  return [obj];
32
33
  }
33
34
 
35
+ // src/vendor/fetch-event-source/parse.ts
36
+ function getLines(onLine) {
37
+ let buffer;
38
+ let position;
39
+ let fieldLength;
40
+ let discardTrailingNewline = false;
41
+ return function onChunk(arr) {
42
+ if (buffer === void 0) {
43
+ buffer = arr;
44
+ position = 0;
45
+ fieldLength = -1;
46
+ } else {
47
+ buffer = concat(buffer, arr);
48
+ }
49
+ const bufLength = buffer.length;
50
+ let lineStart = 0;
51
+ while (position < bufLength) {
52
+ if (discardTrailingNewline) {
53
+ if (buffer[position] === 10 /* NewLine */) {
54
+ lineStart = ++position;
55
+ }
56
+ discardTrailingNewline = false;
57
+ }
58
+ let lineEnd = -1;
59
+ for (; position < bufLength && lineEnd === -1; ++position) {
60
+ switch (buffer[position]) {
61
+ case 58 /* Colon */:
62
+ if (fieldLength === -1) {
63
+ fieldLength = position - lineStart;
64
+ }
65
+ break;
66
+ case 13 /* CarriageReturn */:
67
+ discardTrailingNewline = true;
68
+ case 10 /* NewLine */:
69
+ lineEnd = position;
70
+ break;
71
+ }
72
+ }
73
+ if (lineEnd === -1) {
74
+ break;
75
+ }
76
+ onLine(buffer.subarray(lineStart, lineEnd), fieldLength);
77
+ lineStart = position;
78
+ fieldLength = -1;
79
+ }
80
+ if (lineStart === bufLength) {
81
+ buffer = void 0;
82
+ } else if (lineStart !== 0) {
83
+ buffer = buffer.subarray(lineStart);
84
+ position -= lineStart;
85
+ }
86
+ };
87
+ }
88
+ function getMessages(onId, onRetry, onMessage) {
89
+ let message = newMessage();
90
+ const decoder = new TextDecoder();
91
+ return function onLine(line, fieldLength) {
92
+ if (line.length === 0) {
93
+ onMessage?.(message);
94
+ message = newMessage();
95
+ } else if (fieldLength > 0) {
96
+ const field = decoder.decode(line.subarray(0, fieldLength));
97
+ const valueOffset = fieldLength + (line[fieldLength + 1] === 32 /* Space */ ? 2 : 1);
98
+ const value = decoder.decode(line.subarray(valueOffset));
99
+ switch (field) {
100
+ case "data":
101
+ message.data = message.data ? message.data + "\n" + value : value;
102
+ break;
103
+ case "event":
104
+ message.event = value;
105
+ break;
106
+ case "id":
107
+ onId(message.id = value);
108
+ break;
109
+ case "retry":
110
+ const retry = parseInt(value, 10);
111
+ if (!isNaN(retry)) {
112
+ onRetry(message.retry = retry);
113
+ }
114
+ break;
115
+ }
116
+ }
117
+ };
118
+ }
119
+ function concat(a, b) {
120
+ const res = new Uint8Array(a.length + b.length);
121
+ res.set(a);
122
+ res.set(b, a.length);
123
+ return res;
124
+ }
125
+ function newMessage() {
126
+ return {
127
+ data: "",
128
+ event: "",
129
+ id: "",
130
+ retry: void 0
131
+ };
132
+ }
133
+
34
134
  // src/HfInference.ts
135
+ var HF_INFERENCE_API_BASE_URL = "https://api-inference.huggingface.co/models/";
136
+ var TextGenerationStreamFinishReason = /* @__PURE__ */ ((TextGenerationStreamFinishReason2) => {
137
+ TextGenerationStreamFinishReason2["Length"] = "length";
138
+ TextGenerationStreamFinishReason2["EndOfSequenceToken"] = "eos_token";
139
+ TextGenerationStreamFinishReason2["StopSequence"] = "stop_sequence";
140
+ return TextGenerationStreamFinishReason2;
141
+ })(TextGenerationStreamFinishReason || {});
35
142
  var HfInference = class {
36
143
  apiKey;
37
144
  defaultOptions;
@@ -113,6 +220,12 @@ var HfInference = class {
113
220
  }
114
221
  return res?.[0];
115
222
  }
223
+ /**
224
+ * Use to continue text from a prompt. Same as `textGeneration` but returns generator that can be read one token at a time
225
+ */
226
+ async *textGenerationStream(args, options) {
227
+ yield* this.streamingRequest(args, options);
228
+ }
116
229
  /**
117
230
  * Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text. Recommended model: dbmdz/bert-large-cased-finetuned-conll03-english
118
231
  */
@@ -273,7 +386,10 @@ var HfInference = class {
273
386
  }
274
387
  return res;
275
388
  }
276
- async request(args, options) {
389
+ /**
390
+ * Helper that prepares request arguments
391
+ */
392
+ makeRequestOptions(args, options) {
277
393
  const mergedOptions = { ...this.defaultOptions, ...options };
278
394
  const { model, ...otherArgs } = args;
279
395
  const headers = {};
@@ -294,7 +410,8 @@ var HfInference = class {
294
410
  headers["X-Load-Model"] = "0";
295
411
  }
296
412
  }
297
- const response = await fetch(`https://api-inference.huggingface.co/models/${model}`, {
413
+ const url = `${HF_INFERENCE_API_BASE_URL}${model}`;
414
+ const info = {
298
415
  headers,
299
416
  method: "POST",
300
417
  body: options?.binary ? args.data : JSON.stringify({
@@ -302,7 +419,12 @@ var HfInference = class {
302
419
  options: mergedOptions
303
420
  }),
304
421
  credentials: options?.includeCredentials ? "include" : "same-origin"
305
- });
422
+ };
423
+ return { url, info, mergedOptions };
424
+ }
425
+ async request(args, options) {
426
+ const { url, info, mergedOptions } = this.makeRequestOptions(args, options);
427
+ const response = await fetch(url, info);
306
428
  if (mergedOptions.retry_on_error !== false && response.status === 503 && !mergedOptions.wait_for_model) {
307
429
  return this.request(args, {
308
430
  ...mergedOptions,
@@ -321,8 +443,64 @@ var HfInference = class {
321
443
  }
322
444
  return output;
323
445
  }
446
+ /**
447
+ * Make request that uses server-sent events and returns response as a generator
448
+ */
449
+ async *streamingRequest(args, options) {
450
+ const { url, info, mergedOptions } = this.makeRequestOptions({ ...args, stream: true }, options);
451
+ const response = await fetch(url, info);
452
+ if (mergedOptions.retry_on_error !== false && response.status === 503 && !mergedOptions.wait_for_model) {
453
+ return this.streamingRequest(args, {
454
+ ...mergedOptions,
455
+ wait_for_model: true
456
+ });
457
+ }
458
+ if (!response.ok) {
459
+ if (response.headers.get("Content-Type")?.startsWith("application/json")) {
460
+ const output = await response.json();
461
+ if (output.error) {
462
+ throw new Error(output.error);
463
+ }
464
+ }
465
+ throw new Error(`Server response contains error: ${response.status}`);
466
+ }
467
+ if (response.headers.get("content-type") !== "text/event-stream") {
468
+ throw new Error(`Server does not support event stream content type`);
469
+ }
470
+ const reader = response.body.getReader();
471
+ const events = [];
472
+ const onEvent = (event) => {
473
+ events.push(event);
474
+ };
475
+ const onChunk = getLines(
476
+ getMessages(
477
+ () => {
478
+ },
479
+ () => {
480
+ },
481
+ onEvent
482
+ )
483
+ );
484
+ try {
485
+ while (true) {
486
+ const { done, value } = await reader.read();
487
+ if (done)
488
+ return;
489
+ onChunk(value);
490
+ while (events.length > 0) {
491
+ const event = events.shift();
492
+ if (event.data.length > 0) {
493
+ yield JSON.parse(event.data);
494
+ }
495
+ }
496
+ }
497
+ } finally {
498
+ reader.releaseLock();
499
+ }
500
+ }
324
501
  };
325
502
  // Annotate the CommonJS export names for ESM import in node:
326
503
  0 && (module.exports = {
327
- HfInference
504
+ HfInference,
505
+ TextGenerationStreamFinishReason
328
506
  });
package/dist/index.mjs CHANGED
@@ -6,7 +6,113 @@ function toArray(obj) {
6
6
  return [obj];
7
7
  }
8
8
 
9
+ // src/vendor/fetch-event-source/parse.ts
10
+ function getLines(onLine) {
11
+ let buffer;
12
+ let position;
13
+ let fieldLength;
14
+ let discardTrailingNewline = false;
15
+ return function onChunk(arr) {
16
+ if (buffer === void 0) {
17
+ buffer = arr;
18
+ position = 0;
19
+ fieldLength = -1;
20
+ } else {
21
+ buffer = concat(buffer, arr);
22
+ }
23
+ const bufLength = buffer.length;
24
+ let lineStart = 0;
25
+ while (position < bufLength) {
26
+ if (discardTrailingNewline) {
27
+ if (buffer[position] === 10 /* NewLine */) {
28
+ lineStart = ++position;
29
+ }
30
+ discardTrailingNewline = false;
31
+ }
32
+ let lineEnd = -1;
33
+ for (; position < bufLength && lineEnd === -1; ++position) {
34
+ switch (buffer[position]) {
35
+ case 58 /* Colon */:
36
+ if (fieldLength === -1) {
37
+ fieldLength = position - lineStart;
38
+ }
39
+ break;
40
+ case 13 /* CarriageReturn */:
41
+ discardTrailingNewline = true;
42
+ case 10 /* NewLine */:
43
+ lineEnd = position;
44
+ break;
45
+ }
46
+ }
47
+ if (lineEnd === -1) {
48
+ break;
49
+ }
50
+ onLine(buffer.subarray(lineStart, lineEnd), fieldLength);
51
+ lineStart = position;
52
+ fieldLength = -1;
53
+ }
54
+ if (lineStart === bufLength) {
55
+ buffer = void 0;
56
+ } else if (lineStart !== 0) {
57
+ buffer = buffer.subarray(lineStart);
58
+ position -= lineStart;
59
+ }
60
+ };
61
+ }
62
+ function getMessages(onId, onRetry, onMessage) {
63
+ let message = newMessage();
64
+ const decoder = new TextDecoder();
65
+ return function onLine(line, fieldLength) {
66
+ if (line.length === 0) {
67
+ onMessage?.(message);
68
+ message = newMessage();
69
+ } else if (fieldLength > 0) {
70
+ const field = decoder.decode(line.subarray(0, fieldLength));
71
+ const valueOffset = fieldLength + (line[fieldLength + 1] === 32 /* Space */ ? 2 : 1);
72
+ const value = decoder.decode(line.subarray(valueOffset));
73
+ switch (field) {
74
+ case "data":
75
+ message.data = message.data ? message.data + "\n" + value : value;
76
+ break;
77
+ case "event":
78
+ message.event = value;
79
+ break;
80
+ case "id":
81
+ onId(message.id = value);
82
+ break;
83
+ case "retry":
84
+ const retry = parseInt(value, 10);
85
+ if (!isNaN(retry)) {
86
+ onRetry(message.retry = retry);
87
+ }
88
+ break;
89
+ }
90
+ }
91
+ };
92
+ }
93
+ function concat(a, b) {
94
+ const res = new Uint8Array(a.length + b.length);
95
+ res.set(a);
96
+ res.set(b, a.length);
97
+ return res;
98
+ }
99
+ function newMessage() {
100
+ return {
101
+ data: "",
102
+ event: "",
103
+ id: "",
104
+ retry: void 0
105
+ };
106
+ }
107
+
9
108
  // src/HfInference.ts
109
+ var HF_INFERENCE_API_BASE_URL = "https://api-inference.huggingface.co/models/";
110
+ var TextGenerationStreamFinishReason = /* @__PURE__ */ ((TextGenerationStreamFinishReason2) => {
111
+ TextGenerationStreamFinishReason2["Length"] = "length";
112
+ TextGenerationStreamFinishReason2["EndOfSequenceToken"] = "eos_token";
113
+ TextGenerationStreamFinishReason2["StopSequence"] = "stop_sequence";
114
+ return TextGenerationStreamFinishReason2;
115
+ })(TextGenerationStreamFinishReason || {});
10
116
  var HfInference = class {
11
117
  apiKey;
12
118
  defaultOptions;
@@ -88,6 +194,12 @@ var HfInference = class {
88
194
  }
89
195
  return res?.[0];
90
196
  }
197
+ /**
198
+ * Use to continue text from a prompt. Same as `textGeneration` but returns generator that can be read one token at a time
199
+ */
200
+ async *textGenerationStream(args, options) {
201
+ yield* this.streamingRequest(args, options);
202
+ }
91
203
  /**
92
204
  * Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text. Recommended model: dbmdz/bert-large-cased-finetuned-conll03-english
93
205
  */
@@ -248,7 +360,10 @@ var HfInference = class {
248
360
  }
249
361
  return res;
250
362
  }
251
- async request(args, options) {
363
+ /**
364
+ * Helper that prepares request arguments
365
+ */
366
+ makeRequestOptions(args, options) {
252
367
  const mergedOptions = { ...this.defaultOptions, ...options };
253
368
  const { model, ...otherArgs } = args;
254
369
  const headers = {};
@@ -269,7 +384,8 @@ var HfInference = class {
269
384
  headers["X-Load-Model"] = "0";
270
385
  }
271
386
  }
272
- const response = await fetch(`https://api-inference.huggingface.co/models/${model}`, {
387
+ const url = `${HF_INFERENCE_API_BASE_URL}${model}`;
388
+ const info = {
273
389
  headers,
274
390
  method: "POST",
275
391
  body: options?.binary ? args.data : JSON.stringify({
@@ -277,7 +393,12 @@ var HfInference = class {
277
393
  options: mergedOptions
278
394
  }),
279
395
  credentials: options?.includeCredentials ? "include" : "same-origin"
280
- });
396
+ };
397
+ return { url, info, mergedOptions };
398
+ }
399
+ async request(args, options) {
400
+ const { url, info, mergedOptions } = this.makeRequestOptions(args, options);
401
+ const response = await fetch(url, info);
281
402
  if (mergedOptions.retry_on_error !== false && response.status === 503 && !mergedOptions.wait_for_model) {
282
403
  return this.request(args, {
283
404
  ...mergedOptions,
@@ -296,7 +417,63 @@ var HfInference = class {
296
417
  }
297
418
  return output;
298
419
  }
420
+ /**
421
+ * Make request that uses server-sent events and returns response as a generator
422
+ */
423
+ async *streamingRequest(args, options) {
424
+ const { url, info, mergedOptions } = this.makeRequestOptions({ ...args, stream: true }, options);
425
+ const response = await fetch(url, info);
426
+ if (mergedOptions.retry_on_error !== false && response.status === 503 && !mergedOptions.wait_for_model) {
427
+ return this.streamingRequest(args, {
428
+ ...mergedOptions,
429
+ wait_for_model: true
430
+ });
431
+ }
432
+ if (!response.ok) {
433
+ if (response.headers.get("Content-Type")?.startsWith("application/json")) {
434
+ const output = await response.json();
435
+ if (output.error) {
436
+ throw new Error(output.error);
437
+ }
438
+ }
439
+ throw new Error(`Server response contains error: ${response.status}`);
440
+ }
441
+ if (response.headers.get("content-type") !== "text/event-stream") {
442
+ throw new Error(`Server does not support event stream content type`);
443
+ }
444
+ const reader = response.body.getReader();
445
+ const events = [];
446
+ const onEvent = (event) => {
447
+ events.push(event);
448
+ };
449
+ const onChunk = getLines(
450
+ getMessages(
451
+ () => {
452
+ },
453
+ () => {
454
+ },
455
+ onEvent
456
+ )
457
+ );
458
+ try {
459
+ while (true) {
460
+ const { done, value } = await reader.read();
461
+ if (done)
462
+ return;
463
+ onChunk(value);
464
+ while (events.length > 0) {
465
+ const event = events.shift();
466
+ if (event.data.length > 0) {
467
+ yield JSON.parse(event.data);
468
+ }
469
+ }
470
+ }
471
+ } finally {
472
+ reader.releaseLock();
473
+ }
474
+ }
299
475
  };
300
476
  export {
301
- HfInference
477
+ HfInference,
478
+ TextGenerationStreamFinishReason
302
479
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@huggingface/inference",
3
- "version": "1.6.3",
3
+ "version": "1.7.1",
4
4
  "license": "MIT",
5
5
  "author": "Tim Mikeladze <tim.mikeladze@gmail.com>",
6
6
  "description": "Typescript wrapper for the Hugging Face Inference API",