@huggingface/inference 1.6.3 → 1.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -0
- package/dist/index.d.ts +94 -1
- package/dist/index.js +183 -5
- package/dist/index.mjs +181 -4
- package/package.json +1 -1
- package/src/HfInference.ts +189 -5
- package/src/vendor/fetch-event-source/parse.spec.ts +389 -0
- package/src/vendor/fetch-event-source/parse.ts +216 -0
package/README.md
CHANGED
|
@@ -76,6 +76,14 @@ await hf.textGeneration({
|
|
|
76
76
|
inputs: 'The answer to the universe is'
|
|
77
77
|
})
|
|
78
78
|
|
|
79
|
+
for await (const output of hf.textGenerationStream({
|
|
80
|
+
model: "google/flan-t5-xxl",
|
|
81
|
+
inputs: 'repeat "one two three four"',
|
|
82
|
+
parameters: { max_new_tokens: 250 }
|
|
83
|
+
})) {
|
|
84
|
+
console.log(output.token.text, output.generated_text);
|
|
85
|
+
}
|
|
86
|
+
|
|
79
87
|
await hf.tokenClassification({
|
|
80
88
|
model: 'dbmdz/bert-large-cased-finetuned-conll03-english',
|
|
81
89
|
inputs: 'My name is Sarah Jessica Parker but you can call me Jessica'
|
package/dist/index.d.ts
CHANGED
|
@@ -206,6 +206,80 @@ interface TextGenerationReturn {
|
|
|
206
206
|
*/
|
|
207
207
|
generated_text: string;
|
|
208
208
|
}
|
|
209
|
+
interface TextGenerationStreamToken {
|
|
210
|
+
/** Token ID from the model tokenizer */
|
|
211
|
+
id: number;
|
|
212
|
+
/** Token text */
|
|
213
|
+
text: string;
|
|
214
|
+
/** Logprob */
|
|
215
|
+
logprob: number;
|
|
216
|
+
/**
|
|
217
|
+
* Is the token a special token
|
|
218
|
+
* Can be used to ignore tokens when concatenating
|
|
219
|
+
*/
|
|
220
|
+
special: boolean;
|
|
221
|
+
}
|
|
222
|
+
interface TextGenerationStreamPrefillToken {
|
|
223
|
+
/** Token ID from the model tokenizer */
|
|
224
|
+
id: number;
|
|
225
|
+
/** Token text */
|
|
226
|
+
text: string;
|
|
227
|
+
/**
|
|
228
|
+
* Logprob
|
|
229
|
+
* Optional since the logprob of the first token cannot be computed
|
|
230
|
+
*/
|
|
231
|
+
logprob?: number;
|
|
232
|
+
}
|
|
233
|
+
interface TextGenerationStreamBestOfSequence {
|
|
234
|
+
/** Generated text */
|
|
235
|
+
generated_text: string;
|
|
236
|
+
/** Generation finish reason */
|
|
237
|
+
finish_reason: TextGenerationStreamFinishReason;
|
|
238
|
+
/** Number of generated tokens */
|
|
239
|
+
generated_tokens: number;
|
|
240
|
+
/** Sampling seed if sampling was activated */
|
|
241
|
+
seed?: number;
|
|
242
|
+
/** Prompt tokens */
|
|
243
|
+
prefill: TextGenerationStreamPrefillToken[];
|
|
244
|
+
/** Generated tokens */
|
|
245
|
+
tokens: TextGenerationStreamToken[];
|
|
246
|
+
}
|
|
247
|
+
declare enum TextGenerationStreamFinishReason {
|
|
248
|
+
/** number of generated tokens == `max_new_tokens` */
|
|
249
|
+
Length = "length",
|
|
250
|
+
/** the model generated its end of sequence token */
|
|
251
|
+
EndOfSequenceToken = "eos_token",
|
|
252
|
+
/** the model generated a text included in `stop_sequences` */
|
|
253
|
+
StopSequence = "stop_sequence"
|
|
254
|
+
}
|
|
255
|
+
interface TextGenerationStreamDetails {
|
|
256
|
+
/** Generation finish reason */
|
|
257
|
+
finish_reason: TextGenerationStreamFinishReason;
|
|
258
|
+
/** Number of generated tokens */
|
|
259
|
+
generated_tokens: number;
|
|
260
|
+
/** Sampling seed if sampling was activated */
|
|
261
|
+
seed?: number;
|
|
262
|
+
/** Prompt tokens */
|
|
263
|
+
prefill: TextGenerationStreamPrefillToken[];
|
|
264
|
+
/** */
|
|
265
|
+
tokens: TextGenerationStreamToken[];
|
|
266
|
+
/** Additional sequences when using the `best_of` parameter */
|
|
267
|
+
best_of_sequences?: TextGenerationStreamBestOfSequence[];
|
|
268
|
+
}
|
|
269
|
+
interface TextGenerationStreamReturn {
|
|
270
|
+
/** Generated token, one at a time */
|
|
271
|
+
token: TextGenerationStreamToken;
|
|
272
|
+
/**
|
|
273
|
+
* Complete generated text
|
|
274
|
+
* Only available when the generation is finished
|
|
275
|
+
*/
|
|
276
|
+
generated_text?: string;
|
|
277
|
+
/**
|
|
278
|
+
* Generation details
|
|
279
|
+
* Only available when the generation is finished
|
|
280
|
+
*/
|
|
281
|
+
details?: TextGenerationStreamDetails;
|
|
282
|
+
}
|
|
209
283
|
type TokenClassificationArgs = Args & {
|
|
210
284
|
/**
|
|
211
285
|
* A string to be classified
|
|
@@ -486,6 +560,10 @@ declare class HfInference {
|
|
|
486
560
|
* Use to continue text from a prompt. This is a very generic task. Recommended model: gpt2 (it’s a simple model, but fun to play with).
|
|
487
561
|
*/
|
|
488
562
|
textGeneration(args: TextGenerationArgs, options?: Options): Promise<TextGenerationReturn>;
|
|
563
|
+
/**
|
|
564
|
+
* Use to continue text from a prompt. Same as `textGeneration` but returns generator that can be read one token at a time
|
|
565
|
+
*/
|
|
566
|
+
textGenerationStream(args: TextGenerationArgs, options?: Options): AsyncGenerator<TextGenerationStreamReturn>;
|
|
489
567
|
/**
|
|
490
568
|
* Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text. Recommended model: dbmdz/bert-large-cased-finetuned-conll03-english
|
|
491
569
|
*/
|
|
@@ -537,6 +615,10 @@ declare class HfInference {
|
|
|
537
615
|
* Recommended model: stabilityai/stable-diffusion-2
|
|
538
616
|
*/
|
|
539
617
|
textToImage(args: TextToImageArgs, options?: Options): Promise<TextToImageReturn>;
|
|
618
|
+
/**
|
|
619
|
+
* Helper that prepares request arguments
|
|
620
|
+
*/
|
|
621
|
+
private makeRequestOptions;
|
|
540
622
|
request<T>(args: Args & {
|
|
541
623
|
data?: Blob | ArrayBuffer;
|
|
542
624
|
}, options?: Options & {
|
|
@@ -545,6 +627,17 @@ declare class HfInference {
|
|
|
545
627
|
/** For internal HF use, which is why it's not exposed in {@link Options} */
|
|
546
628
|
includeCredentials?: boolean;
|
|
547
629
|
}): Promise<T>;
|
|
630
|
+
/**
|
|
631
|
+
* Make request that uses server-sent events and returns response as a generator
|
|
632
|
+
*/
|
|
633
|
+
streamingRequest<T>(args: Args & {
|
|
634
|
+
data?: Blob | ArrayBuffer;
|
|
635
|
+
}, options?: Options & {
|
|
636
|
+
binary?: boolean;
|
|
637
|
+
blob?: boolean;
|
|
638
|
+
/** For internal HF use, which is why it's not exposed in {@link Options} */
|
|
639
|
+
includeCredentials?: boolean;
|
|
640
|
+
}): AsyncGenerator<T>;
|
|
548
641
|
}
|
|
549
642
|
|
|
550
|
-
export { Args, AudioClassificationArgs, AudioClassificationReturn, AudioClassificationReturnValue, AutomaticSpeechRecognitionArgs, AutomaticSpeechRecognitionReturn, ConversationalArgs, ConversationalReturn, FeatureExtractionArgs, FeatureExtractionReturn, FillMaskArgs, FillMaskReturn, HfInference, ImageClassificationArgs, ImageClassificationReturn, ImageClassificationReturnValue, ImageSegmentationArgs, ImageSegmentationReturn, ImageSegmentationReturnValue, ObjectDetectionArgs, ObjectDetectionReturn, ObjectDetectionReturnValue, Options, QuestionAnswerArgs, QuestionAnswerReturn, SummarizationArgs, SummarizationReturn, TableQuestionAnswerArgs, TableQuestionAnswerReturn, TextClassificationArgs, TextClassificationReturn, TextGenerationArgs, TextGenerationReturn, TextToImageArgs, TextToImageReturn, TokenClassificationArgs, TokenClassificationReturn, TokenClassificationReturnValue, TranslationArgs, TranslationReturn, ZeroShotClassificationArgs, ZeroShotClassificationReturn, ZeroShotClassificationReturnValue };
|
|
643
|
+
export { Args, AudioClassificationArgs, AudioClassificationReturn, AudioClassificationReturnValue, AutomaticSpeechRecognitionArgs, AutomaticSpeechRecognitionReturn, ConversationalArgs, ConversationalReturn, FeatureExtractionArgs, FeatureExtractionReturn, FillMaskArgs, FillMaskReturn, HfInference, ImageClassificationArgs, ImageClassificationReturn, ImageClassificationReturnValue, ImageSegmentationArgs, ImageSegmentationReturn, ImageSegmentationReturnValue, ObjectDetectionArgs, ObjectDetectionReturn, ObjectDetectionReturnValue, Options, QuestionAnswerArgs, QuestionAnswerReturn, SummarizationArgs, SummarizationReturn, TableQuestionAnswerArgs, TableQuestionAnswerReturn, TextClassificationArgs, TextClassificationReturn, TextGenerationArgs, TextGenerationReturn, TextGenerationStreamBestOfSequence, TextGenerationStreamDetails, TextGenerationStreamFinishReason, TextGenerationStreamPrefillToken, TextGenerationStreamReturn, TextGenerationStreamToken, TextToImageArgs, TextToImageReturn, TokenClassificationArgs, TokenClassificationReturn, TokenClassificationReturnValue, TranslationArgs, TranslationReturn, ZeroShotClassificationArgs, ZeroShotClassificationReturn, ZeroShotClassificationReturnValue };
|
package/dist/index.js
CHANGED
|
@@ -19,7 +19,8 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
19
19
|
// src/index.ts
|
|
20
20
|
var src_exports = {};
|
|
21
21
|
__export(src_exports, {
|
|
22
|
-
HfInference: () => HfInference
|
|
22
|
+
HfInference: () => HfInference,
|
|
23
|
+
TextGenerationStreamFinishReason: () => TextGenerationStreamFinishReason
|
|
23
24
|
});
|
|
24
25
|
module.exports = __toCommonJS(src_exports);
|
|
25
26
|
|
|
@@ -31,7 +32,113 @@ function toArray(obj) {
|
|
|
31
32
|
return [obj];
|
|
32
33
|
}
|
|
33
34
|
|
|
35
|
+
// src/vendor/fetch-event-source/parse.ts
|
|
36
|
+
function getLines(onLine) {
|
|
37
|
+
let buffer;
|
|
38
|
+
let position;
|
|
39
|
+
let fieldLength;
|
|
40
|
+
let discardTrailingNewline = false;
|
|
41
|
+
return function onChunk(arr) {
|
|
42
|
+
if (buffer === void 0) {
|
|
43
|
+
buffer = arr;
|
|
44
|
+
position = 0;
|
|
45
|
+
fieldLength = -1;
|
|
46
|
+
} else {
|
|
47
|
+
buffer = concat(buffer, arr);
|
|
48
|
+
}
|
|
49
|
+
const bufLength = buffer.length;
|
|
50
|
+
let lineStart = 0;
|
|
51
|
+
while (position < bufLength) {
|
|
52
|
+
if (discardTrailingNewline) {
|
|
53
|
+
if (buffer[position] === 10 /* NewLine */) {
|
|
54
|
+
lineStart = ++position;
|
|
55
|
+
}
|
|
56
|
+
discardTrailingNewline = false;
|
|
57
|
+
}
|
|
58
|
+
let lineEnd = -1;
|
|
59
|
+
for (; position < bufLength && lineEnd === -1; ++position) {
|
|
60
|
+
switch (buffer[position]) {
|
|
61
|
+
case 58 /* Colon */:
|
|
62
|
+
if (fieldLength === -1) {
|
|
63
|
+
fieldLength = position - lineStart;
|
|
64
|
+
}
|
|
65
|
+
break;
|
|
66
|
+
case 13 /* CarriageReturn */:
|
|
67
|
+
discardTrailingNewline = true;
|
|
68
|
+
case 10 /* NewLine */:
|
|
69
|
+
lineEnd = position;
|
|
70
|
+
break;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
if (lineEnd === -1) {
|
|
74
|
+
break;
|
|
75
|
+
}
|
|
76
|
+
onLine(buffer.subarray(lineStart, lineEnd), fieldLength);
|
|
77
|
+
lineStart = position;
|
|
78
|
+
fieldLength = -1;
|
|
79
|
+
}
|
|
80
|
+
if (lineStart === bufLength) {
|
|
81
|
+
buffer = void 0;
|
|
82
|
+
} else if (lineStart !== 0) {
|
|
83
|
+
buffer = buffer.subarray(lineStart);
|
|
84
|
+
position -= lineStart;
|
|
85
|
+
}
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
function getMessages(onId, onRetry, onMessage) {
|
|
89
|
+
let message = newMessage();
|
|
90
|
+
const decoder = new TextDecoder();
|
|
91
|
+
return function onLine(line, fieldLength) {
|
|
92
|
+
if (line.length === 0) {
|
|
93
|
+
onMessage?.(message);
|
|
94
|
+
message = newMessage();
|
|
95
|
+
} else if (fieldLength > 0) {
|
|
96
|
+
const field = decoder.decode(line.subarray(0, fieldLength));
|
|
97
|
+
const valueOffset = fieldLength + (line[fieldLength + 1] === 32 /* Space */ ? 2 : 1);
|
|
98
|
+
const value = decoder.decode(line.subarray(valueOffset));
|
|
99
|
+
switch (field) {
|
|
100
|
+
case "data":
|
|
101
|
+
message.data = message.data ? message.data + "\n" + value : value;
|
|
102
|
+
break;
|
|
103
|
+
case "event":
|
|
104
|
+
message.event = value;
|
|
105
|
+
break;
|
|
106
|
+
case "id":
|
|
107
|
+
onId(message.id = value);
|
|
108
|
+
break;
|
|
109
|
+
case "retry":
|
|
110
|
+
const retry = parseInt(value, 10);
|
|
111
|
+
if (!isNaN(retry)) {
|
|
112
|
+
onRetry(message.retry = retry);
|
|
113
|
+
}
|
|
114
|
+
break;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
function concat(a, b) {
|
|
120
|
+
const res = new Uint8Array(a.length + b.length);
|
|
121
|
+
res.set(a);
|
|
122
|
+
res.set(b, a.length);
|
|
123
|
+
return res;
|
|
124
|
+
}
|
|
125
|
+
function newMessage() {
|
|
126
|
+
return {
|
|
127
|
+
data: "",
|
|
128
|
+
event: "",
|
|
129
|
+
id: "",
|
|
130
|
+
retry: void 0
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
|
|
34
134
|
// src/HfInference.ts
|
|
135
|
+
var HF_INFERENCE_API_BASE_URL = "https://api-inference.huggingface.co/models/";
|
|
136
|
+
var TextGenerationStreamFinishReason = /* @__PURE__ */ ((TextGenerationStreamFinishReason2) => {
|
|
137
|
+
TextGenerationStreamFinishReason2["Length"] = "length";
|
|
138
|
+
TextGenerationStreamFinishReason2["EndOfSequenceToken"] = "eos_token";
|
|
139
|
+
TextGenerationStreamFinishReason2["StopSequence"] = "stop_sequence";
|
|
140
|
+
return TextGenerationStreamFinishReason2;
|
|
141
|
+
})(TextGenerationStreamFinishReason || {});
|
|
35
142
|
var HfInference = class {
|
|
36
143
|
apiKey;
|
|
37
144
|
defaultOptions;
|
|
@@ -113,6 +220,12 @@ var HfInference = class {
|
|
|
113
220
|
}
|
|
114
221
|
return res?.[0];
|
|
115
222
|
}
|
|
223
|
+
/**
|
|
224
|
+
* Use to continue text from a prompt. Same as `textGeneration` but returns generator that can be read one token at a time
|
|
225
|
+
*/
|
|
226
|
+
async *textGenerationStream(args, options) {
|
|
227
|
+
yield* this.streamingRequest(args, options);
|
|
228
|
+
}
|
|
116
229
|
/**
|
|
117
230
|
* Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text. Recommended model: dbmdz/bert-large-cased-finetuned-conll03-english
|
|
118
231
|
*/
|
|
@@ -273,7 +386,10 @@ var HfInference = class {
|
|
|
273
386
|
}
|
|
274
387
|
return res;
|
|
275
388
|
}
|
|
276
|
-
|
|
389
|
+
/**
|
|
390
|
+
* Helper that prepares request arguments
|
|
391
|
+
*/
|
|
392
|
+
makeRequestOptions(args, options) {
|
|
277
393
|
const mergedOptions = { ...this.defaultOptions, ...options };
|
|
278
394
|
const { model, ...otherArgs } = args;
|
|
279
395
|
const headers = {};
|
|
@@ -294,7 +410,8 @@ var HfInference = class {
|
|
|
294
410
|
headers["X-Load-Model"] = "0";
|
|
295
411
|
}
|
|
296
412
|
}
|
|
297
|
-
const
|
|
413
|
+
const url = `${HF_INFERENCE_API_BASE_URL}${model}`;
|
|
414
|
+
const info = {
|
|
298
415
|
headers,
|
|
299
416
|
method: "POST",
|
|
300
417
|
body: options?.binary ? args.data : JSON.stringify({
|
|
@@ -302,7 +419,12 @@ var HfInference = class {
|
|
|
302
419
|
options: mergedOptions
|
|
303
420
|
}),
|
|
304
421
|
credentials: options?.includeCredentials ? "include" : "same-origin"
|
|
305
|
-
}
|
|
422
|
+
};
|
|
423
|
+
return { url, info, mergedOptions };
|
|
424
|
+
}
|
|
425
|
+
async request(args, options) {
|
|
426
|
+
const { url, info, mergedOptions } = this.makeRequestOptions(args, options);
|
|
427
|
+
const response = await fetch(url, info);
|
|
306
428
|
if (mergedOptions.retry_on_error !== false && response.status === 503 && !mergedOptions.wait_for_model) {
|
|
307
429
|
return this.request(args, {
|
|
308
430
|
...mergedOptions,
|
|
@@ -321,8 +443,64 @@ var HfInference = class {
|
|
|
321
443
|
}
|
|
322
444
|
return output;
|
|
323
445
|
}
|
|
446
|
+
/**
|
|
447
|
+
* Make request that uses server-sent events and returns response as a generator
|
|
448
|
+
*/
|
|
449
|
+
async *streamingRequest(args, options) {
|
|
450
|
+
const { url, info, mergedOptions } = this.makeRequestOptions({ ...args, stream: true }, options);
|
|
451
|
+
const response = await fetch(url, info);
|
|
452
|
+
if (mergedOptions.retry_on_error !== false && response.status === 503 && !mergedOptions.wait_for_model) {
|
|
453
|
+
return this.streamingRequest(args, {
|
|
454
|
+
...mergedOptions,
|
|
455
|
+
wait_for_model: true
|
|
456
|
+
});
|
|
457
|
+
}
|
|
458
|
+
if (!response.ok) {
|
|
459
|
+
if (response.headers.get("Content-Type")?.startsWith("application/json")) {
|
|
460
|
+
const output = await response.json();
|
|
461
|
+
if (output.error) {
|
|
462
|
+
throw new Error(output.error);
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
throw new Error(`Server response contains error: ${response.status}`);
|
|
466
|
+
}
|
|
467
|
+
if (response.headers.get("content-type") !== "text/event-stream") {
|
|
468
|
+
throw new Error(`Server does not support event stream content type`);
|
|
469
|
+
}
|
|
470
|
+
const reader = response.body.getReader();
|
|
471
|
+
const events = [];
|
|
472
|
+
const onEvent = (event) => {
|
|
473
|
+
events.push(event);
|
|
474
|
+
};
|
|
475
|
+
const onChunk = getLines(
|
|
476
|
+
getMessages(
|
|
477
|
+
() => {
|
|
478
|
+
},
|
|
479
|
+
() => {
|
|
480
|
+
},
|
|
481
|
+
onEvent
|
|
482
|
+
)
|
|
483
|
+
);
|
|
484
|
+
try {
|
|
485
|
+
while (true) {
|
|
486
|
+
const { done, value } = await reader.read();
|
|
487
|
+
if (done)
|
|
488
|
+
return;
|
|
489
|
+
onChunk(value);
|
|
490
|
+
while (events.length > 0) {
|
|
491
|
+
const event = events.shift();
|
|
492
|
+
if (event.data.length > 0) {
|
|
493
|
+
yield JSON.parse(event.data);
|
|
494
|
+
}
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
} finally {
|
|
498
|
+
reader.releaseLock();
|
|
499
|
+
}
|
|
500
|
+
}
|
|
324
501
|
};
|
|
325
502
|
// Annotate the CommonJS export names for ESM import in node:
|
|
326
503
|
0 && (module.exports = {
|
|
327
|
-
HfInference
|
|
504
|
+
HfInference,
|
|
505
|
+
TextGenerationStreamFinishReason
|
|
328
506
|
});
|
package/dist/index.mjs
CHANGED
|
@@ -6,7 +6,113 @@ function toArray(obj) {
|
|
|
6
6
|
return [obj];
|
|
7
7
|
}
|
|
8
8
|
|
|
9
|
+
// src/vendor/fetch-event-source/parse.ts
|
|
10
|
+
function getLines(onLine) {
|
|
11
|
+
let buffer;
|
|
12
|
+
let position;
|
|
13
|
+
let fieldLength;
|
|
14
|
+
let discardTrailingNewline = false;
|
|
15
|
+
return function onChunk(arr) {
|
|
16
|
+
if (buffer === void 0) {
|
|
17
|
+
buffer = arr;
|
|
18
|
+
position = 0;
|
|
19
|
+
fieldLength = -1;
|
|
20
|
+
} else {
|
|
21
|
+
buffer = concat(buffer, arr);
|
|
22
|
+
}
|
|
23
|
+
const bufLength = buffer.length;
|
|
24
|
+
let lineStart = 0;
|
|
25
|
+
while (position < bufLength) {
|
|
26
|
+
if (discardTrailingNewline) {
|
|
27
|
+
if (buffer[position] === 10 /* NewLine */) {
|
|
28
|
+
lineStart = ++position;
|
|
29
|
+
}
|
|
30
|
+
discardTrailingNewline = false;
|
|
31
|
+
}
|
|
32
|
+
let lineEnd = -1;
|
|
33
|
+
for (; position < bufLength && lineEnd === -1; ++position) {
|
|
34
|
+
switch (buffer[position]) {
|
|
35
|
+
case 58 /* Colon */:
|
|
36
|
+
if (fieldLength === -1) {
|
|
37
|
+
fieldLength = position - lineStart;
|
|
38
|
+
}
|
|
39
|
+
break;
|
|
40
|
+
case 13 /* CarriageReturn */:
|
|
41
|
+
discardTrailingNewline = true;
|
|
42
|
+
case 10 /* NewLine */:
|
|
43
|
+
lineEnd = position;
|
|
44
|
+
break;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
if (lineEnd === -1) {
|
|
48
|
+
break;
|
|
49
|
+
}
|
|
50
|
+
onLine(buffer.subarray(lineStart, lineEnd), fieldLength);
|
|
51
|
+
lineStart = position;
|
|
52
|
+
fieldLength = -1;
|
|
53
|
+
}
|
|
54
|
+
if (lineStart === bufLength) {
|
|
55
|
+
buffer = void 0;
|
|
56
|
+
} else if (lineStart !== 0) {
|
|
57
|
+
buffer = buffer.subarray(lineStart);
|
|
58
|
+
position -= lineStart;
|
|
59
|
+
}
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
function getMessages(onId, onRetry, onMessage) {
|
|
63
|
+
let message = newMessage();
|
|
64
|
+
const decoder = new TextDecoder();
|
|
65
|
+
return function onLine(line, fieldLength) {
|
|
66
|
+
if (line.length === 0) {
|
|
67
|
+
onMessage?.(message);
|
|
68
|
+
message = newMessage();
|
|
69
|
+
} else if (fieldLength > 0) {
|
|
70
|
+
const field = decoder.decode(line.subarray(0, fieldLength));
|
|
71
|
+
const valueOffset = fieldLength + (line[fieldLength + 1] === 32 /* Space */ ? 2 : 1);
|
|
72
|
+
const value = decoder.decode(line.subarray(valueOffset));
|
|
73
|
+
switch (field) {
|
|
74
|
+
case "data":
|
|
75
|
+
message.data = message.data ? message.data + "\n" + value : value;
|
|
76
|
+
break;
|
|
77
|
+
case "event":
|
|
78
|
+
message.event = value;
|
|
79
|
+
break;
|
|
80
|
+
case "id":
|
|
81
|
+
onId(message.id = value);
|
|
82
|
+
break;
|
|
83
|
+
case "retry":
|
|
84
|
+
const retry = parseInt(value, 10);
|
|
85
|
+
if (!isNaN(retry)) {
|
|
86
|
+
onRetry(message.retry = retry);
|
|
87
|
+
}
|
|
88
|
+
break;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
function concat(a, b) {
|
|
94
|
+
const res = new Uint8Array(a.length + b.length);
|
|
95
|
+
res.set(a);
|
|
96
|
+
res.set(b, a.length);
|
|
97
|
+
return res;
|
|
98
|
+
}
|
|
99
|
+
function newMessage() {
|
|
100
|
+
return {
|
|
101
|
+
data: "",
|
|
102
|
+
event: "",
|
|
103
|
+
id: "",
|
|
104
|
+
retry: void 0
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
|
|
9
108
|
// src/HfInference.ts
|
|
109
|
+
var HF_INFERENCE_API_BASE_URL = "https://api-inference.huggingface.co/models/";
|
|
110
|
+
var TextGenerationStreamFinishReason = /* @__PURE__ */ ((TextGenerationStreamFinishReason2) => {
|
|
111
|
+
TextGenerationStreamFinishReason2["Length"] = "length";
|
|
112
|
+
TextGenerationStreamFinishReason2["EndOfSequenceToken"] = "eos_token";
|
|
113
|
+
TextGenerationStreamFinishReason2["StopSequence"] = "stop_sequence";
|
|
114
|
+
return TextGenerationStreamFinishReason2;
|
|
115
|
+
})(TextGenerationStreamFinishReason || {});
|
|
10
116
|
var HfInference = class {
|
|
11
117
|
apiKey;
|
|
12
118
|
defaultOptions;
|
|
@@ -88,6 +194,12 @@ var HfInference = class {
|
|
|
88
194
|
}
|
|
89
195
|
return res?.[0];
|
|
90
196
|
}
|
|
197
|
+
/**
|
|
198
|
+
* Use to continue text from a prompt. Same as `textGeneration` but returns generator that can be read one token at a time
|
|
199
|
+
*/
|
|
200
|
+
async *textGenerationStream(args, options) {
|
|
201
|
+
yield* this.streamingRequest(args, options);
|
|
202
|
+
}
|
|
91
203
|
/**
|
|
92
204
|
* Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text. Recommended model: dbmdz/bert-large-cased-finetuned-conll03-english
|
|
93
205
|
*/
|
|
@@ -248,7 +360,10 @@ var HfInference = class {
|
|
|
248
360
|
}
|
|
249
361
|
return res;
|
|
250
362
|
}
|
|
251
|
-
|
|
363
|
+
/**
|
|
364
|
+
* Helper that prepares request arguments
|
|
365
|
+
*/
|
|
366
|
+
makeRequestOptions(args, options) {
|
|
252
367
|
const mergedOptions = { ...this.defaultOptions, ...options };
|
|
253
368
|
const { model, ...otherArgs } = args;
|
|
254
369
|
const headers = {};
|
|
@@ -269,7 +384,8 @@ var HfInference = class {
|
|
|
269
384
|
headers["X-Load-Model"] = "0";
|
|
270
385
|
}
|
|
271
386
|
}
|
|
272
|
-
const
|
|
387
|
+
const url = `${HF_INFERENCE_API_BASE_URL}${model}`;
|
|
388
|
+
const info = {
|
|
273
389
|
headers,
|
|
274
390
|
method: "POST",
|
|
275
391
|
body: options?.binary ? args.data : JSON.stringify({
|
|
@@ -277,7 +393,12 @@ var HfInference = class {
|
|
|
277
393
|
options: mergedOptions
|
|
278
394
|
}),
|
|
279
395
|
credentials: options?.includeCredentials ? "include" : "same-origin"
|
|
280
|
-
}
|
|
396
|
+
};
|
|
397
|
+
return { url, info, mergedOptions };
|
|
398
|
+
}
|
|
399
|
+
async request(args, options) {
|
|
400
|
+
const { url, info, mergedOptions } = this.makeRequestOptions(args, options);
|
|
401
|
+
const response = await fetch(url, info);
|
|
281
402
|
if (mergedOptions.retry_on_error !== false && response.status === 503 && !mergedOptions.wait_for_model) {
|
|
282
403
|
return this.request(args, {
|
|
283
404
|
...mergedOptions,
|
|
@@ -296,7 +417,63 @@ var HfInference = class {
|
|
|
296
417
|
}
|
|
297
418
|
return output;
|
|
298
419
|
}
|
|
420
|
+
/**
|
|
421
|
+
* Make request that uses server-sent events and returns response as a generator
|
|
422
|
+
*/
|
|
423
|
+
async *streamingRequest(args, options) {
|
|
424
|
+
const { url, info, mergedOptions } = this.makeRequestOptions({ ...args, stream: true }, options);
|
|
425
|
+
const response = await fetch(url, info);
|
|
426
|
+
if (mergedOptions.retry_on_error !== false && response.status === 503 && !mergedOptions.wait_for_model) {
|
|
427
|
+
return this.streamingRequest(args, {
|
|
428
|
+
...mergedOptions,
|
|
429
|
+
wait_for_model: true
|
|
430
|
+
});
|
|
431
|
+
}
|
|
432
|
+
if (!response.ok) {
|
|
433
|
+
if (response.headers.get("Content-Type")?.startsWith("application/json")) {
|
|
434
|
+
const output = await response.json();
|
|
435
|
+
if (output.error) {
|
|
436
|
+
throw new Error(output.error);
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
throw new Error(`Server response contains error: ${response.status}`);
|
|
440
|
+
}
|
|
441
|
+
if (response.headers.get("content-type") !== "text/event-stream") {
|
|
442
|
+
throw new Error(`Server does not support event stream content type`);
|
|
443
|
+
}
|
|
444
|
+
const reader = response.body.getReader();
|
|
445
|
+
const events = [];
|
|
446
|
+
const onEvent = (event) => {
|
|
447
|
+
events.push(event);
|
|
448
|
+
};
|
|
449
|
+
const onChunk = getLines(
|
|
450
|
+
getMessages(
|
|
451
|
+
() => {
|
|
452
|
+
},
|
|
453
|
+
() => {
|
|
454
|
+
},
|
|
455
|
+
onEvent
|
|
456
|
+
)
|
|
457
|
+
);
|
|
458
|
+
try {
|
|
459
|
+
while (true) {
|
|
460
|
+
const { done, value } = await reader.read();
|
|
461
|
+
if (done)
|
|
462
|
+
return;
|
|
463
|
+
onChunk(value);
|
|
464
|
+
while (events.length > 0) {
|
|
465
|
+
const event = events.shift();
|
|
466
|
+
if (event.data.length > 0) {
|
|
467
|
+
yield JSON.parse(event.data);
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
} finally {
|
|
472
|
+
reader.releaseLock();
|
|
473
|
+
}
|
|
474
|
+
}
|
|
299
475
|
};
|
|
300
476
|
export {
|
|
301
|
-
HfInference
|
|
477
|
+
HfInference,
|
|
478
|
+
TextGenerationStreamFinishReason
|
|
302
479
|
};
|