@huggingface/inference 1.7.0 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -4,8 +4,6 @@ A Typescript powered wrapper for the Hugging Face Inference API. Learn more abou
4
4
 
5
5
  Check out the [full documentation](https://huggingface.co/docs/huggingface.js/inference/README) or try out a live [interactive notebook](https://observablehq.com/@huggingface/hello-huggingface-js-inference).
6
6
 
7
-
8
-
9
7
  ## Install
10
8
 
11
9
  ```console
@@ -18,7 +16,7 @@ pnpm add @huggingface/inference
18
16
 
19
17
  ## Usage
20
18
 
21
- ❗**Important note:** Using an API key is optional to get started, however you will be rate limited eventually. Join [Hugging Face](https://huggingface.co/join) and then visit [access tokens](https://huggingface.co/settings/tokens) to generate your API key for **free**.
19
+ ❗**Important note:** Using an API key is optional to get started, however you will be rate limited eventually. Join [Hugging Face](https://huggingface.co/join) and then visit [access tokens](https://huggingface.co/settings/tokens) to generate your API key for **free**.
22
20
 
23
21
  Your API key should be kept private. If you need to protect it in front-end applications, we suggest setting up a proxy server that stores the API key.
24
22
 
@@ -76,9 +74,10 @@ await hf.textGeneration({
76
74
  inputs: 'The answer to the universe is'
77
75
  })
78
76
 
79
- for await const (output of hf.textGenerationStream({
77
+ for await (const output of hf.textGenerationStream({
80
78
  model: "google/flan-t5-xxl",
81
- inputs: 'repeat "one two three four"'
79
+ inputs: 'repeat "one two three four"',
80
+ parameters: { max_new_tokens: 250 }
82
81
  })) {
83
82
  console.log(output.token.text, output.generated_text);
84
83
  }
@@ -153,8 +152,15 @@ await hf.imageSegmentation({
153
152
 
154
153
  await hf.textToImage({
155
154
  inputs: 'award winning high resolution photo of a giant tortoise/((ladybird)) hybrid, [trending on artstation]',
156
- negative_prompt: 'blurry',
157
155
  model: 'stabilityai/stable-diffusion-2',
156
+ parameters: {
157
+ negative_prompt: 'blurry',
158
+ }
159
+ })
160
+
161
+ await hf.imageToText({
162
+ data: readFileSync('test/cats.png'),
163
+ model: 'nlpconnect/vit-gpt2-image-captioning'
158
164
  })
159
165
  ```
160
166
 
@@ -187,6 +193,7 @@ await hf.textToImage({
187
193
  - [x] Object detection
188
194
  - [x] Image segmentation
189
195
  - [x] Text to image
196
+ - [x] Image to text
190
197
 
191
198
  ## Running tests
192
199
 
package/dist/index.d.ts CHANGED
@@ -273,12 +273,12 @@ interface TextGenerationStreamReturn {
273
273
  * Complete generated text
274
274
  * Only available when the generation is finished
275
275
  */
276
- generated_text?: string;
276
+ generated_text: string | null;
277
277
  /**
278
278
  * Generation details
279
279
  * Only available when the generation is finished
280
280
  */
281
- details?: TextGenerationStreamDetails;
281
+ details: TextGenerationStreamDetails | null;
282
282
  }
283
283
  type TokenClassificationArgs = Args & {
284
284
  /**
@@ -526,12 +526,42 @@ type TextToImageArgs = Args & {
526
526
  * The text to generate an image from
527
527
  */
528
528
  inputs: string;
529
+ parameters?: {
530
+ /**
531
+ * An optional negative prompt for the image generation
532
+ */
533
+ negative_prompt?: string;
534
+ /**
535
+ * The height in pixels of the generated image
536
+ */
537
+ height?: number;
538
+ /**
539
+ * The width in pixels of the generated image
540
+ */
541
+ width?: number;
542
+ /**
543
+ * The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference.
544
+ */
545
+ num_inference_steps?: number;
546
+ /**
547
+ * Guidance scale: Higher guidance scale encourages to generate images that are closely linked to the text `prompt`, usually at the expense of lower image quality.
548
+ */
549
+ guidance_scale?: number;
550
+ };
551
+ };
552
+ type TextToImageReturn = Blob;
553
+ type ImageToTextArgs = Args & {
529
554
  /**
530
- * An optional negative prompt for the image generation
555
+ * Binary image data
531
556
  */
532
- negative_prompt?: string;
557
+ data: Blob | ArrayBuffer;
533
558
  };
534
- type TextToImageReturn = Blob;
559
+ interface ImageToTextReturn {
560
+ /**
561
+ * The generated caption
562
+ */
563
+ generated_text: string;
564
+ }
535
565
  declare class HfInference {
536
566
  private readonly apiKey;
537
567
  private readonly defaultOptions;
@@ -615,6 +645,10 @@ declare class HfInference {
615
645
  * Recommended model: stabilityai/stable-diffusion-2
616
646
  */
617
647
  textToImage(args: TextToImageArgs, options?: Options): Promise<TextToImageReturn>;
648
+ /**
649
+ * This task reads some image input and outputs the text caption.
650
+ */
651
+ imageToText(args: ImageToTextArgs, options?: Options): Promise<ImageToTextReturn>;
618
652
  /**
619
653
  * Helper that prepares request arguments
620
654
  */
@@ -640,4 +674,4 @@ declare class HfInference {
640
674
  }): AsyncGenerator<T>;
641
675
  }
642
676
 
643
- export { Args, AudioClassificationArgs, AudioClassificationReturn, AudioClassificationReturnValue, AutomaticSpeechRecognitionArgs, AutomaticSpeechRecognitionReturn, ConversationalArgs, ConversationalReturn, FeatureExtractionArgs, FeatureExtractionReturn, FillMaskArgs, FillMaskReturn, HfInference, ImageClassificationArgs, ImageClassificationReturn, ImageClassificationReturnValue, ImageSegmentationArgs, ImageSegmentationReturn, ImageSegmentationReturnValue, ObjectDetectionArgs, ObjectDetectionReturn, ObjectDetectionReturnValue, Options, QuestionAnswerArgs, QuestionAnswerReturn, SummarizationArgs, SummarizationReturn, TableQuestionAnswerArgs, TableQuestionAnswerReturn, TextClassificationArgs, TextClassificationReturn, TextGenerationArgs, TextGenerationReturn, TextGenerationStreamBestOfSequence, TextGenerationStreamDetails, TextGenerationStreamFinishReason, TextGenerationStreamPrefillToken, TextGenerationStreamReturn, TextGenerationStreamToken, TextToImageArgs, TextToImageReturn, TokenClassificationArgs, TokenClassificationReturn, TokenClassificationReturnValue, TranslationArgs, TranslationReturn, ZeroShotClassificationArgs, ZeroShotClassificationReturn, ZeroShotClassificationReturnValue };
677
+ export { Args, AudioClassificationArgs, AudioClassificationReturn, AudioClassificationReturnValue, AutomaticSpeechRecognitionArgs, AutomaticSpeechRecognitionReturn, ConversationalArgs, ConversationalReturn, FeatureExtractionArgs, FeatureExtractionReturn, FillMaskArgs, FillMaskReturn, HfInference, ImageClassificationArgs, ImageClassificationReturn, ImageClassificationReturnValue, ImageSegmentationArgs, ImageSegmentationReturn, ImageSegmentationReturnValue, ImageToTextArgs, ImageToTextReturn, ObjectDetectionArgs, ObjectDetectionReturn, ObjectDetectionReturnValue, Options, QuestionAnswerArgs, QuestionAnswerReturn, SummarizationArgs, SummarizationReturn, TableQuestionAnswerArgs, TableQuestionAnswerReturn, TextClassificationArgs, TextClassificationReturn, TextGenerationArgs, TextGenerationReturn, TextGenerationStreamBestOfSequence, TextGenerationStreamDetails, TextGenerationStreamFinishReason, TextGenerationStreamPrefillToken, TextGenerationStreamReturn, TextGenerationStreamToken, TextToImageArgs, TextToImageReturn, TokenClassificationArgs, TokenClassificationReturn, TokenClassificationReturnValue, TranslationArgs, TranslationReturn, ZeroShotClassificationArgs, ZeroShotClassificationReturn, ZeroShotClassificationReturnValue };
package/dist/index.js CHANGED
@@ -1,3 +1,4 @@
1
+ "use strict";
1
2
  var __defProp = Object.defineProperty;
2
3
  var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
3
4
  var __getOwnPropNames = Object.getOwnPropertyNames;
@@ -24,7 +25,7 @@ __export(src_exports, {
24
25
  });
25
26
  module.exports = __toCommonJS(src_exports);
26
27
 
27
- // src/utils/to-array.ts
28
+ // src/utils/toArray.ts
28
29
  function toArray(obj) {
29
30
  if (Array.isArray(obj)) {
30
31
  return obj;
@@ -386,6 +387,15 @@ var HfInference = class {
386
387
  }
387
388
  return res;
388
389
  }
390
+ /**
391
+ * This task reads some image input and outputs the text caption.
392
+ */
393
+ async imageToText(args, options) {
394
+ return (await this.request(args, {
395
+ ...options,
396
+ binary: true
397
+ }))?.[0];
398
+ }
389
399
  /**
390
400
  * Helper that prepares request arguments
391
401
  */
@@ -456,13 +466,24 @@ var HfInference = class {
456
466
  });
457
467
  }
458
468
  if (!response.ok) {
469
+ if (response.headers.get("Content-Type")?.startsWith("application/json")) {
470
+ const output = await response.json();
471
+ if (output.error) {
472
+ throw new Error(output.error);
473
+ }
474
+ }
459
475
  throw new Error(`Server response contains error: ${response.status}`);
460
476
  }
461
477
  if (response.headers.get("content-type") !== "text/event-stream") {
462
- throw new Error(`Server does not support event stream content type`);
478
+ throw new Error(
479
+ `Server does not support event stream content type, it returned ` + response.headers.get("content-type")
480
+ );
481
+ }
482
+ if (!response.body) {
483
+ return;
463
484
  }
464
485
  const reader = response.body.getReader();
465
- const events = [];
486
+ let events = [];
466
487
  const onEvent = (event) => {
467
488
  events.push(event);
468
489
  };
@@ -481,12 +502,12 @@ var HfInference = class {
481
502
  if (done)
482
503
  return;
483
504
  onChunk(value);
484
- while (events.length > 0) {
485
- const event = events.shift();
505
+ for (const event of events) {
486
506
  if (event.data.length > 0) {
487
507
  yield JSON.parse(event.data);
488
508
  }
489
509
  }
510
+ events = [];
490
511
  }
491
512
  } finally {
492
513
  reader.releaseLock();
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- // src/utils/to-array.ts
1
+ // src/utils/toArray.ts
2
2
  function toArray(obj) {
3
3
  if (Array.isArray(obj)) {
4
4
  return obj;
@@ -360,6 +360,15 @@ var HfInference = class {
360
360
  }
361
361
  return res;
362
362
  }
363
+ /**
364
+ * This task reads some image input and outputs the text caption.
365
+ */
366
+ async imageToText(args, options) {
367
+ return (await this.request(args, {
368
+ ...options,
369
+ binary: true
370
+ }))?.[0];
371
+ }
363
372
  /**
364
373
  * Helper that prepares request arguments
365
374
  */
@@ -430,13 +439,24 @@ var HfInference = class {
430
439
  });
431
440
  }
432
441
  if (!response.ok) {
442
+ if (response.headers.get("Content-Type")?.startsWith("application/json")) {
443
+ const output = await response.json();
444
+ if (output.error) {
445
+ throw new Error(output.error);
446
+ }
447
+ }
433
448
  throw new Error(`Server response contains error: ${response.status}`);
434
449
  }
435
450
  if (response.headers.get("content-type") !== "text/event-stream") {
436
- throw new Error(`Server does not support event stream content type`);
451
+ throw new Error(
452
+ `Server does not support event stream content type, it returned ` + response.headers.get("content-type")
453
+ );
454
+ }
455
+ if (!response.body) {
456
+ return;
437
457
  }
438
458
  const reader = response.body.getReader();
439
- const events = [];
459
+ let events = [];
440
460
  const onEvent = (event) => {
441
461
  events.push(event);
442
462
  };
@@ -455,12 +475,12 @@ var HfInference = class {
455
475
  if (done)
456
476
  return;
457
477
  onChunk(value);
458
- while (events.length > 0) {
459
- const event = events.shift();
478
+ for (const event of events) {
460
479
  if (event.data.length > 0) {
461
480
  yield JSON.parse(event.data);
462
481
  }
463
482
  }
483
+ events = [];
464
484
  }
465
485
  } finally {
466
486
  reader.releaseLock();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@huggingface/inference",
3
- "version": "1.7.0",
3
+ "version": "1.8.0",
4
4
  "license": "MIT",
5
5
  "author": "Tim Mikeladze <tim.mikeladze@gmail.com>",
6
6
  "description": "Typescript wrapper for the Hugging Face Inference API",
@@ -40,10 +40,9 @@
40
40
  },
41
41
  "devDependencies": {
42
42
  "@types/node": "18.13.0",
43
- "tsup": "^6.6.3",
44
43
  "typescript": "4.9.5",
45
44
  "vite": "^4.1.4",
46
- "vitest": "^0.29.2"
45
+ "vitest": "^0.29.8"
47
46
  },
48
47
  "resolutions": {},
49
48
  "scripts": {
@@ -52,8 +51,8 @@
52
51
  "lint:check": "eslint --ext .cjs,.ts .",
53
52
  "format": "prettier --write .",
54
53
  "format:check": "prettier --check .",
55
- "test": "vitest run",
56
- "test:browser": "vitest run --browser.name=chrome --browser.headless",
54
+ "test": "vitest run --config vitest.config.ts",
55
+ "test:browser": "vitest run --browser.name=chrome --browser.headless --config vitest.config.ts",
57
56
  "type-check": "tsc"
58
57
  }
59
58
  }
@@ -1,4 +1,4 @@
1
- import { toArray } from "./utils/to-array";
1
+ import { toArray } from "./utils/toArray";
2
2
  import type { EventSourceMessage } from "./vendor/fetch-event-source/parse";
3
3
  import { getLines, getMessages } from "./vendor/fetch-event-source/parse";
4
4
 
@@ -299,12 +299,12 @@ export interface TextGenerationStreamReturn {
299
299
  * Complete generated text
300
300
  * Only available when the generation is finished
301
301
  */
302
- generated_text?: string;
302
+ generated_text: string | null;
303
303
  /**
304
304
  * Generation details
305
305
  * Only available when the generation is finished
306
306
  */
307
- details?: TextGenerationStreamDetails;
307
+ details: TextGenerationStreamDetails | null;
308
308
  }
309
309
 
310
310
  export type TokenClassificationArgs = Args & {
@@ -582,13 +582,45 @@ export type TextToImageArgs = Args & {
582
582
  */
583
583
  inputs: string;
584
584
 
585
+ parameters?: {
586
+ /**
587
+ * An optional negative prompt for the image generation
588
+ */
589
+ negative_prompt?: string;
590
+ /**
591
+ * The height in pixels of the generated image
592
+ */
593
+ height?: number;
594
+ /**
595
+ * The width in pixels of the generated image
596
+ */
597
+ width?: number;
598
+ /**
599
+ * The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference.
600
+ */
601
+ num_inference_steps?: number;
602
+ /**
603
+ * Guidance scale: Higher guidance scale encourages to generate images that are closely linked to the text `prompt`, usually at the expense of lower image quality.
604
+ */
605
+ guidance_scale?: number;
606
+ };
607
+ };
608
+
609
+ export type TextToImageReturn = Blob;
610
+
611
+ export type ImageToTextArgs = Args & {
585
612
  /**
586
- * An optional negative prompt for the image generation
613
+ * Binary image data
587
614
  */
588
- negative_prompt?: string;
615
+ data: Blob | ArrayBuffer;
589
616
  };
590
617
 
591
- export type TextToImageReturn = Blob;
618
+ export interface ImageToTextReturn {
619
+ /**
620
+ * The generated caption
621
+ */
622
+ generated_text: string;
623
+ }
592
624
 
593
625
  export class HfInference {
594
626
  private readonly apiKey: string;
@@ -928,6 +960,18 @@ export class HfInference {
928
960
  return res;
929
961
  }
930
962
 
963
+ /**
964
+ * This task reads some image input and outputs the text caption.
965
+ */
966
+ public async imageToText(args: ImageToTextArgs, options?: Options): Promise<ImageToTextReturn> {
967
+ return (
968
+ await this.request<[ImageToTextReturn]>(args, {
969
+ ...options,
970
+ binary: true,
971
+ })
972
+ )?.[0];
973
+ }
974
+
931
975
  /**
932
976
  * Helper that prepares request arguments
933
977
  */
@@ -1038,14 +1082,27 @@ export class HfInference {
1038
1082
  });
1039
1083
  }
1040
1084
  if (!response.ok) {
1085
+ if (response.headers.get("Content-Type")?.startsWith("application/json")) {
1086
+ const output = await response.json();
1087
+ if (output.error) {
1088
+ throw new Error(output.error);
1089
+ }
1090
+ }
1091
+
1041
1092
  throw new Error(`Server response contains error: ${response.status}`);
1042
1093
  }
1043
1094
  if (response.headers.get("content-type") !== "text/event-stream") {
1044
- throw new Error(`Server does not support event stream content type`);
1095
+ throw new Error(
1096
+ `Server does not support event stream content type, it returned ` + response.headers.get("content-type")
1097
+ );
1098
+ }
1099
+
1100
+ if (!response.body) {
1101
+ return;
1045
1102
  }
1046
1103
 
1047
1104
  const reader = response.body.getReader();
1048
- const events: EventSourceMessage[] = [];
1105
+ let events: EventSourceMessage[] = [];
1049
1106
 
1050
1107
  const onEvent = (event: EventSourceMessage) => {
1051
1108
  // accumulate events in array
@@ -1065,12 +1122,12 @@ export class HfInference {
1065
1122
  const { done, value } = await reader.read();
1066
1123
  if (done) return;
1067
1124
  onChunk(value);
1068
- while (events.length > 0) {
1069
- const event = events.shift();
1125
+ for (const event of events) {
1070
1126
  if (event.data.length > 0) {
1071
1127
  yield JSON.parse(event.data) as T;
1072
1128
  }
1073
1129
  }
1130
+ events = [];
1074
1131
  }
1075
1132
  } finally {
1076
1133
  reader.releaseLock();
@@ -0,0 +1,11 @@
1
+ import { pick } from "./pick";
2
+ import { typedInclude } from "./typedInclude";
3
+
4
+ /**
5
+ * Return copy of object, omitting blocklisted array of props
6
+ */
7
+ export function omit<T extends object, K extends keyof T>(o: T, props: K[] | K): Pick<T, Exclude<keyof T, K>> {
8
+ const propsArr = Array.isArray(props) ? props : [props];
9
+ const letsKeep = (Object.keys(o) as (keyof T)[]).filter((prop) => !typedInclude(propsArr, prop));
10
+ return pick(o, letsKeep);
11
+ }
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Return copy of object, only keeping allowlisted properties.
3
+ *
4
+ * This doesn't add {p: undefined} anymore, for props not in the o object.
5
+ */
6
+ export function pick<T, K extends keyof T>(o: T, props: K[] | ReadonlyArray<K>): Pick<T, K> {
7
+ // inspired by stackoverflow.com/questions/25553910/one-liner-to-take-some-properties-from-object-in-es-6
8
+ return Object.assign(
9
+ {},
10
+ ...props.map((prop) => {
11
+ if (o[prop] !== undefined) {
12
+ return { [prop]: o[prop] };
13
+ }
14
+ })
15
+ );
16
+ }
@@ -0,0 +1,3 @@
1
+ export function typedInclude<V, T extends V>(arr: readonly T[], v: V): v is T {
2
+ return arr.includes(v as T);
3
+ }
@@ -1,7 +0,0 @@
1
- const isBrowser = typeof window !== "undefined" && typeof window.document !== "undefined";
2
-
3
- const isWebWorker =
4
- typeof self === "object" && self.constructor && self.constructor.name === "DedicatedWorkerGlobalScope";
5
-
6
- export const isFrontend = isBrowser || isWebWorker;
7
- export const isBackend = !isBrowser && !isWebWorker;
File without changes