@huggingface/inference 1.7.1 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -4,8 +4,6 @@ A Typescript powered wrapper for the Hugging Face Inference API. Learn more abou
4
4
 
5
5
  Check out the [full documentation](https://huggingface.co/docs/huggingface.js/inference/README) or try out a live [interactive notebook](https://observablehq.com/@huggingface/hello-huggingface-js-inference).
6
6
 
7
-
8
-
9
7
  ## Install
10
8
 
11
9
  ```console
@@ -18,7 +16,7 @@ pnpm add @huggingface/inference
18
16
 
19
17
  ## Usage
20
18
 
21
- ❗**Important note:** Using an API key is optional to get started, however you will be rate limited eventually. Join [Hugging Face](https://huggingface.co/join) and then visit [access tokens](https://huggingface.co/settings/tokens) to generate your API key for **free**.
19
+ ❗**Important note:** Using an API key is optional to get started, however you will be rate limited eventually. Join [Hugging Face](https://huggingface.co/join) and then visit [access tokens](https://huggingface.co/settings/tokens) to generate your API key for **free**.
22
20
 
23
21
  Your API key should be kept private. If you need to protect it in front-end applications, we suggest setting up a proxy server that stores the API key.
24
22
 
@@ -154,8 +152,15 @@ await hf.imageSegmentation({
154
152
 
155
153
  await hf.textToImage({
156
154
  inputs: 'award winning high resolution photo of a giant tortoise/((ladybird)) hybrid, [trending on artstation]',
157
- negative_prompt: 'blurry',
158
155
  model: 'stabilityai/stable-diffusion-2',
156
+ parameters: {
157
+ negative_prompt: 'blurry',
158
+ }
159
+ })
160
+
161
+ await hf.imageToText({
162
+ data: readFileSync('test/cats.png'),
163
+ model: 'nlpconnect/vit-gpt2-image-captioning'
159
164
  })
160
165
  ```
161
166
 
@@ -188,6 +193,7 @@ await hf.textToImage({
188
193
  - [x] Object detection
189
194
  - [x] Image segmentation
190
195
  - [x] Text to image
196
+ - [x] Image to text
191
197
 
192
198
  ## Running tests
193
199
 
package/dist/index.d.ts CHANGED
@@ -273,12 +273,12 @@ interface TextGenerationStreamReturn {
273
273
  * Complete generated text
274
274
  * Only available when the generation is finished
275
275
  */
276
- generated_text?: string;
276
+ generated_text: string | null;
277
277
  /**
278
278
  * Generation details
279
279
  * Only available when the generation is finished
280
280
  */
281
- details?: TextGenerationStreamDetails;
281
+ details: TextGenerationStreamDetails | null;
282
282
  }
283
283
  type TokenClassificationArgs = Args & {
284
284
  /**
@@ -526,12 +526,42 @@ type TextToImageArgs = Args & {
526
526
  * The text to generate an image from
527
527
  */
528
528
  inputs: string;
529
+ parameters?: {
530
+ /**
531
+ * An optional negative prompt for the image generation
532
+ */
533
+ negative_prompt?: string;
534
+ /**
535
+ * The height in pixels of the generated image
536
+ */
537
+ height?: number;
538
+ /**
539
+ * The width in pixels of the generated image
540
+ */
541
+ width?: number;
542
+ /**
543
+ * The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference.
544
+ */
545
+ num_inference_steps?: number;
546
+ /**
547
+ * Guidance scale: Higher guidance scale encourages to generate images that are closely linked to the text `prompt`, usually at the expense of lower image quality.
548
+ */
549
+ guidance_scale?: number;
550
+ };
551
+ };
552
+ type TextToImageReturn = Blob;
553
+ type ImageToTextArgs = Args & {
529
554
  /**
530
- * An optional negative prompt for the image generation
555
+ * Binary image data
531
556
  */
532
- negative_prompt?: string;
557
+ data: Blob | ArrayBuffer;
533
558
  };
534
- type TextToImageReturn = Blob;
559
+ interface ImageToTextReturn {
560
+ /**
561
+ * The generated caption
562
+ */
563
+ generated_text: string;
564
+ }
535
565
  declare class HfInference {
536
566
  private readonly apiKey;
537
567
  private readonly defaultOptions;
@@ -615,6 +645,10 @@ declare class HfInference {
615
645
  * Recommended model: stabilityai/stable-diffusion-2
616
646
  */
617
647
  textToImage(args: TextToImageArgs, options?: Options): Promise<TextToImageReturn>;
648
+ /**
649
+ * This task reads some image input and outputs the text caption.
650
+ */
651
+ imageToText(args: ImageToTextArgs, options?: Options): Promise<ImageToTextReturn>;
618
652
  /**
619
653
  * Helper that prepares request arguments
620
654
  */
@@ -640,4 +674,4 @@ declare class HfInference {
640
674
  }): AsyncGenerator<T>;
641
675
  }
642
676
 
643
- export { Args, AudioClassificationArgs, AudioClassificationReturn, AudioClassificationReturnValue, AutomaticSpeechRecognitionArgs, AutomaticSpeechRecognitionReturn, ConversationalArgs, ConversationalReturn, FeatureExtractionArgs, FeatureExtractionReturn, FillMaskArgs, FillMaskReturn, HfInference, ImageClassificationArgs, ImageClassificationReturn, ImageClassificationReturnValue, ImageSegmentationArgs, ImageSegmentationReturn, ImageSegmentationReturnValue, ObjectDetectionArgs, ObjectDetectionReturn, ObjectDetectionReturnValue, Options, QuestionAnswerArgs, QuestionAnswerReturn, SummarizationArgs, SummarizationReturn, TableQuestionAnswerArgs, TableQuestionAnswerReturn, TextClassificationArgs, TextClassificationReturn, TextGenerationArgs, TextGenerationReturn, TextGenerationStreamBestOfSequence, TextGenerationStreamDetails, TextGenerationStreamFinishReason, TextGenerationStreamPrefillToken, TextGenerationStreamReturn, TextGenerationStreamToken, TextToImageArgs, TextToImageReturn, TokenClassificationArgs, TokenClassificationReturn, TokenClassificationReturnValue, TranslationArgs, TranslationReturn, ZeroShotClassificationArgs, ZeroShotClassificationReturn, ZeroShotClassificationReturnValue };
677
+ export { Args, AudioClassificationArgs, AudioClassificationReturn, AudioClassificationReturnValue, AutomaticSpeechRecognitionArgs, AutomaticSpeechRecognitionReturn, ConversationalArgs, ConversationalReturn, FeatureExtractionArgs, FeatureExtractionReturn, FillMaskArgs, FillMaskReturn, HfInference, ImageClassificationArgs, ImageClassificationReturn, ImageClassificationReturnValue, ImageSegmentationArgs, ImageSegmentationReturn, ImageSegmentationReturnValue, ImageToTextArgs, ImageToTextReturn, ObjectDetectionArgs, ObjectDetectionReturn, ObjectDetectionReturnValue, Options, QuestionAnswerArgs, QuestionAnswerReturn, SummarizationArgs, SummarizationReturn, TableQuestionAnswerArgs, TableQuestionAnswerReturn, TextClassificationArgs, TextClassificationReturn, TextGenerationArgs, TextGenerationReturn, TextGenerationStreamBestOfSequence, TextGenerationStreamDetails, TextGenerationStreamFinishReason, TextGenerationStreamPrefillToken, TextGenerationStreamReturn, TextGenerationStreamToken, TextToImageArgs, TextToImageReturn, TokenClassificationArgs, TokenClassificationReturn, TokenClassificationReturnValue, TranslationArgs, TranslationReturn, ZeroShotClassificationArgs, ZeroShotClassificationReturn, ZeroShotClassificationReturnValue };
package/dist/index.js CHANGED
@@ -1,3 +1,4 @@
1
+ "use strict";
1
2
  var __defProp = Object.defineProperty;
2
3
  var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
3
4
  var __getOwnPropNames = Object.getOwnPropertyNames;
@@ -24,7 +25,7 @@ __export(src_exports, {
24
25
  });
25
26
  module.exports = __toCommonJS(src_exports);
26
27
 
27
- // src/utils/to-array.ts
28
+ // src/utils/toArray.ts
28
29
  function toArray(obj) {
29
30
  if (Array.isArray(obj)) {
30
31
  return obj;
@@ -386,6 +387,15 @@ var HfInference = class {
386
387
  }
387
388
  return res;
388
389
  }
390
+ /**
391
+ * This task reads some image input and outputs the text caption.
392
+ */
393
+ async imageToText(args, options) {
394
+ return (await this.request(args, {
395
+ ...options,
396
+ binary: true
397
+ }))?.[0];
398
+ }
389
399
  /**
390
400
  * Helper that prepares request arguments
391
401
  */
@@ -465,10 +475,15 @@ var HfInference = class {
465
475
  throw new Error(`Server response contains error: ${response.status}`);
466
476
  }
467
477
  if (response.headers.get("content-type") !== "text/event-stream") {
468
- throw new Error(`Server does not support event stream content type`);
478
+ throw new Error(
479
+ `Server does not support event stream content type, it returned ` + response.headers.get("content-type")
480
+ );
481
+ }
482
+ if (!response.body) {
483
+ return;
469
484
  }
470
485
  const reader = response.body.getReader();
471
- const events = [];
486
+ let events = [];
472
487
  const onEvent = (event) => {
473
488
  events.push(event);
474
489
  };
@@ -487,12 +502,12 @@ var HfInference = class {
487
502
  if (done)
488
503
  return;
489
504
  onChunk(value);
490
- while (events.length > 0) {
491
- const event = events.shift();
505
+ for (const event of events) {
492
506
  if (event.data.length > 0) {
493
507
  yield JSON.parse(event.data);
494
508
  }
495
509
  }
510
+ events = [];
496
511
  }
497
512
  } finally {
498
513
  reader.releaseLock();
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- // src/utils/to-array.ts
1
+ // src/utils/toArray.ts
2
2
  function toArray(obj) {
3
3
  if (Array.isArray(obj)) {
4
4
  return obj;
@@ -360,6 +360,15 @@ var HfInference = class {
360
360
  }
361
361
  return res;
362
362
  }
363
+ /**
364
+ * This task reads some image input and outputs the text caption.
365
+ */
366
+ async imageToText(args, options) {
367
+ return (await this.request(args, {
368
+ ...options,
369
+ binary: true
370
+ }))?.[0];
371
+ }
363
372
  /**
364
373
  * Helper that prepares request arguments
365
374
  */
@@ -439,10 +448,15 @@ var HfInference = class {
439
448
  throw new Error(`Server response contains error: ${response.status}`);
440
449
  }
441
450
  if (response.headers.get("content-type") !== "text/event-stream") {
442
- throw new Error(`Server does not support event stream content type`);
451
+ throw new Error(
452
+ `Server does not support event stream content type, it returned ` + response.headers.get("content-type")
453
+ );
454
+ }
455
+ if (!response.body) {
456
+ return;
443
457
  }
444
458
  const reader = response.body.getReader();
445
- const events = [];
459
+ let events = [];
446
460
  const onEvent = (event) => {
447
461
  events.push(event);
448
462
  };
@@ -461,12 +475,12 @@ var HfInference = class {
461
475
  if (done)
462
476
  return;
463
477
  onChunk(value);
464
- while (events.length > 0) {
465
- const event = events.shift();
478
+ for (const event of events) {
466
479
  if (event.data.length > 0) {
467
480
  yield JSON.parse(event.data);
468
481
  }
469
482
  }
483
+ events = [];
470
484
  }
471
485
  } finally {
472
486
  reader.releaseLock();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@huggingface/inference",
3
- "version": "1.7.1",
3
+ "version": "1.8.0",
4
4
  "license": "MIT",
5
5
  "author": "Tim Mikeladze <tim.mikeladze@gmail.com>",
6
6
  "description": "Typescript wrapper for the Hugging Face Inference API",
@@ -40,10 +40,9 @@
40
40
  },
41
41
  "devDependencies": {
42
42
  "@types/node": "18.13.0",
43
- "tsup": "^6.6.3",
44
43
  "typescript": "4.9.5",
45
44
  "vite": "^4.1.4",
46
- "vitest": "^0.29.2"
45
+ "vitest": "^0.29.8"
47
46
  },
48
47
  "resolutions": {},
49
48
  "scripts": {
@@ -52,8 +51,8 @@
52
51
  "lint:check": "eslint --ext .cjs,.ts .",
53
52
  "format": "prettier --write .",
54
53
  "format:check": "prettier --check .",
55
- "test": "vitest run",
56
- "test:browser": "vitest run --browser.name=chrome --browser.headless",
54
+ "test": "vitest run --config vitest.config.ts",
55
+ "test:browser": "vitest run --browser.name=chrome --browser.headless --config vitest.config.ts",
57
56
  "type-check": "tsc"
58
57
  }
59
58
  }
@@ -1,4 +1,4 @@
1
- import { toArray } from "./utils/to-array";
1
+ import { toArray } from "./utils/toArray";
2
2
  import type { EventSourceMessage } from "./vendor/fetch-event-source/parse";
3
3
  import { getLines, getMessages } from "./vendor/fetch-event-source/parse";
4
4
 
@@ -299,12 +299,12 @@ export interface TextGenerationStreamReturn {
299
299
  * Complete generated text
300
300
  * Only available when the generation is finished
301
301
  */
302
- generated_text?: string;
302
+ generated_text: string | null;
303
303
  /**
304
304
  * Generation details
305
305
  * Only available when the generation is finished
306
306
  */
307
- details?: TextGenerationStreamDetails;
307
+ details: TextGenerationStreamDetails | null;
308
308
  }
309
309
 
310
310
  export type TokenClassificationArgs = Args & {
@@ -582,13 +582,45 @@ export type TextToImageArgs = Args & {
582
582
  */
583
583
  inputs: string;
584
584
 
585
+ parameters?: {
586
+ /**
587
+ * An optional negative prompt for the image generation
588
+ */
589
+ negative_prompt?: string;
590
+ /**
591
+ * The height in pixels of the generated image
592
+ */
593
+ height?: number;
594
+ /**
595
+ * The width in pixels of the generated image
596
+ */
597
+ width?: number;
598
+ /**
599
+ * The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference.
600
+ */
601
+ num_inference_steps?: number;
602
+ /**
603
+ * Guidance scale: Higher guidance scale encourages to generate images that are closely linked to the text `prompt`, usually at the expense of lower image quality.
604
+ */
605
+ guidance_scale?: number;
606
+ };
607
+ };
608
+
609
+ export type TextToImageReturn = Blob;
610
+
611
+ export type ImageToTextArgs = Args & {
585
612
  /**
586
- * An optional negative prompt for the image generation
613
+ * Binary image data
587
614
  */
588
- negative_prompt?: string;
615
+ data: Blob | ArrayBuffer;
589
616
  };
590
617
 
591
- export type TextToImageReturn = Blob;
618
+ export interface ImageToTextReturn {
619
+ /**
620
+ * The generated caption
621
+ */
622
+ generated_text: string;
623
+ }
592
624
 
593
625
  export class HfInference {
594
626
  private readonly apiKey: string;
@@ -928,6 +960,18 @@ export class HfInference {
928
960
  return res;
929
961
  }
930
962
 
963
+ /**
964
+ * This task reads some image input and outputs the text caption.
965
+ */
966
+ public async imageToText(args: ImageToTextArgs, options?: Options): Promise<ImageToTextReturn> {
967
+ return (
968
+ await this.request<[ImageToTextReturn]>(args, {
969
+ ...options,
970
+ binary: true,
971
+ })
972
+ )?.[0];
973
+ }
974
+
931
975
  /**
932
976
  * Helper that prepares request arguments
933
977
  */
@@ -1048,11 +1092,17 @@ export class HfInference {
1048
1092
  throw new Error(`Server response contains error: ${response.status}`);
1049
1093
  }
1050
1094
  if (response.headers.get("content-type") !== "text/event-stream") {
1051
- throw new Error(`Server does not support event stream content type`);
1095
+ throw new Error(
1096
+ `Server does not support event stream content type, it returned ` + response.headers.get("content-type")
1097
+ );
1098
+ }
1099
+
1100
+ if (!response.body) {
1101
+ return;
1052
1102
  }
1053
1103
 
1054
1104
  const reader = response.body.getReader();
1055
- const events: EventSourceMessage[] = [];
1105
+ let events: EventSourceMessage[] = [];
1056
1106
 
1057
1107
  const onEvent = (event: EventSourceMessage) => {
1058
1108
  // accumulate events in array
@@ -1072,12 +1122,12 @@ export class HfInference {
1072
1122
  const { done, value } = await reader.read();
1073
1123
  if (done) return;
1074
1124
  onChunk(value);
1075
- while (events.length > 0) {
1076
- const event = events.shift();
1125
+ for (const event of events) {
1077
1126
  if (event.data.length > 0) {
1078
1127
  yield JSON.parse(event.data) as T;
1079
1128
  }
1080
1129
  }
1130
+ events = [];
1081
1131
  }
1082
1132
  } finally {
1083
1133
  reader.releaseLock();
@@ -0,0 +1,11 @@
1
+ import { pick } from "./pick";
2
+ import { typedInclude } from "./typedInclude";
3
+
4
+ /**
5
+ * Return copy of object, omitting blocklisted array of props
6
+ */
7
+ export function omit<T extends object, K extends keyof T>(o: T, props: K[] | K): Pick<T, Exclude<keyof T, K>> {
8
+ const propsArr = Array.isArray(props) ? props : [props];
9
+ const letsKeep = (Object.keys(o) as (keyof T)[]).filter((prop) => !typedInclude(propsArr, prop));
10
+ return pick(o, letsKeep);
11
+ }
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Return copy of object, only keeping allowlisted properties.
3
+ *
4
+ * This doesn't add {p: undefined} anymore, for props not in the o object.
5
+ */
6
+ export function pick<T, K extends keyof T>(o: T, props: K[] | ReadonlyArray<K>): Pick<T, K> {
7
+ // inspired by stackoverflow.com/questions/25553910/one-liner-to-take-some-properties-from-object-in-es-6
8
+ return Object.assign(
9
+ {},
10
+ ...props.map((prop) => {
11
+ if (o[prop] !== undefined) {
12
+ return { [prop]: o[prop] };
13
+ }
14
+ })
15
+ );
16
+ }
@@ -0,0 +1,3 @@
1
+ export function typedInclude<V, T extends V>(arr: readonly T[], v: V): v is T {
2
+ return arr.includes(v as T);
3
+ }
@@ -1,7 +0,0 @@
1
- const isBrowser = typeof window !== "undefined" && typeof window.document !== "undefined";
2
-
3
- const isWebWorker =
4
- typeof self === "object" && self.constructor && self.constructor.name === "DedicatedWorkerGlobalScope";
5
-
6
- export const isFrontend = isBrowser || isWebWorker;
7
- export const isBackend = !isBrowser && !isWebWorker;
File without changes