@huggingface/inference 3.0.1 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/dist/index.cjs +162 -69
  2. package/dist/index.js +162 -69
  3. package/dist/src/providers/fal-ai.d.ts.map +1 -1
  4. package/dist/src/providers/replicate.d.ts.map +1 -1
  5. package/dist/src/tasks/audio/audioClassification.d.ts +4 -18
  6. package/dist/src/tasks/audio/audioClassification.d.ts.map +1 -1
  7. package/dist/src/tasks/audio/audioToAudio.d.ts +10 -9
  8. package/dist/src/tasks/audio/audioToAudio.d.ts.map +1 -1
  9. package/dist/src/tasks/audio/automaticSpeechRecognition.d.ts +3 -12
  10. package/dist/src/tasks/audio/automaticSpeechRecognition.d.ts.map +1 -1
  11. package/dist/src/tasks/audio/textToSpeech.d.ts +4 -8
  12. package/dist/src/tasks/audio/textToSpeech.d.ts.map +1 -1
  13. package/dist/src/tasks/audio/utils.d.ts +11 -0
  14. package/dist/src/tasks/audio/utils.d.ts.map +1 -0
  15. package/dist/src/tasks/cv/imageClassification.d.ts +3 -17
  16. package/dist/src/tasks/cv/imageClassification.d.ts.map +1 -1
  17. package/dist/src/tasks/cv/imageSegmentation.d.ts +3 -21
  18. package/dist/src/tasks/cv/imageSegmentation.d.ts.map +1 -1
  19. package/dist/src/tasks/cv/imageToImage.d.ts +3 -49
  20. package/dist/src/tasks/cv/imageToImage.d.ts.map +1 -1
  21. package/dist/src/tasks/cv/imageToText.d.ts +3 -12
  22. package/dist/src/tasks/cv/imageToText.d.ts.map +1 -1
  23. package/dist/src/tasks/cv/objectDetection.d.ts +3 -26
  24. package/dist/src/tasks/cv/objectDetection.d.ts.map +1 -1
  25. package/dist/src/tasks/cv/textToImage.d.ts +3 -38
  26. package/dist/src/tasks/cv/textToImage.d.ts.map +1 -1
  27. package/dist/src/tasks/cv/textToVideo.d.ts +6 -0
  28. package/dist/src/tasks/cv/textToVideo.d.ts.map +1 -0
  29. package/dist/src/tasks/cv/utils.d.ts +11 -0
  30. package/dist/src/tasks/cv/utils.d.ts.map +1 -0
  31. package/dist/src/tasks/cv/zeroShotImageClassification.d.ts +7 -15
  32. package/dist/src/tasks/cv/zeroShotImageClassification.d.ts.map +1 -1
  33. package/dist/src/tasks/multimodal/documentQuestionAnswering.d.ts +5 -28
  34. package/dist/src/tasks/multimodal/documentQuestionAnswering.d.ts.map +1 -1
  35. package/dist/src/tasks/multimodal/visualQuestionAnswering.d.ts +5 -20
  36. package/dist/src/tasks/multimodal/visualQuestionAnswering.d.ts.map +1 -1
  37. package/dist/src/tasks/nlp/fillMask.d.ts +2 -21
  38. package/dist/src/tasks/nlp/fillMask.d.ts.map +1 -1
  39. package/dist/src/tasks/nlp/questionAnswering.d.ts +3 -25
  40. package/dist/src/tasks/nlp/questionAnswering.d.ts.map +1 -1
  41. package/dist/src/tasks/nlp/sentenceSimilarity.d.ts +2 -13
  42. package/dist/src/tasks/nlp/sentenceSimilarity.d.ts.map +1 -1
  43. package/dist/src/tasks/nlp/summarization.d.ts +2 -42
  44. package/dist/src/tasks/nlp/summarization.d.ts.map +1 -1
  45. package/dist/src/tasks/nlp/tableQuestionAnswering.d.ts +3 -31
  46. package/dist/src/tasks/nlp/tableQuestionAnswering.d.ts.map +1 -1
  47. package/dist/src/tasks/nlp/textClassification.d.ts +2 -16
  48. package/dist/src/tasks/nlp/textClassification.d.ts.map +1 -1
  49. package/dist/src/tasks/nlp/tokenClassification.d.ts +2 -45
  50. package/dist/src/tasks/nlp/tokenClassification.d.ts.map +1 -1
  51. package/dist/src/tasks/nlp/translation.d.ts +2 -13
  52. package/dist/src/tasks/nlp/translation.d.ts.map +1 -1
  53. package/dist/src/tasks/nlp/zeroShotClassification.d.ts +2 -22
  54. package/dist/src/tasks/nlp/zeroShotClassification.d.ts.map +1 -1
  55. package/dist/src/types.d.ts +4 -0
  56. package/dist/src/types.d.ts.map +1 -1
  57. package/package.json +2 -2
  58. package/src/providers/fal-ai.ts +4 -0
  59. package/src/providers/replicate.ts +3 -0
  60. package/src/tasks/audio/audioClassification.ts +7 -22
  61. package/src/tasks/audio/audioToAudio.ts +43 -23
  62. package/src/tasks/audio/automaticSpeechRecognition.ts +35 -23
  63. package/src/tasks/audio/textToSpeech.ts +8 -14
  64. package/src/tasks/audio/utils.ts +18 -0
  65. package/src/tasks/cv/imageClassification.ts +5 -20
  66. package/src/tasks/cv/imageSegmentation.ts +5 -24
  67. package/src/tasks/cv/imageToImage.ts +4 -52
  68. package/src/tasks/cv/imageToText.ts +6 -15
  69. package/src/tasks/cv/objectDetection.ts +5 -30
  70. package/src/tasks/cv/textToImage.ts +14 -50
  71. package/src/tasks/cv/textToVideo.ts +67 -0
  72. package/src/tasks/cv/utils.ts +13 -0
  73. package/src/tasks/cv/zeroShotImageClassification.ts +32 -31
  74. package/src/tasks/multimodal/documentQuestionAnswering.ts +25 -43
  75. package/src/tasks/multimodal/visualQuestionAnswering.ts +20 -36
  76. package/src/tasks/nlp/fillMask.ts +2 -22
  77. package/src/tasks/nlp/questionAnswering.ts +22 -36
  78. package/src/tasks/nlp/sentenceSimilarity.ts +12 -15
  79. package/src/tasks/nlp/summarization.ts +2 -43
  80. package/src/tasks/nlp/tableQuestionAnswering.ts +25 -41
  81. package/src/tasks/nlp/textClassification.ts +3 -18
  82. package/src/tasks/nlp/tokenClassification.ts +2 -47
  83. package/src/tasks/nlp/translation.ts +3 -17
  84. package/src/tasks/nlp/zeroShotClassification.ts +2 -24
  85. package/src/types.ts +7 -1
package/dist/index.js CHANGED
@@ -61,6 +61,10 @@ var FAL_AI_SUPPORTED_MODEL_IDS = {
61
61
  },
62
62
  "automatic-speech-recognition": {
63
63
  "openai/whisper-large-v3": "fal-ai/whisper"
64
+ },
65
+ "text-to-video": {
66
+ "genmo/mochi-1-preview": "fal-ai/mochi-v1",
67
+ "tencent/HunyuanVideo": "fal-ai/hunyuan-video"
64
68
  }
65
69
  };
66
70
 
@@ -73,6 +77,9 @@ var REPLICATE_SUPPORTED_MODEL_IDS = {
73
77
  },
74
78
  "text-to-speech": {
75
79
  "OuteAI/OuteTTS-0.3-500M": "jbilcke/oute-tts:39a59319327b27327fa3095149c5a746e7f2aee18c75055c3368237a6503cd26"
80
+ },
81
+ "text-to-video": {
82
+ "genmo/mochi-1-preview": "genmoai/mochi-1:1944af04d098ef69bed7f9d335d102e652203f268ec4aaa2d836f6217217e460"
76
83
  }
77
84
  };
78
85
 
@@ -538,9 +545,42 @@ var InferenceOutputError = class extends TypeError {
538
545
  }
539
546
  };
540
547
 
548
+ // src/utils/pick.ts
549
+ function pick(o, props) {
550
+ return Object.assign(
551
+ {},
552
+ ...props.map((prop) => {
553
+ if (o[prop] !== void 0) {
554
+ return { [prop]: o[prop] };
555
+ }
556
+ })
557
+ );
558
+ }
559
+
560
+ // src/utils/typedInclude.ts
561
+ function typedInclude(arr, v) {
562
+ return arr.includes(v);
563
+ }
564
+
565
+ // src/utils/omit.ts
566
+ function omit(o, props) {
567
+ const propsArr = Array.isArray(props) ? props : [props];
568
+ const letsKeep = Object.keys(o).filter((prop) => !typedInclude(propsArr, prop));
569
+ return pick(o, letsKeep);
570
+ }
571
+
572
+ // src/tasks/audio/utils.ts
573
+ function preparePayload(args) {
574
+ return "data" in args ? args : {
575
+ ...omit(args, "inputs"),
576
+ data: args.inputs
577
+ };
578
+ }
579
+
541
580
  // src/tasks/audio/audioClassification.ts
542
581
  async function audioClassification(args, options) {
543
- const res = await request(args, {
582
+ const payload = preparePayload(args);
583
+ const res = await request(payload, {
544
584
  ...options,
545
585
  taskHint: "audio-classification"
546
586
  });
@@ -566,15 +606,8 @@ function base64FromBytes(arr) {
566
606
 
567
607
  // src/tasks/audio/automaticSpeechRecognition.ts
568
608
  async function automaticSpeechRecognition(args, options) {
569
- if (args.provider === "fal-ai") {
570
- const contentType = args.data instanceof Blob ? args.data.type : "audio/mpeg";
571
- const base64audio = base64FromBytes(
572
- new Uint8Array(args.data instanceof ArrayBuffer ? args.data : await args.data.arrayBuffer())
573
- );
574
- args.audio_url = `data:${contentType};base64,${base64audio}`;
575
- delete args.data;
576
- }
577
- const res = await request(args, {
609
+ const payload = await buildPayload(args);
610
+ const res = await request(payload, {
578
611
  ...options,
579
612
  taskHint: "automatic-speech-recognition"
580
613
  });
@@ -584,6 +617,32 @@ async function automaticSpeechRecognition(args, options) {
584
617
  }
585
618
  return res;
586
619
  }
620
+ var FAL_AI_SUPPORTED_BLOB_TYPES = ["audio/mpeg", "audio/mp4", "audio/wav", "audio/x-wav"];
621
+ async function buildPayload(args) {
622
+ if (args.provider === "fal-ai") {
623
+ const blob = "data" in args && args.data instanceof Blob ? args.data : "inputs" in args ? args.inputs : void 0;
624
+ const contentType = blob?.type;
625
+ if (!contentType) {
626
+ throw new Error(
627
+ `Unable to determine the input's content-type. Make sure your are passing a Blob when using provider fal-ai.`
628
+ );
629
+ }
630
+ if (!FAL_AI_SUPPORTED_BLOB_TYPES.includes(contentType)) {
631
+ throw new Error(
632
+ `Provider fal-ai does not support blob type ${contentType} - supported content types are: ${FAL_AI_SUPPORTED_BLOB_TYPES.join(
633
+ ", "
634
+ )}`
635
+ );
636
+ }
637
+ const base64audio = base64FromBytes(new Uint8Array(await blob.arrayBuffer()));
638
+ return {
639
+ ..."data" in args ? omit(args, "data") : omit(args, "inputs"),
640
+ audio_url: `data:${contentType};base64,${base64audio}`
641
+ };
642
+ } else {
643
+ return preparePayload(args);
644
+ }
645
+ }
587
646
 
588
647
  // src/tasks/audio/textToSpeech.ts
589
648
  async function textToSpeech(args, options) {
@@ -591,6 +650,9 @@ async function textToSpeech(args, options) {
591
650
  ...options,
592
651
  taskHint: "text-to-speech"
593
652
  });
653
+ if (res instanceof Blob) {
654
+ return res;
655
+ }
594
656
  if (res && typeof res === "object") {
595
657
  if ("output" in res) {
596
658
  if (typeof res.output === "string") {
@@ -604,31 +666,39 @@ async function textToSpeech(args, options) {
604
666
  }
605
667
  }
606
668
  }
607
- const isValidOutput = res && res instanceof Blob;
608
- if (!isValidOutput) {
609
- throw new InferenceOutputError("Expected Blob");
610
- }
611
- return res;
669
+ throw new InferenceOutputError("Expected Blob or object with output");
612
670
  }
613
671
 
614
672
  // src/tasks/audio/audioToAudio.ts
615
673
  async function audioToAudio(args, options) {
616
- const res = await request(args, {
674
+ const payload = preparePayload(args);
675
+ const res = await request(payload, {
617
676
  ...options,
618
677
  taskHint: "audio-to-audio"
619
678
  });
620
- const isValidOutput = Array.isArray(res) && res.every(
621
- (x) => typeof x.label === "string" && typeof x.blob === "string" && typeof x["content-type"] === "string"
622
- );
623
- if (!isValidOutput) {
624
- throw new InferenceOutputError("Expected Array<{label: string, blob: string, content-type: string}>");
679
+ return validateOutput(res);
680
+ }
681
+ function validateOutput(output) {
682
+ if (!Array.isArray(output)) {
683
+ throw new InferenceOutputError("Expected Array");
625
684
  }
626
- return res;
685
+ if (!output.every((elem) => {
686
+ return typeof elem === "object" && elem && "label" in elem && typeof elem.label === "string" && "content-type" in elem && typeof elem["content-type"] === "string" && "blob" in elem && typeof elem.blob === "string";
687
+ })) {
688
+ throw new InferenceOutputError("Expected Array<{label: string, audio: Blob}>");
689
+ }
690
+ return output;
691
+ }
692
+
693
+ // src/tasks/cv/utils.ts
694
+ function preparePayload2(args) {
695
+ return "data" in args ? args : { ...omit(args, "inputs"), data: args.inputs };
627
696
  }
628
697
 
629
698
  // src/tasks/cv/imageClassification.ts
630
699
  async function imageClassification(args, options) {
631
- const res = await request(args, {
700
+ const payload = preparePayload2(args);
701
+ const res = await request(payload, {
632
702
  ...options,
633
703
  taskHint: "image-classification"
634
704
  });
@@ -641,7 +711,8 @@ async function imageClassification(args, options) {
641
711
 
642
712
  // src/tasks/cv/imageSegmentation.ts
643
713
  async function imageSegmentation(args, options) {
644
- const res = await request(args, {
714
+ const payload = preparePayload2(args);
715
+ const res = await request(payload, {
645
716
  ...options,
646
717
  taskHint: "image-segmentation"
647
718
  });
@@ -654,7 +725,8 @@ async function imageSegmentation(args, options) {
654
725
 
655
726
  // src/tasks/cv/imageToText.ts
656
727
  async function imageToText(args, options) {
657
- const res = (await request(args, {
728
+ const payload = preparePayload2(args);
729
+ const res = (await request(payload, {
658
730
  ...options,
659
731
  taskHint: "image-to-text"
660
732
  }))?.[0];
@@ -666,7 +738,8 @@ async function imageToText(args, options) {
666
738
 
667
739
  // src/tasks/cv/objectDetection.ts
668
740
  async function objectDetection(args, options) {
669
- const res = await request(args, {
741
+ const payload = preparePayload2(args);
742
+ const res = await request(payload, {
670
743
  ...options,
671
744
  taskHint: "object-detection"
672
745
  });
@@ -683,15 +756,13 @@ async function objectDetection(args, options) {
683
756
 
684
757
  // src/tasks/cv/textToImage.ts
685
758
  async function textToImage(args, options) {
686
- if (args.provider === "together" || args.provider === "fal-ai") {
687
- args.prompt = args.inputs;
688
- delete args.inputs;
689
- args.response_format = "base64";
690
- } else if (args.provider === "replicate") {
691
- args.prompt = args.inputs;
692
- delete args.inputs;
693
- }
694
- const res = await request(args, {
759
+ const payload = args.provider === "together" || args.provider === "fal-ai" || args.provider === "replicate" ? {
760
+ ...omit(args, ["inputs", "parameters"]),
761
+ ...args.parameters,
762
+ ...args.provider !== "replicate" ? { response_format: "base64" } : void 0,
763
+ prompt: args.inputs
764
+ } : args;
765
+ const res = await request(payload, {
695
766
  ...options,
696
767
  taskHint: "text-to-image"
697
768
  });
@@ -748,18 +819,30 @@ async function imageToImage(args, options) {
748
819
  }
749
820
 
750
821
  // src/tasks/cv/zeroShotImageClassification.ts
751
- async function zeroShotImageClassification(args, options) {
752
- const reqArgs = {
753
- ...args,
754
- inputs: {
755
- image: base64FromBytes(
756
- new Uint8Array(
757
- args.inputs.image instanceof ArrayBuffer ? args.inputs.image : await args.inputs.image.arrayBuffer()
822
+ async function preparePayload3(args) {
823
+ if (args.inputs instanceof Blob) {
824
+ return {
825
+ ...args,
826
+ inputs: {
827
+ image: base64FromBytes(new Uint8Array(await args.inputs.arrayBuffer()))
828
+ }
829
+ };
830
+ } else {
831
+ return {
832
+ ...args,
833
+ inputs: {
834
+ image: base64FromBytes(
835
+ new Uint8Array(
836
+ args.inputs.image instanceof ArrayBuffer ? args.inputs.image : await args.inputs.image.arrayBuffer()
837
+ )
758
838
  )
759
- )
760
- }
761
- };
762
- const res = await request(reqArgs, {
839
+ }
840
+ };
841
+ }
842
+ }
843
+ async function zeroShotImageClassification(args, options) {
844
+ const payload = await preparePayload3(args);
845
+ const res = await request(payload, {
763
846
  ...options,
764
847
  taskHint: "zero-shot-image-classification"
765
848
  });
@@ -848,17 +931,19 @@ async function questionAnswering(args, options) {
848
931
  ...options,
849
932
  taskHint: "question-answering"
850
933
  });
851
- const isValidOutput = typeof res === "object" && !!res && typeof res.answer === "string" && typeof res.end === "number" && typeof res.score === "number" && typeof res.start === "number";
934
+ const isValidOutput = Array.isArray(res) ? res.every(
935
+ (elem) => typeof elem === "object" && !!elem && typeof elem.answer === "string" && typeof elem.end === "number" && typeof elem.score === "number" && typeof elem.start === "number"
936
+ ) : typeof res === "object" && !!res && typeof res.answer === "string" && typeof res.end === "number" && typeof res.score === "number" && typeof res.start === "number";
852
937
  if (!isValidOutput) {
853
- throw new InferenceOutputError("Expected {answer: string, end: number, score: number, start: number}");
938
+ throw new InferenceOutputError("Expected Array<{answer: string, end: number, score: number, start: number}>");
854
939
  }
855
- return res;
940
+ return Array.isArray(res) ? res[0] : res;
856
941
  }
857
942
 
858
943
  // src/tasks/nlp/sentenceSimilarity.ts
859
944
  async function sentenceSimilarity(args, options) {
860
945
  const defaultTask = args.model ? await getDefaultTask(args.model, args.accessToken, options) : void 0;
861
- const res = await request(args, {
946
+ const res = await request(prepareInput(args), {
862
947
  ...options,
863
948
  taskHint: "sentence-similarity",
864
949
  ...defaultTask === "feature-extraction" && { forceTask: "sentence-similarity" }
@@ -869,6 +954,13 @@ async function sentenceSimilarity(args, options) {
869
954
  }
870
955
  return res;
871
956
  }
957
+ function prepareInput(args) {
958
+ return {
959
+ ...omit(args, ["inputs", "parameters"]),
960
+ inputs: { ...omit(args.inputs, "sourceSentence") },
961
+ parameters: { source_sentence: args.inputs.sourceSentence, ...args.parameters }
962
+ };
963
+ }
872
964
 
873
965
  // src/tasks/nlp/summarization.ts
874
966
  async function summarization(args, options) {
@@ -889,13 +981,18 @@ async function tableQuestionAnswering(args, options) {
889
981
  ...options,
890
982
  taskHint: "table-question-answering"
891
983
  });
892
- const isValidOutput = typeof res?.aggregator === "string" && typeof res.answer === "string" && Array.isArray(res.cells) && res.cells.every((x) => typeof x === "string") && Array.isArray(res.coordinates) && res.coordinates.every((coord) => Array.isArray(coord) && coord.every((x) => typeof x === "number"));
984
+ const isValidOutput = Array.isArray(res) ? res.every((elem) => validate(elem)) : validate(res);
893
985
  if (!isValidOutput) {
894
986
  throw new InferenceOutputError(
895
987
  "Expected {aggregator: string, answer: string, cells: string[], coordinates: number[][]}"
896
988
  );
897
989
  }
898
- return res;
990
+ return Array.isArray(res) ? res[0] : res;
991
+ }
992
+ function validate(elem) {
993
+ return typeof elem === "object" && !!elem && "aggregator" in elem && typeof elem.aggregator === "string" && "answer" in elem && typeof elem.answer === "string" && "cells" in elem && Array.isArray(elem.cells) && elem.cells.every((x) => typeof x === "string") && "coordinates" in elem && Array.isArray(elem.coordinates) && elem.coordinates.every(
994
+ (coord) => Array.isArray(coord) && coord.every((x) => typeof x === "number")
995
+ );
899
996
  }
900
997
 
901
998
  // src/tasks/nlp/textClassification.ts
@@ -1038,11 +1135,7 @@ async function documentQuestionAnswering(args, options) {
1038
1135
  inputs: {
1039
1136
  question: args.inputs.question,
1040
1137
  // convert Blob or ArrayBuffer to base64
1041
- image: base64FromBytes(
1042
- new Uint8Array(
1043
- args.inputs.image instanceof ArrayBuffer ? args.inputs.image : await args.inputs.image.arrayBuffer()
1044
- )
1045
- )
1138
+ image: base64FromBytes(new Uint8Array(await args.inputs.image.arrayBuffer()))
1046
1139
  }
1047
1140
  };
1048
1141
  const res = toArray(
@@ -1050,12 +1143,14 @@ async function documentQuestionAnswering(args, options) {
1050
1143
  ...options,
1051
1144
  taskHint: "document-question-answering"
1052
1145
  })
1053
- )?.[0];
1054
- const isValidOutput = typeof res?.answer === "string" && (typeof res.end === "number" || typeof res.end === "undefined") && (typeof res.score === "number" || typeof res.score === "undefined") && (typeof res.start === "number" || typeof res.start === "undefined");
1146
+ );
1147
+ const isValidOutput = Array.isArray(res) && res.every(
1148
+ (elem) => typeof elem === "object" && !!elem && typeof elem?.answer === "string" && (typeof elem.end === "number" || typeof elem.end === "undefined") && (typeof elem.score === "number" || typeof elem.score === "undefined") && (typeof elem.start === "number" || typeof elem.start === "undefined")
1149
+ );
1055
1150
  if (!isValidOutput) {
1056
1151
  throw new InferenceOutputError("Expected Array<{answer: string, end?: number, score?: number, start?: number}>");
1057
1152
  }
1058
- return res;
1153
+ return res[0];
1059
1154
  }
1060
1155
 
1061
1156
  // src/tasks/multimodal/visualQuestionAnswering.ts
@@ -1065,22 +1160,20 @@ async function visualQuestionAnswering(args, options) {
1065
1160
  inputs: {
1066
1161
  question: args.inputs.question,
1067
1162
  // convert Blob or ArrayBuffer to base64
1068
- image: base64FromBytes(
1069
- new Uint8Array(
1070
- args.inputs.image instanceof ArrayBuffer ? args.inputs.image : await args.inputs.image.arrayBuffer()
1071
- )
1072
- )
1163
+ image: base64FromBytes(new Uint8Array(await args.inputs.image.arrayBuffer()))
1073
1164
  }
1074
1165
  };
1075
- const res = (await request(reqArgs, {
1166
+ const res = await request(reqArgs, {
1076
1167
  ...options,
1077
1168
  taskHint: "visual-question-answering"
1078
- }))?.[0];
1079
- const isValidOutput = typeof res?.answer === "string" && typeof res.score === "number";
1169
+ });
1170
+ const isValidOutput = Array.isArray(res) && res.every(
1171
+ (elem) => typeof elem === "object" && !!elem && typeof elem?.answer === "string" && typeof elem.score === "number"
1172
+ );
1080
1173
  if (!isValidOutput) {
1081
1174
  throw new InferenceOutputError("Expected Array<{answer: string, score: number}>");
1082
1175
  }
1083
- return res;
1176
+ return res[0];
1084
1177
  }
1085
1178
 
1086
1179
  // src/tasks/tabular/tabularRegression.ts
@@ -1 +1 @@
1
- {"version":3,"file":"fal-ai.d.ts","sourceRoot":"","sources":["../../../src/providers/fal-ai.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAE/C,eAAO,MAAM,mBAAmB,oBAAoB,CAAC;AAErD,KAAK,OAAO,GAAG,MAAM,CAAC;AAEtB,eAAO,MAAM,0BAA0B,EAAE,eAAe,CAAC,OAAO,CAgB/D,CAAC"}
1
+ {"version":3,"file":"fal-ai.d.ts","sourceRoot":"","sources":["../../../src/providers/fal-ai.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAE/C,eAAO,MAAM,mBAAmB,oBAAoB,CAAC;AAErD,KAAK,OAAO,GAAG,MAAM,CAAC;AAEtB,eAAO,MAAM,0BAA0B,EAAE,eAAe,CAAC,OAAO,CAoB/D,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"replicate.d.ts","sourceRoot":"","sources":["../../../src/providers/replicate.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAE/C,eAAO,MAAM,sBAAsB,8BAA8B,CAAC;AAElE,KAAK,WAAW,GAAG,MAAM,CAAC;AAE1B,eAAO,MAAM,6BAA6B,EAAE,eAAe,CAAC,WAAW,CAStE,CAAC"}
1
+ {"version":3,"file":"replicate.d.ts","sourceRoot":"","sources":["../../../src/providers/replicate.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAE/C,eAAO,MAAM,sBAAsB,8BAA8B,CAAC;AAElE,KAAK,WAAW,GAAG,MAAM,CAAC;AAE1B,eAAO,MAAM,6BAA6B,EAAE,eAAe,CAAC,WAAW,CAYtE,CAAC"}
@@ -1,24 +1,10 @@
1
+ import type { AudioClassificationInput, AudioClassificationOutput } from "@huggingface/tasks";
1
2
  import type { BaseArgs, Options } from "../../types";
2
- export type AudioClassificationArgs = BaseArgs & {
3
- /**
4
- * Binary audio data
5
- */
6
- data: Blob | ArrayBuffer;
7
- };
8
- export interface AudioClassificationOutputValue {
9
- /**
10
- * The label for the class (model specific)
11
- */
12
- label: string;
13
- /**
14
- * A float that represents how likely it is that the audio file belongs to this class.
15
- */
16
- score: number;
17
- }
18
- export type AudioClassificationReturn = AudioClassificationOutputValue[];
3
+ import type { LegacyAudioInput } from "./utils";
4
+ export type AudioClassificationArgs = BaseArgs & (AudioClassificationInput | LegacyAudioInput);
19
5
  /**
20
6
  * This task reads some audio input and outputs the likelihood of classes.
21
7
  * Recommended model: superb/hubert-large-superb-er
22
8
  */
23
- export declare function audioClassification(args: AudioClassificationArgs, options?: Options): Promise<AudioClassificationReturn>;
9
+ export declare function audioClassification(args: AudioClassificationArgs, options?: Options): Promise<AudioClassificationOutput>;
24
10
  //# sourceMappingURL=audioClassification.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"audioClassification.d.ts","sourceRoot":"","sources":["../../../../src/tasks/audio/audioClassification.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAGrD,MAAM,MAAM,uBAAuB,GAAG,QAAQ,GAAG;IAChD;;OAEG;IACH,IAAI,EAAE,IAAI,GAAG,WAAW,CAAC;CACzB,CAAC;AAEF,MAAM,WAAW,8BAA8B;IAC9C;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IAEd;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;CACd;AAED,MAAM,MAAM,yBAAyB,GAAG,8BAA8B,EAAE,CAAC;AAEzE;;;GAGG;AACH,wBAAsB,mBAAmB,CACxC,IAAI,EAAE,uBAAuB,EAC7B,OAAO,CAAC,EAAE,OAAO,GACf,OAAO,CAAC,yBAAyB,CAAC,CAWpC"}
1
+ {"version":3,"file":"audioClassification.d.ts","sourceRoot":"","sources":["../../../../src/tasks/audio/audioClassification.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,wBAAwB,EAAE,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AAE9F,OAAO,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAErD,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAGhD,MAAM,MAAM,uBAAuB,GAAG,QAAQ,GAAG,CAAC,wBAAwB,GAAG,gBAAgB,CAAC,CAAC;AAE/F;;;GAGG;AACH,wBAAsB,mBAAmB,CACxC,IAAI,EAAE,uBAAuB,EAC7B,OAAO,CAAC,EAAE,OAAO,GACf,OAAO,CAAC,yBAAyB,CAAC,CAYpC"}
@@ -1,11 +1,12 @@
1
1
  import type { BaseArgs, Options } from "../../types";
2
- export type AudioToAudioArgs = BaseArgs & {
2
+ import type { LegacyAudioInput } from "./utils";
3
+ export type AudioToAudioArgs = (BaseArgs & {
3
4
  /**
4
5
  * Binary audio data
5
6
  */
6
- data: Blob | ArrayBuffer;
7
- };
8
- export interface AudioToAudioOutputValue {
7
+ inputs: Blob;
8
+ }) | LegacyAudioInput;
9
+ export interface AudioToAudioOutputElem {
9
10
  /**
10
11
  * The label for the audio output (model specific)
11
12
  */
@@ -13,16 +14,16 @@ export interface AudioToAudioOutputValue {
13
14
  /**
14
15
  * Base64 encoded audio output.
15
16
  */
17
+ audio: Blob;
18
+ }
19
+ export interface AudioToAudioOutput {
16
20
  blob: string;
17
- /**
18
- * Content-type for blob, e.g. audio/flac
19
- */
20
21
  "content-type": string;
22
+ label: string;
21
23
  }
22
- export type AudioToAudioReturn = AudioToAudioOutputValue[];
23
24
  /**
24
25
  * This task reads some audio input and outputs one or multiple audio files.
25
26
  * Example model: speechbrain/sepformer-wham does audio source separation.
26
27
  */
27
- export declare function audioToAudio(args: AudioToAudioArgs, options?: Options): Promise<AudioToAudioReturn>;
28
+ export declare function audioToAudio(args: AudioToAudioArgs, options?: Options): Promise<AudioToAudioOutput[]>;
28
29
  //# sourceMappingURL=audioToAudio.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"audioToAudio.d.ts","sourceRoot":"","sources":["../../../../src/tasks/audio/audioToAudio.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAGrD,MAAM,MAAM,gBAAgB,GAAG,QAAQ,GAAG;IACzC;;OAEG;IACH,IAAI,EAAE,IAAI,GAAG,WAAW,CAAC;CACzB,CAAC;AAEF,MAAM,WAAW,uBAAuB;IACvC;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IAEd;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;IAEb;;OAEG;IACH,cAAc,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,MAAM,kBAAkB,GAAG,uBAAuB,EAAE,CAAC;AAE3D;;;GAGG;AACH,wBAAsB,YAAY,CAAC,IAAI,EAAE,gBAAgB,EAAE,OAAO,CAAC,EAAE,OAAO,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAczG"}
1
+ {"version":3,"file":"audioToAudio.d.ts","sourceRoot":"","sources":["../../../../src/tasks/audio/audioToAudio.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAErD,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAGhD,MAAM,MAAM,gBAAgB,GACzB,CAAC,QAAQ,GAAG;IACZ;;OAEG;IACH,MAAM,EAAE,IAAI,CAAC;CACZ,CAAC,GACF,gBAAgB,CAAC;AAEpB,MAAM,WAAW,sBAAsB;IACtC;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IAEd;;OAEG;IACH,KAAK,EAAE,IAAI,CAAC;CACZ;AAED,MAAM,WAAW,kBAAkB;IAClC,IAAI,EAAE,MAAM,CAAC;IACb,cAAc,EAAE,MAAM,CAAC;IACvB,KAAK,EAAE,MAAM,CAAC;CACd;AAED;;;GAGG;AACH,wBAAsB,YAAY,CAAC,IAAI,EAAE,gBAAgB,EAAE,OAAO,CAAC,EAAE,OAAO,GAAG,OAAO,CAAC,kBAAkB,EAAE,CAAC,CAQ3G"}
@@ -1,16 +1,7 @@
1
+ import type { AutomaticSpeechRecognitionInput, AutomaticSpeechRecognitionOutput } from "@huggingface/tasks";
1
2
  import type { BaseArgs, Options } from "../../types";
2
- export type AutomaticSpeechRecognitionArgs = BaseArgs & {
3
- /**
4
- * Binary audio data
5
- */
6
- data: Blob | ArrayBuffer;
7
- };
8
- export interface AutomaticSpeechRecognitionOutput {
9
- /**
10
- * The text that was recognized from the audio
11
- */
12
- text: string;
13
- }
3
+ import type { LegacyAudioInput } from "./utils";
4
+ export type AutomaticSpeechRecognitionArgs = BaseArgs & (AutomaticSpeechRecognitionInput | LegacyAudioInput);
14
5
  /**
15
6
  * This task reads some audio input and outputs the said words within the audio files.
16
7
  * Recommended model (english language): facebook/wav2vec2-large-960h-lv60-self
@@ -1 +1 @@
1
- {"version":3,"file":"automaticSpeechRecognition.d.ts","sourceRoot":"","sources":["../../../../src/tasks/audio/automaticSpeechRecognition.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAe,MAAM,aAAa,CAAC;AAIlE,MAAM,MAAM,8BAA8B,GAAG,QAAQ,GAAG;IACvD;;OAEG;IACH,IAAI,EAAE,IAAI,GAAG,WAAW,CAAC;CACzB,CAAC;AAEF,MAAM,WAAW,gCAAgC;IAChD;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;CACb;AAED;;;GAGG;AACH,wBAAsB,0BAA0B,CAC/C,IAAI,EAAE,8BAA8B,EACpC,OAAO,CAAC,EAAE,OAAO,GACf,OAAO,CAAC,gCAAgC,CAAC,CAkB3C"}
1
+ {"version":3,"file":"automaticSpeechRecognition.d.ts","sourceRoot":"","sources":["../../../../src/tasks/audio/automaticSpeechRecognition.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,+BAA+B,EAAE,gCAAgC,EAAE,MAAM,oBAAoB,CAAC;AAE5G,OAAO,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAe,MAAM,aAAa,CAAC;AAGlE,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAIhD,MAAM,MAAM,8BAA8B,GAAG,QAAQ,GAAG,CAAC,+BAA+B,GAAG,gBAAgB,CAAC,CAAC;AAC7G;;;GAGG;AACH,wBAAsB,0BAA0B,CAC/C,IAAI,EAAE,8BAA8B,EACpC,OAAO,CAAC,EAAE,OAAO,GACf,OAAO,CAAC,gCAAgC,CAAC,CAW3C"}
@@ -1,14 +1,10 @@
1
+ import type { TextToSpeechInput } from "@huggingface/tasks";
1
2
  import type { BaseArgs, Options } from "../../types";
2
- export type TextToSpeechArgs = BaseArgs & {
3
- /**
4
- * The text to generate an audio from
5
- */
6
- inputs: string;
7
- };
8
- export type TextToSpeechOutput = Blob;
3
+ type TextToSpeechArgs = BaseArgs & TextToSpeechInput;
9
4
  /**
10
5
  * This task synthesize an audio of a voice pronouncing a given text.
11
6
  * Recommended model: espnet/kan-bayashi_ljspeech_vits
12
7
  */
13
- export declare function textToSpeech(args: TextToSpeechArgs, options?: Options): Promise<TextToSpeechOutput>;
8
+ export declare function textToSpeech(args: TextToSpeechArgs, options?: Options): Promise<Blob>;
9
+ export {};
14
10
  //# sourceMappingURL=textToSpeech.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"textToSpeech.d.ts","sourceRoot":"","sources":["../../../../src/tasks/audio/textToSpeech.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAGrD,MAAM,MAAM,gBAAgB,GAAG,QAAQ,GAAG;IACzC;;OAEG;IACH,MAAM,EAAE,MAAM,CAAC;CACf,CAAC;AAEF,MAAM,MAAM,kBAAkB,GAAG,IAAI,CAAC;AAItC;;;GAGG;AACH,wBAAsB,YAAY,CAAC,IAAI,EAAE,gBAAgB,EAAE,OAAO,CAAC,EAAE,OAAO,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAuBzG"}
1
+ {"version":3,"file":"textToSpeech.d.ts","sourceRoot":"","sources":["../../../../src/tasks/audio/textToSpeech.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AAE5D,OAAO,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAGrD,KAAK,gBAAgB,GAAG,QAAQ,GAAG,iBAAiB,CAAC;AAKrD;;;GAGG;AACH,wBAAsB,YAAY,CAAC,IAAI,EAAE,gBAAgB,EAAE,OAAO,CAAC,EAAE,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,CAsB3F"}
@@ -0,0 +1,11 @@
1
+ import type { BaseArgs, RequestArgs } from "../../types";
2
+ /**
3
+ * @deprecated
4
+ */
5
+ export interface LegacyAudioInput {
6
+ data: Blob | ArrayBuffer;
7
+ }
8
+ export declare function preparePayload(args: BaseArgs & ({
9
+ inputs: Blob;
10
+ } | LegacyAudioInput)): RequestArgs;
11
+ //# sourceMappingURL=utils.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../../src/tasks/audio/utils.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAGzD;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAChC,IAAI,EAAE,IAAI,GAAG,WAAW,CAAC;CACzB;AAED,wBAAgB,cAAc,CAAC,IAAI,EAAE,QAAQ,GAAG,CAAC;IAAE,MAAM,EAAE,IAAI,CAAA;CAAE,GAAG,gBAAgB,CAAC,GAAG,WAAW,CAOlG"}
@@ -1,21 +1,7 @@
1
+ import type { ImageClassificationInput, ImageClassificationOutput } from "@huggingface/tasks";
1
2
  import type { BaseArgs, Options } from "../../types";
2
- export type ImageClassificationArgs = BaseArgs & {
3
- /**
4
- * Binary image data
5
- */
6
- data: Blob | ArrayBuffer;
7
- };
8
- export interface ImageClassificationOutputValue {
9
- /**
10
- * The label for the class (model specific)
11
- */
12
- label: string;
13
- /**
14
- * A float that represents how likely it is that the image file belongs to this class.
15
- */
16
- score: number;
17
- }
18
- export type ImageClassificationOutput = ImageClassificationOutputValue[];
3
+ import { type LegacyImageInput } from "./utils";
4
+ export type ImageClassificationArgs = BaseArgs & (ImageClassificationInput | LegacyImageInput);
19
5
  /**
20
6
  * This task reads some image input and outputs the likelihood of classes.
21
7
  * Recommended model: google/vit-base-patch16-224
@@ -1 +1 @@
1
- {"version":3,"file":"imageClassification.d.ts","sourceRoot":"","sources":["../../../../src/tasks/cv/imageClassification.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAGrD,MAAM,MAAM,uBAAuB,GAAG,QAAQ,GAAG;IAChD;;OAEG;IACH,IAAI,EAAE,IAAI,GAAG,WAAW,CAAC;CACzB,CAAC;AAEF,MAAM,WAAW,8BAA8B;IAC9C;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IACd;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;CACd;AAED,MAAM,MAAM,yBAAyB,GAAG,8BAA8B,EAAE,CAAC;AAEzE;;;GAGG;AACH,wBAAsB,mBAAmB,CACxC,IAAI,EAAE,uBAAuB,EAC7B,OAAO,CAAC,EAAE,OAAO,GACf,OAAO,CAAC,yBAAyB,CAAC,CAWpC"}
1
+ {"version":3,"file":"imageClassification.d.ts","sourceRoot":"","sources":["../../../../src/tasks/cv/imageClassification.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,wBAAwB,EAAE,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AAE9F,OAAO,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAErD,OAAO,EAAkB,KAAK,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAEhE,MAAM,MAAM,uBAAuB,GAAG,QAAQ,GAAG,CAAC,wBAAwB,GAAG,gBAAgB,CAAC,CAAC;AAE/F;;;GAGG;AACH,wBAAsB,mBAAmB,CACxC,IAAI,EAAE,uBAAuB,EAC7B,OAAO,CAAC,EAAE,OAAO,GACf,OAAO,CAAC,yBAAyB,CAAC,CAYpC"}
@@ -1,25 +1,7 @@
1
+ import type { ImageSegmentationInput, ImageSegmentationOutput } from "@huggingface/tasks";
1
2
  import type { BaseArgs, Options } from "../../types";
2
- export type ImageSegmentationArgs = BaseArgs & {
3
- /**
4
- * Binary image data
5
- */
6
- data: Blob | ArrayBuffer;
7
- };
8
- export interface ImageSegmentationOutputValue {
9
- /**
10
- * The label for the class (model specific) of a segment.
11
- */
12
- label: string;
13
- /**
14
- * A str (base64 str of a single channel black-and-white img) representing the mask of a segment.
15
- */
16
- mask: string;
17
- /**
18
- * A float that represents how likely it is that the detected object belongs to the given class.
19
- */
20
- score: number;
21
- }
22
- export type ImageSegmentationOutput = ImageSegmentationOutputValue[];
3
+ import { type LegacyImageInput } from "./utils";
4
+ export type ImageSegmentationArgs = BaseArgs & (ImageSegmentationInput | LegacyImageInput);
23
5
  /**
24
6
  * This task reads some image input and outputs the likelihood of classes & bounding boxes of detected objects.
25
7
  * Recommended model: facebook/detr-resnet-50-panoptic
@@ -1 +1 @@
1
- {"version":3,"file":"imageSegmentation.d.ts","sourceRoot":"","sources":["../../../../src/tasks/cv/imageSegmentation.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAGrD,MAAM,MAAM,qBAAqB,GAAG,QAAQ,GAAG;IAC9C;;OAEG;IACH,IAAI,EAAE,IAAI,GAAG,WAAW,CAAC;CACzB,CAAC;AAEF,MAAM,WAAW,4BAA4B;IAC5C;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IACd;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;IACb;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;CACd;AAED,MAAM,MAAM,uBAAuB,GAAG,4BAA4B,EAAE,CAAC;AAErE;;;GAGG;AACH,wBAAsB,iBAAiB,CACtC,IAAI,EAAE,qBAAqB,EAC3B,OAAO,CAAC,EAAE,OAAO,GACf,OAAO,CAAC,uBAAuB,CAAC,CAYlC"}
1
+ {"version":3,"file":"imageSegmentation.d.ts","sourceRoot":"","sources":["../../../../src/tasks/cv/imageSegmentation.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,MAAM,oBAAoB,CAAC;AAE1F,OAAO,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAErD,OAAO,EAAkB,KAAK,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAEhE,MAAM,MAAM,qBAAqB,GAAG,QAAQ,GAAG,CAAC,sBAAsB,GAAG,gBAAgB,CAAC,CAAC;AAE3F;;;GAGG;AACH,wBAAsB,iBAAiB,CACtC,IAAI,EAAE,qBAAqB,EAC3B,OAAO,CAAC,EAAE,OAAO,GACf,OAAO,CAAC,uBAAuB,CAAC,CAalC"}