@huggingface/tasks 0.0.7 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +25 -2
- package/dist/index.js +89 -14
- package/dist/index.mjs +89 -14
- package/package.json +2 -4
- package/src/pipelines.ts +24 -0
- package/src/snippets/inputs.ts +15 -0
- package/src/snippets/python.ts +43 -2
- package/src/tasks/index.ts +12 -8
- package/src/tasks/text-generation/about.md +10 -0
package/dist/index.d.ts
CHANGED
|
@@ -276,6 +276,10 @@ declare const PIPELINE_DATA: {
|
|
|
276
276
|
};
|
|
277
277
|
"image-to-image": {
|
|
278
278
|
name: string;
|
|
279
|
+
subtasks: {
|
|
280
|
+
type: string;
|
|
281
|
+
name: string;
|
|
282
|
+
}[];
|
|
279
283
|
modality: "cv";
|
|
280
284
|
color: "indigo";
|
|
281
285
|
};
|
|
@@ -416,6 +420,16 @@ declare const PIPELINE_DATA: {
|
|
|
416
420
|
modality: "cv";
|
|
417
421
|
color: "yellow";
|
|
418
422
|
};
|
|
423
|
+
"text-to-3d": {
|
|
424
|
+
name: string;
|
|
425
|
+
modality: "multimodal";
|
|
426
|
+
color: "yellow";
|
|
427
|
+
};
|
|
428
|
+
"image-to-3d": {
|
|
429
|
+
name: string;
|
|
430
|
+
modality: "multimodal";
|
|
431
|
+
color: "green";
|
|
432
|
+
};
|
|
419
433
|
other: {
|
|
420
434
|
name: string;
|
|
421
435
|
modality: "other";
|
|
@@ -425,9 +439,9 @@ declare const PIPELINE_DATA: {
|
|
|
425
439
|
};
|
|
426
440
|
};
|
|
427
441
|
type PipelineType = keyof typeof PIPELINE_DATA;
|
|
428
|
-
declare const PIPELINE_TYPES: ("other" | "text-classification" | "token-classification" | "table-question-answering" | "question-answering" | "zero-shot-classification" | "translation" | "summarization" | "conversational" | "feature-extraction" | "text-generation" | "text2text-generation" | "fill-mask" | "sentence-similarity" | "text-to-speech" | "text-to-audio" | "automatic-speech-recognition" | "audio-to-audio" | "audio-classification" | "voice-activity-detection" | "depth-estimation" | "image-classification" | "object-detection" | "image-segmentation" | "text-to-image" | "image-to-text" | "image-to-image" | "image-to-video" | "unconditional-image-generation" | "video-classification" | "reinforcement-learning" | "robotics" | "tabular-classification" | "tabular-regression" | "tabular-to-text" | "table-to-text" | "multiple-choice" | "text-retrieval" | "time-series-forecasting" | "text-to-video" | "visual-question-answering" | "document-question-answering" | "zero-shot-image-classification" | "graph-ml" | "mask-generation" | "zero-shot-object-detection")[];
|
|
442
|
+
declare const PIPELINE_TYPES: ("other" | "text-classification" | "token-classification" | "table-question-answering" | "question-answering" | "zero-shot-classification" | "translation" | "summarization" | "conversational" | "feature-extraction" | "text-generation" | "text2text-generation" | "fill-mask" | "sentence-similarity" | "text-to-speech" | "text-to-audio" | "automatic-speech-recognition" | "audio-to-audio" | "audio-classification" | "voice-activity-detection" | "depth-estimation" | "image-classification" | "object-detection" | "image-segmentation" | "text-to-image" | "image-to-text" | "image-to-image" | "image-to-video" | "unconditional-image-generation" | "video-classification" | "reinforcement-learning" | "robotics" | "tabular-classification" | "tabular-regression" | "tabular-to-text" | "table-to-text" | "multiple-choice" | "text-retrieval" | "time-series-forecasting" | "text-to-video" | "visual-question-answering" | "document-question-answering" | "zero-shot-image-classification" | "graph-ml" | "mask-generation" | "zero-shot-object-detection" | "text-to-3d" | "image-to-3d")[];
|
|
429
443
|
declare const SUBTASK_TYPES: string[];
|
|
430
|
-
declare const PIPELINE_TYPES_SET: Set<"other" | "text-classification" | "token-classification" | "table-question-answering" | "question-answering" | "zero-shot-classification" | "translation" | "summarization" | "conversational" | "feature-extraction" | "text-generation" | "text2text-generation" | "fill-mask" | "sentence-similarity" | "text-to-speech" | "text-to-audio" | "automatic-speech-recognition" | "audio-to-audio" | "audio-classification" | "voice-activity-detection" | "depth-estimation" | "image-classification" | "object-detection" | "image-segmentation" | "text-to-image" | "image-to-text" | "image-to-image" | "image-to-video" | "unconditional-image-generation" | "video-classification" | "reinforcement-learning" | "robotics" | "tabular-classification" | "tabular-regression" | "tabular-to-text" | "table-to-text" | "multiple-choice" | "text-retrieval" | "time-series-forecasting" | "text-to-video" | "visual-question-answering" | "document-question-answering" | "zero-shot-image-classification" | "graph-ml" | "mask-generation" | "zero-shot-object-detection">;
|
|
444
|
+
declare const PIPELINE_TYPES_SET: Set<"other" | "text-classification" | "token-classification" | "table-question-answering" | "question-answering" | "zero-shot-classification" | "translation" | "summarization" | "conversational" | "feature-extraction" | "text-generation" | "text2text-generation" | "fill-mask" | "sentence-similarity" | "text-to-speech" | "text-to-audio" | "automatic-speech-recognition" | "audio-to-audio" | "audio-classification" | "voice-activity-detection" | "depth-estimation" | "image-classification" | "object-detection" | "image-segmentation" | "text-to-image" | "image-to-text" | "image-to-image" | "image-to-video" | "unconditional-image-generation" | "video-classification" | "reinforcement-learning" | "robotics" | "tabular-classification" | "tabular-regression" | "tabular-to-text" | "table-to-text" | "multiple-choice" | "text-retrieval" | "time-series-forecasting" | "text-to-video" | "visual-question-answering" | "document-question-answering" | "zero-shot-image-classification" | "graph-ml" | "mask-generation" | "zero-shot-object-detection" | "text-to-3d" | "image-to-3d">;
|
|
431
445
|
|
|
432
446
|
/**
|
|
433
447
|
* Mapping from library name (excluding Transformers) to its supported tasks.
|
|
@@ -758,10 +772,13 @@ declare namespace curl {
|
|
|
758
772
|
}
|
|
759
773
|
|
|
760
774
|
declare const snippetZeroShotClassification$1: (model: ModelData) => string;
|
|
775
|
+
declare const snippetZeroShotImageClassification: (model: ModelData) => string;
|
|
761
776
|
declare const snippetBasic$1: (model: ModelData) => string;
|
|
762
777
|
declare const snippetFile$1: (model: ModelData) => string;
|
|
763
778
|
declare const snippetTextToImage$1: (model: ModelData) => string;
|
|
779
|
+
declare const snippetTabular: (model: ModelData) => string;
|
|
764
780
|
declare const snippetTextToAudio$1: (model: ModelData) => string;
|
|
781
|
+
declare const snippetDocumentQuestionAnswering: (model: ModelData) => string;
|
|
765
782
|
declare const pythonSnippets: Partial<Record<PipelineType, (model: ModelData) => string>>;
|
|
766
783
|
declare function getPythonInferenceSnippet(model: ModelData, accessToken: string): string;
|
|
767
784
|
declare function hasPythonInferenceSnippet(model: ModelData): boolean;
|
|
@@ -769,16 +786,22 @@ declare function hasPythonInferenceSnippet(model: ModelData): boolean;
|
|
|
769
786
|
declare const python_getPythonInferenceSnippet: typeof getPythonInferenceSnippet;
|
|
770
787
|
declare const python_hasPythonInferenceSnippet: typeof hasPythonInferenceSnippet;
|
|
771
788
|
declare const python_pythonSnippets: typeof pythonSnippets;
|
|
789
|
+
declare const python_snippetDocumentQuestionAnswering: typeof snippetDocumentQuestionAnswering;
|
|
790
|
+
declare const python_snippetTabular: typeof snippetTabular;
|
|
791
|
+
declare const python_snippetZeroShotImageClassification: typeof snippetZeroShotImageClassification;
|
|
772
792
|
declare namespace python {
|
|
773
793
|
export {
|
|
774
794
|
python_getPythonInferenceSnippet as getPythonInferenceSnippet,
|
|
775
795
|
python_hasPythonInferenceSnippet as hasPythonInferenceSnippet,
|
|
776
796
|
python_pythonSnippets as pythonSnippets,
|
|
777
797
|
snippetBasic$1 as snippetBasic,
|
|
798
|
+
python_snippetDocumentQuestionAnswering as snippetDocumentQuestionAnswering,
|
|
778
799
|
snippetFile$1 as snippetFile,
|
|
800
|
+
python_snippetTabular as snippetTabular,
|
|
779
801
|
snippetTextToAudio$1 as snippetTextToAudio,
|
|
780
802
|
snippetTextToImage$1 as snippetTextToImage,
|
|
781
803
|
snippetZeroShotClassification$1 as snippetZeroShotClassification,
|
|
804
|
+
python_snippetZeroShotImageClassification as snippetZeroShotImageClassification,
|
|
782
805
|
};
|
|
783
806
|
}
|
|
784
807
|
|
package/dist/index.js
CHANGED
|
@@ -1801,6 +1801,20 @@ var PIPELINE_DATA = {
|
|
|
1801
1801
|
},
|
|
1802
1802
|
"image-to-image": {
|
|
1803
1803
|
name: "Image-to-Image",
|
|
1804
|
+
subtasks: [
|
|
1805
|
+
{
|
|
1806
|
+
type: "image-inpainting",
|
|
1807
|
+
name: "Image Inpainting"
|
|
1808
|
+
},
|
|
1809
|
+
{
|
|
1810
|
+
type: "image-colorization",
|
|
1811
|
+
name: "Image Colorization"
|
|
1812
|
+
},
|
|
1813
|
+
{
|
|
1814
|
+
type: "super-resolution",
|
|
1815
|
+
name: "Super Resolution"
|
|
1816
|
+
}
|
|
1817
|
+
],
|
|
1804
1818
|
modality: "cv",
|
|
1805
1819
|
color: "indigo"
|
|
1806
1820
|
},
|
|
@@ -1987,6 +2001,16 @@ var PIPELINE_DATA = {
|
|
|
1987
2001
|
modality: "cv",
|
|
1988
2002
|
color: "yellow"
|
|
1989
2003
|
},
|
|
2004
|
+
"text-to-3d": {
|
|
2005
|
+
name: "Text-to-3D",
|
|
2006
|
+
modality: "multimodal",
|
|
2007
|
+
color: "yellow"
|
|
2008
|
+
},
|
|
2009
|
+
"image-to-3d": {
|
|
2010
|
+
name: "Image-to-3D",
|
|
2011
|
+
modality: "multimodal",
|
|
2012
|
+
color: "green"
|
|
2013
|
+
},
|
|
1990
2014
|
other: {
|
|
1991
2015
|
name: "Other",
|
|
1992
2016
|
modality: "other",
|
|
@@ -4406,18 +4430,18 @@ var data_default33 = taskData33;
|
|
|
4406
4430
|
|
|
4407
4431
|
// src/tasks/index.ts
|
|
4408
4432
|
var TASKS_MODEL_LIBRARIES = {
|
|
4409
|
-
"audio-classification": ["speechbrain", "transformers"],
|
|
4433
|
+
"audio-classification": ["speechbrain", "transformers", "transformers.js"],
|
|
4410
4434
|
"audio-to-audio": ["asteroid", "speechbrain"],
|
|
4411
4435
|
"automatic-speech-recognition": ["espnet", "nemo", "speechbrain", "transformers", "transformers.js"],
|
|
4412
4436
|
conversational: ["transformers"],
|
|
4413
|
-
"depth-estimation": ["transformers"],
|
|
4414
|
-
"document-question-answering": ["transformers"],
|
|
4437
|
+
"depth-estimation": ["transformers", "transformers.js"],
|
|
4438
|
+
"document-question-answering": ["transformers", "transformers.js"],
|
|
4415
4439
|
"feature-extraction": ["sentence-transformers", "transformers", "transformers.js"],
|
|
4416
4440
|
"fill-mask": ["transformers", "transformers.js"],
|
|
4417
4441
|
"graph-ml": ["transformers"],
|
|
4418
4442
|
"image-classification": ["keras", "timm", "transformers", "transformers.js"],
|
|
4419
4443
|
"image-segmentation": ["transformers", "transformers.js"],
|
|
4420
|
-
"image-to-image": ["diffusers"],
|
|
4444
|
+
"image-to-image": ["diffusers", "transformers.js"],
|
|
4421
4445
|
"image-to-text": ["transformers.js"],
|
|
4422
4446
|
"image-to-video": ["diffusers"],
|
|
4423
4447
|
"video-classification": ["transformers"],
|
|
@@ -4439,8 +4463,8 @@ var TASKS_MODEL_LIBRARIES = {
|
|
|
4439
4463
|
"text-generation": ["transformers", "transformers.js"],
|
|
4440
4464
|
"text-retrieval": [],
|
|
4441
4465
|
"text-to-image": ["diffusers"],
|
|
4442
|
-
"text-to-speech": ["espnet", "tensorflowtts", "transformers"],
|
|
4443
|
-
"text-to-audio": ["transformers"],
|
|
4466
|
+
"text-to-speech": ["espnet", "tensorflowtts", "transformers", "transformers.js"],
|
|
4467
|
+
"text-to-audio": ["transformers", "transformers.js"],
|
|
4444
4468
|
"text-to-video": ["diffusers"],
|
|
4445
4469
|
"text2text-generation": ["transformers", "transformers.js"],
|
|
4446
4470
|
"time-series-forecasting": [],
|
|
@@ -4455,11 +4479,13 @@ var TASKS_MODEL_LIBRARIES = {
|
|
|
4455
4479
|
],
|
|
4456
4480
|
translation: ["transformers", "transformers.js"],
|
|
4457
4481
|
"unconditional-image-generation": ["diffusers"],
|
|
4458
|
-
"visual-question-answering": ["transformers"],
|
|
4482
|
+
"visual-question-answering": ["transformers", "transformers.js"],
|
|
4459
4483
|
"voice-activity-detection": [],
|
|
4460
4484
|
"zero-shot-classification": ["transformers", "transformers.js"],
|
|
4461
4485
|
"zero-shot-image-classification": ["transformers", "transformers.js"],
|
|
4462
|
-
"zero-shot-object-detection": ["transformers"]
|
|
4486
|
+
"zero-shot-object-detection": ["transformers", "transformers.js"],
|
|
4487
|
+
"text-to-3d": [],
|
|
4488
|
+
"image-to-3d": []
|
|
4463
4489
|
};
|
|
4464
4490
|
function getData(type, partialTaskData = data_default14) {
|
|
4465
4491
|
return {
|
|
@@ -4515,7 +4541,9 @@ var TASKS_DATA = {
|
|
|
4515
4541
|
"voice-activity-detection": void 0,
|
|
4516
4542
|
"zero-shot-classification": getData("zero-shot-classification", data_default32),
|
|
4517
4543
|
"zero-shot-image-classification": getData("zero-shot-image-classification", data_default33),
|
|
4518
|
-
"zero-shot-object-detection": getData("zero-shot-object-detection", data_default14)
|
|
4544
|
+
"zero-shot-object-detection": getData("zero-shot-object-detection", data_default14),
|
|
4545
|
+
"text-to-3d": getData("text-to-3d", data_default14),
|
|
4546
|
+
"image-to-3d": getData("image-to-3d", data_default14)
|
|
4519
4547
|
};
|
|
4520
4548
|
|
|
4521
4549
|
// src/model-libraries.ts
|
|
@@ -4622,6 +4650,10 @@ var inputsTableQuestionAnswering = () => `{
|
|
|
4622
4650
|
]
|
|
4623
4651
|
}
|
|
4624
4652
|
}`;
|
|
4653
|
+
var inputsVisualQuestionAnswering = () => `{
|
|
4654
|
+
"image": "cat.png",
|
|
4655
|
+
"question": "What is in this image?"
|
|
4656
|
+
}`;
|
|
4625
4657
|
var inputsQuestionAnswering = () => `{
|
|
4626
4658
|
"question": "What is my name?",
|
|
4627
4659
|
"context": "My name is Clara and I live in Berkeley."
|
|
@@ -4650,11 +4682,14 @@ var inputsTextToImage = () => `"Astronaut riding a horse"`;
|
|
|
4650
4682
|
var inputsTextToSpeech = () => `"The answer to the universe is 42"`;
|
|
4651
4683
|
var inputsTextToAudio = () => `"liquid drum and bass, atmospheric synths, airy sounds"`;
|
|
4652
4684
|
var inputsAutomaticSpeechRecognition = () => `"sample1.flac"`;
|
|
4685
|
+
var inputsTabularPrediction = () => `'{"Height":[11.52,12.48],"Length1":[23.2,24.0],"Length2":[25.4,26.3],"Species": ["Bream","Bream"]}'`;
|
|
4686
|
+
var inputsZeroShotImageClassification = () => `"cats.jpg"`;
|
|
4653
4687
|
var modelInputSnippets = {
|
|
4654
4688
|
"audio-to-audio": inputsAudioToAudio,
|
|
4655
4689
|
"audio-classification": inputsAudioClassification,
|
|
4656
4690
|
"automatic-speech-recognition": inputsAutomaticSpeechRecognition,
|
|
4657
4691
|
conversational: inputsConversational,
|
|
4692
|
+
"document-question-answering": inputsVisualQuestionAnswering,
|
|
4658
4693
|
"feature-extraction": inputsFeatureExtraction,
|
|
4659
4694
|
"fill-mask": inputsFillMask,
|
|
4660
4695
|
"image-classification": inputsImageClassification,
|
|
@@ -4665,6 +4700,8 @@ var modelInputSnippets = {
|
|
|
4665
4700
|
"sentence-similarity": inputsSentenceSimilarity,
|
|
4666
4701
|
summarization: inputsSummarization,
|
|
4667
4702
|
"table-question-answering": inputsTableQuestionAnswering,
|
|
4703
|
+
"tabular-regression": inputsTabularPrediction,
|
|
4704
|
+
"tabular-classification": inputsTabularPrediction,
|
|
4668
4705
|
"text-classification": inputsTextClassification,
|
|
4669
4706
|
"text-generation": inputsTextGeneration,
|
|
4670
4707
|
"text-to-image": inputsTextToImage,
|
|
@@ -4673,7 +4710,8 @@ var modelInputSnippets = {
|
|
|
4673
4710
|
"text2text-generation": inputsText2TextGeneration,
|
|
4674
4711
|
"token-classification": inputsTokenClassification,
|
|
4675
4712
|
translation: inputsTranslation,
|
|
4676
|
-
"zero-shot-classification": inputsZeroShotClassification
|
|
4713
|
+
"zero-shot-classification": inputsZeroShotClassification,
|
|
4714
|
+
"zero-shot-image-classification": inputsZeroShotImageClassification
|
|
4677
4715
|
};
|
|
4678
4716
|
function getModelInputSnippet(model, noWrap = false, noQuotes = false) {
|
|
4679
4717
|
if (model.pipeline_tag) {
|
|
@@ -4761,10 +4799,13 @@ __export(python_exports, {
|
|
|
4761
4799
|
hasPythonInferenceSnippet: () => hasPythonInferenceSnippet,
|
|
4762
4800
|
pythonSnippets: () => pythonSnippets,
|
|
4763
4801
|
snippetBasic: () => snippetBasic2,
|
|
4802
|
+
snippetDocumentQuestionAnswering: () => snippetDocumentQuestionAnswering,
|
|
4764
4803
|
snippetFile: () => snippetFile2,
|
|
4804
|
+
snippetTabular: () => snippetTabular,
|
|
4765
4805
|
snippetTextToAudio: () => snippetTextToAudio,
|
|
4766
4806
|
snippetTextToImage: () => snippetTextToImage,
|
|
4767
|
-
snippetZeroShotClassification: () => snippetZeroShotClassification2
|
|
4807
|
+
snippetZeroShotClassification: () => snippetZeroShotClassification2,
|
|
4808
|
+
snippetZeroShotImageClassification: () => snippetZeroShotImageClassification
|
|
4768
4809
|
});
|
|
4769
4810
|
var snippetZeroShotClassification2 = (model) => `def query(payload):
|
|
4770
4811
|
response = requests.post(API_URL, headers=headers, json=payload)
|
|
@@ -4774,6 +4815,20 @@ output = query({
|
|
|
4774
4815
|
"inputs": ${getModelInputSnippet(model)},
|
|
4775
4816
|
"parameters": {"candidate_labels": ["refund", "legal", "faq"]},
|
|
4776
4817
|
})`;
|
|
4818
|
+
var snippetZeroShotImageClassification = (model) => `def query(data):
|
|
4819
|
+
with open(data["image_path"], "rb") as f:
|
|
4820
|
+
img = f.read()
|
|
4821
|
+
payload={
|
|
4822
|
+
"parameters": data["parameters"],
|
|
4823
|
+
"inputs": base64.b64encode(img).decode("utf-8")
|
|
4824
|
+
}
|
|
4825
|
+
response = requests.post(API_URL, headers=headers, json=payload)
|
|
4826
|
+
return response.json()
|
|
4827
|
+
|
|
4828
|
+
output = query({
|
|
4829
|
+
"image_path": ${getModelInputSnippet(model)},
|
|
4830
|
+
"parameters": {"candidate_labels": ["cat", "dog", "llama"]},
|
|
4831
|
+
})`;
|
|
4777
4832
|
var snippetBasic2 = (model) => `def query(payload):
|
|
4778
4833
|
response = requests.post(API_URL, headers=headers, json=payload)
|
|
4779
4834
|
return response.json()
|
|
@@ -4798,6 +4853,12 @@ image_bytes = query({
|
|
|
4798
4853
|
import io
|
|
4799
4854
|
from PIL import Image
|
|
4800
4855
|
image = Image.open(io.BytesIO(image_bytes))`;
|
|
4856
|
+
var snippetTabular = (model) => `def query(payload):
|
|
4857
|
+
response = requests.post(API_URL, headers=headers, json=payload)
|
|
4858
|
+
return response.content
|
|
4859
|
+
response = query({
|
|
4860
|
+
"inputs": {"data": ${getModelInputSnippet(model)}},
|
|
4861
|
+
})`;
|
|
4801
4862
|
var snippetTextToAudio = (model) => {
|
|
4802
4863
|
if (model.library_name === "transformers") {
|
|
4803
4864
|
return `def query(payload):
|
|
@@ -4823,8 +4884,18 @@ from IPython.display import Audio
|
|
|
4823
4884
|
Audio(audio, rate=sampling_rate)`;
|
|
4824
4885
|
}
|
|
4825
4886
|
};
|
|
4887
|
+
var snippetDocumentQuestionAnswering = (model) => `def query(payload):
|
|
4888
|
+
with open(payload["image"], "rb") as f:
|
|
4889
|
+
img = f.read()
|
|
4890
|
+
payload["image"] = base64.b64encode(img).decode("utf-8")
|
|
4891
|
+
response = requests.post(API_URL, headers=headers, json=payload)
|
|
4892
|
+
return response.json()
|
|
4893
|
+
|
|
4894
|
+
output = query({
|
|
4895
|
+
"inputs": ${getModelInputSnippet(model)},
|
|
4896
|
+
})`;
|
|
4826
4897
|
var pythonSnippets = {
|
|
4827
|
-
// Same order as in
|
|
4898
|
+
// Same order as in tasks/src/pipelines.ts
|
|
4828
4899
|
"text-classification": snippetBasic2,
|
|
4829
4900
|
"token-classification": snippetBasic2,
|
|
4830
4901
|
"table-question-answering": snippetBasic2,
|
|
@@ -4845,9 +4916,13 @@ var pythonSnippets = {
|
|
|
4845
4916
|
"audio-to-audio": snippetFile2,
|
|
4846
4917
|
"audio-classification": snippetFile2,
|
|
4847
4918
|
"image-classification": snippetFile2,
|
|
4848
|
-
"
|
|
4919
|
+
"tabular-regression": snippetTabular,
|
|
4920
|
+
"tabular-classification": snippetTabular,
|
|
4849
4921
|
"object-detection": snippetFile2,
|
|
4850
|
-
"image-segmentation": snippetFile2
|
|
4922
|
+
"image-segmentation": snippetFile2,
|
|
4923
|
+
"document-question-answering": snippetDocumentQuestionAnswering,
|
|
4924
|
+
"image-to-text": snippetFile2,
|
|
4925
|
+
"zero-shot-image-classification": snippetZeroShotImageClassification
|
|
4851
4926
|
};
|
|
4852
4927
|
function getPythonInferenceSnippet(model, accessToken) {
|
|
4853
4928
|
const body = model.pipeline_tag && model.pipeline_tag in pythonSnippets ? pythonSnippets[model.pipeline_tag]?.(model) ?? "" : "";
|
package/dist/index.mjs
CHANGED
|
@@ -1763,6 +1763,20 @@ var PIPELINE_DATA = {
|
|
|
1763
1763
|
},
|
|
1764
1764
|
"image-to-image": {
|
|
1765
1765
|
name: "Image-to-Image",
|
|
1766
|
+
subtasks: [
|
|
1767
|
+
{
|
|
1768
|
+
type: "image-inpainting",
|
|
1769
|
+
name: "Image Inpainting"
|
|
1770
|
+
},
|
|
1771
|
+
{
|
|
1772
|
+
type: "image-colorization",
|
|
1773
|
+
name: "Image Colorization"
|
|
1774
|
+
},
|
|
1775
|
+
{
|
|
1776
|
+
type: "super-resolution",
|
|
1777
|
+
name: "Super Resolution"
|
|
1778
|
+
}
|
|
1779
|
+
],
|
|
1766
1780
|
modality: "cv",
|
|
1767
1781
|
color: "indigo"
|
|
1768
1782
|
},
|
|
@@ -1949,6 +1963,16 @@ var PIPELINE_DATA = {
|
|
|
1949
1963
|
modality: "cv",
|
|
1950
1964
|
color: "yellow"
|
|
1951
1965
|
},
|
|
1966
|
+
"text-to-3d": {
|
|
1967
|
+
name: "Text-to-3D",
|
|
1968
|
+
modality: "multimodal",
|
|
1969
|
+
color: "yellow"
|
|
1970
|
+
},
|
|
1971
|
+
"image-to-3d": {
|
|
1972
|
+
name: "Image-to-3D",
|
|
1973
|
+
modality: "multimodal",
|
|
1974
|
+
color: "green"
|
|
1975
|
+
},
|
|
1952
1976
|
other: {
|
|
1953
1977
|
name: "Other",
|
|
1954
1978
|
modality: "other",
|
|
@@ -4368,18 +4392,18 @@ var data_default33 = taskData33;
|
|
|
4368
4392
|
|
|
4369
4393
|
// src/tasks/index.ts
|
|
4370
4394
|
var TASKS_MODEL_LIBRARIES = {
|
|
4371
|
-
"audio-classification": ["speechbrain", "transformers"],
|
|
4395
|
+
"audio-classification": ["speechbrain", "transformers", "transformers.js"],
|
|
4372
4396
|
"audio-to-audio": ["asteroid", "speechbrain"],
|
|
4373
4397
|
"automatic-speech-recognition": ["espnet", "nemo", "speechbrain", "transformers", "transformers.js"],
|
|
4374
4398
|
conversational: ["transformers"],
|
|
4375
|
-
"depth-estimation": ["transformers"],
|
|
4376
|
-
"document-question-answering": ["transformers"],
|
|
4399
|
+
"depth-estimation": ["transformers", "transformers.js"],
|
|
4400
|
+
"document-question-answering": ["transformers", "transformers.js"],
|
|
4377
4401
|
"feature-extraction": ["sentence-transformers", "transformers", "transformers.js"],
|
|
4378
4402
|
"fill-mask": ["transformers", "transformers.js"],
|
|
4379
4403
|
"graph-ml": ["transformers"],
|
|
4380
4404
|
"image-classification": ["keras", "timm", "transformers", "transformers.js"],
|
|
4381
4405
|
"image-segmentation": ["transformers", "transformers.js"],
|
|
4382
|
-
"image-to-image": ["diffusers"],
|
|
4406
|
+
"image-to-image": ["diffusers", "transformers.js"],
|
|
4383
4407
|
"image-to-text": ["transformers.js"],
|
|
4384
4408
|
"image-to-video": ["diffusers"],
|
|
4385
4409
|
"video-classification": ["transformers"],
|
|
@@ -4401,8 +4425,8 @@ var TASKS_MODEL_LIBRARIES = {
|
|
|
4401
4425
|
"text-generation": ["transformers", "transformers.js"],
|
|
4402
4426
|
"text-retrieval": [],
|
|
4403
4427
|
"text-to-image": ["diffusers"],
|
|
4404
|
-
"text-to-speech": ["espnet", "tensorflowtts", "transformers"],
|
|
4405
|
-
"text-to-audio": ["transformers"],
|
|
4428
|
+
"text-to-speech": ["espnet", "tensorflowtts", "transformers", "transformers.js"],
|
|
4429
|
+
"text-to-audio": ["transformers", "transformers.js"],
|
|
4406
4430
|
"text-to-video": ["diffusers"],
|
|
4407
4431
|
"text2text-generation": ["transformers", "transformers.js"],
|
|
4408
4432
|
"time-series-forecasting": [],
|
|
@@ -4417,11 +4441,13 @@ var TASKS_MODEL_LIBRARIES = {
|
|
|
4417
4441
|
],
|
|
4418
4442
|
translation: ["transformers", "transformers.js"],
|
|
4419
4443
|
"unconditional-image-generation": ["diffusers"],
|
|
4420
|
-
"visual-question-answering": ["transformers"],
|
|
4444
|
+
"visual-question-answering": ["transformers", "transformers.js"],
|
|
4421
4445
|
"voice-activity-detection": [],
|
|
4422
4446
|
"zero-shot-classification": ["transformers", "transformers.js"],
|
|
4423
4447
|
"zero-shot-image-classification": ["transformers", "transformers.js"],
|
|
4424
|
-
"zero-shot-object-detection": ["transformers"]
|
|
4448
|
+
"zero-shot-object-detection": ["transformers", "transformers.js"],
|
|
4449
|
+
"text-to-3d": [],
|
|
4450
|
+
"image-to-3d": []
|
|
4425
4451
|
};
|
|
4426
4452
|
function getData(type, partialTaskData = data_default14) {
|
|
4427
4453
|
return {
|
|
@@ -4477,7 +4503,9 @@ var TASKS_DATA = {
|
|
|
4477
4503
|
"voice-activity-detection": void 0,
|
|
4478
4504
|
"zero-shot-classification": getData("zero-shot-classification", data_default32),
|
|
4479
4505
|
"zero-shot-image-classification": getData("zero-shot-image-classification", data_default33),
|
|
4480
|
-
"zero-shot-object-detection": getData("zero-shot-object-detection", data_default14)
|
|
4506
|
+
"zero-shot-object-detection": getData("zero-shot-object-detection", data_default14),
|
|
4507
|
+
"text-to-3d": getData("text-to-3d", data_default14),
|
|
4508
|
+
"image-to-3d": getData("image-to-3d", data_default14)
|
|
4481
4509
|
};
|
|
4482
4510
|
|
|
4483
4511
|
// src/model-libraries.ts
|
|
@@ -4584,6 +4612,10 @@ var inputsTableQuestionAnswering = () => `{
|
|
|
4584
4612
|
]
|
|
4585
4613
|
}
|
|
4586
4614
|
}`;
|
|
4615
|
+
var inputsVisualQuestionAnswering = () => `{
|
|
4616
|
+
"image": "cat.png",
|
|
4617
|
+
"question": "What is in this image?"
|
|
4618
|
+
}`;
|
|
4587
4619
|
var inputsQuestionAnswering = () => `{
|
|
4588
4620
|
"question": "What is my name?",
|
|
4589
4621
|
"context": "My name is Clara and I live in Berkeley."
|
|
@@ -4612,11 +4644,14 @@ var inputsTextToImage = () => `"Astronaut riding a horse"`;
|
|
|
4612
4644
|
var inputsTextToSpeech = () => `"The answer to the universe is 42"`;
|
|
4613
4645
|
var inputsTextToAudio = () => `"liquid drum and bass, atmospheric synths, airy sounds"`;
|
|
4614
4646
|
var inputsAutomaticSpeechRecognition = () => `"sample1.flac"`;
|
|
4647
|
+
var inputsTabularPrediction = () => `'{"Height":[11.52,12.48],"Length1":[23.2,24.0],"Length2":[25.4,26.3],"Species": ["Bream","Bream"]}'`;
|
|
4648
|
+
var inputsZeroShotImageClassification = () => `"cats.jpg"`;
|
|
4615
4649
|
var modelInputSnippets = {
|
|
4616
4650
|
"audio-to-audio": inputsAudioToAudio,
|
|
4617
4651
|
"audio-classification": inputsAudioClassification,
|
|
4618
4652
|
"automatic-speech-recognition": inputsAutomaticSpeechRecognition,
|
|
4619
4653
|
conversational: inputsConversational,
|
|
4654
|
+
"document-question-answering": inputsVisualQuestionAnswering,
|
|
4620
4655
|
"feature-extraction": inputsFeatureExtraction,
|
|
4621
4656
|
"fill-mask": inputsFillMask,
|
|
4622
4657
|
"image-classification": inputsImageClassification,
|
|
@@ -4627,6 +4662,8 @@ var modelInputSnippets = {
|
|
|
4627
4662
|
"sentence-similarity": inputsSentenceSimilarity,
|
|
4628
4663
|
summarization: inputsSummarization,
|
|
4629
4664
|
"table-question-answering": inputsTableQuestionAnswering,
|
|
4665
|
+
"tabular-regression": inputsTabularPrediction,
|
|
4666
|
+
"tabular-classification": inputsTabularPrediction,
|
|
4630
4667
|
"text-classification": inputsTextClassification,
|
|
4631
4668
|
"text-generation": inputsTextGeneration,
|
|
4632
4669
|
"text-to-image": inputsTextToImage,
|
|
@@ -4635,7 +4672,8 @@ var modelInputSnippets = {
|
|
|
4635
4672
|
"text2text-generation": inputsText2TextGeneration,
|
|
4636
4673
|
"token-classification": inputsTokenClassification,
|
|
4637
4674
|
translation: inputsTranslation,
|
|
4638
|
-
"zero-shot-classification": inputsZeroShotClassification
|
|
4675
|
+
"zero-shot-classification": inputsZeroShotClassification,
|
|
4676
|
+
"zero-shot-image-classification": inputsZeroShotImageClassification
|
|
4639
4677
|
};
|
|
4640
4678
|
function getModelInputSnippet(model, noWrap = false, noQuotes = false) {
|
|
4641
4679
|
if (model.pipeline_tag) {
|
|
@@ -4723,10 +4761,13 @@ __export(python_exports, {
|
|
|
4723
4761
|
hasPythonInferenceSnippet: () => hasPythonInferenceSnippet,
|
|
4724
4762
|
pythonSnippets: () => pythonSnippets,
|
|
4725
4763
|
snippetBasic: () => snippetBasic2,
|
|
4764
|
+
snippetDocumentQuestionAnswering: () => snippetDocumentQuestionAnswering,
|
|
4726
4765
|
snippetFile: () => snippetFile2,
|
|
4766
|
+
snippetTabular: () => snippetTabular,
|
|
4727
4767
|
snippetTextToAudio: () => snippetTextToAudio,
|
|
4728
4768
|
snippetTextToImage: () => snippetTextToImage,
|
|
4729
|
-
snippetZeroShotClassification: () => snippetZeroShotClassification2
|
|
4769
|
+
snippetZeroShotClassification: () => snippetZeroShotClassification2,
|
|
4770
|
+
snippetZeroShotImageClassification: () => snippetZeroShotImageClassification
|
|
4730
4771
|
});
|
|
4731
4772
|
var snippetZeroShotClassification2 = (model) => `def query(payload):
|
|
4732
4773
|
response = requests.post(API_URL, headers=headers, json=payload)
|
|
@@ -4736,6 +4777,20 @@ output = query({
|
|
|
4736
4777
|
"inputs": ${getModelInputSnippet(model)},
|
|
4737
4778
|
"parameters": {"candidate_labels": ["refund", "legal", "faq"]},
|
|
4738
4779
|
})`;
|
|
4780
|
+
var snippetZeroShotImageClassification = (model) => `def query(data):
|
|
4781
|
+
with open(data["image_path"], "rb") as f:
|
|
4782
|
+
img = f.read()
|
|
4783
|
+
payload={
|
|
4784
|
+
"parameters": data["parameters"],
|
|
4785
|
+
"inputs": base64.b64encode(img).decode("utf-8")
|
|
4786
|
+
}
|
|
4787
|
+
response = requests.post(API_URL, headers=headers, json=payload)
|
|
4788
|
+
return response.json()
|
|
4789
|
+
|
|
4790
|
+
output = query({
|
|
4791
|
+
"image_path": ${getModelInputSnippet(model)},
|
|
4792
|
+
"parameters": {"candidate_labels": ["cat", "dog", "llama"]},
|
|
4793
|
+
})`;
|
|
4739
4794
|
var snippetBasic2 = (model) => `def query(payload):
|
|
4740
4795
|
response = requests.post(API_URL, headers=headers, json=payload)
|
|
4741
4796
|
return response.json()
|
|
@@ -4760,6 +4815,12 @@ image_bytes = query({
|
|
|
4760
4815
|
import io
|
|
4761
4816
|
from PIL import Image
|
|
4762
4817
|
image = Image.open(io.BytesIO(image_bytes))`;
|
|
4818
|
+
var snippetTabular = (model) => `def query(payload):
|
|
4819
|
+
response = requests.post(API_URL, headers=headers, json=payload)
|
|
4820
|
+
return response.content
|
|
4821
|
+
response = query({
|
|
4822
|
+
"inputs": {"data": ${getModelInputSnippet(model)}},
|
|
4823
|
+
})`;
|
|
4763
4824
|
var snippetTextToAudio = (model) => {
|
|
4764
4825
|
if (model.library_name === "transformers") {
|
|
4765
4826
|
return `def query(payload):
|
|
@@ -4785,8 +4846,18 @@ from IPython.display import Audio
|
|
|
4785
4846
|
Audio(audio, rate=sampling_rate)`;
|
|
4786
4847
|
}
|
|
4787
4848
|
};
|
|
4849
|
+
var snippetDocumentQuestionAnswering = (model) => `def query(payload):
|
|
4850
|
+
with open(payload["image"], "rb") as f:
|
|
4851
|
+
img = f.read()
|
|
4852
|
+
payload["image"] = base64.b64encode(img).decode("utf-8")
|
|
4853
|
+
response = requests.post(API_URL, headers=headers, json=payload)
|
|
4854
|
+
return response.json()
|
|
4855
|
+
|
|
4856
|
+
output = query({
|
|
4857
|
+
"inputs": ${getModelInputSnippet(model)},
|
|
4858
|
+
})`;
|
|
4788
4859
|
var pythonSnippets = {
|
|
4789
|
-
// Same order as in
|
|
4860
|
+
// Same order as in tasks/src/pipelines.ts
|
|
4790
4861
|
"text-classification": snippetBasic2,
|
|
4791
4862
|
"token-classification": snippetBasic2,
|
|
4792
4863
|
"table-question-answering": snippetBasic2,
|
|
@@ -4807,9 +4878,13 @@ var pythonSnippets = {
|
|
|
4807
4878
|
"audio-to-audio": snippetFile2,
|
|
4808
4879
|
"audio-classification": snippetFile2,
|
|
4809
4880
|
"image-classification": snippetFile2,
|
|
4810
|
-
"
|
|
4881
|
+
"tabular-regression": snippetTabular,
|
|
4882
|
+
"tabular-classification": snippetTabular,
|
|
4811
4883
|
"object-detection": snippetFile2,
|
|
4812
|
-
"image-segmentation": snippetFile2
|
|
4884
|
+
"image-segmentation": snippetFile2,
|
|
4885
|
+
"document-question-answering": snippetDocumentQuestionAnswering,
|
|
4886
|
+
"image-to-text": snippetFile2,
|
|
4887
|
+
"zero-shot-image-classification": snippetZeroShotImageClassification
|
|
4813
4888
|
};
|
|
4814
4889
|
function getPythonInferenceSnippet(model, accessToken) {
|
|
4815
4890
|
const body = model.pipeline_tag && model.pipeline_tag in pythonSnippets ? pythonSnippets[model.pipeline_tag]?.(model) ?? "" : "";
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@huggingface/tasks",
|
|
3
3
|
"packageManager": "pnpm@8.10.5",
|
|
4
|
-
"version": "0.0.
|
|
4
|
+
"version": "0.0.8",
|
|
5
5
|
"description": "List of ML tasks for huggingface.co/tasks",
|
|
6
6
|
"repository": "https://github.com/huggingface/huggingface.js.git",
|
|
7
7
|
"publishConfig": {
|
|
@@ -30,9 +30,7 @@
|
|
|
30
30
|
],
|
|
31
31
|
"author": "Hugging Face",
|
|
32
32
|
"license": "MIT",
|
|
33
|
-
"devDependencies": {
|
|
34
|
-
"typescript": "^5.0.4"
|
|
35
|
-
},
|
|
33
|
+
"devDependencies": {},
|
|
36
34
|
"scripts": {
|
|
37
35
|
"lint": "eslint --quiet --fix --ext .cjs,.ts .",
|
|
38
36
|
"lint:check": "eslint --ext .cjs,.ts .",
|
package/src/pipelines.ts
CHANGED
|
@@ -435,6 +435,20 @@ export const PIPELINE_DATA = {
|
|
|
435
435
|
},
|
|
436
436
|
"image-to-image": {
|
|
437
437
|
name: "Image-to-Image",
|
|
438
|
+
subtasks: [
|
|
439
|
+
{
|
|
440
|
+
type: "image-inpainting",
|
|
441
|
+
name: "Image Inpainting",
|
|
442
|
+
},
|
|
443
|
+
{
|
|
444
|
+
type: "image-colorization",
|
|
445
|
+
name: "Image Colorization",
|
|
446
|
+
},
|
|
447
|
+
{
|
|
448
|
+
type: "super-resolution",
|
|
449
|
+
name: "Super Resolution",
|
|
450
|
+
},
|
|
451
|
+
],
|
|
438
452
|
modality: "cv",
|
|
439
453
|
color: "indigo",
|
|
440
454
|
},
|
|
@@ -621,6 +635,16 @@ export const PIPELINE_DATA = {
|
|
|
621
635
|
modality: "cv",
|
|
622
636
|
color: "yellow",
|
|
623
637
|
},
|
|
638
|
+
"text-to-3d": {
|
|
639
|
+
name: "Text-to-3D",
|
|
640
|
+
modality: "multimodal",
|
|
641
|
+
color: "yellow",
|
|
642
|
+
},
|
|
643
|
+
"image-to-3d": {
|
|
644
|
+
name: "Image-to-3D",
|
|
645
|
+
modality: "multimodal",
|
|
646
|
+
color: "green",
|
|
647
|
+
},
|
|
624
648
|
other: {
|
|
625
649
|
name: "Other",
|
|
626
650
|
modality: "other",
|
package/src/snippets/inputs.ts
CHANGED
|
@@ -31,6 +31,12 @@ const inputsTableQuestionAnswering = () =>
|
|
|
31
31
|
}
|
|
32
32
|
}`;
|
|
33
33
|
|
|
34
|
+
const inputsVisualQuestionAnswering = () =>
|
|
35
|
+
`{
|
|
36
|
+
"image": "cat.png",
|
|
37
|
+
"question": "What is in this image?"
|
|
38
|
+
}`;
|
|
39
|
+
|
|
34
40
|
const inputsQuestionAnswering = () =>
|
|
35
41
|
`{
|
|
36
42
|
"question": "What is my name?",
|
|
@@ -79,6 +85,11 @@ const inputsTextToAudio = () => `"liquid drum and bass, atmospheric synths, airy
|
|
|
79
85
|
|
|
80
86
|
const inputsAutomaticSpeechRecognition = () => `"sample1.flac"`;
|
|
81
87
|
|
|
88
|
+
const inputsTabularPrediction = () =>
|
|
89
|
+
`'{"Height":[11.52,12.48],"Length1":[23.2,24.0],"Length2":[25.4,26.3],"Species": ["Bream","Bream"]}'`;
|
|
90
|
+
|
|
91
|
+
const inputsZeroShotImageClassification = () => `"cats.jpg"`;
|
|
92
|
+
|
|
82
93
|
const modelInputSnippets: {
|
|
83
94
|
[key in PipelineType]?: (model: ModelData) => string;
|
|
84
95
|
} = {
|
|
@@ -86,6 +97,7 @@ const modelInputSnippets: {
|
|
|
86
97
|
"audio-classification": inputsAudioClassification,
|
|
87
98
|
"automatic-speech-recognition": inputsAutomaticSpeechRecognition,
|
|
88
99
|
conversational: inputsConversational,
|
|
100
|
+
"document-question-answering": inputsVisualQuestionAnswering,
|
|
89
101
|
"feature-extraction": inputsFeatureExtraction,
|
|
90
102
|
"fill-mask": inputsFillMask,
|
|
91
103
|
"image-classification": inputsImageClassification,
|
|
@@ -96,6 +108,8 @@ const modelInputSnippets: {
|
|
|
96
108
|
"sentence-similarity": inputsSentenceSimilarity,
|
|
97
109
|
summarization: inputsSummarization,
|
|
98
110
|
"table-question-answering": inputsTableQuestionAnswering,
|
|
111
|
+
"tabular-regression": inputsTabularPrediction,
|
|
112
|
+
"tabular-classification": inputsTabularPrediction,
|
|
99
113
|
"text-classification": inputsTextClassification,
|
|
100
114
|
"text-generation": inputsTextGeneration,
|
|
101
115
|
"text-to-image": inputsTextToImage,
|
|
@@ -105,6 +119,7 @@ const modelInputSnippets: {
|
|
|
105
119
|
"token-classification": inputsTokenClassification,
|
|
106
120
|
translation: inputsTranslation,
|
|
107
121
|
"zero-shot-classification": inputsZeroShotClassification,
|
|
122
|
+
"zero-shot-image-classification": inputsZeroShotImageClassification,
|
|
108
123
|
};
|
|
109
124
|
|
|
110
125
|
// Use noWrap to put the whole snippet on a single line (removing new lines and tabulations)
|
package/src/snippets/python.ts
CHANGED
|
@@ -12,6 +12,22 @@ output = query({
|
|
|
12
12
|
"parameters": {"candidate_labels": ["refund", "legal", "faq"]},
|
|
13
13
|
})`;
|
|
14
14
|
|
|
15
|
+
export const snippetZeroShotImageClassification = (model: ModelData): string =>
|
|
16
|
+
`def query(data):
|
|
17
|
+
with open(data["image_path"], "rb") as f:
|
|
18
|
+
img = f.read()
|
|
19
|
+
payload={
|
|
20
|
+
"parameters": data["parameters"],
|
|
21
|
+
"inputs": base64.b64encode(img).decode("utf-8")
|
|
22
|
+
}
|
|
23
|
+
response = requests.post(API_URL, headers=headers, json=payload)
|
|
24
|
+
return response.json()
|
|
25
|
+
|
|
26
|
+
output = query({
|
|
27
|
+
"image_path": ${getModelInputSnippet(model)},
|
|
28
|
+
"parameters": {"candidate_labels": ["cat", "dog", "llama"]},
|
|
29
|
+
})`;
|
|
30
|
+
|
|
15
31
|
export const snippetBasic = (model: ModelData): string =>
|
|
16
32
|
`def query(payload):
|
|
17
33
|
response = requests.post(API_URL, headers=headers, json=payload)
|
|
@@ -42,6 +58,14 @@ import io
|
|
|
42
58
|
from PIL import Image
|
|
43
59
|
image = Image.open(io.BytesIO(image_bytes))`;
|
|
44
60
|
|
|
61
|
+
export const snippetTabular = (model: ModelData): string =>
|
|
62
|
+
`def query(payload):
|
|
63
|
+
response = requests.post(API_URL, headers=headers, json=payload)
|
|
64
|
+
return response.content
|
|
65
|
+
response = query({
|
|
66
|
+
"inputs": {"data": ${getModelInputSnippet(model)}},
|
|
67
|
+
})`;
|
|
68
|
+
|
|
45
69
|
export const snippetTextToAudio = (model: ModelData): string => {
|
|
46
70
|
// Transformers TTS pipeline and api-inference-community (AIC) pipeline outputs are diverged
|
|
47
71
|
// with the latest update to inference-api (IA).
|
|
@@ -70,8 +94,21 @@ from IPython.display import Audio
|
|
|
70
94
|
Audio(audio, rate=sampling_rate)`;
|
|
71
95
|
}
|
|
72
96
|
};
|
|
97
|
+
|
|
98
|
+
export const snippetDocumentQuestionAnswering = (model: ModelData): string =>
|
|
99
|
+
`def query(payload):
|
|
100
|
+
with open(payload["image"], "rb") as f:
|
|
101
|
+
img = f.read()
|
|
102
|
+
payload["image"] = base64.b64encode(img).decode("utf-8")
|
|
103
|
+
response = requests.post(API_URL, headers=headers, json=payload)
|
|
104
|
+
return response.json()
|
|
105
|
+
|
|
106
|
+
output = query({
|
|
107
|
+
"inputs": ${getModelInputSnippet(model)},
|
|
108
|
+
})`;
|
|
109
|
+
|
|
73
110
|
export const pythonSnippets: Partial<Record<PipelineType, (model: ModelData) => string>> = {
|
|
74
|
-
// Same order as in
|
|
111
|
+
// Same order as in tasks/src/pipelines.ts
|
|
75
112
|
"text-classification": snippetBasic,
|
|
76
113
|
"token-classification": snippetBasic,
|
|
77
114
|
"table-question-answering": snippetBasic,
|
|
@@ -92,9 +129,13 @@ export const pythonSnippets: Partial<Record<PipelineType, (model: ModelData) =>
|
|
|
92
129
|
"audio-to-audio": snippetFile,
|
|
93
130
|
"audio-classification": snippetFile,
|
|
94
131
|
"image-classification": snippetFile,
|
|
95
|
-
"
|
|
132
|
+
"tabular-regression": snippetTabular,
|
|
133
|
+
"tabular-classification": snippetTabular,
|
|
96
134
|
"object-detection": snippetFile,
|
|
97
135
|
"image-segmentation": snippetFile,
|
|
136
|
+
"document-question-answering": snippetDocumentQuestionAnswering,
|
|
137
|
+
"image-to-text": snippetFile,
|
|
138
|
+
"zero-shot-image-classification": snippetZeroShotImageClassification,
|
|
98
139
|
};
|
|
99
140
|
|
|
100
141
|
export function getPythonInferenceSnippet(model: ModelData, accessToken: string): string {
|
package/src/tasks/index.ts
CHANGED
|
@@ -40,18 +40,18 @@ import type { ModelLibraryKey } from "../model-libraries";
|
|
|
40
40
|
* Model libraries compatible with each ML task
|
|
41
41
|
*/
|
|
42
42
|
export const TASKS_MODEL_LIBRARIES: Record<PipelineType, ModelLibraryKey[]> = {
|
|
43
|
-
"audio-classification": ["speechbrain", "transformers"],
|
|
43
|
+
"audio-classification": ["speechbrain", "transformers", "transformers.js"],
|
|
44
44
|
"audio-to-audio": ["asteroid", "speechbrain"],
|
|
45
45
|
"automatic-speech-recognition": ["espnet", "nemo", "speechbrain", "transformers", "transformers.js"],
|
|
46
46
|
conversational: ["transformers"],
|
|
47
|
-
"depth-estimation": ["transformers"],
|
|
48
|
-
"document-question-answering": ["transformers"],
|
|
47
|
+
"depth-estimation": ["transformers", "transformers.js"],
|
|
48
|
+
"document-question-answering": ["transformers", "transformers.js"],
|
|
49
49
|
"feature-extraction": ["sentence-transformers", "transformers", "transformers.js"],
|
|
50
50
|
"fill-mask": ["transformers", "transformers.js"],
|
|
51
51
|
"graph-ml": ["transformers"],
|
|
52
52
|
"image-classification": ["keras", "timm", "transformers", "transformers.js"],
|
|
53
53
|
"image-segmentation": ["transformers", "transformers.js"],
|
|
54
|
-
"image-to-image": ["diffusers"],
|
|
54
|
+
"image-to-image": ["diffusers", "transformers.js"],
|
|
55
55
|
"image-to-text": ["transformers.js"],
|
|
56
56
|
"image-to-video": ["diffusers"],
|
|
57
57
|
"video-classification": ["transformers"],
|
|
@@ -73,8 +73,8 @@ export const TASKS_MODEL_LIBRARIES: Record<PipelineType, ModelLibraryKey[]> = {
|
|
|
73
73
|
"text-generation": ["transformers", "transformers.js"],
|
|
74
74
|
"text-retrieval": [],
|
|
75
75
|
"text-to-image": ["diffusers"],
|
|
76
|
-
"text-to-speech": ["espnet", "tensorflowtts", "transformers"],
|
|
77
|
-
"text-to-audio": ["transformers"],
|
|
76
|
+
"text-to-speech": ["espnet", "tensorflowtts", "transformers", "transformers.js"],
|
|
77
|
+
"text-to-audio": ["transformers", "transformers.js"],
|
|
78
78
|
"text-to-video": ["diffusers"],
|
|
79
79
|
"text2text-generation": ["transformers", "transformers.js"],
|
|
80
80
|
"time-series-forecasting": [],
|
|
@@ -89,11 +89,13 @@ export const TASKS_MODEL_LIBRARIES: Record<PipelineType, ModelLibraryKey[]> = {
|
|
|
89
89
|
],
|
|
90
90
|
translation: ["transformers", "transformers.js"],
|
|
91
91
|
"unconditional-image-generation": ["diffusers"],
|
|
92
|
-
"visual-question-answering": ["transformers"],
|
|
92
|
+
"visual-question-answering": ["transformers", "transformers.js"],
|
|
93
93
|
"voice-activity-detection": [],
|
|
94
94
|
"zero-shot-classification": ["transformers", "transformers.js"],
|
|
95
95
|
"zero-shot-image-classification": ["transformers", "transformers.js"],
|
|
96
|
-
"zero-shot-object-detection": ["transformers"],
|
|
96
|
+
"zero-shot-object-detection": ["transformers", "transformers.js"],
|
|
97
|
+
"text-to-3d": [],
|
|
98
|
+
"image-to-3d": [],
|
|
97
99
|
};
|
|
98
100
|
|
|
99
101
|
/**
|
|
@@ -161,6 +163,8 @@ export const TASKS_DATA: Record<PipelineType, TaskData | undefined> = {
|
|
|
161
163
|
"zero-shot-classification": getData("zero-shot-classification", zeroShotClassification),
|
|
162
164
|
"zero-shot-image-classification": getData("zero-shot-image-classification", zeroShotImageClassification),
|
|
163
165
|
"zero-shot-object-detection": getData("zero-shot-object-detection", placeholder),
|
|
166
|
+
"text-to-3d": getData("text-to-3d", placeholder),
|
|
167
|
+
"image-to-3d": getData("image-to-3d", placeholder),
|
|
164
168
|
} as const;
|
|
165
169
|
|
|
166
170
|
export interface ExampleRepo {
|
|
@@ -32,6 +32,16 @@ The most popular models for this task are GPT-based models, [Mistral](mistralai/
|
|
|
32
32
|
|
|
33
33
|
These models are trained to learn the mapping between a pair of texts (e.g. translation from one language to another). The most popular variants of these models are [NLLB](facebook/nllb-200-distilled-600M), [FLAN-T5](https://huggingface.co/google/flan-t5-xxl), and [BART](https://huggingface.co/docs/transformers/model_doc/bart). Text-to-Text models are trained with multi-tasking capabilities, they can accomplish a wide range of tasks, including summarization, translation, and text classification.
|
|
34
34
|
|
|
35
|
+
## Language Model Variants
|
|
36
|
+
|
|
37
|
+
When it comes to text generation, the underlying language model can come in several types:
|
|
38
|
+
|
|
39
|
+
- **Base models:** refers to plain language models like [Mistral 7B](mistralai/Mistral-7B-v0.1) and [Llama-2-70b](https://huggingface.co/meta-llama/Llama-2-70b-hf). These models are good for fine-tuning and few-shot prompting.
|
|
40
|
+
|
|
41
|
+
- **Instruction-trained models:** these models are trained in a multi-task manner to follow a broad range of instructions like "Write me a recipe for chocolate cake". Models like [Flan-T5](https://huggingface.co/google/flan-t5-xl), [Mistral-7B-Instruct-v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1), and [falcon-40b-instruct](https://huggingface.co/tiiuae/falcon-40b-instruct) are examples of instruction-trained models. In general, instruction-trained models will produce better responses to instructions than base models.
|
|
42
|
+
|
|
43
|
+
- **Human feedback models:** these models extend base and instruction-trained models by incorporating human feedback that rates the quality of the generated text according to criteria like [helpfulness, honesty, and harmlessness](https://arxiv.org/abs/2112.00861). The human feedback is then combined with an optimization technique like reinforcement learning to align the original model to be closer with human preferences. The overall methodology is often called [Reinforcement Learning from Human Feedback](https://huggingface.co/blog/rlhf), or RLHF for short. [Llama2-Chat](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) is an open-source model aligned through human feedback.
|
|
44
|
+
|
|
35
45
|
## Inference
|
|
36
46
|
|
|
37
47
|
You can use the 🤗 Transformers library `text-generation` pipeline to do inference with Text Generation models. It takes an incomplete text and returns multiple outputs with which the text can be completed.
|