@huggingface/tasks 0.12.0 → 0.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -6768,6 +6768,24 @@ var SKUS = {
6768
6768
  };
6769
6769
 
6770
6770
  // src/local-apps.ts
6771
+ function isGgufModel(model) {
6772
+ return model.tags.includes("gguf");
6773
+ }
6774
+ function isAwqModel(model) {
6775
+ return model.config?.quantization_config?.quant_method === "awq";
6776
+ }
6777
+ function isGptqModel(model) {
6778
+ return model.config?.quantization_config?.quant_method === "gptq";
6779
+ }
6780
+ function isAqlmModel(model) {
6781
+ return model.config?.quantization_config?.quant_method === "aqlm";
6782
+ }
6783
+ function isMarlinModel(model) {
6784
+ return model.config?.quantization_config?.quant_method === "marlin";
6785
+ }
6786
+ function isTransformersModel(model) {
6787
+ return model.tags.includes("transformers");
6788
+ }
6771
6789
  function isLlamaCppGgufModel(model) {
6772
6790
  return !!model.gguf?.context_length;
6773
6791
  }
@@ -6827,6 +6845,46 @@ var snippetLocalAI = (model, filepath) => {
6827
6845
  }
6828
6846
  ];
6829
6847
  };
6848
+ var snippetVllm = (model) => {
6849
+ const runCommand = [
6850
+ "",
6851
+ "# Call the server using curl:",
6852
+ `curl -X POST "http://localhost:8000/v1/chat/completions" \\ `,
6853
+ ` -H "Content-Type: application/json" \\ `,
6854
+ ` --data '{`,
6855
+ ` "model": "${model.id}"`,
6856
+ ` "messages": [`,
6857
+ ` {"role": "user", "content": "Hello!"}`,
6858
+ ` ]`,
6859
+ ` }'`
6860
+ ];
6861
+ return [
6862
+ {
6863
+ title: "Install from pip",
6864
+ setup: ["# Install vLLM from pip:", "pip install vllm"].join("\n"),
6865
+ content: ["# Load and run the model:", `vllm serve "${model.id}"`, ...runCommand].join("\n")
6866
+ },
6867
+ {
6868
+ title: "Use Docker images",
6869
+ setup: [
6870
+ "# Deploy with docker on Linux:",
6871
+ `docker run --runtime nvidia --gpus all \\`,
6872
+ ` --name my_vllm_container \\`,
6873
+ ` -v ~/.cache/huggingface:/root/.cache/huggingface \\`,
6874
+ ` --env "HUGGING_FACE_HUB_TOKEN=<secret>" \\`,
6875
+ ` -p 8000:8000 \\`,
6876
+ ` --ipc=host \\`,
6877
+ ` vllm/vllm-openai:latest \\`,
6878
+ ` --model ${model.id}`
6879
+ ].join("\n"),
6880
+ content: [
6881
+ "# Load and run the model:",
6882
+ `docker exec -it my_vllm_container bash -c "vllm serve ${model.id}"`,
6883
+ ...runCommand
6884
+ ].join("\n")
6885
+ }
6886
+ ];
6887
+ };
6830
6888
  var LOCAL_APPS = {
6831
6889
  "llama.cpp": {
6832
6890
  prettyLabel: "llama.cpp",
@@ -6835,6 +6893,13 @@ var LOCAL_APPS = {
6835
6893
  displayOnModelPage: isLlamaCppGgufModel,
6836
6894
  snippet: snippetLlamacpp
6837
6895
  },
6896
+ vllm: {
6897
+ prettyLabel: "vLLM",
6898
+ docsUrl: "https://docs.vllm.ai",
6899
+ mainTask: "text-generation",
6900
+ displayOnModelPage: (model) => isAwqModel(model) || isGptqModel(model) || isAqlmModel(model) || isMarlinModel(model) || isGgufModel(model) || isTransformersModel(model),
6901
+ snippet: snippetVllm
6902
+ },
6838
6903
  lmstudio: {
6839
6904
  prettyLabel: "LM Studio",
6840
6905
  docsUrl: "https://lmstudio.ai",
package/dist/index.js CHANGED
@@ -6730,6 +6730,24 @@ var SKUS = {
6730
6730
  };
6731
6731
 
6732
6732
  // src/local-apps.ts
6733
+ function isGgufModel(model) {
6734
+ return model.tags.includes("gguf");
6735
+ }
6736
+ function isAwqModel(model) {
6737
+ return model.config?.quantization_config?.quant_method === "awq";
6738
+ }
6739
+ function isGptqModel(model) {
6740
+ return model.config?.quantization_config?.quant_method === "gptq";
6741
+ }
6742
+ function isAqlmModel(model) {
6743
+ return model.config?.quantization_config?.quant_method === "aqlm";
6744
+ }
6745
+ function isMarlinModel(model) {
6746
+ return model.config?.quantization_config?.quant_method === "marlin";
6747
+ }
6748
+ function isTransformersModel(model) {
6749
+ return model.tags.includes("transformers");
6750
+ }
6733
6751
  function isLlamaCppGgufModel(model) {
6734
6752
  return !!model.gguf?.context_length;
6735
6753
  }
@@ -6789,6 +6807,46 @@ var snippetLocalAI = (model, filepath) => {
6789
6807
  }
6790
6808
  ];
6791
6809
  };
6810
+ var snippetVllm = (model) => {
6811
+ const runCommand = [
6812
+ "",
6813
+ "# Call the server using curl:",
6814
+ `curl -X POST "http://localhost:8000/v1/chat/completions" \\ `,
6815
+ ` -H "Content-Type: application/json" \\ `,
6816
+ ` --data '{`,
6817
+ ` "model": "${model.id}"`,
6818
+ ` "messages": [`,
6819
+ ` {"role": "user", "content": "Hello!"}`,
6820
+ ` ]`,
6821
+ ` }'`
6822
+ ];
6823
+ return [
6824
+ {
6825
+ title: "Install from pip",
6826
+ setup: ["# Install vLLM from pip:", "pip install vllm"].join("\n"),
6827
+ content: ["# Load and run the model:", `vllm serve "${model.id}"`, ...runCommand].join("\n")
6828
+ },
6829
+ {
6830
+ title: "Use Docker images",
6831
+ setup: [
6832
+ "# Deploy with docker on Linux:",
6833
+ `docker run --runtime nvidia --gpus all \\`,
6834
+ ` --name my_vllm_container \\`,
6835
+ ` -v ~/.cache/huggingface:/root/.cache/huggingface \\`,
6836
+ ` --env "HUGGING_FACE_HUB_TOKEN=<secret>" \\`,
6837
+ ` -p 8000:8000 \\`,
6838
+ ` --ipc=host \\`,
6839
+ ` vllm/vllm-openai:latest \\`,
6840
+ ` --model ${model.id}`
6841
+ ].join("\n"),
6842
+ content: [
6843
+ "# Load and run the model:",
6844
+ `docker exec -it my_vllm_container bash -c "vllm serve ${model.id}"`,
6845
+ ...runCommand
6846
+ ].join("\n")
6847
+ }
6848
+ ];
6849
+ };
6792
6850
  var LOCAL_APPS = {
6793
6851
  "llama.cpp": {
6794
6852
  prettyLabel: "llama.cpp",
@@ -6797,6 +6855,13 @@ var LOCAL_APPS = {
6797
6855
  displayOnModelPage: isLlamaCppGgufModel,
6798
6856
  snippet: snippetLlamacpp
6799
6857
  },
6858
+ vllm: {
6859
+ prettyLabel: "vLLM",
6860
+ docsUrl: "https://docs.vllm.ai",
6861
+ mainTask: "text-generation",
6862
+ displayOnModelPage: (model) => isAwqModel(model) || isGptqModel(model) || isAqlmModel(model) || isMarlinModel(model) || isGgufModel(model) || isTransformersModel(model),
6863
+ snippet: snippetVllm
6864
+ },
6800
6865
  lmstudio: {
6801
6866
  prettyLabel: "LM Studio",
6802
6867
  docsUrl: "https://lmstudio.ai",
@@ -71,6 +71,13 @@ export declare const LOCAL_APPS: {
71
71
  displayOnModelPage: typeof isLlamaCppGgufModel;
72
72
  snippet: (model: ModelData, filepath?: string) => LocalAppSnippet[];
73
73
  };
74
+ vllm: {
75
+ prettyLabel: string;
76
+ docsUrl: string;
77
+ mainTask: "text-generation";
78
+ displayOnModelPage: (model: ModelData) => boolean;
79
+ snippet: (model: ModelData) => LocalAppSnippet[];
80
+ };
74
81
  lmstudio: {
75
82
  prettyLabel: string;
76
83
  docsUrl: string;
@@ -1 +1 @@
1
- {"version":3,"file":"local-apps.d.ts","sourceRoot":"","sources":["../../src/local-apps.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAC9C,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAEhD,MAAM,WAAW,eAAe;IAC/B;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IACd;;OAEG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IACf;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,MAAM,QAAQ,GAAG;IACtB;;OAEG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;OAEG;IACH,QAAQ,EAAE,YAAY,CAAC;IACvB;;OAEG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IAEpB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB;;OAEG;IACH,kBAAkB,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,OAAO,CAAC;CAClD,GAAG,CACD;IACA;;OAEG;IACH,QAAQ,EAAE,CAAC,KAAK,EAAE,SAAS,EAAE,QAAQ,CAAC,EAAE,MAAM,KAAK,GAAG,CAAC;CACtD,GACD;IACA;;;OAGG;IACH,OAAO,EAAE,CAAC,KAAK,EAAE,SAAS,EAAE,QAAQ,CAAC,EAAE,MAAM,KAAK,MAAM,GAAG,MAAM,EAAE,GAAG,eAAe,GAAG,eAAe,EAAE,CAAC;CACzG,CACH,CAAC;AAOF,iBAAS,mBAAmB,CAAC,KAAK,EAAE,SAAS,WAE5C;AA8DD;;;;;;;;;;GAUG;AACH,eAAO,MAAM,UAAU;;;;;;yBAvES,SAAS,aAAa,MAAM,KAAG,eAAe,EAAE;;;;;;;;;;;;;;yBAqCjD,SAAS,aAAa,MAAM,KAAG,eAAe,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAuJ3C,CAAC;AAErC,MAAM,MAAM,WAAW,GAAG,MAAM,OAAO,UAAU,CAAC"}
1
+ {"version":3,"file":"local-apps.d.ts","sourceRoot":"","sources":["../../src/local-apps.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAC9C,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAEhD,MAAM,WAAW,eAAe;IAC/B;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IACd;;OAEG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IACf;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,MAAM,QAAQ,GAAG;IACtB;;OAEG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;OAEG;IACH,QAAQ,EAAE,YAAY,CAAC;IACvB;;OAEG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IAEpB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB;;OAEG;IACH,kBAAkB,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,OAAO,CAAC;CAClD,GAAG,CACD;IACA;;OAEG;IACH,QAAQ,EAAE,CAAC,KAAK,EAAE,SAAS,EAAE,QAAQ,CAAC,EAAE,MAAM,KAAK,GAAG,CAAC;CACtD,GACD;IACA;;;OAGG;IACH,OAAO,EAAE,CAAC,KAAK,EAAE,SAAS,EAAE,QAAQ,CAAC,EAAE,MAAM,KAAK,MAAM,GAAG,MAAM,EAAE,GAAG,eAAe,GAAG,eAAe,EAAE,CAAC;CACzG,CACH,CAAC;AA0BF,iBAAS,mBAAmB,CAAC,KAAK,EAAE,SAAS,WAE5C;AAuGD;;;;;;;;;;GAUG;AACH,eAAO,MAAM,UAAU;;;;;;yBAhHS,SAAS,aAAa,MAAM,KAAG,eAAe,EAAE;;;;;;oCA4HlD,SAAS;yBAhEX,SAAS,KAAG,eAAe,EAAE;;;;;;;;;;;;;;yBAvB1B,SAAS,aAAa,MAAM,KAAG,eAAe,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA6M3C,CAAC;AAErC,MAAM,MAAM,WAAW,GAAG,MAAM,OAAO,UAAU,CAAC"}
@@ -37,6 +37,10 @@ export interface ModelData {
37
37
  bits?: number;
38
38
  load_in_4bit?: boolean;
39
39
  load_in_8bit?: boolean;
40
+ /**
41
+ * awq, gptq, aqlm, marlin, … Used by vLLM
42
+ */
43
+ quant_method?: string;
40
44
  };
41
45
  tokenizer_config?: TokenizerConfig;
42
46
  adapter_transformers?: {
@@ -1 +1 @@
1
- {"version":3,"file":"model-data.d.ts","sourceRoot":"","sources":["../../src/model-data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAChD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC;AAExD;;GAEG;AACH,MAAM,WAAW,SAAS;IACzB;;OAEG;IACH,EAAE,EAAE,MAAM,CAAC;IACX;;;OAGG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB;;OAEG;IACH,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB;;OAEG;IACH,MAAM,CAAC,EAAE;QACR,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;QACzB;;WAEG;QACH,QAAQ,CAAC,EAAE;YACV;;eAEG;YACH,CAAC,CAAC,EAAE,MAAM,GAAG,MAAM,CAAC;SACpB,CAAC;QACF,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,mBAAmB,CAAC,EAAE;YACrB,IAAI,CAAC,EAAE,MAAM,CAAC;YACd,YAAY,CAAC,EAAE,OAAO,CAAC;YACvB,YAAY,CAAC,EAAE,OAAO,CAAC;SACvB,CAAC;QACF,gBAAgB,CAAC,EAAE,eAAe,CAAC;QACnC,oBAAoB,CAAC,EAAE;YACtB,UAAU,CAAC,EAAE,MAAM,CAAC;YACpB,WAAW,CAAC,EAAE,MAAM,CAAC;SACrB,CAAC;QACF,SAAS,CAAC,EAAE;YACX,WAAW,CAAC,EAAE,MAAM,CAAC;SACrB,CAAC;QACF,OAAO,CAAC,EAAE;YACT,KAAK,CAAC,EAAE;gBACP,IAAI,CAAC,EAAE,MAAM,CAAC;aACd,CAAC;YACF,YAAY,CAAC,EAAE,MAAM,CAAC;SACtB,CAAC;QACF,WAAW,CAAC,EAAE;YACb,qBAAqB,CAAC,EAAE,MAAM,CAAC;YAC/B,iBAAiB,CAAC,EAAE,MAAM,CAAC;YAC3B,gBAAgB,CAAC,EAAE,MAAM,CAAC;SAC1B,CAAC;QACF,IAAI,CAAC,EAAE;YACN,uBAAuB,CAAC,EAAE,MAAM,CAAC;YACjC,SAAS,CAAC,EAAE,MAAM,CAAC;SACnB,CAAC;KACF,CAAC;IACF;;OAEG;IACH,IAAI,EAAE,MAAM,EAAE,CAAC;IACf;;OAEG;IACH,gBAAgB,CAAC,EAAE,gBAAgB,CAAC;IACpC;;OAEG;IACH,YAAY,CAAC,EAAE,YAAY,GAAG,SAAS,CAAC;IACxC;;OAEG;IACH,UAAU,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAChC;;;;;OAKG;IACH,UAAU,CAAC,EAAE,aAAa,EAAE,GAAG,SAAS,CAAC;IACzC;;;;;;;;;OASG;IACH,QAAQ,CAAC,EAAE;QACV,SAAS,CAAC,EACP,OAAO,GACP;YACA,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;SACpC,CAAC;QACL,UAAU,CAAC,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;KAC/B,CAAC;IACF;;;OAGG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,WAAW,CAAC,EAAE;QACb,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QACnC,KAAK,EAAE,MAAM,CAAC;QACd,OAAO,EAAE,OAAO,CAAC;KACjB,CAAC;IACF,IAAI,CAAC,EAAE;QACN,KAAK,EAAE,MAAM,CAAC;QACd,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,cAAc,CAAC,EAAE,MAAM,CAAC;KACxB,CAAC;CACF;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAChC;;OAEG;IACH,UAAU,EAAE,MAAM,CAAC;IACnB;;OAEG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB;;OAEG;IACH,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B;;OAEG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;CACnB"}
1
+ {"version":3,"file":"model-data.d.ts","sourceRoot":"","sources":["../../src/model-data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAChD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC;AAExD;;GAEG;AACH,MAAM,WAAW,SAAS;IACzB;;OAEG;IACH,EAAE,EAAE,MAAM,CAAC;IACX;;;OAGG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB;;OAEG;IACH,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB;;OAEG;IACH,MAAM,CAAC,EAAE;QACR,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;QACzB;;WAEG;QACH,QAAQ,CAAC,EAAE;YACV;;eAEG;YACH,CAAC,CAAC,EAAE,MAAM,GAAG,MAAM,CAAC;SACpB,CAAC;QACF,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,mBAAmB,CAAC,EAAE;YACrB,IAAI,CAAC,EAAE,MAAM,CAAC;YACd,YAAY,CAAC,EAAE,OAAO,CAAC;YACvB,YAAY,CAAC,EAAE,OAAO,CAAC;YACvB;;eAEG;YACH,YAAY,CAAC,EAAE,MAAM,CAAC;SACtB,CAAC;QACF,gBAAgB,CAAC,EAAE,eAAe,CAAC;QACnC,oBAAoB,CAAC,EAAE;YACtB,UAAU,CAAC,EAAE,MAAM,CAAC;YACpB,WAAW,CAAC,EAAE,MAAM,CAAC;SACrB,CAAC;QACF,SAAS,CAAC,EAAE;YACX,WAAW,CAAC,EAAE,MAAM,CAAC;SACrB,CAAC;QACF,OAAO,CAAC,EAAE;YACT,KAAK,CAAC,EAAE;gBACP,IAAI,CAAC,EAAE,MAAM,CAAC;aACd,CAAC;YACF,YAAY,CAAC,EAAE,MAAM,CAAC;SACtB,CAAC;QACF,WAAW,CAAC,EAAE;YACb,qBAAqB,CAAC,EAAE,MAAM,CAAC;YAC/B,iBAAiB,CAAC,EAAE,MAAM,CAAC;YAC3B,gBAAgB,CAAC,EAAE,MAAM,CAAC;SAC1B,CAAC;QACF,IAAI,CAAC,EAAE;YACN,uBAAuB,CAAC,EAAE,MAAM,CAAC;YACjC,SAAS,CAAC,EAAE,MAAM,CAAC;SACnB,CAAC;KACF,CAAC;IACF;;OAEG;IACH,IAAI,EAAE,MAAM,EAAE,CAAC;IACf;;OAEG;IACH,gBAAgB,CAAC,EAAE,gBAAgB,CAAC;IACpC;;OAEG;IACH,YAAY,CAAC,EAAE,YAAY,GAAG,SAAS,CAAC;IACxC;;OAEG;IACH,UAAU,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAChC;;;;;OAKG;IACH,UAAU,CAAC,EAAE,aAAa,EAAE,GAAG,SAAS,CAAC;IACzC;;;;;;;;;OASG;IACH,QAAQ,CAAC,EAAE;QACV,SAAS,CAAC,EACP,OAAO,GACP;YACA,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;SACpC,CAAC;QACL,UAAU,CAAC,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;KAC/B,CAAC;IACF;;;OAGG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,WAAW,CAAC,EAAE;QACb,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QACnC,KAAK,EAAE,MAAM,CAAC;QACd,OAAO,EAAE,OAAO,CAAC;KACjB,CAAC;IACF,IAAI,CAAC,EAAE;QACN,KAAK,EAAE,MAAM,CAAC;QACd,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,cAAc,CAAC,EAAE,MAAM,CAAC;KACxB,CAAC;CACF;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAChC;;OAEG;IACH,UAAU,EAAE,MAAM,CAAC;IACnB;;OAEG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB;;OAEG;IACH,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B;;OAEG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;CACnB"}
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@huggingface/tasks",
3
3
  "packageManager": "pnpm@8.10.5",
4
- "version": "0.12.0",
4
+ "version": "0.12.1",
5
5
  "description": "List of ML tasks for huggingface.co/tasks",
6
6
  "repository": "https://github.com/huggingface/huggingface.js.git",
7
7
  "publishConfig": {
package/src/local-apps.ts CHANGED
@@ -58,11 +58,30 @@ export type LocalApp = {
58
58
  }
59
59
  );
60
60
 
61
- // eslint-disable-next-line @typescript-eslint/no-unused-vars
62
- function isGgufModel(model: ModelData) {
61
+ function isGgufModel(model: ModelData): boolean {
63
62
  return model.tags.includes("gguf");
64
63
  }
65
64
 
65
+ function isAwqModel(model: ModelData): boolean {
66
+ return model.config?.quantization_config?.quant_method === "awq";
67
+ }
68
+
69
+ function isGptqModel(model: ModelData): boolean {
70
+ return model.config?.quantization_config?.quant_method === "gptq";
71
+ }
72
+
73
+ function isAqlmModel(model: ModelData): boolean {
74
+ return model.config?.quantization_config?.quant_method === "aqlm";
75
+ }
76
+
77
+ function isMarlinModel(model: ModelData): boolean {
78
+ return model.config?.quantization_config?.quant_method === "marlin";
79
+ }
80
+
81
+ function isTransformersModel(model: ModelData): boolean {
82
+ return model.tags.includes("transformers");
83
+ }
84
+
66
85
  function isLlamaCppGgufModel(model: ModelData) {
67
86
  return !!model.gguf?.context_length;
68
87
  }
@@ -127,6 +146,47 @@ const snippetLocalAI = (model: ModelData, filepath?: string): LocalAppSnippet[]
127
146
  ];
128
147
  };
129
148
 
149
+ const snippetVllm = (model: ModelData): LocalAppSnippet[] => {
150
+ const runCommand = [
151
+ "",
152
+ "# Call the server using curl:",
153
+ `curl -X POST "http://localhost:8000/v1/chat/completions" \\ `,
154
+ ` -H "Content-Type: application/json" \\ `,
155
+ ` --data '{`,
156
+ ` "model": "${model.id}"`,
157
+ ` "messages": [`,
158
+ ` {"role": "user", "content": "Hello!"}`,
159
+ ` ]`,
160
+ ` }'`,
161
+ ];
162
+ return [
163
+ {
164
+ title: "Install from pip",
165
+ setup: ["# Install vLLM from pip:", "pip install vllm"].join("\n"),
166
+ content: ["# Load and run the model:", `vllm serve "${model.id}"`, ...runCommand].join("\n"),
167
+ },
168
+ {
169
+ title: "Use Docker images",
170
+ setup: [
171
+ "# Deploy with docker on Linux:",
172
+ `docker run --runtime nvidia --gpus all \\`,
173
+ ` --name my_vllm_container \\`,
174
+ ` -v ~/.cache/huggingface:/root/.cache/huggingface \\`,
175
+ ` --env "HUGGING_FACE_HUB_TOKEN=<secret>" \\`,
176
+ ` -p 8000:8000 \\`,
177
+ ` --ipc=host \\`,
178
+ ` vllm/vllm-openai:latest \\`,
179
+ ` --model ${model.id}`,
180
+ ].join("\n"),
181
+ content: [
182
+ "# Load and run the model:",
183
+ `docker exec -it my_vllm_container bash -c "vllm serve ${model.id}"`,
184
+ ...runCommand,
185
+ ].join("\n"),
186
+ },
187
+ ];
188
+ };
189
+
130
190
  /**
131
191
  * Add your new local app here.
132
192
  *
@@ -146,6 +206,19 @@ export const LOCAL_APPS = {
146
206
  displayOnModelPage: isLlamaCppGgufModel,
147
207
  snippet: snippetLlamacpp,
148
208
  },
209
+ vllm: {
210
+ prettyLabel: "vLLM",
211
+ docsUrl: "https://docs.vllm.ai",
212
+ mainTask: "text-generation",
213
+ displayOnModelPage: (model: ModelData) =>
214
+ isAwqModel(model) ||
215
+ isGptqModel(model) ||
216
+ isAqlmModel(model) ||
217
+ isMarlinModel(model) ||
218
+ isGgufModel(model) ||
219
+ isTransformersModel(model),
220
+ snippet: snippetVllm,
221
+ },
149
222
  lmstudio: {
150
223
  prettyLabel: "LM Studio",
151
224
  docsUrl: "https://lmstudio.ai",
package/src/model-data.ts CHANGED
@@ -38,6 +38,10 @@ export interface ModelData {
38
38
  bits?: number;
39
39
  load_in_4bit?: boolean;
40
40
  load_in_8bit?: boolean;
41
+ /**
42
+ * awq, gptq, aqlm, marlin, … Used by vLLM
43
+ */
44
+ quant_method?: string;
41
45
  };
42
46
  tokenizer_config?: TokenizerConfig;
43
47
  adapter_transformers?: {