@huggingface/tasks 0.12.0 → 0.12.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +65 -0
- package/dist/index.js +65 -0
- package/dist/src/local-apps.d.ts +7 -0
- package/dist/src/local-apps.d.ts.map +1 -1
- package/dist/src/model-data.d.ts +4 -0
- package/dist/src/model-data.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/local-apps.ts +75 -2
- package/src/model-data.ts +4 -0
package/dist/index.cjs
CHANGED
|
@@ -6768,6 +6768,24 @@ var SKUS = {
|
|
|
6768
6768
|
};
|
|
6769
6769
|
|
|
6770
6770
|
// src/local-apps.ts
|
|
6771
|
+
function isGgufModel(model) {
|
|
6772
|
+
return model.tags.includes("gguf");
|
|
6773
|
+
}
|
|
6774
|
+
function isAwqModel(model) {
|
|
6775
|
+
return model.config?.quantization_config?.quant_method === "awq";
|
|
6776
|
+
}
|
|
6777
|
+
function isGptqModel(model) {
|
|
6778
|
+
return model.config?.quantization_config?.quant_method === "gptq";
|
|
6779
|
+
}
|
|
6780
|
+
function isAqlmModel(model) {
|
|
6781
|
+
return model.config?.quantization_config?.quant_method === "aqlm";
|
|
6782
|
+
}
|
|
6783
|
+
function isMarlinModel(model) {
|
|
6784
|
+
return model.config?.quantization_config?.quant_method === "marlin";
|
|
6785
|
+
}
|
|
6786
|
+
function isTransformersModel(model) {
|
|
6787
|
+
return model.tags.includes("transformers");
|
|
6788
|
+
}
|
|
6771
6789
|
function isLlamaCppGgufModel(model) {
|
|
6772
6790
|
return !!model.gguf?.context_length;
|
|
6773
6791
|
}
|
|
@@ -6827,6 +6845,46 @@ var snippetLocalAI = (model, filepath) => {
|
|
|
6827
6845
|
}
|
|
6828
6846
|
];
|
|
6829
6847
|
};
|
|
6848
|
+
var snippetVllm = (model) => {
|
|
6849
|
+
const runCommand = [
|
|
6850
|
+
"",
|
|
6851
|
+
"# Call the server using curl:",
|
|
6852
|
+
`curl -X POST "http://localhost:8000/v1/chat/completions" \\ `,
|
|
6853
|
+
` -H "Content-Type: application/json" \\ `,
|
|
6854
|
+
` --data '{`,
|
|
6855
|
+
` "model": "${model.id}"`,
|
|
6856
|
+
` "messages": [`,
|
|
6857
|
+
` {"role": "user", "content": "Hello!"}`,
|
|
6858
|
+
` ]`,
|
|
6859
|
+
` }'`
|
|
6860
|
+
];
|
|
6861
|
+
return [
|
|
6862
|
+
{
|
|
6863
|
+
title: "Install from pip",
|
|
6864
|
+
setup: ["# Install vLLM from pip:", "pip install vllm"].join("\n"),
|
|
6865
|
+
content: ["# Load and run the model:", `vllm serve "${model.id}"`, ...runCommand].join("\n")
|
|
6866
|
+
},
|
|
6867
|
+
{
|
|
6868
|
+
title: "Use Docker images",
|
|
6869
|
+
setup: [
|
|
6870
|
+
"# Deploy with docker on Linux:",
|
|
6871
|
+
`docker run --runtime nvidia --gpus all \\`,
|
|
6872
|
+
` --name my_vllm_container \\`,
|
|
6873
|
+
` -v ~/.cache/huggingface:/root/.cache/huggingface \\`,
|
|
6874
|
+
` --env "HUGGING_FACE_HUB_TOKEN=<secret>" \\`,
|
|
6875
|
+
` -p 8000:8000 \\`,
|
|
6876
|
+
` --ipc=host \\`,
|
|
6877
|
+
` vllm/vllm-openai:latest \\`,
|
|
6878
|
+
` --model ${model.id}`
|
|
6879
|
+
].join("\n"),
|
|
6880
|
+
content: [
|
|
6881
|
+
"# Load and run the model:",
|
|
6882
|
+
`docker exec -it my_vllm_container bash -c "vllm serve ${model.id}"`,
|
|
6883
|
+
...runCommand
|
|
6884
|
+
].join("\n")
|
|
6885
|
+
}
|
|
6886
|
+
];
|
|
6887
|
+
};
|
|
6830
6888
|
var LOCAL_APPS = {
|
|
6831
6889
|
"llama.cpp": {
|
|
6832
6890
|
prettyLabel: "llama.cpp",
|
|
@@ -6835,6 +6893,13 @@ var LOCAL_APPS = {
|
|
|
6835
6893
|
displayOnModelPage: isLlamaCppGgufModel,
|
|
6836
6894
|
snippet: snippetLlamacpp
|
|
6837
6895
|
},
|
|
6896
|
+
vllm: {
|
|
6897
|
+
prettyLabel: "vLLM",
|
|
6898
|
+
docsUrl: "https://docs.vllm.ai",
|
|
6899
|
+
mainTask: "text-generation",
|
|
6900
|
+
displayOnModelPage: (model) => isAwqModel(model) || isGptqModel(model) || isAqlmModel(model) || isMarlinModel(model) || isGgufModel(model) || isTransformersModel(model),
|
|
6901
|
+
snippet: snippetVllm
|
|
6902
|
+
},
|
|
6838
6903
|
lmstudio: {
|
|
6839
6904
|
prettyLabel: "LM Studio",
|
|
6840
6905
|
docsUrl: "https://lmstudio.ai",
|
package/dist/index.js
CHANGED
|
@@ -6730,6 +6730,24 @@ var SKUS = {
|
|
|
6730
6730
|
};
|
|
6731
6731
|
|
|
6732
6732
|
// src/local-apps.ts
|
|
6733
|
+
function isGgufModel(model) {
|
|
6734
|
+
return model.tags.includes("gguf");
|
|
6735
|
+
}
|
|
6736
|
+
function isAwqModel(model) {
|
|
6737
|
+
return model.config?.quantization_config?.quant_method === "awq";
|
|
6738
|
+
}
|
|
6739
|
+
function isGptqModel(model) {
|
|
6740
|
+
return model.config?.quantization_config?.quant_method === "gptq";
|
|
6741
|
+
}
|
|
6742
|
+
function isAqlmModel(model) {
|
|
6743
|
+
return model.config?.quantization_config?.quant_method === "aqlm";
|
|
6744
|
+
}
|
|
6745
|
+
function isMarlinModel(model) {
|
|
6746
|
+
return model.config?.quantization_config?.quant_method === "marlin";
|
|
6747
|
+
}
|
|
6748
|
+
function isTransformersModel(model) {
|
|
6749
|
+
return model.tags.includes("transformers");
|
|
6750
|
+
}
|
|
6733
6751
|
function isLlamaCppGgufModel(model) {
|
|
6734
6752
|
return !!model.gguf?.context_length;
|
|
6735
6753
|
}
|
|
@@ -6789,6 +6807,46 @@ var snippetLocalAI = (model, filepath) => {
|
|
|
6789
6807
|
}
|
|
6790
6808
|
];
|
|
6791
6809
|
};
|
|
6810
|
+
var snippetVllm = (model) => {
|
|
6811
|
+
const runCommand = [
|
|
6812
|
+
"",
|
|
6813
|
+
"# Call the server using curl:",
|
|
6814
|
+
`curl -X POST "http://localhost:8000/v1/chat/completions" \\ `,
|
|
6815
|
+
` -H "Content-Type: application/json" \\ `,
|
|
6816
|
+
` --data '{`,
|
|
6817
|
+
` "model": "${model.id}"`,
|
|
6818
|
+
` "messages": [`,
|
|
6819
|
+
` {"role": "user", "content": "Hello!"}`,
|
|
6820
|
+
` ]`,
|
|
6821
|
+
` }'`
|
|
6822
|
+
];
|
|
6823
|
+
return [
|
|
6824
|
+
{
|
|
6825
|
+
title: "Install from pip",
|
|
6826
|
+
setup: ["# Install vLLM from pip:", "pip install vllm"].join("\n"),
|
|
6827
|
+
content: ["# Load and run the model:", `vllm serve "${model.id}"`, ...runCommand].join("\n")
|
|
6828
|
+
},
|
|
6829
|
+
{
|
|
6830
|
+
title: "Use Docker images",
|
|
6831
|
+
setup: [
|
|
6832
|
+
"# Deploy with docker on Linux:",
|
|
6833
|
+
`docker run --runtime nvidia --gpus all \\`,
|
|
6834
|
+
` --name my_vllm_container \\`,
|
|
6835
|
+
` -v ~/.cache/huggingface:/root/.cache/huggingface \\`,
|
|
6836
|
+
` --env "HUGGING_FACE_HUB_TOKEN=<secret>" \\`,
|
|
6837
|
+
` -p 8000:8000 \\`,
|
|
6838
|
+
` --ipc=host \\`,
|
|
6839
|
+
` vllm/vllm-openai:latest \\`,
|
|
6840
|
+
` --model ${model.id}`
|
|
6841
|
+
].join("\n"),
|
|
6842
|
+
content: [
|
|
6843
|
+
"# Load and run the model:",
|
|
6844
|
+
`docker exec -it my_vllm_container bash -c "vllm serve ${model.id}"`,
|
|
6845
|
+
...runCommand
|
|
6846
|
+
].join("\n")
|
|
6847
|
+
}
|
|
6848
|
+
];
|
|
6849
|
+
};
|
|
6792
6850
|
var LOCAL_APPS = {
|
|
6793
6851
|
"llama.cpp": {
|
|
6794
6852
|
prettyLabel: "llama.cpp",
|
|
@@ -6797,6 +6855,13 @@ var LOCAL_APPS = {
|
|
|
6797
6855
|
displayOnModelPage: isLlamaCppGgufModel,
|
|
6798
6856
|
snippet: snippetLlamacpp
|
|
6799
6857
|
},
|
|
6858
|
+
vllm: {
|
|
6859
|
+
prettyLabel: "vLLM",
|
|
6860
|
+
docsUrl: "https://docs.vllm.ai",
|
|
6861
|
+
mainTask: "text-generation",
|
|
6862
|
+
displayOnModelPage: (model) => isAwqModel(model) || isGptqModel(model) || isAqlmModel(model) || isMarlinModel(model) || isGgufModel(model) || isTransformersModel(model),
|
|
6863
|
+
snippet: snippetVllm
|
|
6864
|
+
},
|
|
6800
6865
|
lmstudio: {
|
|
6801
6866
|
prettyLabel: "LM Studio",
|
|
6802
6867
|
docsUrl: "https://lmstudio.ai",
|
package/dist/src/local-apps.d.ts
CHANGED
|
@@ -71,6 +71,13 @@ export declare const LOCAL_APPS: {
|
|
|
71
71
|
displayOnModelPage: typeof isLlamaCppGgufModel;
|
|
72
72
|
snippet: (model: ModelData, filepath?: string) => LocalAppSnippet[];
|
|
73
73
|
};
|
|
74
|
+
vllm: {
|
|
75
|
+
prettyLabel: string;
|
|
76
|
+
docsUrl: string;
|
|
77
|
+
mainTask: "text-generation";
|
|
78
|
+
displayOnModelPage: (model: ModelData) => boolean;
|
|
79
|
+
snippet: (model: ModelData) => LocalAppSnippet[];
|
|
80
|
+
};
|
|
74
81
|
lmstudio: {
|
|
75
82
|
prettyLabel: string;
|
|
76
83
|
docsUrl: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"local-apps.d.ts","sourceRoot":"","sources":["../../src/local-apps.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAC9C,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAEhD,MAAM,WAAW,eAAe;IAC/B;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IACd;;OAEG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IACf;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,MAAM,QAAQ,GAAG;IACtB;;OAEG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;OAEG;IACH,QAAQ,EAAE,YAAY,CAAC;IACvB;;OAEG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IAEpB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB;;OAEG;IACH,kBAAkB,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,OAAO,CAAC;CAClD,GAAG,CACD;IACA;;OAEG;IACH,QAAQ,EAAE,CAAC,KAAK,EAAE,SAAS,EAAE,QAAQ,CAAC,EAAE,MAAM,KAAK,GAAG,CAAC;CACtD,GACD;IACA;;;OAGG;IACH,OAAO,EAAE,CAAC,KAAK,EAAE,SAAS,EAAE,QAAQ,CAAC,EAAE,MAAM,KAAK,MAAM,GAAG,MAAM,EAAE,GAAG,eAAe,GAAG,eAAe,EAAE,CAAC;CACzG,CACH,CAAC;
|
|
1
|
+
{"version":3,"file":"local-apps.d.ts","sourceRoot":"","sources":["../../src/local-apps.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAC9C,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAEhD,MAAM,WAAW,eAAe;IAC/B;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IACd;;OAEG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IACf;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,MAAM,QAAQ,GAAG;IACtB;;OAEG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;OAEG;IACH,QAAQ,EAAE,YAAY,CAAC;IACvB;;OAEG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IAEpB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB;;OAEG;IACH,kBAAkB,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,OAAO,CAAC;CAClD,GAAG,CACD;IACA;;OAEG;IACH,QAAQ,EAAE,CAAC,KAAK,EAAE,SAAS,EAAE,QAAQ,CAAC,EAAE,MAAM,KAAK,GAAG,CAAC;CACtD,GACD;IACA;;;OAGG;IACH,OAAO,EAAE,CAAC,KAAK,EAAE,SAAS,EAAE,QAAQ,CAAC,EAAE,MAAM,KAAK,MAAM,GAAG,MAAM,EAAE,GAAG,eAAe,GAAG,eAAe,EAAE,CAAC;CACzG,CACH,CAAC;AA0BF,iBAAS,mBAAmB,CAAC,KAAK,EAAE,SAAS,WAE5C;AAuGD;;;;;;;;;;GAUG;AACH,eAAO,MAAM,UAAU;;;;;;yBAhHS,SAAS,aAAa,MAAM,KAAG,eAAe,EAAE;;;;;;oCA4HlD,SAAS;yBAhEX,SAAS,KAAG,eAAe,EAAE;;;;;;;;;;;;;;yBAvB1B,SAAS,aAAa,MAAM,KAAG,eAAe,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA6M3C,CAAC;AAErC,MAAM,MAAM,WAAW,GAAG,MAAM,OAAO,UAAU,CAAC"}
|
package/dist/src/model-data.d.ts
CHANGED
|
@@ -37,6 +37,10 @@ export interface ModelData {
|
|
|
37
37
|
bits?: number;
|
|
38
38
|
load_in_4bit?: boolean;
|
|
39
39
|
load_in_8bit?: boolean;
|
|
40
|
+
/**
|
|
41
|
+
* awq, gptq, aqlm, marlin, … Used by vLLM
|
|
42
|
+
*/
|
|
43
|
+
quant_method?: string;
|
|
40
44
|
};
|
|
41
45
|
tokenizer_config?: TokenizerConfig;
|
|
42
46
|
adapter_transformers?: {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"model-data.d.ts","sourceRoot":"","sources":["../../src/model-data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAChD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC;AAExD;;GAEG;AACH,MAAM,WAAW,SAAS;IACzB;;OAEG;IACH,EAAE,EAAE,MAAM,CAAC;IACX;;;OAGG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB;;OAEG;IACH,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB;;OAEG;IACH,MAAM,CAAC,EAAE;QACR,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;QACzB;;WAEG;QACH,QAAQ,CAAC,EAAE;YACV;;eAEG;YACH,CAAC,CAAC,EAAE,MAAM,GAAG,MAAM,CAAC;SACpB,CAAC;QACF,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,mBAAmB,CAAC,EAAE;YACrB,IAAI,CAAC,EAAE,MAAM,CAAC;YACd,YAAY,CAAC,EAAE,OAAO,CAAC;YACvB,YAAY,CAAC,EAAE,OAAO,CAAC;
|
|
1
|
+
{"version":3,"file":"model-data.d.ts","sourceRoot":"","sources":["../../src/model-data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAChD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC;AAExD;;GAEG;AACH,MAAM,WAAW,SAAS;IACzB;;OAEG;IACH,EAAE,EAAE,MAAM,CAAC;IACX;;;OAGG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB;;OAEG;IACH,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB;;OAEG;IACH,MAAM,CAAC,EAAE;QACR,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;QACzB;;WAEG;QACH,QAAQ,CAAC,EAAE;YACV;;eAEG;YACH,CAAC,CAAC,EAAE,MAAM,GAAG,MAAM,CAAC;SACpB,CAAC;QACF,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,mBAAmB,CAAC,EAAE;YACrB,IAAI,CAAC,EAAE,MAAM,CAAC;YACd,YAAY,CAAC,EAAE,OAAO,CAAC;YACvB,YAAY,CAAC,EAAE,OAAO,CAAC;YACvB;;eAEG;YACH,YAAY,CAAC,EAAE,MAAM,CAAC;SACtB,CAAC;QACF,gBAAgB,CAAC,EAAE,eAAe,CAAC;QACnC,oBAAoB,CAAC,EAAE;YACtB,UAAU,CAAC,EAAE,MAAM,CAAC;YACpB,WAAW,CAAC,EAAE,MAAM,CAAC;SACrB,CAAC;QACF,SAAS,CAAC,EAAE;YACX,WAAW,CAAC,EAAE,MAAM,CAAC;SACrB,CAAC;QACF,OAAO,CAAC,EAAE;YACT,KAAK,CAAC,EAAE;gBACP,IAAI,CAAC,EAAE,MAAM,CAAC;aACd,CAAC;YACF,YAAY,CAAC,EAAE,MAAM,CAAC;SACtB,CAAC;QACF,WAAW,CAAC,EAAE;YACb,qBAAqB,CAAC,EAAE,MAAM,CAAC;YAC/B,iBAAiB,CAAC,EAAE,MAAM,CAAC;YAC3B,gBAAgB,CAAC,EAAE,MAAM,CAAC;SAC1B,CAAC;QACF,IAAI,CAAC,EAAE;YACN,uBAAuB,CAAC,EAAE,MAAM,CAAC;YACjC,SAAS,CAAC,EAAE,MAAM,CAAC;SACnB,CAAC;KACF,CAAC;IACF;;OAEG;IACH,IAAI,EAAE,MAAM,EAAE,CAAC;IACf;;OAEG;IACH,gBAAgB,CAAC,EAAE,gBAAgB,CAAC;IACpC;;OAEG;IACH,YAAY,CAAC,EAAE,YAAY,GAAG,SAAS,CAAC;IACxC;;OAEG;IACH,UAAU,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAChC;;;;;OAKG;IACH,UAAU,CAAC,EAAE,aAAa,EAAE,GAAG,SAAS,CAAC;IACzC;;;;;;;;;OASG;IACH,QAAQ,CAAC,EAAE;QACV,SAAS,CAAC,EACP,OAAO,GACP;YACA,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;SACpC,CAAC;QACL,UAAU,CAAC,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;KAC/B,CAAC;IACF;;;OAGG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,WAAW,CAAC,EAAE;QACb,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QACnC,KAAK,EAAE,MAAM,CAAC;QACd,OAAO,EAAE,OAAO,CAAC;KACjB,CAAC;IACF,IAAI,CAAC,EAAE;QACN,KAAK,EAAE,MAAM,CAAC;QACd,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,cAAc,CAAC,EAAE,MAAM,CAAC;KACxB,CAAC;CACF;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAChC;;OAEG;IACH,UAAU,EAAE,MAAM,CAAC;IACnB;;OAEG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB;;OAEG;IACH,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B;;OAEG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;CACnB"}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@huggingface/tasks",
|
|
3
3
|
"packageManager": "pnpm@8.10.5",
|
|
4
|
-
"version": "0.12.
|
|
4
|
+
"version": "0.12.1",
|
|
5
5
|
"description": "List of ML tasks for huggingface.co/tasks",
|
|
6
6
|
"repository": "https://github.com/huggingface/huggingface.js.git",
|
|
7
7
|
"publishConfig": {
|
package/src/local-apps.ts
CHANGED
|
@@ -58,11 +58,30 @@ export type LocalApp = {
|
|
|
58
58
|
}
|
|
59
59
|
);
|
|
60
60
|
|
|
61
|
-
|
|
62
|
-
function isGgufModel(model: ModelData) {
|
|
61
|
+
function isGgufModel(model: ModelData): boolean {
|
|
63
62
|
return model.tags.includes("gguf");
|
|
64
63
|
}
|
|
65
64
|
|
|
65
|
+
function isAwqModel(model: ModelData): boolean {
|
|
66
|
+
return model.config?.quantization_config?.quant_method === "awq";
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
function isGptqModel(model: ModelData): boolean {
|
|
70
|
+
return model.config?.quantization_config?.quant_method === "gptq";
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function isAqlmModel(model: ModelData): boolean {
|
|
74
|
+
return model.config?.quantization_config?.quant_method === "aqlm";
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
function isMarlinModel(model: ModelData): boolean {
|
|
78
|
+
return model.config?.quantization_config?.quant_method === "marlin";
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function isTransformersModel(model: ModelData): boolean {
|
|
82
|
+
return model.tags.includes("transformers");
|
|
83
|
+
}
|
|
84
|
+
|
|
66
85
|
function isLlamaCppGgufModel(model: ModelData) {
|
|
67
86
|
return !!model.gguf?.context_length;
|
|
68
87
|
}
|
|
@@ -127,6 +146,47 @@ const snippetLocalAI = (model: ModelData, filepath?: string): LocalAppSnippet[]
|
|
|
127
146
|
];
|
|
128
147
|
};
|
|
129
148
|
|
|
149
|
+
const snippetVllm = (model: ModelData): LocalAppSnippet[] => {
|
|
150
|
+
const runCommand = [
|
|
151
|
+
"",
|
|
152
|
+
"# Call the server using curl:",
|
|
153
|
+
`curl -X POST "http://localhost:8000/v1/chat/completions" \\ `,
|
|
154
|
+
` -H "Content-Type: application/json" \\ `,
|
|
155
|
+
` --data '{`,
|
|
156
|
+
` "model": "${model.id}"`,
|
|
157
|
+
` "messages": [`,
|
|
158
|
+
` {"role": "user", "content": "Hello!"}`,
|
|
159
|
+
` ]`,
|
|
160
|
+
` }'`,
|
|
161
|
+
];
|
|
162
|
+
return [
|
|
163
|
+
{
|
|
164
|
+
title: "Install from pip",
|
|
165
|
+
setup: ["# Install vLLM from pip:", "pip install vllm"].join("\n"),
|
|
166
|
+
content: ["# Load and run the model:", `vllm serve "${model.id}"`, ...runCommand].join("\n"),
|
|
167
|
+
},
|
|
168
|
+
{
|
|
169
|
+
title: "Use Docker images",
|
|
170
|
+
setup: [
|
|
171
|
+
"# Deploy with docker on Linux:",
|
|
172
|
+
`docker run --runtime nvidia --gpus all \\`,
|
|
173
|
+
` --name my_vllm_container \\`,
|
|
174
|
+
` -v ~/.cache/huggingface:/root/.cache/huggingface \\`,
|
|
175
|
+
` --env "HUGGING_FACE_HUB_TOKEN=<secret>" \\`,
|
|
176
|
+
` -p 8000:8000 \\`,
|
|
177
|
+
` --ipc=host \\`,
|
|
178
|
+
` vllm/vllm-openai:latest \\`,
|
|
179
|
+
` --model ${model.id}`,
|
|
180
|
+
].join("\n"),
|
|
181
|
+
content: [
|
|
182
|
+
"# Load and run the model:",
|
|
183
|
+
`docker exec -it my_vllm_container bash -c "vllm serve ${model.id}"`,
|
|
184
|
+
...runCommand,
|
|
185
|
+
].join("\n"),
|
|
186
|
+
},
|
|
187
|
+
];
|
|
188
|
+
};
|
|
189
|
+
|
|
130
190
|
/**
|
|
131
191
|
* Add your new local app here.
|
|
132
192
|
*
|
|
@@ -146,6 +206,19 @@ export const LOCAL_APPS = {
|
|
|
146
206
|
displayOnModelPage: isLlamaCppGgufModel,
|
|
147
207
|
snippet: snippetLlamacpp,
|
|
148
208
|
},
|
|
209
|
+
vllm: {
|
|
210
|
+
prettyLabel: "vLLM",
|
|
211
|
+
docsUrl: "https://docs.vllm.ai",
|
|
212
|
+
mainTask: "text-generation",
|
|
213
|
+
displayOnModelPage: (model: ModelData) =>
|
|
214
|
+
isAwqModel(model) ||
|
|
215
|
+
isGptqModel(model) ||
|
|
216
|
+
isAqlmModel(model) ||
|
|
217
|
+
isMarlinModel(model) ||
|
|
218
|
+
isGgufModel(model) ||
|
|
219
|
+
isTransformersModel(model),
|
|
220
|
+
snippet: snippetVllm,
|
|
221
|
+
},
|
|
149
222
|
lmstudio: {
|
|
150
223
|
prettyLabel: "LM Studio",
|
|
151
224
|
docsUrl: "https://lmstudio.ai",
|
package/src/model-data.ts
CHANGED
|
@@ -38,6 +38,10 @@ export interface ModelData {
|
|
|
38
38
|
bits?: number;
|
|
39
39
|
load_in_4bit?: boolean;
|
|
40
40
|
load_in_8bit?: boolean;
|
|
41
|
+
/**
|
|
42
|
+
* awq, gptq, aqlm, marlin, … Used by vLLM
|
|
43
|
+
*/
|
|
44
|
+
quant_method?: string;
|
|
41
45
|
};
|
|
42
46
|
tokenizer_config?: TokenizerConfig;
|
|
43
47
|
adapter_transformers?: {
|