@huggingface/tasks 0.20.3 → 0.20.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commonjs/gguf.d.ts +3 -1
- package/dist/commonjs/gguf.d.ts.map +1 -1
- package/dist/commonjs/gguf.js +3 -0
- package/dist/commonjs/local-apps.d.ts +8 -0
- package/dist/commonjs/local-apps.d.ts.map +1 -1
- package/dist/commonjs/local-apps.js +82 -10
- package/dist/commonjs/local-apps.spec.js +72 -0
- package/dist/esm/gguf.d.ts +3 -1
- package/dist/esm/gguf.d.ts.map +1 -1
- package/dist/esm/gguf.js +3 -0
- package/dist/esm/local-apps.d.ts +8 -0
- package/dist/esm/local-apps.d.ts.map +1 -1
- package/dist/esm/local-apps.js +82 -10
- package/dist/esm/local-apps.spec.js +72 -0
- package/package.json +1 -1
- package/src/gguf.ts +3 -0
- package/src/local-apps.spec.ts +92 -0
- package/src/local-apps.ts +92 -10
package/dist/commonjs/gguf.d.ts
CHANGED
|
@@ -38,6 +38,7 @@ export declare enum GGMLFileQuantizationType {
|
|
|
38
38
|
TQ1_0 = 36,
|
|
39
39
|
TQ2_0 = 37,
|
|
40
40
|
MXFP4_MOE = 38,
|
|
41
|
+
NVFP4 = 39,
|
|
41
42
|
Q2_K_XL = 1000,
|
|
42
43
|
Q3_K_XL = 1001,
|
|
43
44
|
Q4_K_XL = 1002,
|
|
@@ -82,6 +83,7 @@ export declare enum GGMLQuantizationType {
|
|
|
82
83
|
BF16 = 30,
|
|
83
84
|
TQ1_0 = 34,
|
|
84
85
|
TQ2_0 = 35,
|
|
85
|
-
MXFP4 = 39
|
|
86
|
+
MXFP4 = 39,
|
|
87
|
+
NVFP4 = 40
|
|
86
88
|
}
|
|
87
89
|
//# sourceMappingURL=gguf.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"gguf.d.ts","sourceRoot":"","sources":["../../src/gguf.ts"],"names":[],"mappings":"AAGA,oBAAY,wBAAwB;IACnC,GAAG,IAAI;IACP,GAAG,IAAI;IACP,IAAI,IAAI;IACR,IAAI,IAAI;IACR,aAAa,IAAI;IACjB,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,KAAK;IACT,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,IAAI,KAAK;IACT,OAAO,KAAK;IACZ,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,OAAO,KAAK;IACZ,KAAK,KAAK;IACV,MAAM,KAAK;IACX,KAAK,KAAK;IACV,KAAK,KAAK;IACV,KAAK,KAAK;IACV,KAAK,KAAK;IACV,MAAM,KAAK;IACX,KAAK,KAAK;IACV,IAAI,KAAK;IACT,QAAQ,KAAK;IACb,QAAQ,KAAK;IACb,QAAQ,KAAK;IACb,KAAK,KAAK;IACV,KAAK,KAAK;IACV,SAAS,KAAK;
|
|
1
|
+
{"version":3,"file":"gguf.d.ts","sourceRoot":"","sources":["../../src/gguf.ts"],"names":[],"mappings":"AAGA,oBAAY,wBAAwB;IACnC,GAAG,IAAI;IACP,GAAG,IAAI;IACP,IAAI,IAAI;IACR,IAAI,IAAI;IACR,aAAa,IAAI;IACjB,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,KAAK;IACT,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,IAAI,KAAK;IACT,OAAO,KAAK;IACZ,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,OAAO,KAAK;IACZ,KAAK,KAAK;IACV,MAAM,KAAK;IACX,KAAK,KAAK;IACV,KAAK,KAAK;IACV,KAAK,KAAK;IACV,KAAK,KAAK;IACV,MAAM,KAAK;IACX,KAAK,KAAK;IACV,IAAI,KAAK;IACT,QAAQ,KAAK;IACb,QAAQ,KAAK;IACb,QAAQ,KAAK;IACb,KAAK,KAAK;IACV,KAAK,KAAK;IACV,SAAS,KAAK;IACd,KAAK,KAAK;IAIV,OAAO,OAAO;IACd,OAAO,OAAO;IACd,OAAO,OAAO;IACd,OAAO,OAAO;IACd,OAAO,OAAO;IACd,OAAO,OAAO;CACd;AAGD,eAAO,MAAM,aAAa,QAEzB,CAAC;AACF,eAAO,MAAM,oBAAoB,QAAiC,CAAC;AAEnE,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAGrE;AAKD,eAAO,MAAM,gBAAgB,EAAE,wBAAwB,EA2DtD,CAAC;AAIF,wBAAgB,oBAAoB,CACnC,KAAK,EAAE,wBAAwB,EAC/B,eAAe,EAAE,wBAAwB,EAAE,GACzC,wBAAwB,GAAG,SAAS,CAmCtC;AAGD,oBAAY,oBAAoB;IAC/B,GAAG,IAAI;IACP,GAAG,IAAI;IACP,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,KAAK;IACT,IAAI,KAAK;IACT,IAAI,KAAK;IACT,IAAI,KAAK;IACT,IAAI,KAAK;IACT,IAAI,KAAK;IACT,OAAO,KAAK;IACZ,MAAM,KAAK;IACX,OAAO,KAAK;IACZ,KAAK,KAAK;IACV,MAAM,KAAK;IACX,KAAK,KAAK;IACV,KAAK,KAAK;IACV,MAAM,KAAK;IACX,EAAE,KAAK;IACP,GAAG,KAAK;IACR,GAAG,KAAK;IACR,GAAG,KAAK;IACR,GAAG,KAAK;IACR,KAAK,KAAK;IACV,IAAI,KAAK;IACT,KAAK,KAAK;IACV,KAAK,KAAK;IACV,KAAK,KAAK;IACV,KAAK,KAAK;CACV"}
|
package/dist/commonjs/gguf.js
CHANGED
|
@@ -47,6 +47,7 @@ var GGMLFileQuantizationType;
|
|
|
47
47
|
GGMLFileQuantizationType[GGMLFileQuantizationType["TQ1_0"] = 36] = "TQ1_0";
|
|
48
48
|
GGMLFileQuantizationType[GGMLFileQuantizationType["TQ2_0"] = 37] = "TQ2_0";
|
|
49
49
|
GGMLFileQuantizationType[GGMLFileQuantizationType["MXFP4_MOE"] = 38] = "MXFP4_MOE";
|
|
50
|
+
GGMLFileQuantizationType[GGMLFileQuantizationType["NVFP4"] = 39] = "NVFP4";
|
|
50
51
|
// custom quants used by unsloth
|
|
51
52
|
// they are not officially a scheme enum value in GGUF, but only here for naming
|
|
52
53
|
GGMLFileQuantizationType[GGMLFileQuantizationType["Q2_K_XL"] = 1000] = "Q2_K_XL";
|
|
@@ -96,6 +97,7 @@ exports.GGUF_QUANT_ORDER = [
|
|
|
96
97
|
GGMLFileQuantizationType.Q4_2,
|
|
97
98
|
GGMLFileQuantizationType.Q4_3,
|
|
98
99
|
GGMLFileQuantizationType.MXFP4_MOE,
|
|
100
|
+
GGMLFileQuantizationType.NVFP4,
|
|
99
101
|
// 3-bit quantizations
|
|
100
102
|
GGMLFileQuantizationType.Q3_K_XL,
|
|
101
103
|
GGMLFileQuantizationType.Q3_K_L,
|
|
@@ -187,4 +189,5 @@ var GGMLQuantizationType;
|
|
|
187
189
|
GGMLQuantizationType[GGMLQuantizationType["TQ1_0"] = 34] = "TQ1_0";
|
|
188
190
|
GGMLQuantizationType[GGMLQuantizationType["TQ2_0"] = 35] = "TQ2_0";
|
|
189
191
|
GGMLQuantizationType[GGMLQuantizationType["MXFP4"] = 39] = "MXFP4";
|
|
192
|
+
GGMLQuantizationType[GGMLQuantizationType["NVFP4"] = 40] = "NVFP4";
|
|
190
193
|
})(GGMLQuantizationType || (exports.GGMLQuantizationType = GGMLQuantizationType = {}));
|
|
@@ -56,6 +56,7 @@ declare function isTgiModel(model: ModelData): boolean;
|
|
|
56
56
|
declare function isLlamaCppGgufModel(model: ModelData): boolean;
|
|
57
57
|
declare function isVllmModel(model: ModelData): boolean;
|
|
58
58
|
declare function isDockerModelRunnerModel(model: ModelData): boolean;
|
|
59
|
+
declare function isUnslothModel(model: ModelData): boolean;
|
|
59
60
|
/**
|
|
60
61
|
* Add your new local app here.
|
|
61
62
|
*
|
|
@@ -198,6 +199,13 @@ export declare const LOCAL_APPS: {
|
|
|
198
199
|
displayOnModelPage: typeof isLlamaCppGgufModel;
|
|
199
200
|
snippet: (model: ModelData, filepath?: string) => string;
|
|
200
201
|
};
|
|
202
|
+
unsloth: {
|
|
203
|
+
prettyLabel: string;
|
|
204
|
+
docsUrl: string;
|
|
205
|
+
mainTask: "text-generation";
|
|
206
|
+
displayOnModelPage: typeof isUnslothModel;
|
|
207
|
+
snippet: (model: ModelData) => LocalAppSnippet[];
|
|
208
|
+
};
|
|
201
209
|
"docker-model-runner": {
|
|
202
210
|
prettyLabel: string;
|
|
203
211
|
docsUrl: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"local-apps.d.ts","sourceRoot":"","sources":["../../src/local-apps.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAKnD,MAAM,WAAW,eAAe;IAC/B;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IACd;;OAEG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IACf;;OAEG;IACH,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,MAAM,QAAQ,GAAG;IACtB;;OAEG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;OAEG;IACH,QAAQ,EAAE,YAAY,CAAC;IACvB;;OAEG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IAEpB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB;;OAEG;IACH,kBAAkB,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,OAAO,CAAC;CAClD,GAAG,CACD;IACA;;OAEG;IACH,QAAQ,EAAE,CAAC,KAAK,EAAE,SAAS,EAAE,QAAQ,CAAC,EAAE,MAAM,KAAK,GAAG,CAAC;CACtD,GACD;IACA;;;;OAIG;IACH,OAAO,EAAE,CAAC,KAAK,EAAE,SAAS,EAAE,QAAQ,CAAC,EAAE,MAAM,KAAK,MAAM,GAAG,MAAM,EAAE,GAAG,eAAe,GAAG,eAAe,EAAE,CAAC;CACzG,CACH,CAAC;AAsBF,iBAAS,UAAU,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAE7C;AAED,iBAAS,mBAAmB,CAAC,KAAK,EAAE,SAAS,WAE5C;AAED,iBAAS,WAAW,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAU9C;AAED,iBAAS,wBAAwB,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAE3D;
|
|
1
|
+
{"version":3,"file":"local-apps.d.ts","sourceRoot":"","sources":["../../src/local-apps.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAKnD,MAAM,WAAW,eAAe;IAC/B;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IACd;;OAEG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IACf;;OAEG;IACH,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,MAAM,QAAQ,GAAG;IACtB;;OAEG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;OAEG;IACH,QAAQ,EAAE,YAAY,CAAC;IACvB;;OAEG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IAEpB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB;;OAEG;IACH,kBAAkB,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,OAAO,CAAC;CAClD,GAAG,CACD;IACA;;OAEG;IACH,QAAQ,EAAE,CAAC,KAAK,EAAE,SAAS,EAAE,QAAQ,CAAC,EAAE,MAAM,KAAK,GAAG,CAAC;CACtD,GACD;IACA;;;;OAIG;IACH,OAAO,EAAE,CAAC,KAAK,EAAE,SAAS,EAAE,QAAQ,CAAC,EAAE,MAAM,KAAK,MAAM,GAAG,MAAM,EAAE,GAAG,eAAe,GAAG,eAAe,EAAE,CAAC;CACzG,CACH,CAAC;AAsBF,iBAAS,UAAU,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAE7C;AAED,iBAAS,mBAAmB,CAAC,KAAK,EAAE,SAAS,WAE5C;AAED,iBAAS,WAAW,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAU9C;AAED,iBAAS,wBAAwB,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAE3D;AA0BD,iBAAS,cAAc,CAAC,KAAK,EAAE,SAAS,WAEvC;AA6ZD;;;;;;;;;;GAUG;AACH,eAAO,MAAM,UAAU;;;;;;yBA3ZS,SAAS,aAAa,MAAM,KAAG,eAAe,EAAE;;;;;;;yBAiDzC,SAAS,aAAa,MAAM,KAAG,eAAe,EAAE;;;;;;;yBAmF3D,SAAS,KAAG,eAAe,EAAE;;;;;;oCAiT3B,SAAS;yBA3PT,SAAS,KAAG,eAAe,EAAE;;;;;;;yBAoF9B,SAAS,KAAG,eAAe,EAAE;;;;;;;yBA7B/B,SAAS,KAAG,eAAe,EAAE;;;;;;;;;;;;;;yBApIzB,SAAS,aAAa,MAAM,KAAG,eAAe,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;yBA9CjD,SAAS,aAAa,MAAM,KAAG,MAAM;;;;;;;yBAIpC,SAAS,KAAG,eAAe,EAAE;;;;;;;yBA6RnB,SAAS,aAAa,MAAM,KAAG,MAAM;;;;;;;yBAM9C,SAAS,aAAa,MAAM,KAAG,eAAe,EAAE;;;;;;;yBArDtD,SAAS,aAAa,MAAM,KAAG,eAAe,EAAE;;CAuStC,CAAC;AAErC,MAAM,MAAM,WAAW,GAAG,MAAM,OAAO,UAAU,CAAC"}
|
|
@@ -43,6 +43,23 @@ function isAmdRyzenModel(model) {
|
|
|
43
43
|
function isMlxModel(model) {
|
|
44
44
|
return model.tags.includes("mlx");
|
|
45
45
|
}
|
|
46
|
+
/**
|
|
47
|
+
* Returns the model's chat template string, coalescing across sources:
|
|
48
|
+
* GGUF metadata > chat_template_jinja file > tokenizer_config.json
|
|
49
|
+
*/
|
|
50
|
+
function getChatTemplate(model) {
|
|
51
|
+
const ct = model.gguf?.chat_template ?? model.config?.chat_template_jinja ?? model.config?.tokenizer_config?.chat_template;
|
|
52
|
+
if (typeof ct === "string") {
|
|
53
|
+
return ct;
|
|
54
|
+
}
|
|
55
|
+
if (Array.isArray(ct)) {
|
|
56
|
+
return ct[0]?.template;
|
|
57
|
+
}
|
|
58
|
+
return undefined;
|
|
59
|
+
}
|
|
60
|
+
function isUnslothModel(model) {
|
|
61
|
+
return model.tags.includes("unsloth") || isLlamaCppGgufModel(model);
|
|
62
|
+
}
|
|
46
63
|
function getQuantTag(filepath) {
|
|
47
64
|
const defaultTag = ":{{QUANT_TAG}}";
|
|
48
65
|
if (!filepath) {
|
|
@@ -115,6 +132,43 @@ const snippetNodeLlamaCppCli = (model, filepath) => {
|
|
|
115
132
|
const snippetOllama = (model, filepath) => {
|
|
116
133
|
return `ollama run hf.co/${model.id}${getQuantTag(filepath)}`;
|
|
117
134
|
};
|
|
135
|
+
const snippetUnsloth = (model) => {
|
|
136
|
+
const isGguf = isLlamaCppGgufModel(model);
|
|
137
|
+
const studio_instructions = {
|
|
138
|
+
title: "Open model in Unsloth Studio",
|
|
139
|
+
setup: ["pip install unsloth", "unsloth studio setup"].join("\n"),
|
|
140
|
+
content: [
|
|
141
|
+
"# Run unsloth studio",
|
|
142
|
+
"unsloth studio -H 0.0.0.0 -p 8000",
|
|
143
|
+
"# Then open http://localhost:8000/chat in your browser",
|
|
144
|
+
"# Search for " + model.id + " to start chatting",
|
|
145
|
+
].join("\n"),
|
|
146
|
+
};
|
|
147
|
+
const hf_spaces_instructions = {
|
|
148
|
+
title: "Using HuggingFace Spaces for Unsloth",
|
|
149
|
+
setup: "# No setup required",
|
|
150
|
+
content: "# Open https://huggingface.co/spaces/unsloth/studio in your browser\n# Search for " +
|
|
151
|
+
model.id +
|
|
152
|
+
" to start chatting",
|
|
153
|
+
};
|
|
154
|
+
const fastmodel_instructions = {
|
|
155
|
+
title: "Load model with FastModel",
|
|
156
|
+
setup: "pip install unsloth",
|
|
157
|
+
content: [
|
|
158
|
+
"from unsloth import FastModel",
|
|
159
|
+
"model, tokenizer = FastModel.from_pretrained(",
|
|
160
|
+
' model_name="' + model.id + '",',
|
|
161
|
+
" max_seq_length=2048,",
|
|
162
|
+
")",
|
|
163
|
+
].join("\n"),
|
|
164
|
+
};
|
|
165
|
+
if (isGguf) {
|
|
166
|
+
return [studio_instructions, hf_spaces_instructions];
|
|
167
|
+
}
|
|
168
|
+
else {
|
|
169
|
+
return [studio_instructions, hf_spaces_instructions, fastmodel_instructions];
|
|
170
|
+
}
|
|
171
|
+
};
|
|
118
172
|
const snippetLocalAI = (model, filepath) => {
|
|
119
173
|
const command = (binary) => ["# Load and run the model:", `${binary} huggingface://${model.id}/${filepath ?? "{{GGUF_FILE}}"}`].join("\n");
|
|
120
174
|
return [
|
|
@@ -298,12 +352,25 @@ const snippetMlxLm = (model) => {
|
|
|
298
352
|
];
|
|
299
353
|
};
|
|
300
354
|
const snippetPi = (model, filepath) => {
|
|
301
|
-
const quantTag = getQuantTag(filepath);
|
|
302
355
|
const modelName = model.id.split("/").pop() ?? model.id;
|
|
356
|
+
const isMLX = isMlxModel(model);
|
|
357
|
+
// Step 1: Server — differs by backend
|
|
358
|
+
const serverStep = isMLX
|
|
359
|
+
? {
|
|
360
|
+
title: "Start the MLX server",
|
|
361
|
+
setup: "# Install MLX LM:\nuv tool install mlx-lm",
|
|
362
|
+
content: `# Start a local OpenAI-compatible server:\nmlx_lm.server --model "${model.id}"`,
|
|
363
|
+
}
|
|
364
|
+
: {
|
|
365
|
+
title: "Start the llama.cpp server",
|
|
366
|
+
setup: "# Install llama.cpp:\nbrew install llama.cpp",
|
|
367
|
+
content: `# Start a local OpenAI-compatible server:\nllama-server -hf ${model.id}${getQuantTag(filepath)} --jinja`,
|
|
368
|
+
};
|
|
369
|
+
// Step 2: Pi config — port and provider name differ
|
|
303
370
|
const modelsJson = JSON.stringify({
|
|
304
371
|
providers: {
|
|
305
|
-
"llama-cpp": {
|
|
306
|
-
baseUrl: "http://localhost:8080/v1",
|
|
372
|
+
[isMLX ? "mlx-lm" : "llama-cpp"]: {
|
|
373
|
+
baseUrl: isMLX ? "http://localhost:8000/v1" : "http://localhost:8080/v1",
|
|
307
374
|
api: "openai-completions",
|
|
308
375
|
apiKey: "none",
|
|
309
376
|
models: [{ id: modelName }],
|
|
@@ -311,11 +378,7 @@ const snippetPi = (model, filepath) => {
|
|
|
311
378
|
},
|
|
312
379
|
}, null, 2);
|
|
313
380
|
return [
|
|
314
|
-
|
|
315
|
-
title: "Start the llama.cpp server",
|
|
316
|
-
setup: "# Install llama.cpp:\nbrew install llama.cpp",
|
|
317
|
-
content: `# Start a local OpenAI-compatible server:\nllama-server -hf ${model.id}${quantTag} --jinja`,
|
|
318
|
-
},
|
|
381
|
+
serverStep,
|
|
319
382
|
{
|
|
320
383
|
title: "Configure the model in Pi",
|
|
321
384
|
setup: "# Install Pi:\nnpm install -g @mariozechner/pi-coding-agent",
|
|
@@ -323,7 +386,7 @@ const snippetPi = (model, filepath) => {
|
|
|
323
386
|
},
|
|
324
387
|
{
|
|
325
388
|
title: "Run Pi",
|
|
326
|
-
content:
|
|
389
|
+
content: "# Start Pi in your project directory:\npi",
|
|
327
390
|
},
|
|
328
391
|
];
|
|
329
392
|
};
|
|
@@ -539,6 +602,13 @@ exports.LOCAL_APPS = {
|
|
|
539
602
|
displayOnModelPage: isLlamaCppGgufModel,
|
|
540
603
|
snippet: snippetOllama,
|
|
541
604
|
},
|
|
605
|
+
unsloth: {
|
|
606
|
+
prettyLabel: "Unsloth",
|
|
607
|
+
docsUrl: "https://unsloth.ai/docs",
|
|
608
|
+
mainTask: "text-generation",
|
|
609
|
+
displayOnModelPage: isUnslothModel,
|
|
610
|
+
snippet: snippetUnsloth,
|
|
611
|
+
},
|
|
542
612
|
"docker-model-runner": {
|
|
543
613
|
prettyLabel: "Docker Model Runner",
|
|
544
614
|
docsUrl: "https://docs.docker.com/ai/model-runner/",
|
|
@@ -557,7 +627,9 @@ exports.LOCAL_APPS = {
|
|
|
557
627
|
prettyLabel: "Pi",
|
|
558
628
|
docsUrl: "https://github.com/badlogic/pi-mono",
|
|
559
629
|
mainTask: "text-generation",
|
|
560
|
-
displayOnModelPage: (model) => isLlamaCppGgufModel(model)
|
|
630
|
+
displayOnModelPage: (model) => (isLlamaCppGgufModel(model) || isMlxModel(model)) &&
|
|
631
|
+
model.pipeline_tag === "text-generation" &&
|
|
632
|
+
!!getChatTemplate(model)?.includes("tools"),
|
|
561
633
|
snippet: snippetPi,
|
|
562
634
|
},
|
|
563
635
|
};
|
|
@@ -126,6 +126,27 @@ curl -X POST "http://localhost:8000/v1/chat/completions" \\
|
|
|
126
126
|
(0, vitest_1.expect)(snippet[1].content).toContain(`"id": "Llama-3.2-3B-Instruct-GGUF"`);
|
|
127
127
|
(0, vitest_1.expect)(snippet[2].content).toContain("pi");
|
|
128
128
|
});
|
|
129
|
+
(0, vitest_1.it)("pi - mlx", async () => {
|
|
130
|
+
const { snippet: snippetFunc } = local_apps_js_1.LOCAL_APPS["pi"];
|
|
131
|
+
const model = {
|
|
132
|
+
id: "mlx-community/Llama-3.2-3B-Instruct-mlx",
|
|
133
|
+
tags: ["mlx", "conversational"],
|
|
134
|
+
pipeline_tag: "text-generation",
|
|
135
|
+
config: {
|
|
136
|
+
tokenizer_config: {
|
|
137
|
+
chat_template: "{% if tools %}...{% endif %}",
|
|
138
|
+
},
|
|
139
|
+
},
|
|
140
|
+
inference: "",
|
|
141
|
+
};
|
|
142
|
+
const snippet = snippetFunc(model);
|
|
143
|
+
(0, vitest_1.expect)(snippet[0].setup).toContain("uv tool install mlx-lm");
|
|
144
|
+
(0, vitest_1.expect)(snippet[0].content).toContain('mlx_lm.server --model "mlx-community/Llama-3.2-3B-Instruct-mlx"');
|
|
145
|
+
(0, vitest_1.expect)(snippet[1].setup).toContain("npm install -g @mariozechner/pi-coding-agent");
|
|
146
|
+
(0, vitest_1.expect)(snippet[1].content).toContain('"baseUrl": "http://localhost:8000/v1"');
|
|
147
|
+
(0, vitest_1.expect)(snippet[1].content).toContain('"id": "Llama-3.2-3B-Instruct-mlx"');
|
|
148
|
+
(0, vitest_1.expect)(snippet[2].content).toContain("pi");
|
|
149
|
+
});
|
|
129
150
|
(0, vitest_1.it)("docker model runner", async () => {
|
|
130
151
|
const { snippet: snippetFunc } = local_apps_js_1.LOCAL_APPS["docker-model-runner"];
|
|
131
152
|
const model = {
|
|
@@ -137,4 +158,55 @@ curl -X POST "http://localhost:8000/v1/chat/completions" \\
|
|
|
137
158
|
const snippet = snippetFunc(model);
|
|
138
159
|
(0, vitest_1.expect)(snippet).toEqual(`docker model run hf.co/bartowski/Llama-3.2-3B-Instruct-GGUF:{{QUANT_TAG}}`);
|
|
139
160
|
});
|
|
161
|
+
(0, vitest_1.it)("unsloth tagged model", async () => {
|
|
162
|
+
const { displayOnModelPage, snippet: snippetFunc } = local_apps_js_1.LOCAL_APPS.unsloth;
|
|
163
|
+
const model = {
|
|
164
|
+
id: "some-user/my-unsloth-finetune",
|
|
165
|
+
tags: ["unsloth", "conversational"],
|
|
166
|
+
inference: "",
|
|
167
|
+
};
|
|
168
|
+
(0, vitest_1.expect)(displayOnModelPage(model)).toBe(true);
|
|
169
|
+
const snippet = snippetFunc(model);
|
|
170
|
+
(0, vitest_1.expect)(snippet[0].setup).toBe("pip install unsloth\nunsloth studio setup");
|
|
171
|
+
(0, vitest_1.expect)(snippet[0].content).toBe("# Run unsloth studio\nunsloth studio -H 0.0.0.0 -p 8000\n# Then open http://localhost:8000/chat in your browser\n# Search for some-user/my-unsloth-finetune to start chatting");
|
|
172
|
+
(0, vitest_1.expect)(snippet[1].setup).toBe("# No setup required");
|
|
173
|
+
(0, vitest_1.expect)(snippet[1].content).toBe("# Open https://huggingface.co/spaces/unsloth/studio in your browser\n# Search for some-user/my-unsloth-finetune to start chatting");
|
|
174
|
+
(0, vitest_1.expect)(snippet[2].setup).toBe("pip install unsloth");
|
|
175
|
+
(0, vitest_1.expect)(snippet[2].content).toBe('from unsloth import FastModel\nmodel, tokenizer = FastModel.from_pretrained(\n model_name="some-user/my-unsloth-finetune",\n max_seq_length=2048,\n)');
|
|
176
|
+
});
|
|
177
|
+
(0, vitest_1.it)("unsloth namespace gguf model", async () => {
|
|
178
|
+
const { displayOnModelPage, snippet: snippetFunc } = local_apps_js_1.LOCAL_APPS.unsloth;
|
|
179
|
+
const model = {
|
|
180
|
+
id: "unsloth/Llama-3.2-3B-Instruct-GGUF",
|
|
181
|
+
tags: ["conversational"],
|
|
182
|
+
gguf: { total: 1, context_length: 4096 },
|
|
183
|
+
inference: "",
|
|
184
|
+
};
|
|
185
|
+
(0, vitest_1.expect)(displayOnModelPage(model)).toBe(true);
|
|
186
|
+
const snippet = snippetFunc(model);
|
|
187
|
+
(0, vitest_1.expect)(snippet[0].setup).toBe("pip install unsloth\nunsloth studio setup");
|
|
188
|
+
(0, vitest_1.expect)(snippet[0].content).toBe("# Run unsloth studio\nunsloth studio -H 0.0.0.0 -p 8000\n# Then open http://localhost:8000/chat in your browser\n# Search for unsloth/Llama-3.2-3B-Instruct-GGUF to start chatting");
|
|
189
|
+
(0, vitest_1.expect)(snippet[1].setup).toBe("# No setup required");
|
|
190
|
+
(0, vitest_1.expect)(snippet[1].content).toBe("# Open https://huggingface.co/spaces/unsloth/studio in your browser\n# Search for unsloth/Llama-3.2-3B-Instruct-GGUF to start chatting");
|
|
191
|
+
(0, vitest_1.expect)(snippet).toHaveLength(2); // GGUF models only get 2 snippets
|
|
192
|
+
});
|
|
193
|
+
(0, vitest_1.it)("non unsloth namespace gguf model", async () => {
|
|
194
|
+
const { displayOnModelPage } = local_apps_js_1.LOCAL_APPS.unsloth;
|
|
195
|
+
const model = {
|
|
196
|
+
id: "dummy/Llama-3.2-3B-Instruct-GGUF",
|
|
197
|
+
tags: ["conversational"],
|
|
198
|
+
gguf: { total: 1, context_length: 4096 },
|
|
199
|
+
inference: "",
|
|
200
|
+
};
|
|
201
|
+
(0, vitest_1.expect)(displayOnModelPage(model)).toBe(true);
|
|
202
|
+
});
|
|
203
|
+
(0, vitest_1.it)("unsloth not shown for unrelated model", async () => {
|
|
204
|
+
const { displayOnModelPage } = local_apps_js_1.LOCAL_APPS.unsloth;
|
|
205
|
+
const model = {
|
|
206
|
+
id: "meta-llama/Llama-3.2-3B-Instruct",
|
|
207
|
+
tags: ["conversational"],
|
|
208
|
+
inference: "",
|
|
209
|
+
};
|
|
210
|
+
(0, vitest_1.expect)(displayOnModelPage(model)).toBe(false);
|
|
211
|
+
});
|
|
140
212
|
});
|
package/dist/esm/gguf.d.ts
CHANGED
|
@@ -38,6 +38,7 @@ export declare enum GGMLFileQuantizationType {
|
|
|
38
38
|
TQ1_0 = 36,
|
|
39
39
|
TQ2_0 = 37,
|
|
40
40
|
MXFP4_MOE = 38,
|
|
41
|
+
NVFP4 = 39,
|
|
41
42
|
Q2_K_XL = 1000,
|
|
42
43
|
Q3_K_XL = 1001,
|
|
43
44
|
Q4_K_XL = 1002,
|
|
@@ -82,6 +83,7 @@ export declare enum GGMLQuantizationType {
|
|
|
82
83
|
BF16 = 30,
|
|
83
84
|
TQ1_0 = 34,
|
|
84
85
|
TQ2_0 = 35,
|
|
85
|
-
MXFP4 = 39
|
|
86
|
+
MXFP4 = 39,
|
|
87
|
+
NVFP4 = 40
|
|
86
88
|
}
|
|
87
89
|
//# sourceMappingURL=gguf.d.ts.map
|
package/dist/esm/gguf.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"gguf.d.ts","sourceRoot":"","sources":["../../src/gguf.ts"],"names":[],"mappings":"AAGA,oBAAY,wBAAwB;IACnC,GAAG,IAAI;IACP,GAAG,IAAI;IACP,IAAI,IAAI;IACR,IAAI,IAAI;IACR,aAAa,IAAI;IACjB,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,KAAK;IACT,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,IAAI,KAAK;IACT,OAAO,KAAK;IACZ,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,OAAO,KAAK;IACZ,KAAK,KAAK;IACV,MAAM,KAAK;IACX,KAAK,KAAK;IACV,KAAK,KAAK;IACV,KAAK,KAAK;IACV,KAAK,KAAK;IACV,MAAM,KAAK;IACX,KAAK,KAAK;IACV,IAAI,KAAK;IACT,QAAQ,KAAK;IACb,QAAQ,KAAK;IACb,QAAQ,KAAK;IACb,KAAK,KAAK;IACV,KAAK,KAAK;IACV,SAAS,KAAK;
|
|
1
|
+
{"version":3,"file":"gguf.d.ts","sourceRoot":"","sources":["../../src/gguf.ts"],"names":[],"mappings":"AAGA,oBAAY,wBAAwB;IACnC,GAAG,IAAI;IACP,GAAG,IAAI;IACP,IAAI,IAAI;IACR,IAAI,IAAI;IACR,aAAa,IAAI;IACjB,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,KAAK;IACT,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,IAAI,KAAK;IACT,OAAO,KAAK;IACZ,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,OAAO,KAAK;IACZ,KAAK,KAAK;IACV,MAAM,KAAK;IACX,KAAK,KAAK;IACV,KAAK,KAAK;IACV,KAAK,KAAK;IACV,KAAK,KAAK;IACV,MAAM,KAAK;IACX,KAAK,KAAK;IACV,IAAI,KAAK;IACT,QAAQ,KAAK;IACb,QAAQ,KAAK;IACb,QAAQ,KAAK;IACb,KAAK,KAAK;IACV,KAAK,KAAK;IACV,SAAS,KAAK;IACd,KAAK,KAAK;IAIV,OAAO,OAAO;IACd,OAAO,OAAO;IACd,OAAO,OAAO;IACd,OAAO,OAAO;IACd,OAAO,OAAO;IACd,OAAO,OAAO;CACd;AAGD,eAAO,MAAM,aAAa,QAEzB,CAAC;AACF,eAAO,MAAM,oBAAoB,QAAiC,CAAC;AAEnE,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAGrE;AAKD,eAAO,MAAM,gBAAgB,EAAE,wBAAwB,EA2DtD,CAAC;AAIF,wBAAgB,oBAAoB,CACnC,KAAK,EAAE,wBAAwB,EAC/B,eAAe,EAAE,wBAAwB,EAAE,GACzC,wBAAwB,GAAG,SAAS,CAmCtC;AAGD,oBAAY,oBAAoB;IAC/B,GAAG,IAAI;IACP,GAAG,IAAI;IACP,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,KAAK;IACT,IAAI,KAAK;IACT,IAAI,KAAK;IACT,IAAI,KAAK;IACT,IAAI,KAAK;IACT,IAAI,KAAK;IACT,OAAO,KAAK;IACZ,MAAM,KAAK;IACX,OAAO,KAAK;IACZ,KAAK,KAAK;IACV,MAAM,KAAK;IACX,KAAK,KAAK;IACV,KAAK,KAAK;IACV,MAAM,KAAK;IACX,EAAE,KAAK;IACP,GAAG,KAAK;IACR,GAAG,KAAK;IACR,GAAG,KAAK;IACR,GAAG,KAAK;IACR,KAAK,KAAK;IACV,IAAI,KAAK;IACT,KAAK,KAAK;IACV,KAAK,KAAK;IACV,KAAK,KAAK;IACV,KAAK,KAAK;CACV"}
|
package/dist/esm/gguf.js
CHANGED
|
@@ -42,6 +42,7 @@ export var GGMLFileQuantizationType;
|
|
|
42
42
|
GGMLFileQuantizationType[GGMLFileQuantizationType["TQ1_0"] = 36] = "TQ1_0";
|
|
43
43
|
GGMLFileQuantizationType[GGMLFileQuantizationType["TQ2_0"] = 37] = "TQ2_0";
|
|
44
44
|
GGMLFileQuantizationType[GGMLFileQuantizationType["MXFP4_MOE"] = 38] = "MXFP4_MOE";
|
|
45
|
+
GGMLFileQuantizationType[GGMLFileQuantizationType["NVFP4"] = 39] = "NVFP4";
|
|
45
46
|
// custom quants used by unsloth
|
|
46
47
|
// they are not officially a scheme enum value in GGUF, but only here for naming
|
|
47
48
|
GGMLFileQuantizationType[GGMLFileQuantizationType["Q2_K_XL"] = 1000] = "Q2_K_XL";
|
|
@@ -91,6 +92,7 @@ export const GGUF_QUANT_ORDER = [
|
|
|
91
92
|
GGMLFileQuantizationType.Q4_2,
|
|
92
93
|
GGMLFileQuantizationType.Q4_3,
|
|
93
94
|
GGMLFileQuantizationType.MXFP4_MOE,
|
|
95
|
+
GGMLFileQuantizationType.NVFP4,
|
|
94
96
|
// 3-bit quantizations
|
|
95
97
|
GGMLFileQuantizationType.Q3_K_XL,
|
|
96
98
|
GGMLFileQuantizationType.Q3_K_L,
|
|
@@ -182,4 +184,5 @@ export var GGMLQuantizationType;
|
|
|
182
184
|
GGMLQuantizationType[GGMLQuantizationType["TQ1_0"] = 34] = "TQ1_0";
|
|
183
185
|
GGMLQuantizationType[GGMLQuantizationType["TQ2_0"] = 35] = "TQ2_0";
|
|
184
186
|
GGMLQuantizationType[GGMLQuantizationType["MXFP4"] = 39] = "MXFP4";
|
|
187
|
+
GGMLQuantizationType[GGMLQuantizationType["NVFP4"] = 40] = "NVFP4";
|
|
185
188
|
})(GGMLQuantizationType || (GGMLQuantizationType = {}));
|
package/dist/esm/local-apps.d.ts
CHANGED
|
@@ -56,6 +56,7 @@ declare function isTgiModel(model: ModelData): boolean;
|
|
|
56
56
|
declare function isLlamaCppGgufModel(model: ModelData): boolean;
|
|
57
57
|
declare function isVllmModel(model: ModelData): boolean;
|
|
58
58
|
declare function isDockerModelRunnerModel(model: ModelData): boolean;
|
|
59
|
+
declare function isUnslothModel(model: ModelData): boolean;
|
|
59
60
|
/**
|
|
60
61
|
* Add your new local app here.
|
|
61
62
|
*
|
|
@@ -198,6 +199,13 @@ export declare const LOCAL_APPS: {
|
|
|
198
199
|
displayOnModelPage: typeof isLlamaCppGgufModel;
|
|
199
200
|
snippet: (model: ModelData, filepath?: string) => string;
|
|
200
201
|
};
|
|
202
|
+
unsloth: {
|
|
203
|
+
prettyLabel: string;
|
|
204
|
+
docsUrl: string;
|
|
205
|
+
mainTask: "text-generation";
|
|
206
|
+
displayOnModelPage: typeof isUnslothModel;
|
|
207
|
+
snippet: (model: ModelData) => LocalAppSnippet[];
|
|
208
|
+
};
|
|
201
209
|
"docker-model-runner": {
|
|
202
210
|
prettyLabel: string;
|
|
203
211
|
docsUrl: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"local-apps.d.ts","sourceRoot":"","sources":["../../src/local-apps.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAKnD,MAAM,WAAW,eAAe;IAC/B;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IACd;;OAEG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IACf;;OAEG;IACH,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,MAAM,QAAQ,GAAG;IACtB;;OAEG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;OAEG;IACH,QAAQ,EAAE,YAAY,CAAC;IACvB;;OAEG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IAEpB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB;;OAEG;IACH,kBAAkB,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,OAAO,CAAC;CAClD,GAAG,CACD;IACA;;OAEG;IACH,QAAQ,EAAE,CAAC,KAAK,EAAE,SAAS,EAAE,QAAQ,CAAC,EAAE,MAAM,KAAK,GAAG,CAAC;CACtD,GACD;IACA;;;;OAIG;IACH,OAAO,EAAE,CAAC,KAAK,EAAE,SAAS,EAAE,QAAQ,CAAC,EAAE,MAAM,KAAK,MAAM,GAAG,MAAM,EAAE,GAAG,eAAe,GAAG,eAAe,EAAE,CAAC;CACzG,CACH,CAAC;AAsBF,iBAAS,UAAU,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAE7C;AAED,iBAAS,mBAAmB,CAAC,KAAK,EAAE,SAAS,WAE5C;AAED,iBAAS,WAAW,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAU9C;AAED,iBAAS,wBAAwB,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAE3D;
|
|
1
|
+
{"version":3,"file":"local-apps.d.ts","sourceRoot":"","sources":["../../src/local-apps.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAKnD,MAAM,WAAW,eAAe;IAC/B;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IACd;;OAEG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IACf;;OAEG;IACH,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,MAAM,QAAQ,GAAG;IACtB;;OAEG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;OAEG;IACH,QAAQ,EAAE,YAAY,CAAC;IACvB;;OAEG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IAEpB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB;;OAEG;IACH,kBAAkB,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,OAAO,CAAC;CAClD,GAAG,CACD;IACA;;OAEG;IACH,QAAQ,EAAE,CAAC,KAAK,EAAE,SAAS,EAAE,QAAQ,CAAC,EAAE,MAAM,KAAK,GAAG,CAAC;CACtD,GACD;IACA;;;;OAIG;IACH,OAAO,EAAE,CAAC,KAAK,EAAE,SAAS,EAAE,QAAQ,CAAC,EAAE,MAAM,KAAK,MAAM,GAAG,MAAM,EAAE,GAAG,eAAe,GAAG,eAAe,EAAE,CAAC;CACzG,CACH,CAAC;AAsBF,iBAAS,UAAU,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAE7C;AAED,iBAAS,mBAAmB,CAAC,KAAK,EAAE,SAAS,WAE5C;AAED,iBAAS,WAAW,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAU9C;AAED,iBAAS,wBAAwB,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAE3D;AA0BD,iBAAS,cAAc,CAAC,KAAK,EAAE,SAAS,WAEvC;AA6ZD;;;;;;;;;;GAUG;AACH,eAAO,MAAM,UAAU;;;;;;yBA3ZS,SAAS,aAAa,MAAM,KAAG,eAAe,EAAE;;;;;;;yBAiDzC,SAAS,aAAa,MAAM,KAAG,eAAe,EAAE;;;;;;;yBAmF3D,SAAS,KAAG,eAAe,EAAE;;;;;;oCAiT3B,SAAS;yBA3PT,SAAS,KAAG,eAAe,EAAE;;;;;;;yBAoF9B,SAAS,KAAG,eAAe,EAAE;;;;;;;yBA7B/B,SAAS,KAAG,eAAe,EAAE;;;;;;;;;;;;;;yBApIzB,SAAS,aAAa,MAAM,KAAG,eAAe,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;yBA9CjD,SAAS,aAAa,MAAM,KAAG,MAAM;;;;;;;yBAIpC,SAAS,KAAG,eAAe,EAAE;;;;;;;yBA6RnB,SAAS,aAAa,MAAM,KAAG,MAAM;;;;;;;yBAM9C,SAAS,aAAa,MAAM,KAAG,eAAe,EAAE;;;;;;;yBArDtD,SAAS,aAAa,MAAM,KAAG,eAAe,EAAE;;CAuStC,CAAC;AAErC,MAAM,MAAM,WAAW,GAAG,MAAM,OAAO,UAAU,CAAC"}
|
package/dist/esm/local-apps.js
CHANGED
|
@@ -40,6 +40,23 @@ function isAmdRyzenModel(model) {
|
|
|
40
40
|
function isMlxModel(model) {
|
|
41
41
|
return model.tags.includes("mlx");
|
|
42
42
|
}
|
|
43
|
+
/**
|
|
44
|
+
* Returns the model's chat template string, coalescing across sources:
|
|
45
|
+
* GGUF metadata > chat_template_jinja file > tokenizer_config.json
|
|
46
|
+
*/
|
|
47
|
+
function getChatTemplate(model) {
|
|
48
|
+
const ct = model.gguf?.chat_template ?? model.config?.chat_template_jinja ?? model.config?.tokenizer_config?.chat_template;
|
|
49
|
+
if (typeof ct === "string") {
|
|
50
|
+
return ct;
|
|
51
|
+
}
|
|
52
|
+
if (Array.isArray(ct)) {
|
|
53
|
+
return ct[0]?.template;
|
|
54
|
+
}
|
|
55
|
+
return undefined;
|
|
56
|
+
}
|
|
57
|
+
function isUnslothModel(model) {
|
|
58
|
+
return model.tags.includes("unsloth") || isLlamaCppGgufModel(model);
|
|
59
|
+
}
|
|
43
60
|
function getQuantTag(filepath) {
|
|
44
61
|
const defaultTag = ":{{QUANT_TAG}}";
|
|
45
62
|
if (!filepath) {
|
|
@@ -112,6 +129,43 @@ const snippetNodeLlamaCppCli = (model, filepath) => {
|
|
|
112
129
|
const snippetOllama = (model, filepath) => {
|
|
113
130
|
return `ollama run hf.co/${model.id}${getQuantTag(filepath)}`;
|
|
114
131
|
};
|
|
132
|
+
const snippetUnsloth = (model) => {
|
|
133
|
+
const isGguf = isLlamaCppGgufModel(model);
|
|
134
|
+
const studio_instructions = {
|
|
135
|
+
title: "Open model in Unsloth Studio",
|
|
136
|
+
setup: ["pip install unsloth", "unsloth studio setup"].join("\n"),
|
|
137
|
+
content: [
|
|
138
|
+
"# Run unsloth studio",
|
|
139
|
+
"unsloth studio -H 0.0.0.0 -p 8000",
|
|
140
|
+
"# Then open http://localhost:8000/chat in your browser",
|
|
141
|
+
"# Search for " + model.id + " to start chatting",
|
|
142
|
+
].join("\n"),
|
|
143
|
+
};
|
|
144
|
+
const hf_spaces_instructions = {
|
|
145
|
+
title: "Using HuggingFace Spaces for Unsloth",
|
|
146
|
+
setup: "# No setup required",
|
|
147
|
+
content: "# Open https://huggingface.co/spaces/unsloth/studio in your browser\n# Search for " +
|
|
148
|
+
model.id +
|
|
149
|
+
" to start chatting",
|
|
150
|
+
};
|
|
151
|
+
const fastmodel_instructions = {
|
|
152
|
+
title: "Load model with FastModel",
|
|
153
|
+
setup: "pip install unsloth",
|
|
154
|
+
content: [
|
|
155
|
+
"from unsloth import FastModel",
|
|
156
|
+
"model, tokenizer = FastModel.from_pretrained(",
|
|
157
|
+
' model_name="' + model.id + '",',
|
|
158
|
+
" max_seq_length=2048,",
|
|
159
|
+
")",
|
|
160
|
+
].join("\n"),
|
|
161
|
+
};
|
|
162
|
+
if (isGguf) {
|
|
163
|
+
return [studio_instructions, hf_spaces_instructions];
|
|
164
|
+
}
|
|
165
|
+
else {
|
|
166
|
+
return [studio_instructions, hf_spaces_instructions, fastmodel_instructions];
|
|
167
|
+
}
|
|
168
|
+
};
|
|
115
169
|
const snippetLocalAI = (model, filepath) => {
|
|
116
170
|
const command = (binary) => ["# Load and run the model:", `${binary} huggingface://${model.id}/${filepath ?? "{{GGUF_FILE}}"}`].join("\n");
|
|
117
171
|
return [
|
|
@@ -295,12 +349,25 @@ const snippetMlxLm = (model) => {
|
|
|
295
349
|
];
|
|
296
350
|
};
|
|
297
351
|
const snippetPi = (model, filepath) => {
|
|
298
|
-
const quantTag = getQuantTag(filepath);
|
|
299
352
|
const modelName = model.id.split("/").pop() ?? model.id;
|
|
353
|
+
const isMLX = isMlxModel(model);
|
|
354
|
+
// Step 1: Server — differs by backend
|
|
355
|
+
const serverStep = isMLX
|
|
356
|
+
? {
|
|
357
|
+
title: "Start the MLX server",
|
|
358
|
+
setup: "# Install MLX LM:\nuv tool install mlx-lm",
|
|
359
|
+
content: `# Start a local OpenAI-compatible server:\nmlx_lm.server --model "${model.id}"`,
|
|
360
|
+
}
|
|
361
|
+
: {
|
|
362
|
+
title: "Start the llama.cpp server",
|
|
363
|
+
setup: "# Install llama.cpp:\nbrew install llama.cpp",
|
|
364
|
+
content: `# Start a local OpenAI-compatible server:\nllama-server -hf ${model.id}${getQuantTag(filepath)} --jinja`,
|
|
365
|
+
};
|
|
366
|
+
// Step 2: Pi config — port and provider name differ
|
|
300
367
|
const modelsJson = JSON.stringify({
|
|
301
368
|
providers: {
|
|
302
|
-
"llama-cpp": {
|
|
303
|
-
baseUrl: "http://localhost:8080/v1",
|
|
369
|
+
[isMLX ? "mlx-lm" : "llama-cpp"]: {
|
|
370
|
+
baseUrl: isMLX ? "http://localhost:8000/v1" : "http://localhost:8080/v1",
|
|
304
371
|
api: "openai-completions",
|
|
305
372
|
apiKey: "none",
|
|
306
373
|
models: [{ id: modelName }],
|
|
@@ -308,11 +375,7 @@ const snippetPi = (model, filepath) => {
|
|
|
308
375
|
},
|
|
309
376
|
}, null, 2);
|
|
310
377
|
return [
|
|
311
|
-
|
|
312
|
-
title: "Start the llama.cpp server",
|
|
313
|
-
setup: "# Install llama.cpp:\nbrew install llama.cpp",
|
|
314
|
-
content: `# Start a local OpenAI-compatible server:\nllama-server -hf ${model.id}${quantTag} --jinja`,
|
|
315
|
-
},
|
|
378
|
+
serverStep,
|
|
316
379
|
{
|
|
317
380
|
title: "Configure the model in Pi",
|
|
318
381
|
setup: "# Install Pi:\nnpm install -g @mariozechner/pi-coding-agent",
|
|
@@ -320,7 +383,7 @@ const snippetPi = (model, filepath) => {
|
|
|
320
383
|
},
|
|
321
384
|
{
|
|
322
385
|
title: "Run Pi",
|
|
323
|
-
content:
|
|
386
|
+
content: "# Start Pi in your project directory:\npi",
|
|
324
387
|
},
|
|
325
388
|
];
|
|
326
389
|
};
|
|
@@ -536,6 +599,13 @@ export const LOCAL_APPS = {
|
|
|
536
599
|
displayOnModelPage: isLlamaCppGgufModel,
|
|
537
600
|
snippet: snippetOllama,
|
|
538
601
|
},
|
|
602
|
+
unsloth: {
|
|
603
|
+
prettyLabel: "Unsloth",
|
|
604
|
+
docsUrl: "https://unsloth.ai/docs",
|
|
605
|
+
mainTask: "text-generation",
|
|
606
|
+
displayOnModelPage: isUnslothModel,
|
|
607
|
+
snippet: snippetUnsloth,
|
|
608
|
+
},
|
|
539
609
|
"docker-model-runner": {
|
|
540
610
|
prettyLabel: "Docker Model Runner",
|
|
541
611
|
docsUrl: "https://docs.docker.com/ai/model-runner/",
|
|
@@ -554,7 +624,9 @@ export const LOCAL_APPS = {
|
|
|
554
624
|
prettyLabel: "Pi",
|
|
555
625
|
docsUrl: "https://github.com/badlogic/pi-mono",
|
|
556
626
|
mainTask: "text-generation",
|
|
557
|
-
displayOnModelPage: (model) => isLlamaCppGgufModel(model)
|
|
627
|
+
displayOnModelPage: (model) => (isLlamaCppGgufModel(model) || isMlxModel(model)) &&
|
|
628
|
+
model.pipeline_tag === "text-generation" &&
|
|
629
|
+
!!getChatTemplate(model)?.includes("tools"),
|
|
558
630
|
snippet: snippetPi,
|
|
559
631
|
},
|
|
560
632
|
};
|
|
@@ -124,6 +124,27 @@ curl -X POST "http://localhost:8000/v1/chat/completions" \\
|
|
|
124
124
|
expect(snippet[1].content).toContain(`"id": "Llama-3.2-3B-Instruct-GGUF"`);
|
|
125
125
|
expect(snippet[2].content).toContain("pi");
|
|
126
126
|
});
|
|
127
|
+
it("pi - mlx", async () => {
|
|
128
|
+
const { snippet: snippetFunc } = LOCAL_APPS["pi"];
|
|
129
|
+
const model = {
|
|
130
|
+
id: "mlx-community/Llama-3.2-3B-Instruct-mlx",
|
|
131
|
+
tags: ["mlx", "conversational"],
|
|
132
|
+
pipeline_tag: "text-generation",
|
|
133
|
+
config: {
|
|
134
|
+
tokenizer_config: {
|
|
135
|
+
chat_template: "{% if tools %}...{% endif %}",
|
|
136
|
+
},
|
|
137
|
+
},
|
|
138
|
+
inference: "",
|
|
139
|
+
};
|
|
140
|
+
const snippet = snippetFunc(model);
|
|
141
|
+
expect(snippet[0].setup).toContain("uv tool install mlx-lm");
|
|
142
|
+
expect(snippet[0].content).toContain('mlx_lm.server --model "mlx-community/Llama-3.2-3B-Instruct-mlx"');
|
|
143
|
+
expect(snippet[1].setup).toContain("npm install -g @mariozechner/pi-coding-agent");
|
|
144
|
+
expect(snippet[1].content).toContain('"baseUrl": "http://localhost:8000/v1"');
|
|
145
|
+
expect(snippet[1].content).toContain('"id": "Llama-3.2-3B-Instruct-mlx"');
|
|
146
|
+
expect(snippet[2].content).toContain("pi");
|
|
147
|
+
});
|
|
127
148
|
it("docker model runner", async () => {
|
|
128
149
|
const { snippet: snippetFunc } = LOCAL_APPS["docker-model-runner"];
|
|
129
150
|
const model = {
|
|
@@ -135,4 +156,55 @@ curl -X POST "http://localhost:8000/v1/chat/completions" \\
|
|
|
135
156
|
const snippet = snippetFunc(model);
|
|
136
157
|
expect(snippet).toEqual(`docker model run hf.co/bartowski/Llama-3.2-3B-Instruct-GGUF:{{QUANT_TAG}}`);
|
|
137
158
|
});
|
|
159
|
+
it("unsloth tagged model", async () => {
|
|
160
|
+
const { displayOnModelPage, snippet: snippetFunc } = LOCAL_APPS.unsloth;
|
|
161
|
+
const model = {
|
|
162
|
+
id: "some-user/my-unsloth-finetune",
|
|
163
|
+
tags: ["unsloth", "conversational"],
|
|
164
|
+
inference: "",
|
|
165
|
+
};
|
|
166
|
+
expect(displayOnModelPage(model)).toBe(true);
|
|
167
|
+
const snippet = snippetFunc(model);
|
|
168
|
+
expect(snippet[0].setup).toBe("pip install unsloth\nunsloth studio setup");
|
|
169
|
+
expect(snippet[0].content).toBe("# Run unsloth studio\nunsloth studio -H 0.0.0.0 -p 8000\n# Then open http://localhost:8000/chat in your browser\n# Search for some-user/my-unsloth-finetune to start chatting");
|
|
170
|
+
expect(snippet[1].setup).toBe("# No setup required");
|
|
171
|
+
expect(snippet[1].content).toBe("# Open https://huggingface.co/spaces/unsloth/studio in your browser\n# Search for some-user/my-unsloth-finetune to start chatting");
|
|
172
|
+
expect(snippet[2].setup).toBe("pip install unsloth");
|
|
173
|
+
expect(snippet[2].content).toBe('from unsloth import FastModel\nmodel, tokenizer = FastModel.from_pretrained(\n model_name="some-user/my-unsloth-finetune",\n max_seq_length=2048,\n)');
|
|
174
|
+
});
|
|
175
|
+
it("unsloth namespace gguf model", async () => {
|
|
176
|
+
const { displayOnModelPage, snippet: snippetFunc } = LOCAL_APPS.unsloth;
|
|
177
|
+
const model = {
|
|
178
|
+
id: "unsloth/Llama-3.2-3B-Instruct-GGUF",
|
|
179
|
+
tags: ["conversational"],
|
|
180
|
+
gguf: { total: 1, context_length: 4096 },
|
|
181
|
+
inference: "",
|
|
182
|
+
};
|
|
183
|
+
expect(displayOnModelPage(model)).toBe(true);
|
|
184
|
+
const snippet = snippetFunc(model);
|
|
185
|
+
expect(snippet[0].setup).toBe("pip install unsloth\nunsloth studio setup");
|
|
186
|
+
expect(snippet[0].content).toBe("# Run unsloth studio\nunsloth studio -H 0.0.0.0 -p 8000\n# Then open http://localhost:8000/chat in your browser\n# Search for unsloth/Llama-3.2-3B-Instruct-GGUF to start chatting");
|
|
187
|
+
expect(snippet[1].setup).toBe("# No setup required");
|
|
188
|
+
expect(snippet[1].content).toBe("# Open https://huggingface.co/spaces/unsloth/studio in your browser\n# Search for unsloth/Llama-3.2-3B-Instruct-GGUF to start chatting");
|
|
189
|
+
expect(snippet).toHaveLength(2); // GGUF models only get 2 snippets
|
|
190
|
+
});
|
|
191
|
+
it("non unsloth namespace gguf model", async () => {
|
|
192
|
+
const { displayOnModelPage } = LOCAL_APPS.unsloth;
|
|
193
|
+
const model = {
|
|
194
|
+
id: "dummy/Llama-3.2-3B-Instruct-GGUF",
|
|
195
|
+
tags: ["conversational"],
|
|
196
|
+
gguf: { total: 1, context_length: 4096 },
|
|
197
|
+
inference: "",
|
|
198
|
+
};
|
|
199
|
+
expect(displayOnModelPage(model)).toBe(true);
|
|
200
|
+
});
|
|
201
|
+
it("unsloth not shown for unrelated model", async () => {
|
|
202
|
+
const { displayOnModelPage } = LOCAL_APPS.unsloth;
|
|
203
|
+
const model = {
|
|
204
|
+
id: "meta-llama/Llama-3.2-3B-Instruct",
|
|
205
|
+
tags: ["conversational"],
|
|
206
|
+
inference: "",
|
|
207
|
+
};
|
|
208
|
+
expect(displayOnModelPage(model)).toBe(false);
|
|
209
|
+
});
|
|
138
210
|
});
|
package/package.json
CHANGED
package/src/gguf.ts
CHANGED
|
@@ -41,6 +41,7 @@ export enum GGMLFileQuantizationType {
|
|
|
41
41
|
TQ1_0 = 36,
|
|
42
42
|
TQ2_0 = 37,
|
|
43
43
|
MXFP4_MOE = 38,
|
|
44
|
+
NVFP4 = 39,
|
|
44
45
|
|
|
45
46
|
// custom quants used by unsloth
|
|
46
47
|
// they are not officially a scheme enum value in GGUF, but only here for naming
|
|
@@ -99,6 +100,7 @@ export const GGUF_QUANT_ORDER: GGMLFileQuantizationType[] = [
|
|
|
99
100
|
GGMLFileQuantizationType.Q4_2,
|
|
100
101
|
GGMLFileQuantizationType.Q4_3,
|
|
101
102
|
GGMLFileQuantizationType.MXFP4_MOE,
|
|
103
|
+
GGMLFileQuantizationType.NVFP4,
|
|
102
104
|
|
|
103
105
|
// 3-bit quantizations
|
|
104
106
|
GGMLFileQuantizationType.Q3_K_XL,
|
|
@@ -202,4 +204,5 @@ export enum GGMLQuantizationType {
|
|
|
202
204
|
TQ1_0 = 34,
|
|
203
205
|
TQ2_0 = 35,
|
|
204
206
|
MXFP4 = 39,
|
|
207
|
+
NVFP4 = 40,
|
|
205
208
|
}
|
package/src/local-apps.spec.ts
CHANGED
|
@@ -138,6 +138,29 @@ curl -X POST "http://localhost:8000/v1/chat/completions" \\
|
|
|
138
138
|
expect(snippet[2].content).toContain("pi");
|
|
139
139
|
});
|
|
140
140
|
|
|
141
|
+
it("pi - mlx", async () => {
|
|
142
|
+
const { snippet: snippetFunc } = LOCAL_APPS["pi"];
|
|
143
|
+
const model: ModelData = {
|
|
144
|
+
id: "mlx-community/Llama-3.2-3B-Instruct-mlx",
|
|
145
|
+
tags: ["mlx", "conversational"],
|
|
146
|
+
pipeline_tag: "text-generation",
|
|
147
|
+
config: {
|
|
148
|
+
tokenizer_config: {
|
|
149
|
+
chat_template: "{% if tools %}...{% endif %}",
|
|
150
|
+
},
|
|
151
|
+
},
|
|
152
|
+
inference: "",
|
|
153
|
+
};
|
|
154
|
+
const snippet = snippetFunc(model);
|
|
155
|
+
|
|
156
|
+
expect(snippet[0].setup).toContain("uv tool install mlx-lm");
|
|
157
|
+
expect(snippet[0].content).toContain('mlx_lm.server --model "mlx-community/Llama-3.2-3B-Instruct-mlx"');
|
|
158
|
+
expect(snippet[1].setup).toContain("npm install -g @mariozechner/pi-coding-agent");
|
|
159
|
+
expect(snippet[1].content).toContain('"baseUrl": "http://localhost:8000/v1"');
|
|
160
|
+
expect(snippet[1].content).toContain('"id": "Llama-3.2-3B-Instruct-mlx"');
|
|
161
|
+
expect(snippet[2].content).toContain("pi");
|
|
162
|
+
});
|
|
163
|
+
|
|
141
164
|
it("docker model runner", async () => {
|
|
142
165
|
const { snippet: snippetFunc } = LOCAL_APPS["docker-model-runner"];
|
|
143
166
|
const model: ModelData = {
|
|
@@ -150,4 +173,73 @@ curl -X POST "http://localhost:8000/v1/chat/completions" \\
|
|
|
150
173
|
|
|
151
174
|
expect(snippet).toEqual(`docker model run hf.co/bartowski/Llama-3.2-3B-Instruct-GGUF:{{QUANT_TAG}}`);
|
|
152
175
|
});
|
|
176
|
+
|
|
177
|
+
it("unsloth tagged model", async () => {
|
|
178
|
+
const { displayOnModelPage, snippet: snippetFunc } = LOCAL_APPS.unsloth;
|
|
179
|
+
const model: ModelData = {
|
|
180
|
+
id: "some-user/my-unsloth-finetune",
|
|
181
|
+
tags: ["unsloth", "conversational"],
|
|
182
|
+
inference: "",
|
|
183
|
+
};
|
|
184
|
+
|
|
185
|
+
expect(displayOnModelPage(model)).toBe(true);
|
|
186
|
+
const snippet = snippetFunc(model);
|
|
187
|
+
expect(snippet[0].setup).toBe("pip install unsloth\nunsloth studio setup");
|
|
188
|
+
expect(snippet[0].content).toBe(
|
|
189
|
+
"# Run unsloth studio\nunsloth studio -H 0.0.0.0 -p 8000\n# Then open http://localhost:8000/chat in your browser\n# Search for some-user/my-unsloth-finetune to start chatting",
|
|
190
|
+
);
|
|
191
|
+
expect(snippet[1].setup).toBe("# No setup required");
|
|
192
|
+
expect(snippet[1].content).toBe(
|
|
193
|
+
"# Open https://huggingface.co/spaces/unsloth/studio in your browser\n# Search for some-user/my-unsloth-finetune to start chatting",
|
|
194
|
+
);
|
|
195
|
+
expect(snippet[2].setup).toBe("pip install unsloth");
|
|
196
|
+
expect(snippet[2].content).toBe(
|
|
197
|
+
'from unsloth import FastModel\nmodel, tokenizer = FastModel.from_pretrained(\n model_name="some-user/my-unsloth-finetune",\n max_seq_length=2048,\n)',
|
|
198
|
+
);
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
it("unsloth namespace gguf model", async () => {
|
|
202
|
+
const { displayOnModelPage, snippet: snippetFunc } = LOCAL_APPS.unsloth;
|
|
203
|
+
const model: ModelData = {
|
|
204
|
+
id: "unsloth/Llama-3.2-3B-Instruct-GGUF",
|
|
205
|
+
tags: ["conversational"],
|
|
206
|
+
gguf: { total: 1, context_length: 4096 },
|
|
207
|
+
inference: "",
|
|
208
|
+
};
|
|
209
|
+
|
|
210
|
+
expect(displayOnModelPage(model)).toBe(true);
|
|
211
|
+
const snippet = snippetFunc(model);
|
|
212
|
+
expect(snippet[0].setup).toBe("pip install unsloth\nunsloth studio setup");
|
|
213
|
+
expect(snippet[0].content).toBe(
|
|
214
|
+
"# Run unsloth studio\nunsloth studio -H 0.0.0.0 -p 8000\n# Then open http://localhost:8000/chat in your browser\n# Search for unsloth/Llama-3.2-3B-Instruct-GGUF to start chatting",
|
|
215
|
+
);
|
|
216
|
+
expect(snippet[1].setup).toBe("# No setup required");
|
|
217
|
+
expect(snippet[1].content).toBe(
|
|
218
|
+
"# Open https://huggingface.co/spaces/unsloth/studio in your browser\n# Search for unsloth/Llama-3.2-3B-Instruct-GGUF to start chatting",
|
|
219
|
+
);
|
|
220
|
+
expect(snippet).toHaveLength(2); // GGUF models only get 2 snippets
|
|
221
|
+
});
|
|
222
|
+
|
|
223
|
+
it("non unsloth namespace gguf model", async () => {
|
|
224
|
+
const { displayOnModelPage } = LOCAL_APPS.unsloth;
|
|
225
|
+
const model: ModelData = {
|
|
226
|
+
id: "dummy/Llama-3.2-3B-Instruct-GGUF",
|
|
227
|
+
tags: ["conversational"],
|
|
228
|
+
gguf: { total: 1, context_length: 4096 },
|
|
229
|
+
inference: "",
|
|
230
|
+
};
|
|
231
|
+
|
|
232
|
+
expect(displayOnModelPage(model)).toBe(true);
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
it("unsloth not shown for unrelated model", async () => {
|
|
236
|
+
const { displayOnModelPage } = LOCAL_APPS.unsloth;
|
|
237
|
+
const model: ModelData = {
|
|
238
|
+
id: "meta-llama/Llama-3.2-3B-Instruct",
|
|
239
|
+
tags: ["conversational"],
|
|
240
|
+
inference: "",
|
|
241
|
+
};
|
|
242
|
+
|
|
243
|
+
expect(displayOnModelPage(model)).toBe(false);
|
|
244
|
+
});
|
|
153
245
|
});
|
package/src/local-apps.ts
CHANGED
|
@@ -115,6 +115,26 @@ function isMlxModel(model: ModelData) {
|
|
|
115
115
|
return model.tags.includes("mlx");
|
|
116
116
|
}
|
|
117
117
|
|
|
118
|
+
/**
|
|
119
|
+
* Returns the model's chat template string, coalescing across sources:
|
|
120
|
+
* GGUF metadata > chat_template_jinja file > tokenizer_config.json
|
|
121
|
+
*/
|
|
122
|
+
function getChatTemplate(model: ModelData): string | undefined {
|
|
123
|
+
const ct =
|
|
124
|
+
model.gguf?.chat_template ?? model.config?.chat_template_jinja ?? model.config?.tokenizer_config?.chat_template;
|
|
125
|
+
if (typeof ct === "string") {
|
|
126
|
+
return ct;
|
|
127
|
+
}
|
|
128
|
+
if (Array.isArray(ct)) {
|
|
129
|
+
return ct[0]?.template;
|
|
130
|
+
}
|
|
131
|
+
return undefined;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
function isUnslothModel(model: ModelData) {
|
|
135
|
+
return model.tags.includes("unsloth") || isLlamaCppGgufModel(model);
|
|
136
|
+
}
|
|
137
|
+
|
|
118
138
|
function getQuantTag(filepath?: string): string {
|
|
119
139
|
const defaultTag = ":{{QUANT_TAG}}";
|
|
120
140
|
|
|
@@ -193,6 +213,48 @@ const snippetOllama = (model: ModelData, filepath?: string): string => {
|
|
|
193
213
|
return `ollama run hf.co/${model.id}${getQuantTag(filepath)}`;
|
|
194
214
|
};
|
|
195
215
|
|
|
216
|
+
const snippetUnsloth = (model: ModelData): LocalAppSnippet[] => {
|
|
217
|
+
const isGguf = isLlamaCppGgufModel(model);
|
|
218
|
+
|
|
219
|
+
const studio_instructions: LocalAppSnippet = {
|
|
220
|
+
title: "Open model in Unsloth Studio",
|
|
221
|
+
setup: ["pip install unsloth", "unsloth studio setup"].join("\n"),
|
|
222
|
+
content: [
|
|
223
|
+
"# Run unsloth studio",
|
|
224
|
+
"unsloth studio -H 0.0.0.0 -p 8000",
|
|
225
|
+
"# Then open http://localhost:8000/chat in your browser",
|
|
226
|
+
"# Search for " + model.id + " to start chatting",
|
|
227
|
+
].join("\n"),
|
|
228
|
+
};
|
|
229
|
+
|
|
230
|
+
const hf_spaces_instructions: LocalAppSnippet = {
|
|
231
|
+
title: "Using HuggingFace Spaces for Unsloth",
|
|
232
|
+
setup: "# No setup required",
|
|
233
|
+
content:
|
|
234
|
+
"# Open https://huggingface.co/spaces/unsloth/studio in your browser\n# Search for " +
|
|
235
|
+
model.id +
|
|
236
|
+
" to start chatting",
|
|
237
|
+
};
|
|
238
|
+
|
|
239
|
+
const fastmodel_instructions: LocalAppSnippet = {
|
|
240
|
+
title: "Load model with FastModel",
|
|
241
|
+
setup: "pip install unsloth",
|
|
242
|
+
content: [
|
|
243
|
+
"from unsloth import FastModel",
|
|
244
|
+
"model, tokenizer = FastModel.from_pretrained(",
|
|
245
|
+
' model_name="' + model.id + '",',
|
|
246
|
+
" max_seq_length=2048,",
|
|
247
|
+
")",
|
|
248
|
+
].join("\n"),
|
|
249
|
+
};
|
|
250
|
+
|
|
251
|
+
if (isGguf) {
|
|
252
|
+
return [studio_instructions, hf_spaces_instructions];
|
|
253
|
+
} else {
|
|
254
|
+
return [studio_instructions, hf_spaces_instructions, fastmodel_instructions];
|
|
255
|
+
}
|
|
256
|
+
};
|
|
257
|
+
|
|
196
258
|
const snippetLocalAI = (model: ModelData, filepath?: string): LocalAppSnippet[] => {
|
|
197
259
|
const command = (binary: string) =>
|
|
198
260
|
["# Load and run the model:", `${binary} huggingface://${model.id}/${filepath ?? "{{GGUF_FILE}}"}`].join("\n");
|
|
@@ -390,14 +452,28 @@ const snippetMlxLm = (model: ModelData): LocalAppSnippet[] => {
|
|
|
390
452
|
};
|
|
391
453
|
|
|
392
454
|
const snippetPi = (model: ModelData, filepath?: string): LocalAppSnippet[] => {
|
|
393
|
-
const quantTag = getQuantTag(filepath);
|
|
394
455
|
const modelName = model.id.split("/").pop() ?? model.id;
|
|
456
|
+
const isMLX = isMlxModel(model);
|
|
457
|
+
|
|
458
|
+
// Step 1: Server — differs by backend
|
|
459
|
+
const serverStep: LocalAppSnippet = isMLX
|
|
460
|
+
? {
|
|
461
|
+
title: "Start the MLX server",
|
|
462
|
+
setup: "# Install MLX LM:\nuv tool install mlx-lm",
|
|
463
|
+
content: `# Start a local OpenAI-compatible server:\nmlx_lm.server --model "${model.id}"`,
|
|
464
|
+
}
|
|
465
|
+
: {
|
|
466
|
+
title: "Start the llama.cpp server",
|
|
467
|
+
setup: "# Install llama.cpp:\nbrew install llama.cpp",
|
|
468
|
+
content: `# Start a local OpenAI-compatible server:\nllama-server -hf ${model.id}${getQuantTag(filepath)} --jinja`,
|
|
469
|
+
};
|
|
395
470
|
|
|
471
|
+
// Step 2: Pi config — port and provider name differ
|
|
396
472
|
const modelsJson = JSON.stringify(
|
|
397
473
|
{
|
|
398
474
|
providers: {
|
|
399
|
-
"llama-cpp": {
|
|
400
|
-
baseUrl: "http://localhost:8080/v1",
|
|
475
|
+
[isMLX ? "mlx-lm" : "llama-cpp"]: {
|
|
476
|
+
baseUrl: isMLX ? "http://localhost:8000/v1" : "http://localhost:8080/v1",
|
|
401
477
|
api: "openai-completions",
|
|
402
478
|
apiKey: "none",
|
|
403
479
|
models: [{ id: modelName }],
|
|
@@ -409,11 +485,7 @@ const snippetPi = (model: ModelData, filepath?: string): LocalAppSnippet[] => {
|
|
|
409
485
|
);
|
|
410
486
|
|
|
411
487
|
return [
|
|
412
|
-
|
|
413
|
-
title: "Start the llama.cpp server",
|
|
414
|
-
setup: "# Install llama.cpp:\nbrew install llama.cpp",
|
|
415
|
-
content: `# Start a local OpenAI-compatible server:\nllama-server -hf ${model.id}${quantTag} --jinja`,
|
|
416
|
-
},
|
|
488
|
+
serverStep,
|
|
417
489
|
{
|
|
418
490
|
title: "Configure the model in Pi",
|
|
419
491
|
setup: "# Install Pi:\nnpm install -g @mariozechner/pi-coding-agent",
|
|
@@ -421,7 +493,7 @@ const snippetPi = (model: ModelData, filepath?: string): LocalAppSnippet[] => {
|
|
|
421
493
|
},
|
|
422
494
|
{
|
|
423
495
|
title: "Run Pi",
|
|
424
|
-
content:
|
|
496
|
+
content: "# Start Pi in your project directory:\npi",
|
|
425
497
|
},
|
|
426
498
|
];
|
|
427
499
|
};
|
|
@@ -643,6 +715,13 @@ export const LOCAL_APPS = {
|
|
|
643
715
|
displayOnModelPage: isLlamaCppGgufModel,
|
|
644
716
|
snippet: snippetOllama,
|
|
645
717
|
},
|
|
718
|
+
unsloth: {
|
|
719
|
+
prettyLabel: "Unsloth",
|
|
720
|
+
docsUrl: "https://unsloth.ai/docs",
|
|
721
|
+
mainTask: "text-generation",
|
|
722
|
+
displayOnModelPage: isUnslothModel,
|
|
723
|
+
snippet: snippetUnsloth,
|
|
724
|
+
},
|
|
646
725
|
"docker-model-runner": {
|
|
647
726
|
prettyLabel: "Docker Model Runner",
|
|
648
727
|
docsUrl: "https://docs.docker.com/ai/model-runner/",
|
|
@@ -661,7 +740,10 @@ export const LOCAL_APPS = {
|
|
|
661
740
|
prettyLabel: "Pi",
|
|
662
741
|
docsUrl: "https://github.com/badlogic/pi-mono",
|
|
663
742
|
mainTask: "text-generation",
|
|
664
|
-
displayOnModelPage: (model) =>
|
|
743
|
+
displayOnModelPage: (model) =>
|
|
744
|
+
(isLlamaCppGgufModel(model) || isMlxModel(model)) &&
|
|
745
|
+
model.pipeline_tag === "text-generation" &&
|
|
746
|
+
!!getChatTemplate(model)?.includes("tools"),
|
|
665
747
|
snippet: snippetPi,
|
|
666
748
|
},
|
|
667
749
|
} satisfies Record<string, LocalApp>;
|