@huggingface/tasks 0.20.3 → 0.20.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -38,6 +38,7 @@ export declare enum GGMLFileQuantizationType {
38
38
  TQ1_0 = 36,
39
39
  TQ2_0 = 37,
40
40
  MXFP4_MOE = 38,
41
+ NVFP4 = 39,
41
42
  Q2_K_XL = 1000,
42
43
  Q3_K_XL = 1001,
43
44
  Q4_K_XL = 1002,
@@ -82,6 +83,7 @@ export declare enum GGMLQuantizationType {
82
83
  BF16 = 30,
83
84
  TQ1_0 = 34,
84
85
  TQ2_0 = 35,
85
- MXFP4 = 39
86
+ MXFP4 = 39,
87
+ NVFP4 = 40
86
88
  }
87
89
  //# sourceMappingURL=gguf.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"gguf.d.ts","sourceRoot":"","sources":["../../src/gguf.ts"],"names":[],"mappings":"AAGA,oBAAY,wBAAwB;IACnC,GAAG,IAAI;IACP,GAAG,IAAI;IACP,IAAI,IAAI;IACR,IAAI,IAAI;IACR,aAAa,IAAI;IACjB,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,KAAK;IACT,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,IAAI,KAAK;IACT,OAAO,KAAK;IACZ,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,OAAO,KAAK;IACZ,KAAK,KAAK;IACV,MAAM,KAAK;IACX,KAAK,KAAK;IACV,KAAK,KAAK;IACV,KAAK,KAAK;IACV,KAAK,KAAK;IACV,MAAM,KAAK;IACX,KAAK,KAAK;IACV,IAAI,KAAK;IACT,QAAQ,KAAK;IACb,QAAQ,KAAK;IACb,QAAQ,KAAK;IACb,KAAK,KAAK;IACV,KAAK,KAAK;IACV,SAAS,KAAK;IAId,OAAO,OAAO;IACd,OAAO,OAAO;IACd,OAAO,OAAO;IACd,OAAO,OAAO;IACd,OAAO,OAAO;IACd,OAAO,OAAO;CACd;AAGD,eAAO,MAAM,aAAa,QAEzB,CAAC;AACF,eAAO,MAAM,oBAAoB,QAAiC,CAAC;AAEnE,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAGrE;AAKD,eAAO,MAAM,gBAAgB,EAAE,wBAAwB,EA0DtD,CAAC;AAIF,wBAAgB,oBAAoB,CACnC,KAAK,EAAE,wBAAwB,EAC/B,eAAe,EAAE,wBAAwB,EAAE,GACzC,wBAAwB,GAAG,SAAS,CAmCtC;AAGD,oBAAY,oBAAoB;IAC/B,GAAG,IAAI;IACP,GAAG,IAAI;IACP,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,KAAK;IACT,IAAI,KAAK;IACT,IAAI,KAAK;IACT,IAAI,KAAK;IACT,IAAI,KAAK;IACT,IAAI,KAAK;IACT,OAAO,KAAK;IACZ,MAAM,KAAK;IACX,OAAO,KAAK;IACZ,KAAK,KAAK;IACV,MAAM,KAAK;IACX,KAAK,KAAK;IACV,KAAK,KAAK;IACV,MAAM,KAAK;IACX,EAAE,KAAK;IACP,GAAG,KAAK;IACR,GAAG,KAAK;IACR,GAAG,KAAK;IACR,GAAG,KAAK;IACR,KAAK,KAAK;IACV,IAAI,KAAK;IACT,KAAK,KAAK;IACV,KAAK,KAAK;IACV,KAAK,KAAK;CACV"}
1
+ {"version":3,"file":"gguf.d.ts","sourceRoot":"","sources":["../../src/gguf.ts"],"names":[],"mappings":"AAGA,oBAAY,wBAAwB;IACnC,GAAG,IAAI;IACP,GAAG,IAAI;IACP,IAAI,IAAI;IACR,IAAI,IAAI;IACR,aAAa,IAAI;IACjB,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,KAAK;IACT,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,IAAI,KAAK;IACT,OAAO,KAAK;IACZ,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,OAAO,KAAK;IACZ,KAAK,KAAK;IACV,MAAM,KAAK;IACX,KAAK,KAAK;IACV,KAAK,KAAK;IACV,KAAK,KAAK;IACV,KAAK,KAAK;IACV,MAAM,KAAK;IACX,KAAK,KAAK;IACV,IAAI,KAAK;IACT,QAAQ,KAAK;IACb,QAAQ,KAAK;IACb,QAAQ,KAAK;IACb,KAAK,KAAK;IACV,KAAK,KAAK;IACV,SAAS,KAAK;IACd,KAAK,KAAK;IAIV,OAAO,OAAO;IACd,OAAO,OAAO;IACd,OAAO,OAAO;IACd,OAAO,OAAO;IACd,OAAO,OAAO;IACd,OAAO,OAAO;CACd;AAGD,eAAO,MAAM,aAAa,QAEzB,CAAC;AACF,eAAO,MAAM,oBAAoB,QAAiC,CAAC;AAEnE,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAGrE;AAKD,eAAO,MAAM,gBAAgB,EAAE,wBAAwB,EA2DtD,CAAC;AAIF,wBAAgB,oBAAoB,CACnC,KAAK,EAAE,wBAAwB,EAC/B,eAAe,EAAE,wBAAwB,EAAE,GACzC,wBAAwB,GAAG,SAAS,CAmCtC;AAGD,oBAAY,oBAAoB;IAC/B,GAAG,IAAI;IACP,GAAG,IAAI;IACP,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,KAAK;IACT,IAAI,KAAK;IACT,IAAI,KAAK;IACT,IAAI,KAAK;IACT,IAAI,KAAK;IACT,IAAI,KAAK;IACT,OAAO,KAAK;IACZ,MAAM,KAAK;IACX,OAAO,KAAK;IACZ,KAAK,KAAK;IACV,MAAM,KAAK;IACX,KAAK,KAAK;IACV,KAAK,KAAK;IACV,MAAM,KAAK;IACX,EAAE,KAAK;IACP,GAAG,KAAK;IACR,GAAG,KAAK;IACR,GAAG,KAAK;IACR,GAAG,KAAK;IACR,KAAK,KAAK;IACV,IAAI,KAAK;IACT,KAAK,KAAK;IACV,KAAK,KAAK;IACV,KAAK,KAAK;IACV,KAAK,KAAK;CACV"}
@@ -47,6 +47,7 @@ var GGMLFileQuantizationType;
47
47
  GGMLFileQuantizationType[GGMLFileQuantizationType["TQ1_0"] = 36] = "TQ1_0";
48
48
  GGMLFileQuantizationType[GGMLFileQuantizationType["TQ2_0"] = 37] = "TQ2_0";
49
49
  GGMLFileQuantizationType[GGMLFileQuantizationType["MXFP4_MOE"] = 38] = "MXFP4_MOE";
50
+ GGMLFileQuantizationType[GGMLFileQuantizationType["NVFP4"] = 39] = "NVFP4";
50
51
  // custom quants used by unsloth
51
52
  // they are not officially a scheme enum value in GGUF, but only here for naming
52
53
  GGMLFileQuantizationType[GGMLFileQuantizationType["Q2_K_XL"] = 1000] = "Q2_K_XL";
@@ -96,6 +97,7 @@ exports.GGUF_QUANT_ORDER = [
96
97
  GGMLFileQuantizationType.Q4_2,
97
98
  GGMLFileQuantizationType.Q4_3,
98
99
  GGMLFileQuantizationType.MXFP4_MOE,
100
+ GGMLFileQuantizationType.NVFP4,
99
101
  // 3-bit quantizations
100
102
  GGMLFileQuantizationType.Q3_K_XL,
101
103
  GGMLFileQuantizationType.Q3_K_L,
@@ -187,4 +189,5 @@ var GGMLQuantizationType;
187
189
  GGMLQuantizationType[GGMLQuantizationType["TQ1_0"] = 34] = "TQ1_0";
188
190
  GGMLQuantizationType[GGMLQuantizationType["TQ2_0"] = 35] = "TQ2_0";
189
191
  GGMLQuantizationType[GGMLQuantizationType["MXFP4"] = 39] = "MXFP4";
192
+ GGMLQuantizationType[GGMLQuantizationType["NVFP4"] = 40] = "NVFP4";
190
193
  })(GGMLQuantizationType || (exports.GGMLQuantizationType = GGMLQuantizationType = {}));
@@ -56,6 +56,7 @@ declare function isTgiModel(model: ModelData): boolean;
56
56
  declare function isLlamaCppGgufModel(model: ModelData): boolean;
57
57
  declare function isVllmModel(model: ModelData): boolean;
58
58
  declare function isDockerModelRunnerModel(model: ModelData): boolean;
59
+ declare function isUnslothModel(model: ModelData): boolean;
59
60
  /**
60
61
  * Add your new local app here.
61
62
  *
@@ -198,6 +199,13 @@ export declare const LOCAL_APPS: {
198
199
  displayOnModelPage: typeof isLlamaCppGgufModel;
199
200
  snippet: (model: ModelData, filepath?: string) => string;
200
201
  };
202
+ unsloth: {
203
+ prettyLabel: string;
204
+ docsUrl: string;
205
+ mainTask: "text-generation";
206
+ displayOnModelPage: typeof isUnslothModel;
207
+ snippet: (model: ModelData) => LocalAppSnippet[];
208
+ };
201
209
  "docker-model-runner": {
202
210
  prettyLabel: string;
203
211
  docsUrl: string;
@@ -1 +1 @@
1
- {"version":3,"file":"local-apps.d.ts","sourceRoot":"","sources":["../../src/local-apps.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAKnD,MAAM,WAAW,eAAe;IAC/B;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IACd;;OAEG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IACf;;OAEG;IACH,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,MAAM,QAAQ,GAAG;IACtB;;OAEG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;OAEG;IACH,QAAQ,EAAE,YAAY,CAAC;IACvB;;OAEG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IAEpB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB;;OAEG;IACH,kBAAkB,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,OAAO,CAAC;CAClD,GAAG,CACD;IACA;;OAEG;IACH,QAAQ,EAAE,CAAC,KAAK,EAAE,SAAS,EAAE,QAAQ,CAAC,EAAE,MAAM,KAAK,GAAG,CAAC;CACtD,GACD;IACA;;;;OAIG;IACH,OAAO,EAAE,CAAC,KAAK,EAAE,SAAS,EAAE,QAAQ,CAAC,EAAE,MAAM,KAAK,MAAM,GAAG,MAAM,EAAE,GAAG,eAAe,GAAG,eAAe,EAAE,CAAC;CACzG,CACH,CAAC;AAsBF,iBAAS,UAAU,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAE7C;AAED,iBAAS,mBAAmB,CAAC,KAAK,EAAE,SAAS,WAE5C;AAED,iBAAS,WAAW,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAU9C;AAED,iBAAS,wBAAwB,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAE3D;AAiXD;;;;;;;;;;GAUG;AACH,eAAO,MAAM,UAAU;;;;;;yBAvWS,SAAS,aAAa,MAAM,KAAG,eAAe,EAAE;;;;;;;yBAiDzC,SAAS,aAAa,MAAM,KAAG,eAAe,EAAE;;;;;;;yBAyC3D,SAAS,KAAG,eAAe,EAAE;;;;;;oCAuS3B,SAAS;yBAjPT,SAAS,KAAG,eAAe,EAAE;;;;;;;yBAoF9B,SAAS,KAAG,eAAe,EAAE;;;;;;;yBA7B/B,SAAS,KAAG,eAAe,EAAE;;;;;;;;;;;;;;yBApIzB,SAAS,aAAa,MAAM,KAAG,eAAe,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;yBAJjD,SAAS,aAAa,MAAM,KAAG,MAAM;;;;;;;yBA6O1B,SAAS,aAAa,MAAM,KAAG,MAAM;;;;;;;yBAM9C,SAAS,aAAa,MAAM,KAAG,eAAe,EAAE;;;;;;;yBA3CtD,SAAS,aAAa,MAAM,KAAG,eAAe,EAAE;;CAmRtC,CAAC;AAErC,MAAM,MAAM,WAAW,GAAG,MAAM,OAAO,UAAU,CAAC"}
1
+ {"version":3,"file":"local-apps.d.ts","sourceRoot":"","sources":["../../src/local-apps.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAKnD,MAAM,WAAW,eAAe;IAC/B;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IACd;;OAEG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IACf;;OAEG;IACH,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,MAAM,QAAQ,GAAG;IACtB;;OAEG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;OAEG;IACH,QAAQ,EAAE,YAAY,CAAC;IACvB;;OAEG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IAEpB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB;;OAEG;IACH,kBAAkB,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,OAAO,CAAC;CAClD,GAAG,CACD;IACA;;OAEG;IACH,QAAQ,EAAE,CAAC,KAAK,EAAE,SAAS,EAAE,QAAQ,CAAC,EAAE,MAAM,KAAK,GAAG,CAAC;CACtD,GACD;IACA;;;;OAIG;IACH,OAAO,EAAE,CAAC,KAAK,EAAE,SAAS,EAAE,QAAQ,CAAC,EAAE,MAAM,KAAK,MAAM,GAAG,MAAM,EAAE,GAAG,eAAe,GAAG,eAAe,EAAE,CAAC;CACzG,CACH,CAAC;AAsBF,iBAAS,UAAU,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAE7C;AAED,iBAAS,mBAAmB,CAAC,KAAK,EAAE,SAAS,WAE5C;AAED,iBAAS,WAAW,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAU9C;AAED,iBAAS,wBAAwB,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAE3D;AA0BD,iBAAS,cAAc,CAAC,KAAK,EAAE,SAAS,WAEvC;AA6ZD;;;;;;;;;;GAUG;AACH,eAAO,MAAM,UAAU;;;;;;yBA3ZS,SAAS,aAAa,MAAM,KAAG,eAAe,EAAE;;;;;;;yBAiDzC,SAAS,aAAa,MAAM,KAAG,eAAe,EAAE;;;;;;;yBAmF3D,SAAS,KAAG,eAAe,EAAE;;;;;;oCAiT3B,SAAS;yBA3PT,SAAS,KAAG,eAAe,EAAE;;;;;;;yBAoF9B,SAAS,KAAG,eAAe,EAAE;;;;;;;yBA7B/B,SAAS,KAAG,eAAe,EAAE;;;;;;;;;;;;;;yBApIzB,SAAS,aAAa,MAAM,KAAG,eAAe,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;yBA9CjD,SAAS,aAAa,MAAM,KAAG,MAAM;;;;;;;yBAIpC,SAAS,KAAG,eAAe,EAAE;;;;;;;yBA6RnB,SAAS,aAAa,MAAM,KAAG,MAAM;;;;;;;yBAM9C,SAAS,aAAa,MAAM,KAAG,eAAe,EAAE;;;;;;;yBArDtD,SAAS,aAAa,MAAM,KAAG,eAAe,EAAE;;CAuStC,CAAC;AAErC,MAAM,MAAM,WAAW,GAAG,MAAM,OAAO,UAAU,CAAC"}
@@ -43,6 +43,23 @@ function isAmdRyzenModel(model) {
43
43
  function isMlxModel(model) {
44
44
  return model.tags.includes("mlx");
45
45
  }
46
+ /**
47
+ * Returns the model's chat template string, coalescing across sources:
48
+ * GGUF metadata > chat_template_jinja file > tokenizer_config.json
49
+ */
50
+ function getChatTemplate(model) {
51
+ const ct = model.gguf?.chat_template ?? model.config?.chat_template_jinja ?? model.config?.tokenizer_config?.chat_template;
52
+ if (typeof ct === "string") {
53
+ return ct;
54
+ }
55
+ if (Array.isArray(ct)) {
56
+ return ct[0]?.template;
57
+ }
58
+ return undefined;
59
+ }
60
+ function isUnslothModel(model) {
61
+ return model.tags.includes("unsloth") || isLlamaCppGgufModel(model);
62
+ }
46
63
  function getQuantTag(filepath) {
47
64
  const defaultTag = ":{{QUANT_TAG}}";
48
65
  if (!filepath) {
@@ -115,6 +132,43 @@ const snippetNodeLlamaCppCli = (model, filepath) => {
115
132
  const snippetOllama = (model, filepath) => {
116
133
  return `ollama run hf.co/${model.id}${getQuantTag(filepath)}`;
117
134
  };
135
+ const snippetUnsloth = (model) => {
136
+ const isGguf = isLlamaCppGgufModel(model);
137
+ const studio_instructions = {
138
+ title: "Open model in Unsloth Studio",
139
+ setup: ["pip install unsloth", "unsloth studio setup"].join("\n"),
140
+ content: [
141
+ "# Run unsloth studio",
142
+ "unsloth studio -H 0.0.0.0 -p 8000",
143
+ "# Then open http://localhost:8000/chat in your browser",
144
+ "# Search for " + model.id + " to start chatting",
145
+ ].join("\n"),
146
+ };
147
+ const hf_spaces_instructions = {
148
+ title: "Using HuggingFace Spaces for Unsloth",
149
+ setup: "# No setup required",
150
+ content: "# Open https://huggingface.co/spaces/unsloth/studio in your browser\n# Search for " +
151
+ model.id +
152
+ " to start chatting",
153
+ };
154
+ const fastmodel_instructions = {
155
+ title: "Load model with FastModel",
156
+ setup: "pip install unsloth",
157
+ content: [
158
+ "from unsloth import FastModel",
159
+ "model, tokenizer = FastModel.from_pretrained(",
160
+ ' model_name="' + model.id + '",',
161
+ " max_seq_length=2048,",
162
+ ")",
163
+ ].join("\n"),
164
+ };
165
+ if (isGguf) {
166
+ return [studio_instructions, hf_spaces_instructions];
167
+ }
168
+ else {
169
+ return [studio_instructions, hf_spaces_instructions, fastmodel_instructions];
170
+ }
171
+ };
118
172
  const snippetLocalAI = (model, filepath) => {
119
173
  const command = (binary) => ["# Load and run the model:", `${binary} huggingface://${model.id}/${filepath ?? "{{GGUF_FILE}}"}`].join("\n");
120
174
  return [
@@ -298,12 +352,25 @@ const snippetMlxLm = (model) => {
298
352
  ];
299
353
  };
300
354
  const snippetPi = (model, filepath) => {
301
- const quantTag = getQuantTag(filepath);
302
355
  const modelName = model.id.split("/").pop() ?? model.id;
356
+ const isMLX = isMlxModel(model);
357
+ // Step 1: Server — differs by backend
358
+ const serverStep = isMLX
359
+ ? {
360
+ title: "Start the MLX server",
361
+ setup: "# Install MLX LM:\nuv tool install mlx-lm",
362
+ content: `# Start a local OpenAI-compatible server:\nmlx_lm.server --model "${model.id}"`,
363
+ }
364
+ : {
365
+ title: "Start the llama.cpp server",
366
+ setup: "# Install llama.cpp:\nbrew install llama.cpp",
367
+ content: `# Start a local OpenAI-compatible server:\nllama-server -hf ${model.id}${getQuantTag(filepath)} --jinja`,
368
+ };
369
+ // Step 2: Pi config — port and provider name differ
303
370
  const modelsJson = JSON.stringify({
304
371
  providers: {
305
- "llama-cpp": {
306
- baseUrl: "http://localhost:8080/v1",
372
+ [isMLX ? "mlx-lm" : "llama-cpp"]: {
373
+ baseUrl: isMLX ? "http://localhost:8000/v1" : "http://localhost:8080/v1",
307
374
  api: "openai-completions",
308
375
  apiKey: "none",
309
376
  models: [{ id: modelName }],
@@ -311,11 +378,7 @@ const snippetPi = (model, filepath) => {
311
378
  },
312
379
  }, null, 2);
313
380
  return [
314
- {
315
- title: "Start the llama.cpp server",
316
- setup: "# Install llama.cpp:\nbrew install llama.cpp",
317
- content: `# Start a local OpenAI-compatible server:\nllama-server -hf ${model.id}${quantTag} --jinja`,
318
- },
381
+ serverStep,
319
382
  {
320
383
  title: "Configure the model in Pi",
321
384
  setup: "# Install Pi:\nnpm install -g @mariozechner/pi-coding-agent",
@@ -323,7 +386,7 @@ const snippetPi = (model, filepath) => {
323
386
  },
324
387
  {
325
388
  title: "Run Pi",
326
- content: `# Start Pi in your project directory:\npi`,
389
+ content: "# Start Pi in your project directory:\npi",
327
390
  },
328
391
  ];
329
392
  };
@@ -539,6 +602,13 @@ exports.LOCAL_APPS = {
539
602
  displayOnModelPage: isLlamaCppGgufModel,
540
603
  snippet: snippetOllama,
541
604
  },
605
+ unsloth: {
606
+ prettyLabel: "Unsloth",
607
+ docsUrl: "https://unsloth.ai/docs",
608
+ mainTask: "text-generation",
609
+ displayOnModelPage: isUnslothModel,
610
+ snippet: snippetUnsloth,
611
+ },
542
612
  "docker-model-runner": {
543
613
  prettyLabel: "Docker Model Runner",
544
614
  docsUrl: "https://docs.docker.com/ai/model-runner/",
@@ -557,7 +627,9 @@ exports.LOCAL_APPS = {
557
627
  prettyLabel: "Pi",
558
628
  docsUrl: "https://github.com/badlogic/pi-mono",
559
629
  mainTask: "text-generation",
560
- displayOnModelPage: (model) => isLlamaCppGgufModel(model) && !!model.gguf?.chat_template?.includes("tools"),
630
+ displayOnModelPage: (model) => (isLlamaCppGgufModel(model) || isMlxModel(model)) &&
631
+ model.pipeline_tag === "text-generation" &&
632
+ !!getChatTemplate(model)?.includes("tools"),
561
633
  snippet: snippetPi,
562
634
  },
563
635
  };
@@ -126,6 +126,27 @@ curl -X POST "http://localhost:8000/v1/chat/completions" \\
126
126
  (0, vitest_1.expect)(snippet[1].content).toContain(`"id": "Llama-3.2-3B-Instruct-GGUF"`);
127
127
  (0, vitest_1.expect)(snippet[2].content).toContain("pi");
128
128
  });
129
+ (0, vitest_1.it)("pi - mlx", async () => {
130
+ const { snippet: snippetFunc } = local_apps_js_1.LOCAL_APPS["pi"];
131
+ const model = {
132
+ id: "mlx-community/Llama-3.2-3B-Instruct-mlx",
133
+ tags: ["mlx", "conversational"],
134
+ pipeline_tag: "text-generation",
135
+ config: {
136
+ tokenizer_config: {
137
+ chat_template: "{% if tools %}...{% endif %}",
138
+ },
139
+ },
140
+ inference: "",
141
+ };
142
+ const snippet = snippetFunc(model);
143
+ (0, vitest_1.expect)(snippet[0].setup).toContain("uv tool install mlx-lm");
144
+ (0, vitest_1.expect)(snippet[0].content).toContain('mlx_lm.server --model "mlx-community/Llama-3.2-3B-Instruct-mlx"');
145
+ (0, vitest_1.expect)(snippet[1].setup).toContain("npm install -g @mariozechner/pi-coding-agent");
146
+ (0, vitest_1.expect)(snippet[1].content).toContain('"baseUrl": "http://localhost:8000/v1"');
147
+ (0, vitest_1.expect)(snippet[1].content).toContain('"id": "Llama-3.2-3B-Instruct-mlx"');
148
+ (0, vitest_1.expect)(snippet[2].content).toContain("pi");
149
+ });
129
150
  (0, vitest_1.it)("docker model runner", async () => {
130
151
  const { snippet: snippetFunc } = local_apps_js_1.LOCAL_APPS["docker-model-runner"];
131
152
  const model = {
@@ -137,4 +158,55 @@ curl -X POST "http://localhost:8000/v1/chat/completions" \\
137
158
  const snippet = snippetFunc(model);
138
159
  (0, vitest_1.expect)(snippet).toEqual(`docker model run hf.co/bartowski/Llama-3.2-3B-Instruct-GGUF:{{QUANT_TAG}}`);
139
160
  });
161
+ (0, vitest_1.it)("unsloth tagged model", async () => {
162
+ const { displayOnModelPage, snippet: snippetFunc } = local_apps_js_1.LOCAL_APPS.unsloth;
163
+ const model = {
164
+ id: "some-user/my-unsloth-finetune",
165
+ tags: ["unsloth", "conversational"],
166
+ inference: "",
167
+ };
168
+ (0, vitest_1.expect)(displayOnModelPage(model)).toBe(true);
169
+ const snippet = snippetFunc(model);
170
+ (0, vitest_1.expect)(snippet[0].setup).toBe("pip install unsloth\nunsloth studio setup");
171
+ (0, vitest_1.expect)(snippet[0].content).toBe("# Run unsloth studio\nunsloth studio -H 0.0.0.0 -p 8000\n# Then open http://localhost:8000/chat in your browser\n# Search for some-user/my-unsloth-finetune to start chatting");
172
+ (0, vitest_1.expect)(snippet[1].setup).toBe("# No setup required");
173
+ (0, vitest_1.expect)(snippet[1].content).toBe("# Open https://huggingface.co/spaces/unsloth/studio in your browser\n# Search for some-user/my-unsloth-finetune to start chatting");
174
+ (0, vitest_1.expect)(snippet[2].setup).toBe("pip install unsloth");
175
+ (0, vitest_1.expect)(snippet[2].content).toBe('from unsloth import FastModel\nmodel, tokenizer = FastModel.from_pretrained(\n model_name="some-user/my-unsloth-finetune",\n max_seq_length=2048,\n)');
176
+ });
177
+ (0, vitest_1.it)("unsloth namespace gguf model", async () => {
178
+ const { displayOnModelPage, snippet: snippetFunc } = local_apps_js_1.LOCAL_APPS.unsloth;
179
+ const model = {
180
+ id: "unsloth/Llama-3.2-3B-Instruct-GGUF",
181
+ tags: ["conversational"],
182
+ gguf: { total: 1, context_length: 4096 },
183
+ inference: "",
184
+ };
185
+ (0, vitest_1.expect)(displayOnModelPage(model)).toBe(true);
186
+ const snippet = snippetFunc(model);
187
+ (0, vitest_1.expect)(snippet[0].setup).toBe("pip install unsloth\nunsloth studio setup");
188
+ (0, vitest_1.expect)(snippet[0].content).toBe("# Run unsloth studio\nunsloth studio -H 0.0.0.0 -p 8000\n# Then open http://localhost:8000/chat in your browser\n# Search for unsloth/Llama-3.2-3B-Instruct-GGUF to start chatting");
189
+ (0, vitest_1.expect)(snippet[1].setup).toBe("# No setup required");
190
+ (0, vitest_1.expect)(snippet[1].content).toBe("# Open https://huggingface.co/spaces/unsloth/studio in your browser\n# Search for unsloth/Llama-3.2-3B-Instruct-GGUF to start chatting");
191
+ (0, vitest_1.expect)(snippet).toHaveLength(2); // GGUF models only get 2 snippets
192
+ });
193
+ (0, vitest_1.it)("non unsloth namespace gguf model", async () => {
194
+ const { displayOnModelPage } = local_apps_js_1.LOCAL_APPS.unsloth;
195
+ const model = {
196
+ id: "dummy/Llama-3.2-3B-Instruct-GGUF",
197
+ tags: ["conversational"],
198
+ gguf: { total: 1, context_length: 4096 },
199
+ inference: "",
200
+ };
201
+ (0, vitest_1.expect)(displayOnModelPage(model)).toBe(true);
202
+ });
203
+ (0, vitest_1.it)("unsloth not shown for unrelated model", async () => {
204
+ const { displayOnModelPage } = local_apps_js_1.LOCAL_APPS.unsloth;
205
+ const model = {
206
+ id: "meta-llama/Llama-3.2-3B-Instruct",
207
+ tags: ["conversational"],
208
+ inference: "",
209
+ };
210
+ (0, vitest_1.expect)(displayOnModelPage(model)).toBe(false);
211
+ });
140
212
  });
@@ -38,6 +38,7 @@ export declare enum GGMLFileQuantizationType {
38
38
  TQ1_0 = 36,
39
39
  TQ2_0 = 37,
40
40
  MXFP4_MOE = 38,
41
+ NVFP4 = 39,
41
42
  Q2_K_XL = 1000,
42
43
  Q3_K_XL = 1001,
43
44
  Q4_K_XL = 1002,
@@ -82,6 +83,7 @@ export declare enum GGMLQuantizationType {
82
83
  BF16 = 30,
83
84
  TQ1_0 = 34,
84
85
  TQ2_0 = 35,
85
- MXFP4 = 39
86
+ MXFP4 = 39,
87
+ NVFP4 = 40
86
88
  }
87
89
  //# sourceMappingURL=gguf.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"gguf.d.ts","sourceRoot":"","sources":["../../src/gguf.ts"],"names":[],"mappings":"AAGA,oBAAY,wBAAwB;IACnC,GAAG,IAAI;IACP,GAAG,IAAI;IACP,IAAI,IAAI;IACR,IAAI,IAAI;IACR,aAAa,IAAI;IACjB,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,KAAK;IACT,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,IAAI,KAAK;IACT,OAAO,KAAK;IACZ,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,OAAO,KAAK;IACZ,KAAK,KAAK;IACV,MAAM,KAAK;IACX,KAAK,KAAK;IACV,KAAK,KAAK;IACV,KAAK,KAAK;IACV,KAAK,KAAK;IACV,MAAM,KAAK;IACX,KAAK,KAAK;IACV,IAAI,KAAK;IACT,QAAQ,KAAK;IACb,QAAQ,KAAK;IACb,QAAQ,KAAK;IACb,KAAK,KAAK;IACV,KAAK,KAAK;IACV,SAAS,KAAK;IAId,OAAO,OAAO;IACd,OAAO,OAAO;IACd,OAAO,OAAO;IACd,OAAO,OAAO;IACd,OAAO,OAAO;IACd,OAAO,OAAO;CACd;AAGD,eAAO,MAAM,aAAa,QAEzB,CAAC;AACF,eAAO,MAAM,oBAAoB,QAAiC,CAAC;AAEnE,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAGrE;AAKD,eAAO,MAAM,gBAAgB,EAAE,wBAAwB,EA0DtD,CAAC;AAIF,wBAAgB,oBAAoB,CACnC,KAAK,EAAE,wBAAwB,EAC/B,eAAe,EAAE,wBAAwB,EAAE,GACzC,wBAAwB,GAAG,SAAS,CAmCtC;AAGD,oBAAY,oBAAoB;IAC/B,GAAG,IAAI;IACP,GAAG,IAAI;IACP,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,KAAK;IACT,IAAI,KAAK;IACT,IAAI,KAAK;IACT,IAAI,KAAK;IACT,IAAI,KAAK;IACT,IAAI,KAAK;IACT,OAAO,KAAK;IACZ,MAAM,KAAK;IACX,OAAO,KAAK;IACZ,KAAK,KAAK;IACV,MAAM,KAAK;IACX,KAAK,KAAK;IACV,KAAK,KAAK;IACV,MAAM,KAAK;IACX,EAAE,KAAK;IACP,GAAG,KAAK;IACR,GAAG,KAAK;IACR,GAAG,KAAK;IACR,GAAG,KAAK;IACR,KAAK,KAAK;IACV,IAAI,KAAK;IACT,KAAK,KAAK;IACV,KAAK,KAAK;IACV,KAAK,KAAK;CACV"}
1
+ {"version":3,"file":"gguf.d.ts","sourceRoot":"","sources":["../../src/gguf.ts"],"names":[],"mappings":"AAGA,oBAAY,wBAAwB;IACnC,GAAG,IAAI;IACP,GAAG,IAAI;IACP,IAAI,IAAI;IACR,IAAI,IAAI;IACR,aAAa,IAAI;IACjB,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,KAAK;IACT,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,IAAI,KAAK;IACT,OAAO,KAAK;IACZ,MAAM,KAAK;IACX,MAAM,KAAK;IACX,MAAM,KAAK;IACX,OAAO,KAAK;IACZ,KAAK,KAAK;IACV,MAAM,KAAK;IACX,KAAK,KAAK;IACV,KAAK,KAAK;IACV,KAAK,KAAK;IACV,KAAK,KAAK;IACV,MAAM,KAAK;IACX,KAAK,KAAK;IACV,IAAI,KAAK;IACT,QAAQ,KAAK;IACb,QAAQ,KAAK;IACb,QAAQ,KAAK;IACb,KAAK,KAAK;IACV,KAAK,KAAK;IACV,SAAS,KAAK;IACd,KAAK,KAAK;IAIV,OAAO,OAAO;IACd,OAAO,OAAO;IACd,OAAO,OAAO;IACd,OAAO,OAAO;IACd,OAAO,OAAO;IACd,OAAO,OAAO;CACd;AAGD,eAAO,MAAM,aAAa,QAEzB,CAAC;AACF,eAAO,MAAM,oBAAoB,QAAiC,CAAC;AAEnE,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAGrE;AAKD,eAAO,MAAM,gBAAgB,EAAE,wBAAwB,EA2DtD,CAAC;AAIF,wBAAgB,oBAAoB,CACnC,KAAK,EAAE,wBAAwB,EAC/B,eAAe,EAAE,wBAAwB,EAAE,GACzC,wBAAwB,GAAG,SAAS,CAmCtC;AAGD,oBAAY,oBAAoB;IAC/B,GAAG,IAAI;IACP,GAAG,IAAI;IACP,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,IAAI;IACR,IAAI,KAAK;IACT,IAAI,KAAK;IACT,IAAI,KAAK;IACT,IAAI,KAAK;IACT,IAAI,KAAK;IACT,IAAI,KAAK;IACT,OAAO,KAAK;IACZ,MAAM,KAAK;IACX,OAAO,KAAK;IACZ,KAAK,KAAK;IACV,MAAM,KAAK;IACX,KAAK,KAAK;IACV,KAAK,KAAK;IACV,MAAM,KAAK;IACX,EAAE,KAAK;IACP,GAAG,KAAK;IACR,GAAG,KAAK;IACR,GAAG,KAAK;IACR,GAAG,KAAK;IACR,KAAK,KAAK;IACV,IAAI,KAAK;IACT,KAAK,KAAK;IACV,KAAK,KAAK;IACV,KAAK,KAAK;IACV,KAAK,KAAK;CACV"}
package/dist/esm/gguf.js CHANGED
@@ -42,6 +42,7 @@ export var GGMLFileQuantizationType;
42
42
  GGMLFileQuantizationType[GGMLFileQuantizationType["TQ1_0"] = 36] = "TQ1_0";
43
43
  GGMLFileQuantizationType[GGMLFileQuantizationType["TQ2_0"] = 37] = "TQ2_0";
44
44
  GGMLFileQuantizationType[GGMLFileQuantizationType["MXFP4_MOE"] = 38] = "MXFP4_MOE";
45
+ GGMLFileQuantizationType[GGMLFileQuantizationType["NVFP4"] = 39] = "NVFP4";
45
46
  // custom quants used by unsloth
46
47
  // they are not officially a scheme enum value in GGUF, but only here for naming
47
48
  GGMLFileQuantizationType[GGMLFileQuantizationType["Q2_K_XL"] = 1000] = "Q2_K_XL";
@@ -91,6 +92,7 @@ export const GGUF_QUANT_ORDER = [
91
92
  GGMLFileQuantizationType.Q4_2,
92
93
  GGMLFileQuantizationType.Q4_3,
93
94
  GGMLFileQuantizationType.MXFP4_MOE,
95
+ GGMLFileQuantizationType.NVFP4,
94
96
  // 3-bit quantizations
95
97
  GGMLFileQuantizationType.Q3_K_XL,
96
98
  GGMLFileQuantizationType.Q3_K_L,
@@ -182,4 +184,5 @@ export var GGMLQuantizationType;
182
184
  GGMLQuantizationType[GGMLQuantizationType["TQ1_0"] = 34] = "TQ1_0";
183
185
  GGMLQuantizationType[GGMLQuantizationType["TQ2_0"] = 35] = "TQ2_0";
184
186
  GGMLQuantizationType[GGMLQuantizationType["MXFP4"] = 39] = "MXFP4";
187
+ GGMLQuantizationType[GGMLQuantizationType["NVFP4"] = 40] = "NVFP4";
185
188
  })(GGMLQuantizationType || (GGMLQuantizationType = {}));
@@ -56,6 +56,7 @@ declare function isTgiModel(model: ModelData): boolean;
56
56
  declare function isLlamaCppGgufModel(model: ModelData): boolean;
57
57
  declare function isVllmModel(model: ModelData): boolean;
58
58
  declare function isDockerModelRunnerModel(model: ModelData): boolean;
59
+ declare function isUnslothModel(model: ModelData): boolean;
59
60
  /**
60
61
  * Add your new local app here.
61
62
  *
@@ -198,6 +199,13 @@ export declare const LOCAL_APPS: {
198
199
  displayOnModelPage: typeof isLlamaCppGgufModel;
199
200
  snippet: (model: ModelData, filepath?: string) => string;
200
201
  };
202
+ unsloth: {
203
+ prettyLabel: string;
204
+ docsUrl: string;
205
+ mainTask: "text-generation";
206
+ displayOnModelPage: typeof isUnslothModel;
207
+ snippet: (model: ModelData) => LocalAppSnippet[];
208
+ };
201
209
  "docker-model-runner": {
202
210
  prettyLabel: string;
203
211
  docsUrl: string;
@@ -1 +1 @@
1
- {"version":3,"file":"local-apps.d.ts","sourceRoot":"","sources":["../../src/local-apps.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAKnD,MAAM,WAAW,eAAe;IAC/B;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IACd;;OAEG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IACf;;OAEG;IACH,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,MAAM,QAAQ,GAAG;IACtB;;OAEG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;OAEG;IACH,QAAQ,EAAE,YAAY,CAAC;IACvB;;OAEG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IAEpB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB;;OAEG;IACH,kBAAkB,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,OAAO,CAAC;CAClD,GAAG,CACD;IACA;;OAEG;IACH,QAAQ,EAAE,CAAC,KAAK,EAAE,SAAS,EAAE,QAAQ,CAAC,EAAE,MAAM,KAAK,GAAG,CAAC;CACtD,GACD;IACA;;;;OAIG;IACH,OAAO,EAAE,CAAC,KAAK,EAAE,SAAS,EAAE,QAAQ,CAAC,EAAE,MAAM,KAAK,MAAM,GAAG,MAAM,EAAE,GAAG,eAAe,GAAG,eAAe,EAAE,CAAC;CACzG,CACH,CAAC;AAsBF,iBAAS,UAAU,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAE7C;AAED,iBAAS,mBAAmB,CAAC,KAAK,EAAE,SAAS,WAE5C;AAED,iBAAS,WAAW,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAU9C;AAED,iBAAS,wBAAwB,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAE3D;AAiXD;;;;;;;;;;GAUG;AACH,eAAO,MAAM,UAAU;;;;;;yBAvWS,SAAS,aAAa,MAAM,KAAG,eAAe,EAAE;;;;;;;yBAiDzC,SAAS,aAAa,MAAM,KAAG,eAAe,EAAE;;;;;;;yBAyC3D,SAAS,KAAG,eAAe,EAAE;;;;;;oCAuS3B,SAAS;yBAjPT,SAAS,KAAG,eAAe,EAAE;;;;;;;yBAoF9B,SAAS,KAAG,eAAe,EAAE;;;;;;;yBA7B/B,SAAS,KAAG,eAAe,EAAE;;;;;;;;;;;;;;yBApIzB,SAAS,aAAa,MAAM,KAAG,eAAe,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;yBAJjD,SAAS,aAAa,MAAM,KAAG,MAAM;;;;;;;yBA6O1B,SAAS,aAAa,MAAM,KAAG,MAAM;;;;;;;yBAM9C,SAAS,aAAa,MAAM,KAAG,eAAe,EAAE;;;;;;;yBA3CtD,SAAS,aAAa,MAAM,KAAG,eAAe,EAAE;;CAmRtC,CAAC;AAErC,MAAM,MAAM,WAAW,GAAG,MAAM,OAAO,UAAU,CAAC"}
1
+ {"version":3,"file":"local-apps.d.ts","sourceRoot":"","sources":["../../src/local-apps.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAKnD,MAAM,WAAW,eAAe;IAC/B;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IACd;;OAEG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IACf;;OAEG;IACH,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,MAAM,QAAQ,GAAG;IACtB;;OAEG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;OAEG;IACH,QAAQ,EAAE,YAAY,CAAC;IACvB;;OAEG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IAEpB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB;;OAEG;IACH,kBAAkB,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,OAAO,CAAC;CAClD,GAAG,CACD;IACA;;OAEG;IACH,QAAQ,EAAE,CAAC,KAAK,EAAE,SAAS,EAAE,QAAQ,CAAC,EAAE,MAAM,KAAK,GAAG,CAAC;CACtD,GACD;IACA;;;;OAIG;IACH,OAAO,EAAE,CAAC,KAAK,EAAE,SAAS,EAAE,QAAQ,CAAC,EAAE,MAAM,KAAK,MAAM,GAAG,MAAM,EAAE,GAAG,eAAe,GAAG,eAAe,EAAE,CAAC;CACzG,CACH,CAAC;AAsBF,iBAAS,UAAU,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAE7C;AAED,iBAAS,mBAAmB,CAAC,KAAK,EAAE,SAAS,WAE5C;AAED,iBAAS,WAAW,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAU9C;AAED,iBAAS,wBAAwB,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAE3D;AA0BD,iBAAS,cAAc,CAAC,KAAK,EAAE,SAAS,WAEvC;AA6ZD;;;;;;;;;;GAUG;AACH,eAAO,MAAM,UAAU;;;;;;yBA3ZS,SAAS,aAAa,MAAM,KAAG,eAAe,EAAE;;;;;;;yBAiDzC,SAAS,aAAa,MAAM,KAAG,eAAe,EAAE;;;;;;;yBAmF3D,SAAS,KAAG,eAAe,EAAE;;;;;;oCAiT3B,SAAS;yBA3PT,SAAS,KAAG,eAAe,EAAE;;;;;;;yBAoF9B,SAAS,KAAG,eAAe,EAAE;;;;;;;yBA7B/B,SAAS,KAAG,eAAe,EAAE;;;;;;;;;;;;;;yBApIzB,SAAS,aAAa,MAAM,KAAG,eAAe,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;yBA9CjD,SAAS,aAAa,MAAM,KAAG,MAAM;;;;;;;yBAIpC,SAAS,KAAG,eAAe,EAAE;;;;;;;yBA6RnB,SAAS,aAAa,MAAM,KAAG,MAAM;;;;;;;yBAM9C,SAAS,aAAa,MAAM,KAAG,eAAe,EAAE;;;;;;;yBArDtD,SAAS,aAAa,MAAM,KAAG,eAAe,EAAE;;CAuStC,CAAC;AAErC,MAAM,MAAM,WAAW,GAAG,MAAM,OAAO,UAAU,CAAC"}
@@ -40,6 +40,23 @@ function isAmdRyzenModel(model) {
40
40
  function isMlxModel(model) {
41
41
  return model.tags.includes("mlx");
42
42
  }
43
+ /**
44
+ * Returns the model's chat template string, coalescing across sources:
45
+ * GGUF metadata > chat_template_jinja file > tokenizer_config.json
46
+ */
47
+ function getChatTemplate(model) {
48
+ const ct = model.gguf?.chat_template ?? model.config?.chat_template_jinja ?? model.config?.tokenizer_config?.chat_template;
49
+ if (typeof ct === "string") {
50
+ return ct;
51
+ }
52
+ if (Array.isArray(ct)) {
53
+ return ct[0]?.template;
54
+ }
55
+ return undefined;
56
+ }
57
+ function isUnslothModel(model) {
58
+ return model.tags.includes("unsloth") || isLlamaCppGgufModel(model);
59
+ }
43
60
  function getQuantTag(filepath) {
44
61
  const defaultTag = ":{{QUANT_TAG}}";
45
62
  if (!filepath) {
@@ -112,6 +129,43 @@ const snippetNodeLlamaCppCli = (model, filepath) => {
112
129
  const snippetOllama = (model, filepath) => {
113
130
  return `ollama run hf.co/${model.id}${getQuantTag(filepath)}`;
114
131
  };
132
+ const snippetUnsloth = (model) => {
133
+ const isGguf = isLlamaCppGgufModel(model);
134
+ const studio_instructions = {
135
+ title: "Open model in Unsloth Studio",
136
+ setup: ["pip install unsloth", "unsloth studio setup"].join("\n"),
137
+ content: [
138
+ "# Run unsloth studio",
139
+ "unsloth studio -H 0.0.0.0 -p 8000",
140
+ "# Then open http://localhost:8000/chat in your browser",
141
+ "# Search for " + model.id + " to start chatting",
142
+ ].join("\n"),
143
+ };
144
+ const hf_spaces_instructions = {
145
+ title: "Using HuggingFace Spaces for Unsloth",
146
+ setup: "# No setup required",
147
+ content: "# Open https://huggingface.co/spaces/unsloth/studio in your browser\n# Search for " +
148
+ model.id +
149
+ " to start chatting",
150
+ };
151
+ const fastmodel_instructions = {
152
+ title: "Load model with FastModel",
153
+ setup: "pip install unsloth",
154
+ content: [
155
+ "from unsloth import FastModel",
156
+ "model, tokenizer = FastModel.from_pretrained(",
157
+ ' model_name="' + model.id + '",',
158
+ " max_seq_length=2048,",
159
+ ")",
160
+ ].join("\n"),
161
+ };
162
+ if (isGguf) {
163
+ return [studio_instructions, hf_spaces_instructions];
164
+ }
165
+ else {
166
+ return [studio_instructions, hf_spaces_instructions, fastmodel_instructions];
167
+ }
168
+ };
115
169
  const snippetLocalAI = (model, filepath) => {
116
170
  const command = (binary) => ["# Load and run the model:", `${binary} huggingface://${model.id}/${filepath ?? "{{GGUF_FILE}}"}`].join("\n");
117
171
  return [
@@ -295,12 +349,25 @@ const snippetMlxLm = (model) => {
295
349
  ];
296
350
  };
297
351
  const snippetPi = (model, filepath) => {
298
- const quantTag = getQuantTag(filepath);
299
352
  const modelName = model.id.split("/").pop() ?? model.id;
353
+ const isMLX = isMlxModel(model);
354
+ // Step 1: Server — differs by backend
355
+ const serverStep = isMLX
356
+ ? {
357
+ title: "Start the MLX server",
358
+ setup: "# Install MLX LM:\nuv tool install mlx-lm",
359
+ content: `# Start a local OpenAI-compatible server:\nmlx_lm.server --model "${model.id}"`,
360
+ }
361
+ : {
362
+ title: "Start the llama.cpp server",
363
+ setup: "# Install llama.cpp:\nbrew install llama.cpp",
364
+ content: `# Start a local OpenAI-compatible server:\nllama-server -hf ${model.id}${getQuantTag(filepath)} --jinja`,
365
+ };
366
+ // Step 2: Pi config — port and provider name differ
300
367
  const modelsJson = JSON.stringify({
301
368
  providers: {
302
- "llama-cpp": {
303
- baseUrl: "http://localhost:8080/v1",
369
+ [isMLX ? "mlx-lm" : "llama-cpp"]: {
370
+ baseUrl: isMLX ? "http://localhost:8000/v1" : "http://localhost:8080/v1",
304
371
  api: "openai-completions",
305
372
  apiKey: "none",
306
373
  models: [{ id: modelName }],
@@ -308,11 +375,7 @@ const snippetPi = (model, filepath) => {
308
375
  },
309
376
  }, null, 2);
310
377
  return [
311
- {
312
- title: "Start the llama.cpp server",
313
- setup: "# Install llama.cpp:\nbrew install llama.cpp",
314
- content: `# Start a local OpenAI-compatible server:\nllama-server -hf ${model.id}${quantTag} --jinja`,
315
- },
378
+ serverStep,
316
379
  {
317
380
  title: "Configure the model in Pi",
318
381
  setup: "# Install Pi:\nnpm install -g @mariozechner/pi-coding-agent",
@@ -320,7 +383,7 @@ const snippetPi = (model, filepath) => {
320
383
  },
321
384
  {
322
385
  title: "Run Pi",
323
- content: `# Start Pi in your project directory:\npi`,
386
+ content: "# Start Pi in your project directory:\npi",
324
387
  },
325
388
  ];
326
389
  };
@@ -536,6 +599,13 @@ export const LOCAL_APPS = {
536
599
  displayOnModelPage: isLlamaCppGgufModel,
537
600
  snippet: snippetOllama,
538
601
  },
602
+ unsloth: {
603
+ prettyLabel: "Unsloth",
604
+ docsUrl: "https://unsloth.ai/docs",
605
+ mainTask: "text-generation",
606
+ displayOnModelPage: isUnslothModel,
607
+ snippet: snippetUnsloth,
608
+ },
539
609
  "docker-model-runner": {
540
610
  prettyLabel: "Docker Model Runner",
541
611
  docsUrl: "https://docs.docker.com/ai/model-runner/",
@@ -554,7 +624,9 @@ export const LOCAL_APPS = {
554
624
  prettyLabel: "Pi",
555
625
  docsUrl: "https://github.com/badlogic/pi-mono",
556
626
  mainTask: "text-generation",
557
- displayOnModelPage: (model) => isLlamaCppGgufModel(model) && !!model.gguf?.chat_template?.includes("tools"),
627
+ displayOnModelPage: (model) => (isLlamaCppGgufModel(model) || isMlxModel(model)) &&
628
+ model.pipeline_tag === "text-generation" &&
629
+ !!getChatTemplate(model)?.includes("tools"),
558
630
  snippet: snippetPi,
559
631
  },
560
632
  };
@@ -124,6 +124,27 @@ curl -X POST "http://localhost:8000/v1/chat/completions" \\
124
124
  expect(snippet[1].content).toContain(`"id": "Llama-3.2-3B-Instruct-GGUF"`);
125
125
  expect(snippet[2].content).toContain("pi");
126
126
  });
127
+ it("pi - mlx", async () => {
128
+ const { snippet: snippetFunc } = LOCAL_APPS["pi"];
129
+ const model = {
130
+ id: "mlx-community/Llama-3.2-3B-Instruct-mlx",
131
+ tags: ["mlx", "conversational"],
132
+ pipeline_tag: "text-generation",
133
+ config: {
134
+ tokenizer_config: {
135
+ chat_template: "{% if tools %}...{% endif %}",
136
+ },
137
+ },
138
+ inference: "",
139
+ };
140
+ const snippet = snippetFunc(model);
141
+ expect(snippet[0].setup).toContain("uv tool install mlx-lm");
142
+ expect(snippet[0].content).toContain('mlx_lm.server --model "mlx-community/Llama-3.2-3B-Instruct-mlx"');
143
+ expect(snippet[1].setup).toContain("npm install -g @mariozechner/pi-coding-agent");
144
+ expect(snippet[1].content).toContain('"baseUrl": "http://localhost:8000/v1"');
145
+ expect(snippet[1].content).toContain('"id": "Llama-3.2-3B-Instruct-mlx"');
146
+ expect(snippet[2].content).toContain("pi");
147
+ });
127
148
  it("docker model runner", async () => {
128
149
  const { snippet: snippetFunc } = LOCAL_APPS["docker-model-runner"];
129
150
  const model = {
@@ -135,4 +156,55 @@ curl -X POST "http://localhost:8000/v1/chat/completions" \\
135
156
  const snippet = snippetFunc(model);
136
157
  expect(snippet).toEqual(`docker model run hf.co/bartowski/Llama-3.2-3B-Instruct-GGUF:{{QUANT_TAG}}`);
137
158
  });
159
+ it("unsloth tagged model", async () => {
160
+ const { displayOnModelPage, snippet: snippetFunc } = LOCAL_APPS.unsloth;
161
+ const model = {
162
+ id: "some-user/my-unsloth-finetune",
163
+ tags: ["unsloth", "conversational"],
164
+ inference: "",
165
+ };
166
+ expect(displayOnModelPage(model)).toBe(true);
167
+ const snippet = snippetFunc(model);
168
+ expect(snippet[0].setup).toBe("pip install unsloth\nunsloth studio setup");
169
+ expect(snippet[0].content).toBe("# Run unsloth studio\nunsloth studio -H 0.0.0.0 -p 8000\n# Then open http://localhost:8000/chat in your browser\n# Search for some-user/my-unsloth-finetune to start chatting");
170
+ expect(snippet[1].setup).toBe("# No setup required");
171
+ expect(snippet[1].content).toBe("# Open https://huggingface.co/spaces/unsloth/studio in your browser\n# Search for some-user/my-unsloth-finetune to start chatting");
172
+ expect(snippet[2].setup).toBe("pip install unsloth");
173
+ expect(snippet[2].content).toBe('from unsloth import FastModel\nmodel, tokenizer = FastModel.from_pretrained(\n model_name="some-user/my-unsloth-finetune",\n max_seq_length=2048,\n)');
174
+ });
175
+ it("unsloth namespace gguf model", async () => {
176
+ const { displayOnModelPage, snippet: snippetFunc } = LOCAL_APPS.unsloth;
177
+ const model = {
178
+ id: "unsloth/Llama-3.2-3B-Instruct-GGUF",
179
+ tags: ["conversational"],
180
+ gguf: { total: 1, context_length: 4096 },
181
+ inference: "",
182
+ };
183
+ expect(displayOnModelPage(model)).toBe(true);
184
+ const snippet = snippetFunc(model);
185
+ expect(snippet[0].setup).toBe("pip install unsloth\nunsloth studio setup");
186
+ expect(snippet[0].content).toBe("# Run unsloth studio\nunsloth studio -H 0.0.0.0 -p 8000\n# Then open http://localhost:8000/chat in your browser\n# Search for unsloth/Llama-3.2-3B-Instruct-GGUF to start chatting");
187
+ expect(snippet[1].setup).toBe("# No setup required");
188
+ expect(snippet[1].content).toBe("# Open https://huggingface.co/spaces/unsloth/studio in your browser\n# Search for unsloth/Llama-3.2-3B-Instruct-GGUF to start chatting");
189
+ expect(snippet).toHaveLength(2); // GGUF models only get 2 snippets
190
+ });
191
+ it("non unsloth namespace gguf model", async () => {
192
+ const { displayOnModelPage } = LOCAL_APPS.unsloth;
193
+ const model = {
194
+ id: "dummy/Llama-3.2-3B-Instruct-GGUF",
195
+ tags: ["conversational"],
196
+ gguf: { total: 1, context_length: 4096 },
197
+ inference: "",
198
+ };
199
+ expect(displayOnModelPage(model)).toBe(true);
200
+ });
201
+ it("unsloth not shown for unrelated model", async () => {
202
+ const { displayOnModelPage } = LOCAL_APPS.unsloth;
203
+ const model = {
204
+ id: "meta-llama/Llama-3.2-3B-Instruct",
205
+ tags: ["conversational"],
206
+ inference: "",
207
+ };
208
+ expect(displayOnModelPage(model)).toBe(false);
209
+ });
138
210
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@huggingface/tasks",
3
- "version": "0.20.3",
3
+ "version": "0.20.5",
4
4
  "description": "List of ML tasks for huggingface.co/tasks",
5
5
  "keywords": [
6
6
  "hub",
package/src/gguf.ts CHANGED
@@ -41,6 +41,7 @@ export enum GGMLFileQuantizationType {
41
41
  TQ1_0 = 36,
42
42
  TQ2_0 = 37,
43
43
  MXFP4_MOE = 38,
44
+ NVFP4 = 39,
44
45
 
45
46
  // custom quants used by unsloth
46
47
  // they are not officially a scheme enum value in GGUF, but only here for naming
@@ -99,6 +100,7 @@ export const GGUF_QUANT_ORDER: GGMLFileQuantizationType[] = [
99
100
  GGMLFileQuantizationType.Q4_2,
100
101
  GGMLFileQuantizationType.Q4_3,
101
102
  GGMLFileQuantizationType.MXFP4_MOE,
103
+ GGMLFileQuantizationType.NVFP4,
102
104
 
103
105
  // 3-bit quantizations
104
106
  GGMLFileQuantizationType.Q3_K_XL,
@@ -202,4 +204,5 @@ export enum GGMLQuantizationType {
202
204
  TQ1_0 = 34,
203
205
  TQ2_0 = 35,
204
206
  MXFP4 = 39,
207
+ NVFP4 = 40,
205
208
  }
@@ -138,6 +138,29 @@ curl -X POST "http://localhost:8000/v1/chat/completions" \\
138
138
  expect(snippet[2].content).toContain("pi");
139
139
  });
140
140
 
141
+ it("pi - mlx", async () => {
142
+ const { snippet: snippetFunc } = LOCAL_APPS["pi"];
143
+ const model: ModelData = {
144
+ id: "mlx-community/Llama-3.2-3B-Instruct-mlx",
145
+ tags: ["mlx", "conversational"],
146
+ pipeline_tag: "text-generation",
147
+ config: {
148
+ tokenizer_config: {
149
+ chat_template: "{% if tools %}...{% endif %}",
150
+ },
151
+ },
152
+ inference: "",
153
+ };
154
+ const snippet = snippetFunc(model);
155
+
156
+ expect(snippet[0].setup).toContain("uv tool install mlx-lm");
157
+ expect(snippet[0].content).toContain('mlx_lm.server --model "mlx-community/Llama-3.2-3B-Instruct-mlx"');
158
+ expect(snippet[1].setup).toContain("npm install -g @mariozechner/pi-coding-agent");
159
+ expect(snippet[1].content).toContain('"baseUrl": "http://localhost:8000/v1"');
160
+ expect(snippet[1].content).toContain('"id": "Llama-3.2-3B-Instruct-mlx"');
161
+ expect(snippet[2].content).toContain("pi");
162
+ });
163
+
141
164
  it("docker model runner", async () => {
142
165
  const { snippet: snippetFunc } = LOCAL_APPS["docker-model-runner"];
143
166
  const model: ModelData = {
@@ -150,4 +173,73 @@ curl -X POST "http://localhost:8000/v1/chat/completions" \\
150
173
 
151
174
  expect(snippet).toEqual(`docker model run hf.co/bartowski/Llama-3.2-3B-Instruct-GGUF:{{QUANT_TAG}}`);
152
175
  });
176
+
177
+ it("unsloth tagged model", async () => {
178
+ const { displayOnModelPage, snippet: snippetFunc } = LOCAL_APPS.unsloth;
179
+ const model: ModelData = {
180
+ id: "some-user/my-unsloth-finetune",
181
+ tags: ["unsloth", "conversational"],
182
+ inference: "",
183
+ };
184
+
185
+ expect(displayOnModelPage(model)).toBe(true);
186
+ const snippet = snippetFunc(model);
187
+ expect(snippet[0].setup).toBe("pip install unsloth\nunsloth studio setup");
188
+ expect(snippet[0].content).toBe(
189
+ "# Run unsloth studio\nunsloth studio -H 0.0.0.0 -p 8000\n# Then open http://localhost:8000/chat in your browser\n# Search for some-user/my-unsloth-finetune to start chatting",
190
+ );
191
+ expect(snippet[1].setup).toBe("# No setup required");
192
+ expect(snippet[1].content).toBe(
193
+ "# Open https://huggingface.co/spaces/unsloth/studio in your browser\n# Search for some-user/my-unsloth-finetune to start chatting",
194
+ );
195
+ expect(snippet[2].setup).toBe("pip install unsloth");
196
+ expect(snippet[2].content).toBe(
197
+ 'from unsloth import FastModel\nmodel, tokenizer = FastModel.from_pretrained(\n model_name="some-user/my-unsloth-finetune",\n max_seq_length=2048,\n)',
198
+ );
199
+ });
200
+
201
+ it("unsloth namespace gguf model", async () => {
202
+ const { displayOnModelPage, snippet: snippetFunc } = LOCAL_APPS.unsloth;
203
+ const model: ModelData = {
204
+ id: "unsloth/Llama-3.2-3B-Instruct-GGUF",
205
+ tags: ["conversational"],
206
+ gguf: { total: 1, context_length: 4096 },
207
+ inference: "",
208
+ };
209
+
210
+ expect(displayOnModelPage(model)).toBe(true);
211
+ const snippet = snippetFunc(model);
212
+ expect(snippet[0].setup).toBe("pip install unsloth\nunsloth studio setup");
213
+ expect(snippet[0].content).toBe(
214
+ "# Run unsloth studio\nunsloth studio -H 0.0.0.0 -p 8000\n# Then open http://localhost:8000/chat in your browser\n# Search for unsloth/Llama-3.2-3B-Instruct-GGUF to start chatting",
215
+ );
216
+ expect(snippet[1].setup).toBe("# No setup required");
217
+ expect(snippet[1].content).toBe(
218
+ "# Open https://huggingface.co/spaces/unsloth/studio in your browser\n# Search for unsloth/Llama-3.2-3B-Instruct-GGUF to start chatting",
219
+ );
220
+ expect(snippet).toHaveLength(2); // GGUF models only get 2 snippets
221
+ });
222
+
223
+ it("non unsloth namespace gguf model", async () => {
224
+ const { displayOnModelPage } = LOCAL_APPS.unsloth;
225
+ const model: ModelData = {
226
+ id: "dummy/Llama-3.2-3B-Instruct-GGUF",
227
+ tags: ["conversational"],
228
+ gguf: { total: 1, context_length: 4096 },
229
+ inference: "",
230
+ };
231
+
232
+ expect(displayOnModelPage(model)).toBe(true);
233
+ });
234
+
235
+ it("unsloth not shown for unrelated model", async () => {
236
+ const { displayOnModelPage } = LOCAL_APPS.unsloth;
237
+ const model: ModelData = {
238
+ id: "meta-llama/Llama-3.2-3B-Instruct",
239
+ tags: ["conversational"],
240
+ inference: "",
241
+ };
242
+
243
+ expect(displayOnModelPage(model)).toBe(false);
244
+ });
153
245
  });
package/src/local-apps.ts CHANGED
@@ -115,6 +115,26 @@ function isMlxModel(model: ModelData) {
115
115
  return model.tags.includes("mlx");
116
116
  }
117
117
 
118
+ /**
119
+ * Returns the model's chat template string, coalescing across sources:
120
+ * GGUF metadata > chat_template_jinja file > tokenizer_config.json
121
+ */
122
+ function getChatTemplate(model: ModelData): string | undefined {
123
+ const ct =
124
+ model.gguf?.chat_template ?? model.config?.chat_template_jinja ?? model.config?.tokenizer_config?.chat_template;
125
+ if (typeof ct === "string") {
126
+ return ct;
127
+ }
128
+ if (Array.isArray(ct)) {
129
+ return ct[0]?.template;
130
+ }
131
+ return undefined;
132
+ }
133
+
134
+ function isUnslothModel(model: ModelData) {
135
+ return model.tags.includes("unsloth") || isLlamaCppGgufModel(model);
136
+ }
137
+
118
138
  function getQuantTag(filepath?: string): string {
119
139
  const defaultTag = ":{{QUANT_TAG}}";
120
140
 
@@ -193,6 +213,48 @@ const snippetOllama = (model: ModelData, filepath?: string): string => {
193
213
  return `ollama run hf.co/${model.id}${getQuantTag(filepath)}`;
194
214
  };
195
215
 
216
+ const snippetUnsloth = (model: ModelData): LocalAppSnippet[] => {
217
+ const isGguf = isLlamaCppGgufModel(model);
218
+
219
+ const studio_instructions: LocalAppSnippet = {
220
+ title: "Open model in Unsloth Studio",
221
+ setup: ["pip install unsloth", "unsloth studio setup"].join("\n"),
222
+ content: [
223
+ "# Run unsloth studio",
224
+ "unsloth studio -H 0.0.0.0 -p 8000",
225
+ "# Then open http://localhost:8000/chat in your browser",
226
+ "# Search for " + model.id + " to start chatting",
227
+ ].join("\n"),
228
+ };
229
+
230
+ const hf_spaces_instructions: LocalAppSnippet = {
231
+ title: "Using HuggingFace Spaces for Unsloth",
232
+ setup: "# No setup required",
233
+ content:
234
+ "# Open https://huggingface.co/spaces/unsloth/studio in your browser\n# Search for " +
235
+ model.id +
236
+ " to start chatting",
237
+ };
238
+
239
+ const fastmodel_instructions: LocalAppSnippet = {
240
+ title: "Load model with FastModel",
241
+ setup: "pip install unsloth",
242
+ content: [
243
+ "from unsloth import FastModel",
244
+ "model, tokenizer = FastModel.from_pretrained(",
245
+ ' model_name="' + model.id + '",',
246
+ " max_seq_length=2048,",
247
+ ")",
248
+ ].join("\n"),
249
+ };
250
+
251
+ if (isGguf) {
252
+ return [studio_instructions, hf_spaces_instructions];
253
+ } else {
254
+ return [studio_instructions, hf_spaces_instructions, fastmodel_instructions];
255
+ }
256
+ };
257
+
196
258
  const snippetLocalAI = (model: ModelData, filepath?: string): LocalAppSnippet[] => {
197
259
  const command = (binary: string) =>
198
260
  ["# Load and run the model:", `${binary} huggingface://${model.id}/${filepath ?? "{{GGUF_FILE}}"}`].join("\n");
@@ -390,14 +452,28 @@ const snippetMlxLm = (model: ModelData): LocalAppSnippet[] => {
390
452
  };
391
453
 
392
454
  const snippetPi = (model: ModelData, filepath?: string): LocalAppSnippet[] => {
393
- const quantTag = getQuantTag(filepath);
394
455
  const modelName = model.id.split("/").pop() ?? model.id;
456
+ const isMLX = isMlxModel(model);
457
+
458
+ // Step 1: Server — differs by backend
459
+ const serverStep: LocalAppSnippet = isMLX
460
+ ? {
461
+ title: "Start the MLX server",
462
+ setup: "# Install MLX LM:\nuv tool install mlx-lm",
463
+ content: `# Start a local OpenAI-compatible server:\nmlx_lm.server --model "${model.id}"`,
464
+ }
465
+ : {
466
+ title: "Start the llama.cpp server",
467
+ setup: "# Install llama.cpp:\nbrew install llama.cpp",
468
+ content: `# Start a local OpenAI-compatible server:\nllama-server -hf ${model.id}${getQuantTag(filepath)} --jinja`,
469
+ };
395
470
 
471
+ // Step 2: Pi config — port and provider name differ
396
472
  const modelsJson = JSON.stringify(
397
473
  {
398
474
  providers: {
399
- "llama-cpp": {
400
- baseUrl: "http://localhost:8080/v1",
475
+ [isMLX ? "mlx-lm" : "llama-cpp"]: {
476
+ baseUrl: isMLX ? "http://localhost:8000/v1" : "http://localhost:8080/v1",
401
477
  api: "openai-completions",
402
478
  apiKey: "none",
403
479
  models: [{ id: modelName }],
@@ -409,11 +485,7 @@ const snippetPi = (model: ModelData, filepath?: string): LocalAppSnippet[] => {
409
485
  );
410
486
 
411
487
  return [
412
- {
413
- title: "Start the llama.cpp server",
414
- setup: "# Install llama.cpp:\nbrew install llama.cpp",
415
- content: `# Start a local OpenAI-compatible server:\nllama-server -hf ${model.id}${quantTag} --jinja`,
416
- },
488
+ serverStep,
417
489
  {
418
490
  title: "Configure the model in Pi",
419
491
  setup: "# Install Pi:\nnpm install -g @mariozechner/pi-coding-agent",
@@ -421,7 +493,7 @@ const snippetPi = (model: ModelData, filepath?: string): LocalAppSnippet[] => {
421
493
  },
422
494
  {
423
495
  title: "Run Pi",
424
- content: `# Start Pi in your project directory:\npi`,
496
+ content: "# Start Pi in your project directory:\npi",
425
497
  },
426
498
  ];
427
499
  };
@@ -643,6 +715,13 @@ export const LOCAL_APPS = {
643
715
  displayOnModelPage: isLlamaCppGgufModel,
644
716
  snippet: snippetOllama,
645
717
  },
718
+ unsloth: {
719
+ prettyLabel: "Unsloth",
720
+ docsUrl: "https://unsloth.ai/docs",
721
+ mainTask: "text-generation",
722
+ displayOnModelPage: isUnslothModel,
723
+ snippet: snippetUnsloth,
724
+ },
646
725
  "docker-model-runner": {
647
726
  prettyLabel: "Docker Model Runner",
648
727
  docsUrl: "https://docs.docker.com/ai/model-runner/",
@@ -661,7 +740,10 @@ export const LOCAL_APPS = {
661
740
  prettyLabel: "Pi",
662
741
  docsUrl: "https://github.com/badlogic/pi-mono",
663
742
  mainTask: "text-generation",
664
- displayOnModelPage: (model) => isLlamaCppGgufModel(model) && !!model.gguf?.chat_template?.includes("tools"),
743
+ displayOnModelPage: (model) =>
744
+ (isLlamaCppGgufModel(model) || isMlxModel(model)) &&
745
+ model.pipeline_tag === "text-generation" &&
746
+ !!getChatTemplate(model)?.includes("tools"),
665
747
  snippet: snippetPi,
666
748
  },
667
749
  } satisfies Record<string, LocalApp>;