offgrid-ai 0.16.3 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -2
- package/package.json +3 -11
- package/resources/recommendations.json +8 -8
- package/src/cli.mjs +1 -4
- package/src/commands/main.mjs +20 -1
- package/src/commands/models.mjs +296 -39
- package/src/commands/onboard.mjs +6 -106
- package/src/commands/run.mjs +2 -4
- package/src/commands/status.mjs +1 -0
- package/src/commands/stop.mjs +1 -0
- package/src/config.mjs +16 -1
- package/src/discovery-shared.mjs +2 -3
- package/src/download.mjs +221 -0
- package/src/harness-pi.mjs +2 -3
- package/src/huggingface.mjs +72 -72
- package/src/managed.mjs +1 -6
- package/src/model-name.mjs +2 -2
- package/src/model-presenters.mjs +5 -36
- package/src/model-summary.mjs +2 -2
- package/src/omlx-runtime.mjs +29 -4
- package/src/process.mjs +3 -5
- package/src/profile-setup.mjs +206 -49
- package/src/profiles.mjs +1 -1
- package/src/runtime.mjs +2 -2
- package/src/ui.mjs +10 -8
- package/resources/hf-download.py +0 -79
- package/src/backend-installers.mjs +0 -42
- package/src/benchmark/finalize.mjs +0 -169
- package/src/benchmark/flow.mjs +0 -240
- package/src/benchmark/metrics.mjs +0 -107
- package/src/benchmark/prepare.mjs +0 -118
- package/src/benchmark/repo.mjs +0 -77
- package/src/benchmark/sdk-runner.mjs +0 -363
- package/src/benchmark/shared.mjs +0 -46
- package/src/benchmark.mjs +0 -12
- package/src/commands/benchmark.mjs +0 -4
package/resources/hf-download.py
DELETED
|
@@ -1,79 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
Download a HuggingFace model into the standard HF cache.
|
|
4
|
-
|
|
5
|
-
Usage:
|
|
6
|
-
python3 hf-download.py --repo mlx-community/gemma-4-e2b-it-4bit
|
|
7
|
-
python3 hf-download.py --repo unsloth/gemma-4-E2B-it-GGUF --file gemma-4-E2B-it-Q4_K_S.gguf
|
|
8
|
-
|
|
9
|
-
Streams NDJSON progress events to stdout.
|
|
10
|
-
"""
|
|
11
|
-
import argparse
|
|
12
|
-
import json
|
|
13
|
-
import os
|
|
14
|
-
import sys
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def emit(event):
|
|
18
|
-
print(json.dumps(event), flush=True)
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
def progress_callback(relative_path, downloaded, total):
|
|
22
|
-
emit({
|
|
23
|
-
"type": "progress",
|
|
24
|
-
"file": relative_path,
|
|
25
|
-
"downloadedBytes": downloaded,
|
|
26
|
-
"totalBytes": total,
|
|
27
|
-
})
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
def main():
|
|
31
|
-
parser = argparse.ArgumentParser(description="Download a HuggingFace model into the standard cache.")
|
|
32
|
-
parser.add_argument("--repo", required=True, help="HuggingFace repo ID (e.g. mlx-community/gemma-4-e2b-it-4bit)")
|
|
33
|
-
parser.add_argument("--file", help="Specific filename to download (for GGUF). Omit to download the full repo (MLX).")
|
|
34
|
-
parser.add_argument("--cache-dir", help="HF hub cache directory (where models--org--name/... live). Defaults to $HF_HUB_CACHE or $HF_HOME/hub or ~/.cache/huggingface/hub.")
|
|
35
|
-
args = parser.parse_args()
|
|
36
|
-
|
|
37
|
-
try:
|
|
38
|
-
from huggingface_hub import hf_hub_download, snapshot_download
|
|
39
|
-
except ImportError as e:
|
|
40
|
-
emit({"type": "error", "message": f"huggingface_hub is not installed: {e}"})
|
|
41
|
-
sys.exit(1)
|
|
42
|
-
|
|
43
|
-
cache_dir = args.cache_dir or os.environ.get("HF_HUB_CACHE") or os.path.join(
|
|
44
|
-
os.environ.get("HF_HOME") or os.path.join(os.path.expanduser("~"), ".cache", "huggingface"),
|
|
45
|
-
"hub",
|
|
46
|
-
)
|
|
47
|
-
|
|
48
|
-
try:
|
|
49
|
-
if args.file:
|
|
50
|
-
local_path = hf_hub_download(
|
|
51
|
-
repo_id=args.repo,
|
|
52
|
-
filename=args.file,
|
|
53
|
-
cache_dir=cache_dir,
|
|
54
|
-
resume_download=True,
|
|
55
|
-
)
|
|
56
|
-
emit({
|
|
57
|
-
"type": "complete",
|
|
58
|
-
"localDir": os.path.dirname(local_path),
|
|
59
|
-
"localPath": local_path,
|
|
60
|
-
"format": "gguf",
|
|
61
|
-
})
|
|
62
|
-
else:
|
|
63
|
-
local_dir = snapshot_download(
|
|
64
|
-
repo_id=args.repo,
|
|
65
|
-
cache_dir=cache_dir,
|
|
66
|
-
resume_download=True,
|
|
67
|
-
)
|
|
68
|
-
emit({
|
|
69
|
-
"type": "complete",
|
|
70
|
-
"localDir": local_dir,
|
|
71
|
-
"format": "mlx",
|
|
72
|
-
})
|
|
73
|
-
except Exception as e:
|
|
74
|
-
emit({"type": "error", "message": str(e)})
|
|
75
|
-
sys.exit(1)
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
if __name__ == "__main__":
|
|
79
|
-
main()
|
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
import { pc } from "./ui.mjs";
|
|
2
|
-
|
|
3
|
-
export const BACKEND_INSTALLERS = {
|
|
4
|
-
lmstudio: {
|
|
5
|
-
label: "LM Studio",
|
|
6
|
-
choiceLabel: "LM Studio (recommended)",
|
|
7
|
-
hint: "brew install --cask lm-studio — visual model browser + CLI",
|
|
8
|
-
commands: [["brew", ["install", "--cask", "lm-studio"], "LM Studio"]],
|
|
9
|
-
success(model) {
|
|
10
|
-
console.log(pc.green("✓ LM Studio installed"));
|
|
11
|
-
console.log(pc.yellow("\nOpen LM Studio and download a model to get started."));
|
|
12
|
-
console.log(pc.dim(`Recommended for your machine: ${model.label}`));
|
|
13
|
-
console.log(pc.dim("Then run offgrid-ai again to pick and run a model."));
|
|
14
|
-
},
|
|
15
|
-
failure: "Download it manually from https://lmstudio.ai",
|
|
16
|
-
allFailure: "✗ LM Studio installation failed. Download from https://lmstudio.ai",
|
|
17
|
-
},
|
|
18
|
-
omlx: {
|
|
19
|
-
label: "oMLX",
|
|
20
|
-
choiceLabel: "oMLX",
|
|
21
|
-
hint: "brew tap jundot/omlx && brew install omlx — Apple Silicon optimized",
|
|
22
|
-
commands: [
|
|
23
|
-
["brew", ["tap", "jundot/omlx", "https://github.com/jundot/omlx"], "oMLX tap"],
|
|
24
|
-
["brew", ["install", "omlx"], "oMLX"],
|
|
25
|
-
],
|
|
26
|
-
success(model) {
|
|
27
|
-
console.log(pc.green("✓ oMLX installed"));
|
|
28
|
-
console.log(pc.yellow("\nStart oMLX and download a model:"));
|
|
29
|
-
console.log(pc.bold(" omlx start"));
|
|
30
|
-
console.log(pc.dim(`Recommended for your machine: ${model.label}`));
|
|
31
|
-
console.log(pc.dim("Then run offgrid-ai again to pick and run a model."));
|
|
32
|
-
},
|
|
33
|
-
failure: "Install manually: brew tap jundot/omlx && brew install omlx",
|
|
34
|
-
allFailure: "✗ oMLX installation failed. Install manually: brew tap jundot/omlx && brew install omlx",
|
|
35
|
-
},
|
|
36
|
-
};
|
|
37
|
-
|
|
38
|
-
export const BACKEND_INSTALL_CHOICES = [
|
|
39
|
-
...Object.entries(BACKEND_INSTALLERS).map(([value, installer]) => ({ value, label: installer.choiceLabel, hint: installer.hint })),
|
|
40
|
-
{ value: "all", label: "Install both", hint: "LM Studio + oMLX" },
|
|
41
|
-
{ value: "skip", label: "Skip for now", hint: "I'll set up models myself" },
|
|
42
|
-
];
|
|
@@ -1,169 +0,0 @@
|
|
|
1
|
-
// ── Benchmark finalization (metadata + summary rendering) ───────────────────
|
|
2
|
-
// unloadModelFromServer has been moved to src/process.mjs (it's the managed-server
|
|
3
|
-
// counterpart to stopProfile, used by both the benchmark flow and the Pi chat flow).
|
|
4
|
-
|
|
5
|
-
import { existsSync } from "node:fs";
|
|
6
|
-
import { readFile, writeFile } from "node:fs/promises";
|
|
7
|
-
import { join } from "node:path";
|
|
8
|
-
import { pc, renderRows, renderSection } from "../ui.mjs";
|
|
9
|
-
|
|
10
|
-
export async function finalizeBenchmarkRun(runDirectory, runResult, speedMetrics, speedMetricsError = null) {
|
|
11
|
-
const metadataPath = join(runDirectory, "metadata.json");
|
|
12
|
-
const metadata = JSON.parse(await readFile(metadataPath, "utf8"));
|
|
13
|
-
const now = new Date();
|
|
14
|
-
const timestamp = now.toISOString();
|
|
15
|
-
|
|
16
|
-
const kind = metadata.kind ?? "visual";
|
|
17
|
-
const isDs = kind === "data-science";
|
|
18
|
-
const requiredFile = isDs ? "analysis.ipynb" : "index.html";
|
|
19
|
-
const requiredPath = join(runDirectory, requiredFile);
|
|
20
|
-
|
|
21
|
-
const outputFiles = [];
|
|
22
|
-
for (const candidate of [requiredFile, isDs ? "summary.json" : "preview.png", isDs ? "chart-distribution.png" : "preview.webm", "preview.mp4"]) {
|
|
23
|
-
if (existsSync(join(runDirectory, candidate))) {
|
|
24
|
-
outputFiles.push(candidate);
|
|
25
|
-
}
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
const success = existsSync(requiredPath) && (await readFile(requiredPath, "utf8")).trim().length > 0;
|
|
29
|
-
const hasTurns = runResult.agentTurns > 0;
|
|
30
|
-
|
|
31
|
-
let failureReason = null;
|
|
32
|
-
if (runResult.error) {
|
|
33
|
-
failureReason = typeof runResult.error === "string" ? runResult.error : (runResult.error.message ?? "Unknown error");
|
|
34
|
-
} else if (!hasTurns) {
|
|
35
|
-
failureReason = "The model did not produce any response turns.";
|
|
36
|
-
} else if (!success) {
|
|
37
|
-
if (runResult.toolCalls === 0) {
|
|
38
|
-
failureReason = `The model finished without writing the required output file (${requiredFile}). It may have returned the response as chat text instead of using the write tool.`;
|
|
39
|
-
} else {
|
|
40
|
-
failureReason = `The required output file (${requiredFile}) was missing or empty after the run.`;
|
|
41
|
-
}
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
const failed = failureReason !== null;
|
|
45
|
-
|
|
46
|
-
metadata.status = failed ? "failed" : "completed";
|
|
47
|
-
metadata.updatedAt = timestamp;
|
|
48
|
-
if (failed) {
|
|
49
|
-
metadata.failedAt = timestamp;
|
|
50
|
-
} else {
|
|
51
|
-
metadata.completedAt = timestamp;
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
const totalTokens = runResult.promptTokens + runResult.completionTokens;
|
|
55
|
-
|
|
56
|
-
metadata.runner.tokenMetrics = {
|
|
57
|
-
reported: hasTurns,
|
|
58
|
-
promptTokens: runResult.promptTokens,
|
|
59
|
-
completionTokens: runResult.completionTokens,
|
|
60
|
-
totalTokens,
|
|
61
|
-
};
|
|
62
|
-
|
|
63
|
-
metadata.runner.speedMetrics = speedMetrics;
|
|
64
|
-
metadata.runner.metricSource = speedMetrics?.metricSource ?? null;
|
|
65
|
-
metadata.runner.speedMetricsError = speedMetricsError ?? null;
|
|
66
|
-
|
|
67
|
-
metadata.results = {
|
|
68
|
-
wallClockMs: runResult.wallClockMs,
|
|
69
|
-
agentTurns: runResult.agentTurns,
|
|
70
|
-
toolCalls: runResult.toolCalls,
|
|
71
|
-
toolResults: runResult.toolResults,
|
|
72
|
-
success,
|
|
73
|
-
outputFiles,
|
|
74
|
-
perTurn: runResult.perTurn,
|
|
75
|
-
};
|
|
76
|
-
|
|
77
|
-
if (failureReason) {
|
|
78
|
-
metadata.error = { message: failureReason, ...(typeof runResult.error === "object" && runResult.error?.stack ? { stack: runResult.error.stack } : {}) };
|
|
79
|
-
} else if (runResult.error) {
|
|
80
|
-
metadata.error = typeof runResult.error === "string"
|
|
81
|
-
? { message: runResult.error }
|
|
82
|
-
: { message: runResult.error.message ?? "Unknown error", ...(runResult.error.stack ? { stack: runResult.error.stack } : {}) };
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
await writeFile(metadataPath, JSON.stringify(metadata, null, 2) + "\n", "utf8");
|
|
86
|
-
return metadata;
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
function formatMetric(value, formatter) {
|
|
90
|
-
if (value === null || value === undefined || !Number.isFinite(value)) return pc.dim("—");
|
|
91
|
-
return formatter(value);
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
function formatMs(ms) {
|
|
95
|
-
return formatMetric(ms, (n) => (n < 1000 ? `${Math.round(n)} ms` : `${(n / 1000).toFixed(1)} s`));
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
function formatNumber(n) {
|
|
99
|
-
return formatMetric(n, (v) => v.toLocaleString());
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
function formatTokPerSec(n) {
|
|
103
|
-
return formatMetric(n, (v) => `${v.toFixed(1)} tok/s`);
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
function formatPercent(n) {
|
|
107
|
-
return formatMetric(n, (v) => `${(v * 100).toFixed(0)} %`);
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
export function renderBenchmarkSummary(metadata) {
|
|
111
|
-
const { status, results, runner, error } = metadata;
|
|
112
|
-
|
|
113
|
-
const agentRows = [
|
|
114
|
-
["Status", status === "completed" ? pc.green("completed") : pc.red(status ?? "failed")],
|
|
115
|
-
["Duration", formatMs(results?.wallClockMs)],
|
|
116
|
-
["Agent turns", formatNumber(results?.agentTurns)],
|
|
117
|
-
["Input tokens", formatNumber(runner?.tokenMetrics?.promptTokens)],
|
|
118
|
-
["Output tokens", formatNumber(runner?.tokenMetrics?.completionTokens)],
|
|
119
|
-
["Total tokens", formatNumber(runner?.tokenMetrics?.totalTokens)],
|
|
120
|
-
["Tool calls", formatNumber(results?.toolCalls)],
|
|
121
|
-
["Tool results", formatNumber(results?.toolResults)],
|
|
122
|
-
["Output files", (results?.outputFiles?.length ?? 0) > 0 ? results.outputFiles.join(", ") : pc.dim("—")],
|
|
123
|
-
];
|
|
124
|
-
|
|
125
|
-
console.log("");
|
|
126
|
-
console.log(renderSection("Benchmark Result", renderRows(agentRows)));
|
|
127
|
-
|
|
128
|
-
if (status === "completed" && runner?.speedMetrics) {
|
|
129
|
-
const speed = runner.speedMetrics;
|
|
130
|
-
const speedRows = [
|
|
131
|
-
["Prefill tok/s", formatTokPerSec(speed.prefillTokensPerSecond)],
|
|
132
|
-
["Generation tok/s", formatTokPerSec(speed.generationTokensPerSecond)],
|
|
133
|
-
["TTFT", formatMs(speed.ttftMs)],
|
|
134
|
-
["Speculative decode", formatPercent(speed.speculativeDecodeAcceptance)],
|
|
135
|
-
["KV cache tokens", formatNumber(speed.kvCacheTokens)],
|
|
136
|
-
["Model load time", formatMs(speed.modelLoadMs)],
|
|
137
|
-
["Metric source", speed.metricSource ?? pc.dim("—")],
|
|
138
|
-
];
|
|
139
|
-
console.log(renderSection("Speed Metrics", renderRows(speedRows)));
|
|
140
|
-
} else if (error) {
|
|
141
|
-
const wrappedError = wrapText(error.message ?? "Unknown error");
|
|
142
|
-
console.log(renderSection("Error", pc.red(wrappedError)));
|
|
143
|
-
if (error.message?.includes("write tool") || error.message?.includes("required output file")) {
|
|
144
|
-
const tip = wrapText("Tip: This usually means the model returned the answer as chat text instead of writing the file. Try a model with stronger tool-use support, or run the prompt manually.", 64);
|
|
145
|
-
console.log(pc.dim("\n" + tip));
|
|
146
|
-
}
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
if (status === "completed" && !runner?.speedMetrics && runner?.speedMetricsError) {
|
|
150
|
-
console.log(pc.dim(`\nSpeed metrics unavailable: ${runner.speedMetricsError}`));
|
|
151
|
-
}
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
function wrapText(text, width = 64) {
|
|
155
|
-
if (!text) return "";
|
|
156
|
-
const words = text.split(/\s+/);
|
|
157
|
-
const lines = [];
|
|
158
|
-
let current = "";
|
|
159
|
-
for (const word of words) {
|
|
160
|
-
if ((current + " " + word).trim().length > width) {
|
|
161
|
-
if (current) lines.push(current.trim());
|
|
162
|
-
current = word;
|
|
163
|
-
} else {
|
|
164
|
-
current = current ? `${current} ${word}` : word;
|
|
165
|
-
}
|
|
166
|
-
}
|
|
167
|
-
if (current) lines.push(current.trim());
|
|
168
|
-
return lines.join("\n");
|
|
169
|
-
}
|
package/src/benchmark/flow.mjs
DELETED
|
@@ -1,240 +0,0 @@
|
|
|
1
|
-
// ── Benchmark command flows ───────────────────────────────────────────────────
|
|
2
|
-
|
|
3
|
-
import { join } from "node:path";
|
|
4
|
-
import { ensureDirs } from "../config.mjs";
|
|
5
|
-
import { backendFor } from "../backends.mjs";
|
|
6
|
-
import { serverReady, startServer, waitForReady, stopProfile, modelAvailableOnServer, unloadModelFromServer } from "../process.mjs";
|
|
7
|
-
import { loadProfiles } from "../profiles.mjs";
|
|
8
|
-
import { pc, createPrompt } from "../ui.mjs";
|
|
9
|
-
import { linkBenchmarkRepo } from "./repo.mjs";
|
|
10
|
-
import { loadBenchmarks } from "./shared.mjs";
|
|
11
|
-
import { prepareBenchmarkRun } from "./prepare.mjs";
|
|
12
|
-
import { runBenchmarkInPi } from "./sdk-runner.mjs";
|
|
13
|
-
import { queryServerMetrics } from "./metrics.mjs";
|
|
14
|
-
import { finalizeBenchmarkRun, renderBenchmarkSummary } from "./finalize.mjs";
|
|
15
|
-
|
|
16
|
-
function benchmarkModelSource(profile) {
|
|
17
|
-
if (!profile) return "cloud";
|
|
18
|
-
return profile.backend === "omlx" ? "omlx" : "llama-cpp";
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
async function chooseBenchmarkAction(prompt, canRun) {
|
|
22
|
-
const choices = [
|
|
23
|
-
{ value: "run", label: "Run Benchmark", hint: "Automated with Pi" },
|
|
24
|
-
{ value: "prepare", label: "Prepare Benchmark (manual)", hint: "Copy prompt and run yourself" },
|
|
25
|
-
];
|
|
26
|
-
return await prompt.choice("Action", canRun ? choices : choices.filter((c) => c.value === "prepare"), canRun ? "run" : "prepare");
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
function managedModelId(profile) {
|
|
30
|
-
return profile.omlxModel ?? profile.modelAlias ?? profile.label;
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
async function ensureManagedModelAvailableForBenchmark(profile, backend) {
|
|
34
|
-
if (backend.type !== "managed-server") return;
|
|
35
|
-
if (await modelAvailableOnServer(profile)) return;
|
|
36
|
-
throw new Error(`${managedModelId(profile)} is not available on ${backend.label} at ${profile.baseUrl}.`);
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
async function ensureServerForBenchmark(profile) {
|
|
40
|
-
const backend = backendFor(profile.backend);
|
|
41
|
-
if (await serverReady(profile.baseUrl)) {
|
|
42
|
-
await ensureManagedModelAvailableForBenchmark(profile, backend);
|
|
43
|
-
console.log(pc.green(`[ready] ${backend.label} at ${profile.baseUrl}`));
|
|
44
|
-
return { started: false };
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
if (backend.type === "managed-server") {
|
|
48
|
-
throw new Error(`${backend.label} is not running at ${profile.baseUrl}. Start it and try again.`);
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
console.log(pc.dim(`Starting ${backend.label} for ${profile.label}...`));
|
|
52
|
-
const state = await startServer(profile);
|
|
53
|
-
await waitForReady(profile, state?.pid, state?.rawLogPath);
|
|
54
|
-
console.log(pc.green(`[ready] ${profile.baseUrl}/models`));
|
|
55
|
-
return { started: true, state };
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
export async function runPreparedBenchmark(profile, runDirectory, options = {}) {
|
|
59
|
-
const controller = new AbortController();
|
|
60
|
-
if (options.signal) {
|
|
61
|
-
options.signal.addEventListener("abort", () => controller.abort(), { once: true });
|
|
62
|
-
}
|
|
63
|
-
let serverStarted = false;
|
|
64
|
-
let benchmarkStarted = false;
|
|
65
|
-
let metadata;
|
|
66
|
-
|
|
67
|
-
const onSigint = () => {
|
|
68
|
-
controller.abort();
|
|
69
|
-
};
|
|
70
|
-
process.on("SIGINT", onSigint);
|
|
71
|
-
|
|
72
|
-
try {
|
|
73
|
-
const serverState = await ensureServerForBenchmark(profile);
|
|
74
|
-
serverStarted = serverState.started;
|
|
75
|
-
|
|
76
|
-
benchmarkStarted = true;
|
|
77
|
-
const runResult = await runBenchmarkInPi(profile, runDirectory, { signal: controller.signal });
|
|
78
|
-
|
|
79
|
-
let speedMetrics = null;
|
|
80
|
-
let speedMetricsError = null;
|
|
81
|
-
if (!runResult.error) {
|
|
82
|
-
try {
|
|
83
|
-
speedMetrics = await queryServerMetrics(profile);
|
|
84
|
-
} catch (err) {
|
|
85
|
-
// Non-fatal: speed metrics are a supplementary measurement, not the
|
|
86
|
-
// benchmark itself. Don't poison the run result; surface it as a note.
|
|
87
|
-
speedMetricsError = err.message;
|
|
88
|
-
}
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
metadata = await finalizeBenchmarkRun(runDirectory, runResult, speedMetrics, speedMetricsError);
|
|
92
|
-
renderBenchmarkSummary(metadata);
|
|
93
|
-
} catch (err) {
|
|
94
|
-
const failedResult = {
|
|
95
|
-
error: { message: err.message },
|
|
96
|
-
wallClockMs: null,
|
|
97
|
-
agentTurns: 0,
|
|
98
|
-
promptTokens: 0,
|
|
99
|
-
completionTokens: 0,
|
|
100
|
-
totalTokens: 0,
|
|
101
|
-
cacheRead: 0,
|
|
102
|
-
cacheWrite: 0,
|
|
103
|
-
toolCalls: 0,
|
|
104
|
-
toolResults: 0,
|
|
105
|
-
perTurn: [],
|
|
106
|
-
};
|
|
107
|
-
metadata = await finalizeBenchmarkRun(runDirectory, failedResult, null);
|
|
108
|
-
renderBenchmarkSummary(metadata);
|
|
109
|
-
} finally {
|
|
110
|
-
process.removeListener("SIGINT", onSigint);
|
|
111
|
-
if (serverStarted && !options.keepServer) {
|
|
112
|
-
const backend = backendFor(profile.backend);
|
|
113
|
-
if (backend.type !== "managed-server") {
|
|
114
|
-
const result = await stopProfile(profile);
|
|
115
|
-
console.log(result.stopped ? pc.green(`[stop] ${result.message}`) : pc.dim(`[stop] ${result.message}`));
|
|
116
|
-
}
|
|
117
|
-
}
|
|
118
|
-
if (benchmarkStarted) {
|
|
119
|
-
const unloadResult = await unloadModelFromServer(profile);
|
|
120
|
-
if (!unloadResult.unloaded && unloadResult.error) {
|
|
121
|
-
console.log(pc.yellow(`[unload] ${unloadResult.backend}: ${unloadResult.error}`));
|
|
122
|
-
} else if (!unloadResult.unloaded && unloadResult.reason) {
|
|
123
|
-
console.log(pc.dim(`[unload] ${unloadResult.backend}: ${unloadResult.reason}`));
|
|
124
|
-
}
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
return metadata;
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
// ── Shared benchmark selection ────────────────────────────────────────────
|
|
132
|
-
|
|
133
|
-
async function selectBenchmark(prompt, repoPath) {
|
|
134
|
-
const kind = await prompt.choice("Benchmark category", [
|
|
135
|
-
{ value: "visual", label: "Visual Benchmark", hint: "HTML/CSS/JS animation benchmarks" },
|
|
136
|
-
{ value: "data-science", label: "Data Science", hint: "Analysis and charting benchmarks" },
|
|
137
|
-
], "visual");
|
|
138
|
-
|
|
139
|
-
const benchDir = join(repoPath, "benchmarks");
|
|
140
|
-
const benchmarks = (await loadBenchmarks(benchDir)).filter((b) => b.kind === kind);
|
|
141
|
-
if (benchmarks.length === 0) {
|
|
142
|
-
console.log(pc.yellow(`No ${kind} benchmarks found in ${benchDir}`));
|
|
143
|
-
return null;
|
|
144
|
-
}
|
|
145
|
-
const benchmarkId = await prompt.choice("Prompt", benchmarks.map((b) => ({
|
|
146
|
-
value: b.id, label: b.title, hint: b.description || b.id,
|
|
147
|
-
})), benchmarks[0].id);
|
|
148
|
-
const benchmark = benchmarks.find((b) => b.id === benchmarkId);
|
|
149
|
-
if (!benchmark) return null;
|
|
150
|
-
return { kind, benchmark };
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
// ── Shared benchmark setup ───────────────────────────────────────────────
|
|
154
|
-
|
|
155
|
-
async function benchmarkSetup() {
|
|
156
|
-
await ensureDirs();
|
|
157
|
-
const prompt = createPrompt();
|
|
158
|
-
const repoPath = await linkBenchmarkRepo(prompt);
|
|
159
|
-
if (!repoPath) return { prompt, repoPath: null, selected: null };
|
|
160
|
-
const selected = await selectBenchmark(prompt, repoPath);
|
|
161
|
-
return { prompt, repoPath, selected };
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
// ── Benchmark from a selected profile (from model picker) ────────────────
|
|
165
|
-
|
|
166
|
-
export async function benchmarkForProfile(profile) {
|
|
167
|
-
const { prompt, repoPath, selected } = await benchmarkSetup();
|
|
168
|
-
try {
|
|
169
|
-
if (!selected) return;
|
|
170
|
-
const { kind, benchmark: selectedBenchmark } = selected;
|
|
171
|
-
|
|
172
|
-
const modelId = profile.modelAlias;
|
|
173
|
-
const modelSource = benchmarkModelSource(profile);
|
|
174
|
-
const backendLabel = backendFor(profile.backend).label;
|
|
175
|
-
|
|
176
|
-
const canRun = modelSource !== "cloud";
|
|
177
|
-
const action = await chooseBenchmarkAction(prompt, canRun);
|
|
178
|
-
|
|
179
|
-
const runDirectory = await prepareBenchmarkRun({ repoPath, benchmark: selectedBenchmark, kind, modelId, modelSource, backendLabel, profile, showNextSteps: action === "prepare" });
|
|
180
|
-
|
|
181
|
-
if (action === "run") {
|
|
182
|
-
return await runPreparedBenchmark(profile, runDirectory);
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
return runDirectory;
|
|
186
|
-
} finally {
|
|
187
|
-
prompt.close();
|
|
188
|
-
}
|
|
189
|
-
}
|
|
190
|
-
|
|
191
|
-
// ── Standalone benchmark flow (offgrid-ai benchmark) ──────────────────────
|
|
192
|
-
|
|
193
|
-
export async function benchmarkFlow() {
|
|
194
|
-
const { prompt, repoPath, selected } = await benchmarkSetup();
|
|
195
|
-
try {
|
|
196
|
-
if (!selected) return;
|
|
197
|
-
const { kind, benchmark: selectedBenchmark } = selected;
|
|
198
|
-
|
|
199
|
-
const profiles = await loadProfiles();
|
|
200
|
-
const source = await prompt.choice("Model source", [
|
|
201
|
-
{ value: "profile", label: "Use existing profile", hint: "Pick a saved offgrid-ai profile" },
|
|
202
|
-
{ value: "cloud", label: "Custom / cloud", hint: "Free-form model label for cloud runs" },
|
|
203
|
-
], "profile");
|
|
204
|
-
|
|
205
|
-
let modelId, modelSource, backendLabel, profile;
|
|
206
|
-
|
|
207
|
-
if (source === "profile") {
|
|
208
|
-
if (profiles.length === 0) {
|
|
209
|
-
console.log(pc.yellow("No profiles yet. Run: offgrid-ai models"));
|
|
210
|
-
return;
|
|
211
|
-
}
|
|
212
|
-
const profileId = await prompt.choice("Profile", profiles.map((p) => ({
|
|
213
|
-
value: p.id, label: p.label, hint: `${backendFor(p.backend).label} · ${p.modelAlias}`,
|
|
214
|
-
})), profiles[0].id);
|
|
215
|
-
profile = profiles.find((p) => p.id === profileId);
|
|
216
|
-
if (!profile) return;
|
|
217
|
-
modelId = profile.modelAlias;
|
|
218
|
-
modelSource = benchmarkModelSource(profile);
|
|
219
|
-
backendLabel = backendFor(profile.backend).label;
|
|
220
|
-
} else {
|
|
221
|
-
backendLabel = await prompt.text("Backend label", "cloud");
|
|
222
|
-
modelId = await prompt.text("Model name", "");
|
|
223
|
-
if (!modelId) { console.log(pc.yellow("Model name is required.")); return; }
|
|
224
|
-
modelSource = "cloud";
|
|
225
|
-
}
|
|
226
|
-
|
|
227
|
-
const canRun = modelSource !== "cloud" && profile != null;
|
|
228
|
-
const action = await chooseBenchmarkAction(prompt, canRun);
|
|
229
|
-
|
|
230
|
-
const runDirectory = await prepareBenchmarkRun({ repoPath, benchmark: selectedBenchmark, kind, modelId, modelSource, backendLabel, profile, showNextSteps: action === "prepare" });
|
|
231
|
-
|
|
232
|
-
if (action === "run" && profile) {
|
|
233
|
-
return await runPreparedBenchmark(profile, runDirectory);
|
|
234
|
-
}
|
|
235
|
-
|
|
236
|
-
return runDirectory;
|
|
237
|
-
} finally {
|
|
238
|
-
prompt.close();
|
|
239
|
-
}
|
|
240
|
-
}
|
|
@@ -1,107 +0,0 @@
|
|
|
1
|
-
// ── Backend-aware server speed metrics ───────────────────────────────────────
|
|
2
|
-
|
|
3
|
-
import { backendFor } from "../backends.mjs";
|
|
4
|
-
|
|
5
|
-
const BENCH_SPEED_PROMPT = "Write a one-sentence summary of machine learning.";
|
|
6
|
-
const SPEED_QUERY_TIMEOUT_MS = 120_000;
|
|
7
|
-
const SPEED_QUERY_MAX_TOKENS = 64;
|
|
8
|
-
|
|
9
|
-
export async function queryServerMetrics(profile) {
|
|
10
|
-
const backend = backendFor(profile.backend);
|
|
11
|
-
|
|
12
|
-
if (backend.id === "llama-cpp") {
|
|
13
|
-
return await queryLlamaCppMetrics(profile);
|
|
14
|
-
}
|
|
15
|
-
if (backend.id === "omlx") {
|
|
16
|
-
return await queryOmlxMetrics(profile);
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
throw new Error(`Unsupported backend for benchmark speed metrics: ${backend.id}`);
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
async function speedQueryFetch(profile, { stream = false, streamOptions = null, errorLabel = "speed query" } = {}) {
|
|
23
|
-
const body = {
|
|
24
|
-
model: profile.modelAlias,
|
|
25
|
-
messages: [{ role: "user", content: BENCH_SPEED_PROMPT }],
|
|
26
|
-
stream,
|
|
27
|
-
max_tokens: SPEED_QUERY_MAX_TOKENS,
|
|
28
|
-
...(streamOptions ? { stream_options: streamOptions } : {}),
|
|
29
|
-
};
|
|
30
|
-
|
|
31
|
-
const response = await fetch(profile.baseUrl.replace(/\/$/u, "") + "/chat/completions", {
|
|
32
|
-
method: "POST",
|
|
33
|
-
headers: { "Content-Type": "application/json" },
|
|
34
|
-
body: JSON.stringify(body),
|
|
35
|
-
signal: AbortSignal.timeout(SPEED_QUERY_TIMEOUT_MS),
|
|
36
|
-
});
|
|
37
|
-
|
|
38
|
-
if (!response.ok) {
|
|
39
|
-
throw new Error(`${errorLabel} failed: ${response.status} ${response.statusText}`);
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
return response;
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
async function queryLlamaCppMetrics(profile) {
|
|
46
|
-
const response = await speedQueryFetch(profile, { errorLabel: "llama.cpp speed query" });
|
|
47
|
-
|
|
48
|
-
const data = await response.json();
|
|
49
|
-
const timings = data.timings;
|
|
50
|
-
if (!timings || typeof timings.prompt_per_second !== "number" || typeof timings.predicted_per_second !== "number") {
|
|
51
|
-
throw new Error("llama.cpp response did not include usable timings object");
|
|
52
|
-
}
|
|
53
|
-
const draftN = timings.draft_n;
|
|
54
|
-
const draftAccepted = timings.draft_n_accepted;
|
|
55
|
-
|
|
56
|
-
return {
|
|
57
|
-
prefillTokensPerSecond: timings.prompt_per_second ?? null,
|
|
58
|
-
generationTokensPerSecond: timings.predicted_per_second ?? null,
|
|
59
|
-
ttftMs: timings.prompt_ms ?? null,
|
|
60
|
-
modelLoadMs: null,
|
|
61
|
-
speculativeDecodeAcceptance: (draftN && Number.isFinite(draftAccepted) && Number.isFinite(draftN) && draftN > 0)
|
|
62
|
-
? draftAccepted / draftN
|
|
63
|
-
: null,
|
|
64
|
-
kvCacheTokens: timings.cache_n ?? null,
|
|
65
|
-
metricSource: "llama.cpp /v1/chat/completions timings",
|
|
66
|
-
};
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
async function queryOmlxMetrics(profile) {
|
|
70
|
-
const response = await speedQueryFetch(profile, {
|
|
71
|
-
stream: true,
|
|
72
|
-
streamOptions: { include_usage: true },
|
|
73
|
-
errorLabel: "oMLX speed query",
|
|
74
|
-
});
|
|
75
|
-
|
|
76
|
-
const text = await response.text();
|
|
77
|
-
let usage = null;
|
|
78
|
-
for (const line of text.split("\n").reverse()) {
|
|
79
|
-
const trimmed = line.trim();
|
|
80
|
-
if (!trimmed || !trimmed.startsWith("data:")) continue;
|
|
81
|
-
const payload = trimmed.slice(5).trim();
|
|
82
|
-
if (payload === "[DONE]") continue;
|
|
83
|
-
try {
|
|
84
|
-
const chunk = JSON.parse(payload);
|
|
85
|
-
if (chunk.usage) {
|
|
86
|
-
usage = chunk.usage;
|
|
87
|
-
break;
|
|
88
|
-
}
|
|
89
|
-
} catch {
|
|
90
|
-
// Ignore malformed SSE chunks.
|
|
91
|
-
}
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
if (!usage) {
|
|
95
|
-
throw new Error("oMLX speed query did not return usage in streaming response");
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
return {
|
|
99
|
-
prefillTokensPerSecond: usage.prompt_tokens_per_second ?? null,
|
|
100
|
-
generationTokensPerSecond: usage.generation_tokens_per_second ?? null,
|
|
101
|
-
ttftMs: usage.time_to_first_token != null ? usage.time_to_first_token * 1000 : null,
|
|
102
|
-
modelLoadMs: null,
|
|
103
|
-
speculativeDecodeAcceptance: null,
|
|
104
|
-
kvCacheTokens: usage.prompt_tokens_details?.cached_tokens ?? null,
|
|
105
|
-
metricSource: "oMLX /v1/chat/completions streaming include_usage",
|
|
106
|
-
};
|
|
107
|
-
}
|