@freesyntax/notch-cli 0.5.20 → 0.5.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-443G6HCC.js +543 -0
- package/dist/chunk-GFVLHUSS.js +155 -0
- package/dist/chunk-MMBFNIKE.js +509 -0
- package/dist/chunk-OSWUX6TC.js +167 -0
- package/dist/{chunk-6M6CXXWR.js → chunk-PKZKVOAN.js} +209 -1
- package/dist/chunk-QKM27RHS.js +198 -0
- package/dist/{chunk-YBYF7L4A.js → chunk-TU465P2P.js} +1830 -1331
- package/dist/compression-SQAIQ2UU.js +32 -0
- package/dist/index.js +2346 -822
- package/dist/ollama-bench-QQHBIG2D.js +190 -0
- package/dist/ollama-launch-2ASVER3S.js +18 -0
- package/dist/ollama-usage-2WPCZJJI.js +69 -0
- package/dist/{server-W7FRCVRZ.js → server-7UQKCB2Z.js} +1 -1
- package/dist/session-index-SSGOOZXK.js +21 -0
- package/dist/{tools-Q7CDHB4K.js → tools-7WAWS6V4.js} +3 -1
- package/package.json +2 -1
- package/dist/compression-UTB2Y4BB.js +0 -16
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
import {
|
|
2
|
+
printNotReachable,
|
|
3
|
+
renderProgressLine,
|
|
4
|
+
resolveEndpoint
|
|
5
|
+
} from "./chunk-MMBFNIKE.js";
|
|
6
|
+
import {
|
|
7
|
+
detectDaemon,
|
|
8
|
+
listModels,
|
|
9
|
+
pullModel
|
|
10
|
+
} from "./chunk-GFVLHUSS.js";
|
|
11
|
+
import {
|
|
12
|
+
resolveByokModel
|
|
13
|
+
} from "./chunk-443G6HCC.js";
|
|
14
|
+
|
|
15
|
+
// src/commands/ollama-bench.ts
|
|
16
|
+
import fs from "fs/promises";
|
|
17
|
+
import path from "path";
|
|
18
|
+
import chalk from "chalk";
|
|
19
|
+
import { streamText } from "ai";
|
|
20
|
+
var DEFAULT_PROMPT = "Write a 5-line Python function that returns the n-th Fibonacci number.";
|
|
21
|
+
var DEFAULT_BENCH_MODELS = [
|
|
22
|
+
"qwen3-coder:30b",
|
|
23
|
+
"llama3.2:latest",
|
|
24
|
+
"gpt-oss:20b"
|
|
25
|
+
];
|
|
26
|
+
function approximateTokens(text) {
|
|
27
|
+
if (!text) return 0;
|
|
28
|
+
return Math.round(text.split(/\s+/).filter(Boolean).length / 0.75);
|
|
29
|
+
}
|
|
30
|
+
async function ensureModelPresent(modelName, baseUrl, apiKey, allowPull) {
|
|
31
|
+
try {
|
|
32
|
+
const installed = await listModels(baseUrl, { apiKey });
|
|
33
|
+
if (installed.some((m) => m.name === modelName)) return true;
|
|
34
|
+
} catch {
|
|
35
|
+
return false;
|
|
36
|
+
}
|
|
37
|
+
if (!allowPull) return false;
|
|
38
|
+
console.log(chalk.gray(`
|
|
39
|
+
Pulling ${modelName} (not installed)...
|
|
40
|
+
`));
|
|
41
|
+
const lastStatus = { label: "" };
|
|
42
|
+
try {
|
|
43
|
+
for await (const ev of pullModel(modelName, baseUrl, { apiKey })) {
|
|
44
|
+
renderProgressLine(ev, lastStatus);
|
|
45
|
+
}
|
|
46
|
+
} catch (err) {
|
|
47
|
+
process.stdout.write("\n");
|
|
48
|
+
console.error(chalk.red(` Pull failed: ${err.message}`));
|
|
49
|
+
return false;
|
|
50
|
+
}
|
|
51
|
+
return true;
|
|
52
|
+
}
|
|
53
|
+
async function runOne(model, prompt, baseUrl, apiKey, isCloud) {
|
|
54
|
+
const providerId = isCloud ? "ollama-cloud" : "ollama";
|
|
55
|
+
const { model: languageModel } = resolveByokModel({
|
|
56
|
+
provider: providerId,
|
|
57
|
+
model,
|
|
58
|
+
apiKey,
|
|
59
|
+
baseUrl
|
|
60
|
+
});
|
|
61
|
+
const messages = [{ role: "user", content: prompt }];
|
|
62
|
+
const started = Date.now();
|
|
63
|
+
let firstTokenAt = 0;
|
|
64
|
+
let output = "";
|
|
65
|
+
try {
|
|
66
|
+
const stream = streamText({ model: languageModel, messages });
|
|
67
|
+
for await (const chunk of stream.textStream) {
|
|
68
|
+
if (!firstTokenAt) firstTokenAt = Date.now();
|
|
69
|
+
output += chunk;
|
|
70
|
+
}
|
|
71
|
+
const finished = Date.now();
|
|
72
|
+
const usage = await stream.usage;
|
|
73
|
+
const promptTokens = usage?.promptTokens ?? approximateTokens(prompt);
|
|
74
|
+
const completionTokens = usage?.completionTokens ?? approximateTokens(output);
|
|
75
|
+
const totalMs = finished - started;
|
|
76
|
+
const firstTokenMs = firstTokenAt ? firstTokenAt - started : totalMs;
|
|
77
|
+
const seconds = Math.max(1e-3, totalMs / 1e3);
|
|
78
|
+
return {
|
|
79
|
+
model,
|
|
80
|
+
firstTokenMs,
|
|
81
|
+
totalMs,
|
|
82
|
+
tokensPerSec: completionTokens / seconds,
|
|
83
|
+
promptTokens,
|
|
84
|
+
completionTokens,
|
|
85
|
+
output
|
|
86
|
+
};
|
|
87
|
+
} catch (err) {
|
|
88
|
+
return {
|
|
89
|
+
model,
|
|
90
|
+
firstTokenMs: 0,
|
|
91
|
+
totalMs: Date.now() - started,
|
|
92
|
+
tokensPerSec: 0,
|
|
93
|
+
promptTokens: 0,
|
|
94
|
+
completionTokens: 0,
|
|
95
|
+
output: "",
|
|
96
|
+
error: err.message
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
function formatRow(r) {
|
|
101
|
+
const name = r.model.padEnd(28);
|
|
102
|
+
const ttft = `${r.firstTokenMs.toString().padStart(5)} ms`;
|
|
103
|
+
const total = `${r.totalMs.toString().padStart(6)} ms`;
|
|
104
|
+
const tps = `${r.tokensPerSec.toFixed(1).padStart(6)} tok/s`;
|
|
105
|
+
const toks = `${r.promptTokens}\u2192${r.completionTokens}`;
|
|
106
|
+
if (r.error) {
|
|
107
|
+
return ` ${chalk.red("\u2717")} ${chalk.white(name)} ${chalk.red(r.error.slice(0, 70))}`;
|
|
108
|
+
}
|
|
109
|
+
return ` ${chalk.green("\u2713")} ${chalk.white(name)} ${chalk.gray(ttft)} ${chalk.gray(total)} ${chalk.cyan(tps)} ${chalk.gray(toks.padEnd(12))}`;
|
|
110
|
+
}
|
|
111
|
+
async function persistBench(projectRoot, results, prompt) {
|
|
112
|
+
const outDir = path.join(projectRoot, ".notch");
|
|
113
|
+
await fs.mkdir(outDir, { recursive: true });
|
|
114
|
+
const file = path.join(outDir, "ollama-bench.json");
|
|
115
|
+
const payload = {
|
|
116
|
+
ts: (/* @__PURE__ */ new Date()).toISOString(),
|
|
117
|
+
prompt,
|
|
118
|
+
results: results.map((r) => ({
|
|
119
|
+
model: r.model,
|
|
120
|
+
firstTokenMs: r.firstTokenMs,
|
|
121
|
+
totalMs: r.totalMs,
|
|
122
|
+
tokensPerSec: Number(r.tokensPerSec.toFixed(2)),
|
|
123
|
+
promptTokens: r.promptTokens,
|
|
124
|
+
completionTokens: r.completionTokens,
|
|
125
|
+
error: r.error
|
|
126
|
+
}))
|
|
127
|
+
};
|
|
128
|
+
await fs.writeFile(file, JSON.stringify(payload, null, 2) + "\n", "utf-8");
|
|
129
|
+
return file;
|
|
130
|
+
}
|
|
131
|
+
async function runOllamaBench(flags, opts) {
|
|
132
|
+
const { baseUrl, apiKey, mode } = await resolveEndpoint(flags);
|
|
133
|
+
if (!await detectDaemon(baseUrl, { apiKey })) {
|
|
134
|
+
printNotReachable(baseUrl, mode);
|
|
135
|
+
return 1;
|
|
136
|
+
}
|
|
137
|
+
const modelsArg = flags.model ?? "";
|
|
138
|
+
const fromFlag = modelsArg.split(",").map((s) => s.trim()).filter(Boolean);
|
|
139
|
+
const models = fromFlag.length > 0 ? fromFlag : DEFAULT_BENCH_MODELS;
|
|
140
|
+
const prompt = flags.positional.join(" ").trim() || DEFAULT_PROMPT;
|
|
141
|
+
console.log("");
|
|
142
|
+
console.log(chalk.cyan(` Ollama bench \u2014 ${models.length} model(s) @ ${baseUrl}`));
|
|
143
|
+
console.log(chalk.gray(` Prompt: ${prompt}`));
|
|
144
|
+
console.log("");
|
|
145
|
+
const results = [];
|
|
146
|
+
for (const model of models) {
|
|
147
|
+
if (mode === "local") {
|
|
148
|
+
const present = await ensureModelPresent(model, baseUrl, apiKey, flags.yes);
|
|
149
|
+
if (!present) {
|
|
150
|
+
results.push({
|
|
151
|
+
model,
|
|
152
|
+
firstTokenMs: 0,
|
|
153
|
+
totalMs: 0,
|
|
154
|
+
tokensPerSec: 0,
|
|
155
|
+
promptTokens: 0,
|
|
156
|
+
completionTokens: 0,
|
|
157
|
+
output: "",
|
|
158
|
+
error: "not installed (pass -y to auto-pull)"
|
|
159
|
+
});
|
|
160
|
+
continue;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
process.stdout.write(chalk.gray(` running ${model}... `));
|
|
164
|
+
const result = await runOne(model, prompt, baseUrl, apiKey, mode === "cloud");
|
|
165
|
+
results.push(result);
|
|
166
|
+
process.stdout.write("\r\x1B[K");
|
|
167
|
+
}
|
|
168
|
+
const ranked = [...results].sort((a, b) => {
|
|
169
|
+
if (a.error && !b.error) return 1;
|
|
170
|
+
if (!a.error && b.error) return -1;
|
|
171
|
+
return b.tokensPerSec - a.tokensPerSec;
|
|
172
|
+
});
|
|
173
|
+
const header = ` ${" "}${"model".padEnd(28)} ${"ttft".padStart(8)} ${"total".padStart(9)} ${"throughput".padStart(12)} ${"tokens (p\u2192c)"}`;
|
|
174
|
+
console.log(chalk.gray(header));
|
|
175
|
+
console.log(chalk.gray(` ${"-".repeat(header.length - 2)}`));
|
|
176
|
+
for (const r of ranked) console.log(formatRow(r));
|
|
177
|
+
try {
|
|
178
|
+
const saved = await persistBench(opts.projectRoot, ranked, prompt);
|
|
179
|
+
console.log("");
|
|
180
|
+
console.log(chalk.gray(` Saved: ${saved}`));
|
|
181
|
+
console.log("");
|
|
182
|
+
} catch (err) {
|
|
183
|
+
console.warn(chalk.yellow(` ! Could not persist bench: ${err.message}`));
|
|
184
|
+
}
|
|
185
|
+
const anyFailed = ranked.some((r) => r.error);
|
|
186
|
+
return anyFailed ? 1 : 0;
|
|
187
|
+
}
|
|
188
|
+
export {
|
|
189
|
+
runOllamaBench
|
|
190
|
+
};
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import {
|
|
2
|
+
parseFlags,
|
|
3
|
+
persistOllamaChoice,
|
|
4
|
+
printNotReachable,
|
|
5
|
+
renderProgressLine,
|
|
6
|
+
resolveEndpoint,
|
|
7
|
+
runOllamaCli
|
|
8
|
+
} from "./chunk-MMBFNIKE.js";
|
|
9
|
+
import "./chunk-GFVLHUSS.js";
|
|
10
|
+
import "./chunk-443G6HCC.js";
|
|
11
|
+
export {
|
|
12
|
+
parseFlags,
|
|
13
|
+
persistOllamaChoice,
|
|
14
|
+
printNotReachable,
|
|
15
|
+
renderProgressLine,
|
|
16
|
+
resolveEndpoint,
|
|
17
|
+
runOllamaCli
|
|
18
|
+
};
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import {
|
|
2
|
+
isCloudModel
|
|
3
|
+
} from "./chunk-GFVLHUSS.js";
|
|
4
|
+
|
|
5
|
+
// src/ui/ollama-usage.ts
|
|
6
|
+
import chalk from "chalk";
|
|
7
|
+
var OllamaCloudUsageTracker = class {
|
|
8
|
+
turns = [];
|
|
9
|
+
/**
|
|
10
|
+
* Record a turn. The tracker auto-filters: only entries whose modelId
|
|
11
|
+
* looks like an Ollama Cloud model are kept. Non-cloud turns are
|
|
12
|
+
* silently ignored so callers can fire this once per turn without
|
|
13
|
+
* conditionals.
|
|
14
|
+
*/
|
|
15
|
+
record(turn) {
|
|
16
|
+
if (!this.isCloudTurn(turn.modelId)) return;
|
|
17
|
+
this.turns.push(turn);
|
|
18
|
+
}
|
|
19
|
+
isCloudTurn(modelId) {
|
|
20
|
+
if (!modelId) return false;
|
|
21
|
+
if (modelId.startsWith("ollama-cloud:")) return true;
|
|
22
|
+
return isCloudModel(modelId);
|
|
23
|
+
}
|
|
24
|
+
get turnCount() {
|
|
25
|
+
return this.turns.length;
|
|
26
|
+
}
|
|
27
|
+
get sessionTotals() {
|
|
28
|
+
return this.turns.reduce(
|
|
29
|
+
(acc, t) => ({
|
|
30
|
+
promptTokens: acc.promptTokens + t.promptTokens,
|
|
31
|
+
completionTokens: acc.completionTokens + t.completionTokens,
|
|
32
|
+
totalTokens: acc.totalTokens + t.totalTokens,
|
|
33
|
+
toolCalls: acc.toolCalls + t.toolCalls,
|
|
34
|
+
iterations: acc.iterations + t.iterations
|
|
35
|
+
}),
|
|
36
|
+
{ promptTokens: 0, completionTokens: 0, totalTokens: 0, toolCalls: 0, iterations: 0 }
|
|
37
|
+
);
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* One-line footer rendered after every turn when the active model is
|
|
41
|
+
* an Ollama Cloud alias. Kept intentionally short — it sits beside
|
|
42
|
+
* the existing UsageTracker.formatLast() output.
|
|
43
|
+
*/
|
|
44
|
+
formatFooter(activeModelId) {
|
|
45
|
+
if (!this.isCloudTurn(activeModelId) || this.turnCount === 0) return "";
|
|
46
|
+
const total = this.sessionTotals;
|
|
47
|
+
const totalK = (total.totalTokens / 1e3).toFixed(1);
|
|
48
|
+
return chalk.gray(` [cloud: ${this.turnCount} turns, ${totalK}K tokens]`);
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Multi-line breakdown for `/usage cloud` / `/status` style surfaces.
|
|
52
|
+
*/
|
|
53
|
+
formatDetail() {
|
|
54
|
+
if (this.turnCount === 0) {
|
|
55
|
+
return chalk.gray(" No Ollama Cloud traffic this session.");
|
|
56
|
+
}
|
|
57
|
+
const total = this.sessionTotals;
|
|
58
|
+
return [
|
|
59
|
+
chalk.gray(" Ollama Cloud"),
|
|
60
|
+
chalk.gray(` turns ${this.turnCount}`),
|
|
61
|
+
chalk.gray(` prompt ${total.promptTokens.toLocaleString()} tokens`),
|
|
62
|
+
chalk.gray(` completion ${total.completionTokens.toLocaleString()} tokens`),
|
|
63
|
+
chalk.gray(` total ${total.totalTokens.toLocaleString()} tokens`)
|
|
64
|
+
].join("\n");
|
|
65
|
+
}
|
|
66
|
+
};
|
|
67
|
+
export {
|
|
68
|
+
OllamaCloudUsageTracker
|
|
69
|
+
};
|
|
@@ -1315,7 +1315,7 @@ function toolsToMcpList(tools) {
|
|
|
1315
1315
|
}));
|
|
1316
1316
|
}
|
|
1317
1317
|
async function runMcpServer(opts) {
|
|
1318
|
-
const toolsModule = await import("./tools-
|
|
1318
|
+
const toolsModule = await import("./tools-7WAWS6V4.js");
|
|
1319
1319
|
const { pluginManager } = await import("./plugins-OG2P75K5.js");
|
|
1320
1320
|
if (opts.includePlugins !== false) {
|
|
1321
1321
|
try {
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import {
|
|
2
|
+
deriveEntryFromRollout,
|
|
3
|
+
forgetSession,
|
|
4
|
+
getSession,
|
|
5
|
+
indexFilePath,
|
|
6
|
+
querySessions,
|
|
7
|
+
rebuildIndex,
|
|
8
|
+
tagSession,
|
|
9
|
+
upsertSessionIndexEntry
|
|
10
|
+
} from "./chunk-OSWUX6TC.js";
|
|
11
|
+
import "./chunk-QKM27RHS.js";
|
|
12
|
+
export {
|
|
13
|
+
deriveEntryFromRollout,
|
|
14
|
+
forgetSession,
|
|
15
|
+
getSession,
|
|
16
|
+
indexFilePath,
|
|
17
|
+
querySessions,
|
|
18
|
+
rebuildIndex,
|
|
19
|
+
tagSession,
|
|
20
|
+
upsertSessionIndexEntry
|
|
21
|
+
};
|
|
@@ -5,7 +5,8 @@ import {
|
|
|
5
5
|
initMCPServers,
|
|
6
6
|
listToolNames,
|
|
7
7
|
mcpToolCount
|
|
8
|
-
} from "./chunk-
|
|
8
|
+
} from "./chunk-TU465P2P.js";
|
|
9
|
+
import "./chunk-QKM27RHS.js";
|
|
9
10
|
import "./chunk-6CZCFY6H.js";
|
|
10
11
|
import "./chunk-6U3ZAGYA.js";
|
|
11
12
|
import "./chunk-FFB7GK3Y.js";
|
|
@@ -14,6 +15,7 @@ import "./chunk-TH6GKC7E.js";
|
|
|
14
15
|
import "./chunk-KZAS754V.js";
|
|
15
16
|
import "./chunk-UR4XL6OM.js";
|
|
16
17
|
import "./chunk-3QUV4JEX.js";
|
|
18
|
+
import "./chunk-FIFC4V2R.js";
|
|
17
19
|
import "./chunk-CQMAVWLJ.js";
|
|
18
20
|
import "./chunk-O3WZW7GS.js";
|
|
19
21
|
import "./chunk-YAYPQTOU.js";
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@freesyntax/notch-cli",
|
|
3
|
-
"version": "0.5.
|
|
3
|
+
"version": "0.5.21",
|
|
4
4
|
"description": "Notch CLI — AI-powered coding assistant by Driftrail",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -20,6 +20,7 @@
|
|
|
20
20
|
"prepublishOnly": "npm run build:publish"
|
|
21
21
|
},
|
|
22
22
|
"dependencies": {
|
|
23
|
+
"@ai-sdk/anthropic": "^1.2.12",
|
|
23
24
|
"@ai-sdk/openai": "^1.2.0",
|
|
24
25
|
"ai": "^4.3.2",
|
|
25
26
|
"chalk": "^5.3.0",
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
import {
|
|
2
|
-
autoCompactSummarize,
|
|
3
|
-
autoCompress,
|
|
4
|
-
compressHistory,
|
|
5
|
-
estimateTokens,
|
|
6
|
-
fullCompact,
|
|
7
|
-
microCompact
|
|
8
|
-
} from "./chunk-6M6CXXWR.js";
|
|
9
|
-
export {
|
|
10
|
-
autoCompactSummarize,
|
|
11
|
-
autoCompress,
|
|
12
|
-
compressHistory,
|
|
13
|
-
estimateTokens,
|
|
14
|
-
fullCompact,
|
|
15
|
-
microCompact
|
|
16
|
-
};
|