@freesyntax/notch-cli 0.5.20 → 0.5.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,190 @@
1
+ import {
2
+ printNotReachable,
3
+ renderProgressLine,
4
+ resolveEndpoint
5
+ } from "./chunk-MMBFNIKE.js";
6
+ import {
7
+ detectDaemon,
8
+ listModels,
9
+ pullModel
10
+ } from "./chunk-GFVLHUSS.js";
11
+ import {
12
+ resolveByokModel
13
+ } from "./chunk-443G6HCC.js";
14
+
15
+ // src/commands/ollama-bench.ts
16
+ import fs from "fs/promises";
17
+ import path from "path";
18
+ import chalk from "chalk";
19
+ import { streamText } from "ai";
20
+ var DEFAULT_PROMPT = "Write a 5-line Python function that returns the n-th Fibonacci number.";
21
+ var DEFAULT_BENCH_MODELS = [
22
+ "qwen3-coder:30b",
23
+ "llama3.2:latest",
24
+ "gpt-oss:20b"
25
+ ];
26
+ function approximateTokens(text) {
27
+ if (!text) return 0;
28
+ return Math.round(text.split(/\s+/).filter(Boolean).length / 0.75);
29
+ }
30
+ async function ensureModelPresent(modelName, baseUrl, apiKey, allowPull) {
31
+ try {
32
+ const installed = await listModels(baseUrl, { apiKey });
33
+ if (installed.some((m) => m.name === modelName)) return true;
34
+ } catch {
35
+ return false;
36
+ }
37
+ if (!allowPull) return false;
38
+ console.log(chalk.gray(`
39
+ Pulling ${modelName} (not installed)...
40
+ `));
41
+ const lastStatus = { label: "" };
42
+ try {
43
+ for await (const ev of pullModel(modelName, baseUrl, { apiKey })) {
44
+ renderProgressLine(ev, lastStatus);
45
+ }
46
+ } catch (err) {
47
+ process.stdout.write("\n");
48
+ console.error(chalk.red(` Pull failed: ${err.message}`));
49
+ return false;
50
+ }
51
+ return true;
52
+ }
53
+ async function runOne(model, prompt, baseUrl, apiKey, isCloud) {
54
+ const providerId = isCloud ? "ollama-cloud" : "ollama";
55
+ const { model: languageModel } = resolveByokModel({
56
+ provider: providerId,
57
+ model,
58
+ apiKey,
59
+ baseUrl
60
+ });
61
+ const messages = [{ role: "user", content: prompt }];
62
+ const started = Date.now();
63
+ let firstTokenAt = 0;
64
+ let output = "";
65
+ try {
66
+ const stream = streamText({ model: languageModel, messages });
67
+ for await (const chunk of stream.textStream) {
68
+ if (!firstTokenAt) firstTokenAt = Date.now();
69
+ output += chunk;
70
+ }
71
+ const finished = Date.now();
72
+ const usage = await stream.usage;
73
+ const promptTokens = usage?.promptTokens ?? approximateTokens(prompt);
74
+ const completionTokens = usage?.completionTokens ?? approximateTokens(output);
75
+ const totalMs = finished - started;
76
+ const firstTokenMs = firstTokenAt ? firstTokenAt - started : totalMs;
77
+ const seconds = Math.max(1e-3, totalMs / 1e3);
78
+ return {
79
+ model,
80
+ firstTokenMs,
81
+ totalMs,
82
+ tokensPerSec: completionTokens / seconds,
83
+ promptTokens,
84
+ completionTokens,
85
+ output
86
+ };
87
+ } catch (err) {
88
+ return {
89
+ model,
90
+ firstTokenMs: 0,
91
+ totalMs: Date.now() - started,
92
+ tokensPerSec: 0,
93
+ promptTokens: 0,
94
+ completionTokens: 0,
95
+ output: "",
96
+ error: err.message
97
+ };
98
+ }
99
+ }
100
+ function formatRow(r) {
101
+ const name = r.model.padEnd(28);
102
+ const ttft = `${r.firstTokenMs.toString().padStart(5)} ms`;
103
+ const total = `${r.totalMs.toString().padStart(6)} ms`;
104
+ const tps = `${r.tokensPerSec.toFixed(1).padStart(6)} tok/s`;
105
+ const toks = `${r.promptTokens}\u2192${r.completionTokens}`;
106
+ if (r.error) {
107
+ return ` ${chalk.red("\u2717")} ${chalk.white(name)} ${chalk.red(r.error.slice(0, 70))}`;
108
+ }
109
+ return ` ${chalk.green("\u2713")} ${chalk.white(name)} ${chalk.gray(ttft)} ${chalk.gray(total)} ${chalk.cyan(tps)} ${chalk.gray(toks.padEnd(12))}`;
110
+ }
111
+ async function persistBench(projectRoot, results, prompt) {
112
+ const outDir = path.join(projectRoot, ".notch");
113
+ await fs.mkdir(outDir, { recursive: true });
114
+ const file = path.join(outDir, "ollama-bench.json");
115
+ const payload = {
116
+ ts: (/* @__PURE__ */ new Date()).toISOString(),
117
+ prompt,
118
+ results: results.map((r) => ({
119
+ model: r.model,
120
+ firstTokenMs: r.firstTokenMs,
121
+ totalMs: r.totalMs,
122
+ tokensPerSec: Number(r.tokensPerSec.toFixed(2)),
123
+ promptTokens: r.promptTokens,
124
+ completionTokens: r.completionTokens,
125
+ error: r.error
126
+ }))
127
+ };
128
+ await fs.writeFile(file, JSON.stringify(payload, null, 2) + "\n", "utf-8");
129
+ return file;
130
+ }
131
+ async function runOllamaBench(flags, opts) {
132
+ const { baseUrl, apiKey, mode } = await resolveEndpoint(flags);
133
+ if (!await detectDaemon(baseUrl, { apiKey })) {
134
+ printNotReachable(baseUrl, mode);
135
+ return 1;
136
+ }
137
+ const modelsArg = flags.model ?? "";
138
+ const fromFlag = modelsArg.split(",").map((s) => s.trim()).filter(Boolean);
139
+ const models = fromFlag.length > 0 ? fromFlag : DEFAULT_BENCH_MODELS;
140
+ const prompt = flags.positional.join(" ").trim() || DEFAULT_PROMPT;
141
+ console.log("");
142
+ console.log(chalk.cyan(` Ollama bench \u2014 ${models.length} model(s) @ ${baseUrl}`));
143
+ console.log(chalk.gray(` Prompt: ${prompt}`));
144
+ console.log("");
145
+ const results = [];
146
+ for (const model of models) {
147
+ if (mode === "local") {
148
+ const present = await ensureModelPresent(model, baseUrl, apiKey, flags.yes);
149
+ if (!present) {
150
+ results.push({
151
+ model,
152
+ firstTokenMs: 0,
153
+ totalMs: 0,
154
+ tokensPerSec: 0,
155
+ promptTokens: 0,
156
+ completionTokens: 0,
157
+ output: "",
158
+ error: "not installed (pass -y to auto-pull)"
159
+ });
160
+ continue;
161
+ }
162
+ }
163
+ process.stdout.write(chalk.gray(` running ${model}... `));
164
+ const result = await runOne(model, prompt, baseUrl, apiKey, mode === "cloud");
165
+ results.push(result);
166
+ process.stdout.write("\r\x1B[K");
167
+ }
168
+ const ranked = [...results].sort((a, b) => {
169
+ if (a.error && !b.error) return 1;
170
+ if (!a.error && b.error) return -1;
171
+ return b.tokensPerSec - a.tokensPerSec;
172
+ });
173
+ const header = ` ${" "}${"model".padEnd(28)} ${"ttft".padStart(8)} ${"total".padStart(9)} ${"throughput".padStart(12)} ${"tokens (p\u2192c)"}`;
174
+ console.log(chalk.gray(header));
175
+ console.log(chalk.gray(` ${"-".repeat(header.length - 2)}`));
176
+ for (const r of ranked) console.log(formatRow(r));
177
+ try {
178
+ const saved = await persistBench(opts.projectRoot, ranked, prompt);
179
+ console.log("");
180
+ console.log(chalk.gray(` Saved: ${saved}`));
181
+ console.log("");
182
+ } catch (err) {
183
+ console.warn(chalk.yellow(` ! Could not persist bench: ${err.message}`));
184
+ }
185
+ const anyFailed = ranked.some((r) => r.error);
186
+ return anyFailed ? 1 : 0;
187
+ }
188
+ export {
189
+ runOllamaBench
190
+ };
@@ -0,0 +1,18 @@
1
+ import {
2
+ parseFlags,
3
+ persistOllamaChoice,
4
+ printNotReachable,
5
+ renderProgressLine,
6
+ resolveEndpoint,
7
+ runOllamaCli
8
+ } from "./chunk-MMBFNIKE.js";
9
+ import "./chunk-GFVLHUSS.js";
10
+ import "./chunk-443G6HCC.js";
11
+ export {
12
+ parseFlags,
13
+ persistOllamaChoice,
14
+ printNotReachable,
15
+ renderProgressLine,
16
+ resolveEndpoint,
17
+ runOllamaCli
18
+ };
@@ -0,0 +1,69 @@
1
+ import {
2
+ isCloudModel
3
+ } from "./chunk-GFVLHUSS.js";
4
+
5
+ // src/ui/ollama-usage.ts
6
+ import chalk from "chalk";
7
+ var OllamaCloudUsageTracker = class {
8
+ turns = [];
9
+ /**
10
+ * Record a turn. The tracker auto-filters: only entries whose modelId
11
+ * looks like an Ollama Cloud model are kept. Non-cloud turns are
12
+ * silently ignored so callers can fire this once per turn without
13
+ * conditionals.
14
+ */
15
+ record(turn) {
16
+ if (!this.isCloudTurn(turn.modelId)) return;
17
+ this.turns.push(turn);
18
+ }
19
+ isCloudTurn(modelId) {
20
+ if (!modelId) return false;
21
+ if (modelId.startsWith("ollama-cloud:")) return true;
22
+ return isCloudModel(modelId);
23
+ }
24
+ get turnCount() {
25
+ return this.turns.length;
26
+ }
27
+ get sessionTotals() {
28
+ return this.turns.reduce(
29
+ (acc, t) => ({
30
+ promptTokens: acc.promptTokens + t.promptTokens,
31
+ completionTokens: acc.completionTokens + t.completionTokens,
32
+ totalTokens: acc.totalTokens + t.totalTokens,
33
+ toolCalls: acc.toolCalls + t.toolCalls,
34
+ iterations: acc.iterations + t.iterations
35
+ }),
36
+ { promptTokens: 0, completionTokens: 0, totalTokens: 0, toolCalls: 0, iterations: 0 }
37
+ );
38
+ }
39
+ /**
40
+ * One-line footer rendered after every turn when the active model is
41
+ * an Ollama Cloud alias. Kept intentionally short — it sits beside
42
+ * the existing UsageTracker.formatLast() output.
43
+ */
44
+ formatFooter(activeModelId) {
45
+ if (!this.isCloudTurn(activeModelId) || this.turnCount === 0) return "";
46
+ const total = this.sessionTotals;
47
+ const totalK = (total.totalTokens / 1e3).toFixed(1);
48
+ return chalk.gray(` [cloud: ${this.turnCount} turns, ${totalK}K tokens]`);
49
+ }
50
+ /**
51
+ * Multi-line breakdown for `/usage cloud` / `/status` style surfaces.
52
+ */
53
+ formatDetail() {
54
+ if (this.turnCount === 0) {
55
+ return chalk.gray(" No Ollama Cloud traffic this session.");
56
+ }
57
+ const total = this.sessionTotals;
58
+ return [
59
+ chalk.gray(" Ollama Cloud"),
60
+ chalk.gray(` turns ${this.turnCount}`),
61
+ chalk.gray(` prompt ${total.promptTokens.toLocaleString()} tokens`),
62
+ chalk.gray(` completion ${total.completionTokens.toLocaleString()} tokens`),
63
+ chalk.gray(` total ${total.totalTokens.toLocaleString()} tokens`)
64
+ ].join("\n");
65
+ }
66
+ };
67
+ export {
68
+ OllamaCloudUsageTracker
69
+ };
@@ -1315,7 +1315,7 @@ function toolsToMcpList(tools) {
1315
1315
  }));
1316
1316
  }
1317
1317
  async function runMcpServer(opts) {
1318
- const toolsModule = await import("./tools-Q7CDHB4K.js");
1318
+ const toolsModule = await import("./tools-7WAWS6V4.js");
1319
1319
  const { pluginManager } = await import("./plugins-OG2P75K5.js");
1320
1320
  if (opts.includePlugins !== false) {
1321
1321
  try {
@@ -0,0 +1,21 @@
1
+ import {
2
+ deriveEntryFromRollout,
3
+ forgetSession,
4
+ getSession,
5
+ indexFilePath,
6
+ querySessions,
7
+ rebuildIndex,
8
+ tagSession,
9
+ upsertSessionIndexEntry
10
+ } from "./chunk-OSWUX6TC.js";
11
+ import "./chunk-QKM27RHS.js";
12
+ export {
13
+ deriveEntryFromRollout,
14
+ forgetSession,
15
+ getSession,
16
+ indexFilePath,
17
+ querySessions,
18
+ rebuildIndex,
19
+ tagSession,
20
+ upsertSessionIndexEntry
21
+ };
@@ -5,7 +5,8 @@ import {
5
5
  initMCPServers,
6
6
  listToolNames,
7
7
  mcpToolCount
8
- } from "./chunk-YBYF7L4A.js";
8
+ } from "./chunk-TU465P2P.js";
9
+ import "./chunk-QKM27RHS.js";
9
10
  import "./chunk-6CZCFY6H.js";
10
11
  import "./chunk-6U3ZAGYA.js";
11
12
  import "./chunk-FFB7GK3Y.js";
@@ -14,6 +15,7 @@ import "./chunk-TH6GKC7E.js";
14
15
  import "./chunk-KZAS754V.js";
15
16
  import "./chunk-UR4XL6OM.js";
16
17
  import "./chunk-3QUV4JEX.js";
18
+ import "./chunk-FIFC4V2R.js";
17
19
  import "./chunk-CQMAVWLJ.js";
18
20
  import "./chunk-O3WZW7GS.js";
19
21
  import "./chunk-YAYPQTOU.js";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@freesyntax/notch-cli",
3
- "version": "0.5.20",
3
+ "version": "0.5.21",
4
4
  "description": "Notch CLI — AI-powered coding assistant by Driftrail",
5
5
  "type": "module",
6
6
  "bin": {
@@ -20,6 +20,7 @@
20
20
  "prepublishOnly": "npm run build:publish"
21
21
  },
22
22
  "dependencies": {
23
+ "@ai-sdk/anthropic": "^1.2.12",
23
24
  "@ai-sdk/openai": "^1.2.0",
24
25
  "ai": "^4.3.2",
25
26
  "chalk": "^5.3.0",
@@ -1,16 +0,0 @@
1
- import {
2
- autoCompactSummarize,
3
- autoCompress,
4
- compressHistory,
5
- estimateTokens,
6
- fullCompact,
7
- microCompact
8
- } from "./chunk-6M6CXXWR.js";
9
- export {
10
- autoCompactSummarize,
11
- autoCompress,
12
- compressHistory,
13
- estimateTokens,
14
- fullCompact,
15
- microCompact
16
- };