getmy-ruflo 3.5.55 → 3.5.56
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
*/
|
|
7
7
|
function getOllamaHost() {
|
|
8
8
|
const host = (typeof process !== "undefined" && process.env["OLLAMA_HOST"]) ||
|
|
9
|
-
"http://
|
|
9
|
+
"http://192.168.0.85:11434";
|
|
10
10
|
return host.trim();
|
|
11
11
|
}
|
|
12
12
|
function ok(data) {
|
|
@@ -34,7 +34,7 @@ export const ollamaTools = [
|
|
|
34
34
|
{
|
|
35
35
|
name: "ollama_route",
|
|
36
36
|
description: "Auto-route a task to the best available Ollama model based on complexity. " +
|
|
37
|
-
"Automatically selects
|
|
37
|
+
"Automatically selects qwen3-coder:latest for short tasks or qwen3-coder:latest for longer ones. " +
|
|
38
38
|
"Returns the model response or an error if Ollama is unavailable (caller should fall back to Claude).",
|
|
39
39
|
inputSchema: {
|
|
40
40
|
type: "object",
|
|
@@ -68,23 +68,18 @@ export const ollamaTools = [
|
|
|
68
68
|
const models = (tags.models ?? []).map((m) => m.name);
|
|
69
69
|
if (models.length === 0)
|
|
70
70
|
return fail("No Ollama models loaded — fall back to Claude");
|
|
71
|
-
// Select model
|
|
72
|
-
const
|
|
73
|
-
const
|
|
71
|
+
// Select model — prefer qwen3-coder (primary Mac Mini model), then qwen2.5 variants
|
|
72
|
+
const qwen3 = models.find((m) => m.includes("qwen3"));
|
|
73
|
+
const qwen7b = models.find((m) => m.includes("qwen") && m.includes("7b"));
|
|
74
|
+
const qwen32b = models.find((m) => m.includes("qwen") && m.includes("32b"));
|
|
74
75
|
let model;
|
|
75
|
-
if (preferSmall &&
|
|
76
|
-
model =
|
|
77
|
-
}
|
|
78
|
-
else if (prompt.length < 200 && has7b) {
|
|
79
|
-
model = models.find((m) => m.includes("7b"));
|
|
80
|
-
}
|
|
81
|
-
else if (has32b) {
|
|
82
|
-
model = models.find((m) => m.includes("32b"));
|
|
76
|
+
if (preferSmall && qwen7b) {
|
|
77
|
+
model = qwen7b;
|
|
83
78
|
}
|
|
84
79
|
else {
|
|
85
|
-
model = models[0];
|
|
80
|
+
model = qwen3 || qwen32b || qwen7b || models[0];
|
|
86
81
|
}
|
|
87
|
-
// Query — use timeout for large models (
|
|
82
|
+
// Query — use timeout for large models (qwen3-coder:latest can take 60-120s)
|
|
88
83
|
const timeoutMs = input.timeout || 300000;
|
|
89
84
|
const response = await fetch(`${host}/api/generate`, {
|
|
90
85
|
method: "POST",
|
|
@@ -170,7 +165,7 @@ export const ollamaTools = [
|
|
|
170
165
|
},
|
|
171
166
|
model: {
|
|
172
167
|
type: "string",
|
|
173
|
-
description: "Ollama model (default:
|
|
168
|
+
description: "Ollama model (default: qwen3-coder:latest)",
|
|
174
169
|
},
|
|
175
170
|
apply: {
|
|
176
171
|
type: "boolean",
|
|
@@ -183,7 +178,7 @@ export const ollamaTools = [
|
|
|
183
178
|
tags: ["ollama", "llm", "local-ai", "github", "code-generation"],
|
|
184
179
|
handler: async (input) => {
|
|
185
180
|
const issueNum = input.issue_number;
|
|
186
|
-
const model = input.model || "
|
|
181
|
+
const model = input.model || "qwen3-coder:latest";
|
|
187
182
|
const host = getOllamaHost();
|
|
188
183
|
try {
|
|
189
184
|
// 1. Get issue details via gh CLI
|
|
@@ -235,8 +230,8 @@ export const ollamaTools = [
|
|
|
235
230
|
{
|
|
236
231
|
name: "ollama_pipeline",
|
|
237
232
|
description: "Full zero-cost issue implementation pipeline using local Ollama. " +
|
|
238
|
-
"Lists open GitHub issues, uses
|
|
239
|
-
"
|
|
233
|
+
"Lists open GitHub issues, uses qwen3-coder:latest to identify files, " +
|
|
234
|
+
"qwen3-coder:latest to generate code, then creates branches, commits, and PRs. " +
|
|
240
235
|
"Costs $0 — all inference runs locally on Ollama.",
|
|
241
236
|
inputSchema: {
|
|
242
237
|
type: "object",
|
|
@@ -251,11 +246,11 @@ export const ollamaTools = [
|
|
|
251
246
|
},
|
|
252
247
|
model: {
|
|
253
248
|
type: "string",
|
|
254
|
-
description: "Ollama model for code generation (default:
|
|
249
|
+
description: "Ollama model for code generation (default: qwen3-coder:latest)",
|
|
255
250
|
},
|
|
256
251
|
analysis_model: {
|
|
257
252
|
type: "string",
|
|
258
|
-
description: "Ollama model for file analysis (default:
|
|
253
|
+
description: "Ollama model for file analysis (default: qwen3-coder:latest)",
|
|
259
254
|
},
|
|
260
255
|
limit: {
|
|
261
256
|
type: "number",
|
|
@@ -284,8 +279,8 @@ export const ollamaTools = [
|
|
|
284
279
|
tags: ["ollama", "llm", "local-ai", "github", "pipeline", "automation"],
|
|
285
280
|
handler: async (input) => {
|
|
286
281
|
const host = getOllamaHost();
|
|
287
|
-
const codeModel = input.model || "
|
|
288
|
-
const analysisModel = input.analysis_model || "
|
|
282
|
+
const codeModel = input.model || "qwen3-coder:latest";
|
|
283
|
+
const analysisModel = input.analysis_model || "qwen3-coder:latest";
|
|
289
284
|
const baseBranch = input.base_branch || "main";
|
|
290
285
|
const limit = input.limit || 10;
|
|
291
286
|
const skipIssues = input.skip_issues || [];
|
|
@@ -255,21 +255,20 @@ export class EnhancedModelRouter {
|
|
|
255
255
|
* Prefers 7b for short/simple prompts, 32b for longer ones.
|
|
256
256
|
*/
|
|
257
257
|
selectOllamaModel(task) {
|
|
258
|
-
// Prefer
|
|
259
|
-
const
|
|
258
|
+
// Prefer Q4_K_M quants (fast load, ~66 tok/s on M4 Pro) over Q8_0 (too slow to load)
|
|
259
|
+
const qwen3q4 = this.ollamaModels.find((m) => m.includes('qwen3') && (m.includes('q4') || m === 'qwen3-coder:latest'));
|
|
260
|
+
const qwen3 = this.ollamaModels.find((m) => m.includes('qwen3') && !m.includes('q8'));
|
|
260
261
|
const qwen7b = this.ollamaModels.find((m) => m.includes('qwen') && m.includes('7b'));
|
|
261
|
-
|
|
262
|
-
|
|
262
|
+
// qwen3-coder Q4_K_M is the preferred model — fast load, good quality
|
|
263
|
+
if (qwen3q4)
|
|
264
|
+
return qwen3q4;
|
|
263
265
|
if (qwen3)
|
|
264
266
|
return qwen3;
|
|
265
|
-
// Fallback to qwen2.5-coder size variants
|
|
266
267
|
if (task.length < 200 && qwen7b)
|
|
267
268
|
return qwen7b;
|
|
268
|
-
if (qwen32b)
|
|
269
|
-
return qwen32b;
|
|
270
269
|
if (qwen7b)
|
|
271
270
|
return qwen7b;
|
|
272
|
-
return (this.ollamaModels[0] ?? 'qwen3-coder:
|
|
271
|
+
return (this.ollamaModels[0] ?? 'qwen3-coder:latest');
|
|
273
272
|
}
|
|
274
273
|
/**
|
|
275
274
|
* Check if a task is suitable for Ollama.
|