getmy-ruflo 3.5.55 → 3.5.56

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "getmy-ruflo",
3
- "version": "3.5.55",
3
+ "version": "3.5.56",
4
4
  "description": "GetMy Ruflo - AI agent orchestration platform with real terminal execution, 259 MCP tools, 60+ agents, and swarm coordination",
5
5
  "main": "bin/ruflo.js",
6
6
  "type": "module",
@@ -6,7 +6,7 @@
6
6
  */
7
7
  function getOllamaHost() {
8
8
  const host = (typeof process !== "undefined" && process.env["OLLAMA_HOST"]) ||
9
- "http://localhost:11434";
9
+ "http://192.168.0.85:11434";
10
10
  return host.trim();
11
11
  }
12
12
  function ok(data) {
@@ -34,7 +34,7 @@ export const ollamaTools = [
34
34
  {
35
35
  name: "ollama_route",
36
36
  description: "Auto-route a task to the best available Ollama model based on complexity. " +
37
- "Automatically selects qwen2.5-coder:7b for short tasks or qwen2.5-coder:32b for longer ones. " +
37
+ "Automatically selects qwen3-coder:latest for short tasks or qwen3-coder:latest for longer ones. " +
38
38
  "Returns the model response or an error if Ollama is unavailable (caller should fall back to Claude).",
39
39
  inputSchema: {
40
40
  type: "object",
@@ -68,23 +68,18 @@ export const ollamaTools = [
68
68
  const models = (tags.models ?? []).map((m) => m.name);
69
69
  if (models.length === 0)
70
70
  return fail("No Ollama models loaded — fall back to Claude");
71
- // Select model
72
- const has7b = models.some((m) => m.includes("7b"));
73
- const has32b = models.some((m) => m.includes("32b"));
71
+ // Select model — prefer qwen3-coder (primary Mac Mini model), then qwen2.5 variants
72
+ const qwen3 = models.find((m) => m.includes("qwen3"));
73
+ const qwen7b = models.find((m) => m.includes("qwen") && m.includes("7b"));
74
+ const qwen32b = models.find((m) => m.includes("qwen") && m.includes("32b"));
74
75
  let model;
75
- if (preferSmall && has7b) {
76
- model = models.find((m) => m.includes("7b"));
77
- }
78
- else if (prompt.length < 200 && has7b) {
79
- model = models.find((m) => m.includes("7b"));
80
- }
81
- else if (has32b) {
82
- model = models.find((m) => m.includes("32b"));
76
+ if (preferSmall && qwen7b) {
77
+ model = qwen7b;
83
78
  }
84
79
  else {
85
- model = models[0];
80
+ model = qwen3 || qwen32b || qwen7b || models[0];
86
81
  }
87
- // Query — use timeout for large models (qwen2.5-coder:32b can take 60-120s)
82
+ // Query — use timeout for large models (qwen3-coder:latest can take 60-120s)
88
83
  const timeoutMs = input.timeout || 300000;
89
84
  const response = await fetch(`${host}/api/generate`, {
90
85
  method: "POST",
@@ -170,7 +165,7 @@ export const ollamaTools = [
170
165
  },
171
166
  model: {
172
167
  type: "string",
173
- description: "Ollama model (default: qwen2.5-coder:32b)",
168
+ description: "Ollama model (default: qwen3-coder:latest)",
174
169
  },
175
170
  apply: {
176
171
  type: "boolean",
@@ -183,7 +178,7 @@ export const ollamaTools = [
183
178
  tags: ["ollama", "llm", "local-ai", "github", "code-generation"],
184
179
  handler: async (input) => {
185
180
  const issueNum = input.issue_number;
186
- const model = input.model || "qwen2.5-coder:32b";
181
+ const model = input.model || "qwen3-coder:latest";
187
182
  const host = getOllamaHost();
188
183
  try {
189
184
  // 1. Get issue details via gh CLI
@@ -235,8 +230,8 @@ export const ollamaTools = [
235
230
  {
236
231
  name: "ollama_pipeline",
237
232
  description: "Full zero-cost issue implementation pipeline using local Ollama. " +
238
- "Lists open GitHub issues, uses qwen2.5-coder:7b to identify files, " +
239
- "qwen2.5-coder:32b to generate code, then creates branches, commits, and PRs. " +
233
+ "Lists open GitHub issues, uses qwen3-coder:latest to identify files, " +
234
+ "qwen3-coder:latest to generate code, then creates branches, commits, and PRs. " +
240
235
  "Costs $0 — all inference runs locally on Ollama.",
241
236
  inputSchema: {
242
237
  type: "object",
@@ -251,11 +246,11 @@ export const ollamaTools = [
251
246
  },
252
247
  model: {
253
248
  type: "string",
254
- description: "Ollama model for code generation (default: qwen2.5-coder:32b)",
249
+ description: "Ollama model for code generation (default: qwen3-coder:latest)",
255
250
  },
256
251
  analysis_model: {
257
252
  type: "string",
258
- description: "Ollama model for file analysis (default: qwen2.5-coder:7b)",
253
+ description: "Ollama model for file analysis (default: qwen3-coder:latest)",
259
254
  },
260
255
  limit: {
261
256
  type: "number",
@@ -284,8 +279,8 @@ export const ollamaTools = [
284
279
  tags: ["ollama", "llm", "local-ai", "github", "pipeline", "automation"],
285
280
  handler: async (input) => {
286
281
  const host = getOllamaHost();
287
- const codeModel = input.model || "qwen2.5-coder:32b";
288
- const analysisModel = input.analysis_model || "qwen2.5-coder:7b";
282
+ const codeModel = input.model || "qwen3-coder:latest";
283
+ const analysisModel = input.analysis_model || "qwen3-coder:latest";
289
284
  const baseBranch = input.base_branch || "main";
290
285
  const limit = input.limit || 10;
291
286
  const skipIssues = input.skip_issues || [];
@@ -255,21 +255,20 @@ export class EnhancedModelRouter {
255
255
  * Prefers 7b for short/simple prompts, 32b for longer ones.
256
256
  */
257
257
  selectOllamaModel(task) {
258
- // Prefer qwen3-coder (MoE, best quality), then qwen2.5-coder variants
259
- const qwen3 = this.ollamaModels.find((m) => m.includes('qwen3'));
258
+ // Prefer Q4_K_M quants (fast load, ~66 tok/s on M4 Pro) over Q8_0 (too slow to load)
259
+ const qwen3q4 = this.ollamaModels.find((m) => m.includes('qwen3') && (m.includes('q4') || m === 'qwen3-coder:latest'));
260
+ const qwen3 = this.ollamaModels.find((m) => m.includes('qwen3') && !m.includes('q8'));
260
261
  const qwen7b = this.ollamaModels.find((m) => m.includes('qwen') && m.includes('7b'));
261
- const qwen32b = this.ollamaModels.find((m) => m.includes('qwen') && m.includes('32b'));
262
- // qwen3-coder is the primary model on Mac Mini — always prefer it
262
+ // qwen3-coder Q4_K_M is the preferred model — fast load, good quality
263
+ if (qwen3q4)
264
+ return qwen3q4;
263
265
  if (qwen3)
264
266
  return qwen3;
265
- // Fallback to qwen2.5-coder size variants
266
267
  if (task.length < 200 && qwen7b)
267
268
  return qwen7b;
268
- if (qwen32b)
269
- return qwen32b;
270
269
  if (qwen7b)
271
270
  return qwen7b;
272
- return (this.ollamaModels[0] ?? 'qwen3-coder:30b-ctx32k');
271
+ return (this.ollamaModels[0] ?? 'qwen3-coder:latest');
273
272
  }
274
273
  /**
275
274
  * Check if a task is suitable for Ollama.