npm - getmy-ruflo - Versions diffs - 3.5.55 → 3.5.56 - Mend

getmy-ruflo 3.5.55 → 3.5.56

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "getmy-ruflo",
-  "version": "3.5.55",
+  "version": "3.5.56",
   "description": "GetMy Ruflo - AI agent orchestration platform with real terminal execution, 259 MCP tools, 60+ agents, and swarm coordination",
   "main": "bin/ruflo.js",
   "type": "module",

package/v3/@claude-flow/cli/dist/src/mcp-tools/ollama-tools.js CHANGED Viewed

@@ -6,7 +6,7 @@
  */
 function getOllamaHost() {
     const host = (typeof process !== "undefined" && process.env["OLLAMA_HOST"]) ||
-        "http://localhost:11434";
+        "http://192.168.0.85:11434";
     return host.trim();
 }
 function ok(data) {
@@ -34,7 +34,7 @@ export const ollamaTools = [
     {
         name: "ollama_route",
         description: "Auto-route a task to the best available Ollama model based on complexity. " +
-            "Automatically selects qwen2.5-coder:7b for short tasks or qwen2.5-coder:32b for longer ones. " +
+            "Automatically selects qwen3-coder:latest for short tasks or qwen3-coder:latest for longer ones. " +
             "Returns the model response or an error if Ollama is unavailable (caller should fall back to Claude).",
         inputSchema: {
             type: "object",
@@ -68,23 +68,18 @@ export const ollamaTools = [
                 const models = (tags.models ?? []).map((m) => m.name);
                 if (models.length === 0)
                     return fail("No Ollama models loaded — fall back to Claude");
-                // Select model
-                const has7b = models.some((m) => m.includes("7b"));
-                const has32b = models.some((m) => m.includes("32b"));
+                // Select model — prefer qwen3-coder (primary Mac Mini model), then qwen2.5 variants
+                const qwen3 = models.find((m) => m.includes("qwen3"));
+                const qwen7b = models.find((m) => m.includes("qwen") && m.includes("7b"));
+                const qwen32b = models.find((m) => m.includes("qwen") && m.includes("32b"));
                 let model;
-                if (preferSmall && has7b) {
-                    model = models.find((m) => m.includes("7b"));
-                }
-                else if (prompt.length < 200 && has7b) {
-                    model = models.find((m) => m.includes("7b"));
-                }
-                else if (has32b) {
-                    model = models.find((m) => m.includes("32b"));
+                if (preferSmall && qwen7b) {
+                    model = qwen7b;
                 }
                 else {
-                    model = models[0];
+                    model = qwen3 || qwen32b || qwen7b || models[0];
                 }
-                // Query — use timeout for large models (qwen2.5-coder:32b can take 60-120s)
+                // Query — use timeout for large models (qwen3-coder:latest can take 60-120s)
                 const timeoutMs = input.timeout || 300000;
                 const response = await fetch(`${host}/api/generate`, {
                     method: "POST",
@@ -170,7 +165,7 @@ export const ollamaTools = [
                 },
                 model: {
                     type: "string",
-                    description: "Ollama model (default: qwen2.5-coder:32b)",
+                    description: "Ollama model (default: qwen3-coder:latest)",
                 },
                 apply: {
                     type: "boolean",
@@ -183,7 +178,7 @@ export const ollamaTools = [
         tags: ["ollama", "llm", "local-ai", "github", "code-generation"],
         handler: async (input) => {
             const issueNum = input.issue_number;
-            const model = input.model || "qwen2.5-coder:32b";
+            const model = input.model || "qwen3-coder:latest";
             const host = getOllamaHost();
             try {
                 // 1. Get issue details via gh CLI
@@ -235,8 +230,8 @@ export const ollamaTools = [
     {
         name: "ollama_pipeline",
         description: "Full zero-cost issue implementation pipeline using local Ollama. " +
-            "Lists open GitHub issues, uses qwen2.5-coder:7b to identify files, " +
-            "qwen2.5-coder:32b to generate code, then creates branches, commits, and PRs. " +
+            "Lists open GitHub issues, uses qwen3-coder:latest to identify files, " +
+            "qwen3-coder:latest to generate code, then creates branches, commits, and PRs. " +
             "Costs $0 — all inference runs locally on Ollama.",
         inputSchema: {
             type: "object",
@@ -251,11 +246,11 @@ export const ollamaTools = [
                 },
                 model: {
                     type: "string",
-                    description: "Ollama model for code generation (default: qwen2.5-coder:32b)",
+                    description: "Ollama model for code generation (default: qwen3-coder:latest)",
                 },
                 analysis_model: {
                     type: "string",
-                    description: "Ollama model for file analysis (default: qwen2.5-coder:7b)",
+                    description: "Ollama model for file analysis (default: qwen3-coder:latest)",
                 },
                 limit: {
                     type: "number",
@@ -284,8 +279,8 @@ export const ollamaTools = [
         tags: ["ollama", "llm", "local-ai", "github", "pipeline", "automation"],
         handler: async (input) => {
             const host = getOllamaHost();
-            const codeModel = input.model || "qwen2.5-coder:32b";
-            const analysisModel = input.analysis_model || "qwen2.5-coder:7b";
+            const codeModel = input.model || "qwen3-coder:latest";
+            const analysisModel = input.analysis_model || "qwen3-coder:latest";
             const baseBranch = input.base_branch || "main";
             const limit = input.limit || 10;
             const skipIssues = input.skip_issues || [];

package/v3/@claude-flow/cli/dist/src/ruvector/enhanced-model-router.js CHANGED Viewed

@@ -255,21 +255,20 @@ export class EnhancedModelRouter {
      * Prefers 7b for short/simple prompts, 32b for longer ones.
      */
     selectOllamaModel(task) {
-        // Prefer qwen3-coder (MoE, best quality), then qwen2.5-coder variants
-        const qwen3 = this.ollamaModels.find((m) => m.includes('qwen3'));
+        // Prefer Q4_K_M quants (fast load, ~66 tok/s on M4 Pro) over Q8_0 (too slow to load)
+        const qwen3q4 = this.ollamaModels.find((m) => m.includes('qwen3') && (m.includes('q4') || m === 'qwen3-coder:latest'));
+        const qwen3 = this.ollamaModels.find((m) => m.includes('qwen3') && !m.includes('q8'));
         const qwen7b = this.ollamaModels.find((m) => m.includes('qwen') && m.includes('7b'));
-        const qwen32b = this.ollamaModels.find((m) => m.includes('qwen') && m.includes('32b'));
-        // qwen3-coder is the primary model on Mac Mini — always prefer it
+        // qwen3-coder Q4_K_M is the preferred model — fast load, good quality
+        if (qwen3q4)
+            return qwen3q4;
         if (qwen3)
             return qwen3;
-        // Fallback to qwen2.5-coder size variants
         if (task.length < 200 && qwen7b)
             return qwen7b;
-        if (qwen32b)
-            return qwen32b;
         if (qwen7b)
             return qwen7b;
-        return (this.ollamaModels[0] ?? 'qwen3-coder:30b-ctx32k');
+        return (this.ollamaModels[0] ?? 'qwen3-coder:latest');
     }
     /**
      * Check if a task is suitable for Ollama.