npm - tuna-agent - Versions diffs - 0.1.164 → 0.1.166 - Mend

tuna-agent 0.1.164 → 0.1.166

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/agents/claude-code-adapter.d.ts +20 -0
package/dist/agents/claude-code-adapter.js +93 -1
package/dist/daemon/index.js +71 -39
package/package.json +1 -1

package/dist/agents/claude-code-adapter.d.ts CHANGED Viewed

@@ -22,6 +22,8 @@ export interface AgentMetrics {
     lastReflectionAt: string | null;
     lastPatternAt: string | null;
     latestLearnedRule: string | null;
+    lastScore: number | null;
+    scoreTrend: string | null;
     upSince: string;
 }
 export declare class ClaudeCodeAdapter implements AgentAdapter {
@@ -76,6 +78,24 @@ export declare class ClaudeCodeAdapter implements AgentAdapter {
     private static isSimilarRule;
     /** Cosine similarity between two equal-length embedding vectors. */
     private static cosine;
+    /** Path to the per-agent self-improvement score log (one JSON line per scored task). */
+    private static scoresFile;
+    /**
+     * Derive a 0-10 quality score for a completed task — the signal the self-improvement
+     * loop optimizes against (SIA-style: a measurable score gates whether changes help).
+     * Prefers an explicit rubric score the agent emits ("[SCORE: N]" / "QUALITY_SCORE: N");
+     * otherwise falls back to a deterministic proxy from status + result keywords.
+     */
+    static deriveScore(status: 'done' | 'failed', resultSummary: string): number;
+    /** Append one score record to the agent's score log; update in-memory metrics. */
+    recordRunScore(cwd: string, agentId: string, score: number, note: string): void;
+    /** Read the last N scores from the agent's score log (oldest→newest). */
+    static readRecentScores(cwd: string, n: number): number[];
+    /**
+     * Classify the recent score trend. Compares the latest score to the mean of the
+     * prior window. Needs >=3 points; otherwise 'insufficient'.
+     */
+    static computeScoreTrend(scores: number[]): 'up' | 'flat' | 'down' | 'insufficient';
     runSelfImprovement(cwd: string, agentId: string): Promise<void>;
     /**
      * Parse "## Learned Rules" section from CLAUDE.md and store in learnedRulesMap.

package/dist/agents/claude-code-adapter.js CHANGED Viewed

@@ -36,6 +36,8 @@ export class ClaudeCodeAdapter {
                 lastReflectionAt: null,
                 lastPatternAt: null,
                 latestLearnedRule: null,
+                lastScore: null,
+                scoreTrend: null,
                 upSince: new Date().toISOString(),
             });
         }
@@ -844,6 +846,10 @@ export class ClaudeCodeAdapter {
             return;
         const agentName = path.basename(cwd);
         const agentId = task.agentId || '';
+        // Record a quality score for this task — the signal the self-improvement loop
+        // optimizes against (closes the loop: score in -> trend gates rule changes out).
+        const runScore = ClaudeCodeAdapter.deriveScore(status, resultSummary);
+        this.recordRunScore(cwd, agentId, runScore, `${status}: ${task.description.substring(0, 80)}`);
         try {
             // Step 1: Generate AI-powered reflection via Ollama
             console.log(`[Reflection] Generating AI reflection for task ${task.id} (${status}), input: ${resultSummary.substring(0, 150)}...`);
@@ -934,6 +940,85 @@ export class ClaudeCodeAdapter {
         }
         return (na && nb) ? dot / (Math.sqrt(na) * Math.sqrt(nb)) : 0;
     }
+    /** Path to the per-agent self-improvement score log (one JSON line per scored task). */
+    static scoresFile(cwd) {
+        return path.join(cwd, 'self-improve-scores.jsonl');
+    }
+    /**
+     * Derive a 0-10 quality score for a completed task — the signal the self-improvement
+     * loop optimizes against (SIA-style: a measurable score gates whether changes help).
+     * Prefers an explicit rubric score the agent emits ("[SCORE: N]" / "QUALITY_SCORE: N");
+     * otherwise falls back to a deterministic proxy from status + result keywords.
+     */
+    static deriveScore(status, resultSummary) {
+        const text = resultSummary || '';
+        const m = text.match(/(?:\[score:?\s*|quality_score:?\s*|score:\s*)(\d+(?:\.\d+)?)\s*(?:\/\s*10)?/i);
+        if (m) {
+            const s = parseFloat(m[1]);
+            if (!isNaN(s))
+                return Math.max(0, Math.min(10, s));
+        }
+        if (status === 'failed')
+            return 2;
+        let score = 6; // baseline for a completed task
+        const low = text.toLowerCase();
+        if (/\b(verified|confirmed|passed|deployed|completed successfully|all tests pass)\b/.test(low))
+            score += 2;
+        if (/\b(improved|optimi[sz]ed|fixed|resolved)\b/.test(low))
+            score += 1;
+        if (/\b(partial|incomplete|could not|unable|skipped|blocked|warning|fallback|degraded)\b/.test(low))
+            score -= 2;
+        if (/\b(error|failed|exception|timeout)\b/.test(low))
+            score -= 1;
+        return Math.max(0, Math.min(10, score));
+    }
+    /** Append one score record to the agent's score log; update in-memory metrics. */
+    recordRunScore(cwd, agentId, score, note) {
+        try {
+            const m = this.getMetricsForAgent(agentId);
+            const entry = { date: new Date().toISOString(), score, rulesCount: m.rulesCount, note: note.substring(0, 200) };
+            fs.appendFileSync(ClaudeCodeAdapter.scoresFile(cwd), JSON.stringify(entry) + '\n');
+            m.lastScore = score;
+        }
+        catch (err) {
+            console.warn(`[Self-Improve] recordRunScore failed:`, err instanceof Error ? err.message : err);
+        }
+    }
+    /** Read the last N scores from the agent's score log (oldest→newest). */
+    static readRecentScores(cwd, n) {
+        try {
+            const f = ClaudeCodeAdapter.scoresFile(cwd);
+            if (!fs.existsSync(f))
+                return [];
+            const lines = fs.readFileSync(f, 'utf-8').trim().split('\n').filter(Boolean);
+            return lines.slice(-n).map(l => { try {
+                return JSON.parse(l).score;
+            }
+            catch {
+                return NaN;
+            } }).filter(s => !isNaN(s));
+        }
+        catch {
+            return [];
+        }
+    }
+    /**
+     * Classify the recent score trend. Compares the latest score to the mean of the
+     * prior window. Needs >=3 points; otherwise 'insufficient'.
+     */
+    static computeScoreTrend(scores) {
+        if (scores.length < 3)
+            return 'insufficient';
+        const recent = scores[scores.length - 1];
+        const prior = scores.slice(0, -1);
+        const priorAvg = prior.reduce((a, b) => a + b, 0) / prior.length;
+        const delta = recent - priorAvg;
+        if (delta > 0.5)
+            return 'up';
+        if (delta < -0.5)
+            return 'down';
+        return 'flat';
+    }
     async runSelfImprovement(cwd, agentId) {
         if (!process.env.MEM0_SSH_HOST)
             return;
@@ -975,7 +1060,14 @@ export class ClaudeCodeAdapter {
             }
             // Filter 1: Quality gate — reject garbage rules
             const MAX_LEARNED_RULES = 50;
-            const MIN_CONFIDENCE = 2;
+            // Score-gate (SIA-style): if recent self-improvements have NOT improved the agent's
+            // score (flat/down trend), raise the confidence bar so we stop piling on rules that
+            // aren't helping. Only loosen the bar when the trend is actually improving.
+            const recentScores = ClaudeCodeAdapter.readRecentScores(cwd, 5);
+            const trend = ClaudeCodeAdapter.computeScoreTrend(recentScores);
+            this.getMetricsForAgent(agentId).scoreTrend = trend;
+            const MIN_CONFIDENCE = (trend === 'down' || trend === 'flat') ? 4 : 2;
+            console.log(`[Self-Improve] Score trend '${trend}' (recent=[${recentScores.join(',')}]) → confidence bar ${MIN_CONFIDENCE}${(trend === 'down' || trend === 'flat') ? ' (raised — recent rule additions not improving outcomes)' : ''}`);
             const qualityPatterns = patterns.filter(p => {
                 const r = p.rule.trim();
                 // Confidence gate — only persist rules seen 2+ times

package/dist/daemon/index.js CHANGED Viewed

@@ -137,6 +137,7 @@ export async function startDaemon(config) {
     const pendingPermissionResolvers = new Map();
     // Track active tasks per agent (agentId → taskId)
     const activeAgentTasks = new Map();
+    const agentQueues = new Map();
     // Track abort controllers per task
     const taskAbortControllers = new Map();
     // Note: currentTaskId/currentTaskAbort removed — use taskAbortControllers + activeAgentTasks instead
@@ -218,41 +219,13 @@ export async function startDaemon(config) {
                 if (task.repoPath?.startsWith('~/')) {
                     task.repoPath = path.join(os.homedir(), task.repoPath.slice(2));
                 }
-                // Check per-agent concurrency (each agent can run 1 task at a time)
+                // Per-agent concurrency: if busy, queue instead of rejecting.
                 const agentId = task.agentId || '__default__';
                 if (activeAgentTasks.has(agentId)) {
-                    console.log(`[Daemon] Agent ${agentId} busy — rejecting task ${task.id}`);
-                    ws.send({ action: 'task_rejected', taskId: task.id, reason: 'agent_busy' });
+                    enqueueForAgent(agentId, { kind: 'task', task });
                     break;
                 }
-                activeTasks++;
-                activeAgentTasks.set(agentId, task.id);
-                const abort = new AbortController();
-                taskAbortControllers.set(task.id, abort);
-                console.log(`[Daemon] Received task: ${task.id} agent=${agentId} — ${task.description.slice(0, 80)} (attachments: ${task.attachments?.length ?? 0}) [active: ${activeTasks}]`);
-                // Run task in background (non-blocking) to allow parallel agent execution
-                (async () => {
-                    try {
-                        await adapter.handleTask(task, ws, pendingInputResolvers, abort.signal, pendingPermissionResolvers);
-                    }
-                    catch (err) {
-                        const errMsg = err instanceof Error ? err.message : String(err);
-                        if (abort.signal.aborted) {
-                            console.log(`[Daemon] Task ${task.id} cancelled`);
-                        }
-                        else {
-                            ws.sendTaskFailed(task.id, errMsg);
-                            console.error(`[Daemon] Task ${task.id} error:`, errMsg);
-                        }
-                    }
-                    finally {
-                        activeTasks--;
-                        activeAgentTasks.delete(agentId);
-                        taskAbortControllers.delete(task.id);
-                        pendingInputResolvers.delete(task.id);
-                        ws.send({ action: 'agent_ready', agentId });
-                    }
-                })();
+                runTaskNow(task);
                 break;
             }
             case 'task_cancelled': {
@@ -479,19 +452,15 @@ ${skillContent.slice(0, 15000)}`;
                     // No resolver — check if we have a persisted session to resume
                     const savedState = loadPMState(taskId);
                     if (savedState) {
-                        // Check per-agent concurrency before resuming
+                        // Per-agent concurrency: if busy, queue the reply instead of dropping it.
                         const resumeAgentId = savedState.agentId || '__default__';
                         if (activeAgentTasks.has(resumeAgentId)) {
                             const busyTask = activeAgentTasks.get(resumeAgentId);
-                            console.warn(`[Daemon] Cannot resume task ${taskId} — agent ${resumeAgentId} is busy with task ${busyTask}`);
-                            // Notify user and revert task status so it doesn't get stuck at "executing"
+                            console.log(`[Daemon] Agent ${resumeAgentId} busy with ${busyTask} — queuing reply for task ${taskId}`);
+                            enqueueForAgent(resumeAgentId, { kind: 'input', taskId, answer, attachments, savedState });
                             ws.sendPMMessage(taskId, {
                                 sender: 'pm',
-                                content: 'Agent is currently busy with another task. Please try again shortly.',
-                            });
-                            ws.sendTaskDone(taskId, {
-                                result: 'Agent busy — message not processed',
-                                durationMs: 0,
+                                content: '⏳ Đang xử lý việc khác, sẽ trả lời ngay khi xong.',
                             });
                             break;
                         }
@@ -697,6 +666,67 @@ ${skillContent.slice(0, 15000)}`;
                 console.log(`[Daemon] Unknown message type: ${type}`);
         }
     }, onAuthFailed);
+    /** Queue work for an agent that is currently busy. */
+    function enqueueForAgent(agentId, item) {
+        let q = agentQueues.get(agentId);
+        if (!q) {
+            q = [];
+            agentQueues.set(agentId, q);
+        }
+        q.push(item);
+        console.log(`[Daemon] Agent ${agentId} busy — queued ${item.kind} (queue: ${q.length})`);
+    }
+    /** Start the next queued item for an agent, if any and the agent is free. */
+    function processNextForAgent(agentId) {
+        if (activeAgentTasks.has(agentId))
+            return; // still busy
+        const q = agentQueues.get(agentId);
+        if (!q || q.length === 0)
+            return;
+        const item = q.shift();
+        console.log(`[Daemon] Dequeue ${item.kind} for agent ${agentId} (remaining: ${q.length})`);
+        if (item.kind === 'task') {
+            runTaskNow(item.task);
+        }
+        else if (item.savedState.mode === 'agent_team') {
+            resumeAgentTeamChat(item.taskId, item.answer, item.attachments, item.savedState, ws, pendingInputResolvers);
+        }
+        else {
+            resumePMChat(item.taskId, item.answer, item.attachments, item.savedState, ws, pendingInputResolvers);
+        }
+    }
+    /** Run a task immediately (agent assumed free). Drains the queue when done. */
+    function runTaskNow(task) {
+        const agentId = task.agentId || '__default__';
+        activeTasks++;
+        activeAgentTasks.set(agentId, task.id);
+        const abort = new AbortController();
+        taskAbortControllers.set(task.id, abort);
+        console.log(`[Daemon] Received task: ${task.id} agent=${agentId} — ${task.description.slice(0, 80)} (attachments: ${task.attachments?.length ?? 0}) [active: ${activeTasks}]`);
+        (async () => {
+            try {
+                await adapter.handleTask(task, ws, pendingInputResolvers, abort.signal, pendingPermissionResolvers);
+            }
+            catch (err) {
+                const errMsg = err instanceof Error ? err.message : String(err);
+                if (abort.signal.aborted) {
+                    console.log(`[Daemon] Task ${task.id} cancelled`);
+                }
+                else {
+                    ws.sendTaskFailed(task.id, errMsg);
+                    console.error(`[Daemon] Task ${task.id} error:`, errMsg);
+                }
+            }
+            finally {
+                activeTasks--;
+                activeAgentTasks.delete(agentId);
+                taskAbortControllers.delete(task.id);
+                pendingInputResolvers.delete(task.id);
+                ws.send({ action: 'agent_ready', agentId });
+                processNextForAgent(agentId);
+            }
+        })();
+    }
     /**
      * Resume PM chat for a task after agent restart or done task reopen.
      * Loads persisted pmSessionId and runs a chat loop.
@@ -803,6 +833,7 @@ ${skillContent.slice(0, 15000)}`;
             resolvers.delete(taskId);
             cleanupAttachments(taskId);
             wsClient.send({ action: 'agent_ready', agentId });
+            processNextForAgent(agentId);
         }
     }
     /**
@@ -1049,6 +1080,7 @@ ${skillContent.slice(0, 15000)}`;
             resolvers.delete(taskId);
             cleanupAttachments(taskId);
             wsClient.send({ action: 'agent_ready', agentId });
+            processNextForAgent(agentId);
         }
     }
     // Wire up agent metrics to heartbeat

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "tuna-agent",
-  "version": "0.1.164",
+  "version": "0.1.166",
   "description": "Tuna Agent - Run AI coding tasks on your machine",
   "bin": {
     "tuna-agent": "dist/cli/index.js"