tuna-agent 0.1.164 → 0.1.166

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,6 +22,8 @@ export interface AgentMetrics {
22
22
  lastReflectionAt: string | null;
23
23
  lastPatternAt: string | null;
24
24
  latestLearnedRule: string | null;
25
+ lastScore: number | null;
26
+ scoreTrend: string | null;
25
27
  upSince: string;
26
28
  }
27
29
  export declare class ClaudeCodeAdapter implements AgentAdapter {
@@ -76,6 +78,24 @@ export declare class ClaudeCodeAdapter implements AgentAdapter {
76
78
  private static isSimilarRule;
77
79
  /** Cosine similarity between two equal-length embedding vectors. */
78
80
  private static cosine;
81
+ /** Path to the per-agent self-improvement score log (one JSON line per scored task). */
82
+ private static scoresFile;
83
+ /**
84
+ * Derive a 0-10 quality score for a completed task — the signal the self-improvement
85
+ * loop optimizes against (SIA-style: a measurable score gates whether changes help).
86
+ * Prefers an explicit rubric score the agent emits ("[SCORE: N]" / "QUALITY_SCORE: N");
87
+ * otherwise falls back to a deterministic proxy from status + result keywords.
88
+ */
89
+ static deriveScore(status: 'done' | 'failed', resultSummary: string): number;
90
+ /** Append one score record to the agent's score log; update in-memory metrics. */
91
+ recordRunScore(cwd: string, agentId: string, score: number, note: string): void;
92
+ /** Read the last N scores from the agent's score log (oldest→newest). */
93
+ static readRecentScores(cwd: string, n: number): number[];
94
+ /**
95
+ * Classify the recent score trend. Compares the latest score to the mean of the
96
+ * prior window. Needs >=3 points; otherwise 'insufficient'.
97
+ */
98
+ static computeScoreTrend(scores: number[]): 'up' | 'flat' | 'down' | 'insufficient';
79
99
  runSelfImprovement(cwd: string, agentId: string): Promise<void>;
80
100
  /**
81
101
  * Parse "## Learned Rules" section from CLAUDE.md and store in learnedRulesMap.
@@ -36,6 +36,8 @@ export class ClaudeCodeAdapter {
36
36
  lastReflectionAt: null,
37
37
  lastPatternAt: null,
38
38
  latestLearnedRule: null,
39
+ lastScore: null,
40
+ scoreTrend: null,
39
41
  upSince: new Date().toISOString(),
40
42
  });
41
43
  }
@@ -844,6 +846,10 @@ export class ClaudeCodeAdapter {
844
846
  return;
845
847
  const agentName = path.basename(cwd);
846
848
  const agentId = task.agentId || '';
849
+ // Record a quality score for this task — the signal the self-improvement loop
850
+ // optimizes against (closes the loop: score in -> trend gates rule changes out).
851
+ const runScore = ClaudeCodeAdapter.deriveScore(status, resultSummary);
852
+ this.recordRunScore(cwd, agentId, runScore, `${status}: ${task.description.substring(0, 80)}`);
847
853
  try {
848
854
  // Step 1: Generate AI-powered reflection via Ollama
849
855
  console.log(`[Reflection] Generating AI reflection for task ${task.id} (${status}), input: ${resultSummary.substring(0, 150)}...`);
@@ -934,6 +940,85 @@ export class ClaudeCodeAdapter {
934
940
  }
935
941
  return (na && nb) ? dot / (Math.sqrt(na) * Math.sqrt(nb)) : 0;
936
942
  }
943
+ /** Path to the per-agent self-improvement score log (one JSON line per scored task). */
944
+ static scoresFile(cwd) {
945
+ return path.join(cwd, 'self-improve-scores.jsonl');
946
+ }
947
+ /**
948
+ * Derive a 0-10 quality score for a completed task — the signal the self-improvement
949
+ * loop optimizes against (SIA-style: a measurable score gates whether changes help).
950
+ * Prefers an explicit rubric score the agent emits ("[SCORE: N]" / "QUALITY_SCORE: N");
951
+ * otherwise falls back to a deterministic proxy from status + result keywords.
952
+ */
953
+ static deriveScore(status, resultSummary) {
954
+ const text = resultSummary || '';
955
+ const m = text.match(/(?:\[score:?\s*|quality_score:?\s*|score:\s*)(\d+(?:\.\d+)?)\s*(?:\/\s*10)?/i);
956
+ if (m) {
957
+ const s = parseFloat(m[1]);
958
+ if (!isNaN(s))
959
+ return Math.max(0, Math.min(10, s));
960
+ }
961
+ if (status === 'failed')
962
+ return 2;
963
+ let score = 6; // baseline for a completed task
964
+ const low = text.toLowerCase();
965
+ if (/\b(verified|confirmed|passed|deployed|completed successfully|all tests pass)\b/.test(low))
966
+ score += 2;
967
+ if (/\b(improved|optimi[sz]ed|fixed|resolved)\b/.test(low))
968
+ score += 1;
969
+ if (/\b(partial|incomplete|could not|unable|skipped|blocked|warning|fallback|degraded)\b/.test(low))
970
+ score -= 2;
971
+ if (/\b(error|failed|exception|timeout)\b/.test(low))
972
+ score -= 1;
973
+ return Math.max(0, Math.min(10, score));
974
+ }
975
+ /** Append one score record to the agent's score log; update in-memory metrics. */
976
+ recordRunScore(cwd, agentId, score, note) {
977
+ try {
978
+ const m = this.getMetricsForAgent(agentId);
979
+ const entry = { date: new Date().toISOString(), score, rulesCount: m.rulesCount, note: note.substring(0, 200) };
980
+ fs.appendFileSync(ClaudeCodeAdapter.scoresFile(cwd), JSON.stringify(entry) + '\n');
981
+ m.lastScore = score;
982
+ }
983
+ catch (err) {
984
+ console.warn(`[Self-Improve] recordRunScore failed:`, err instanceof Error ? err.message : err);
985
+ }
986
+ }
987
+ /** Read the last N scores from the agent's score log (oldest→newest). */
988
+ static readRecentScores(cwd, n) {
989
+ try {
990
+ const f = ClaudeCodeAdapter.scoresFile(cwd);
991
+ if (!fs.existsSync(f))
992
+ return [];
993
+ const lines = fs.readFileSync(f, 'utf-8').trim().split('\n').filter(Boolean);
994
+ return lines.slice(-n).map(l => { try {
995
+ return JSON.parse(l).score;
996
+ }
997
+ catch {
998
+ return NaN;
999
+ } }).filter(s => !isNaN(s));
1000
+ }
1001
+ catch {
1002
+ return [];
1003
+ }
1004
+ }
1005
+ /**
1006
+ * Classify the recent score trend. Compares the latest score to the mean of the
1007
+ * prior window. Needs >=3 points; otherwise 'insufficient'.
1008
+ */
1009
+ static computeScoreTrend(scores) {
1010
+ if (scores.length < 3)
1011
+ return 'insufficient';
1012
+ const recent = scores[scores.length - 1];
1013
+ const prior = scores.slice(0, -1);
1014
+ const priorAvg = prior.reduce((a, b) => a + b, 0) / prior.length;
1015
+ const delta = recent - priorAvg;
1016
+ if (delta > 0.5)
1017
+ return 'up';
1018
+ if (delta < -0.5)
1019
+ return 'down';
1020
+ return 'flat';
1021
+ }
937
1022
  async runSelfImprovement(cwd, agentId) {
938
1023
  if (!process.env.MEM0_SSH_HOST)
939
1024
  return;
@@ -975,7 +1060,14 @@ export class ClaudeCodeAdapter {
975
1060
  }
976
1061
  // Filter 1: Quality gate — reject garbage rules
977
1062
  const MAX_LEARNED_RULES = 50;
978
- const MIN_CONFIDENCE = 2;
1063
+ // Score-gate (SIA-style): if recent self-improvements have NOT improved the agent's
1064
+ // score (flat/down trend), raise the confidence bar so we stop piling on rules that
1065
+ // aren't helping. Only loosen the bar when the trend is actually improving.
1066
+ const recentScores = ClaudeCodeAdapter.readRecentScores(cwd, 5);
1067
+ const trend = ClaudeCodeAdapter.computeScoreTrend(recentScores);
1068
+ this.getMetricsForAgent(agentId).scoreTrend = trend;
1069
+ const MIN_CONFIDENCE = (trend === 'down' || trend === 'flat') ? 4 : 2;
1070
+ console.log(`[Self-Improve] Score trend '${trend}' (recent=[${recentScores.join(',')}]) → confidence bar ${MIN_CONFIDENCE}${(trend === 'down' || trend === 'flat') ? ' (raised — recent rule additions not improving outcomes)' : ''}`);
979
1071
  const qualityPatterns = patterns.filter(p => {
980
1072
  const r = p.rule.trim();
981
1073
  // Confidence gate — only persist rules seen 2+ times
@@ -137,6 +137,7 @@ export async function startDaemon(config) {
137
137
  const pendingPermissionResolvers = new Map();
138
138
  // Track active tasks per agent (agentId → taskId)
139
139
  const activeAgentTasks = new Map();
140
+ const agentQueues = new Map();
140
141
  // Track abort controllers per task
141
142
  const taskAbortControllers = new Map();
142
143
  // Note: currentTaskId/currentTaskAbort removed — use taskAbortControllers + activeAgentTasks instead
@@ -218,41 +219,13 @@ export async function startDaemon(config) {
218
219
  if (task.repoPath?.startsWith('~/')) {
219
220
  task.repoPath = path.join(os.homedir(), task.repoPath.slice(2));
220
221
  }
221
- // Check per-agent concurrency (each agent can run 1 task at a time)
222
+ // Per-agent concurrency: if busy, queue instead of rejecting.
222
223
  const agentId = task.agentId || '__default__';
223
224
  if (activeAgentTasks.has(agentId)) {
224
- console.log(`[Daemon] Agent ${agentId} busy rejecting task ${task.id}`);
225
- ws.send({ action: 'task_rejected', taskId: task.id, reason: 'agent_busy' });
225
+ enqueueForAgent(agentId, { kind: 'task', task });
226
226
  break;
227
227
  }
228
- activeTasks++;
229
- activeAgentTasks.set(agentId, task.id);
230
- const abort = new AbortController();
231
- taskAbortControllers.set(task.id, abort);
232
- console.log(`[Daemon] Received task: ${task.id} agent=${agentId} — ${task.description.slice(0, 80)} (attachments: ${task.attachments?.length ?? 0}) [active: ${activeTasks}]`);
233
- // Run task in background (non-blocking) to allow parallel agent execution
234
- (async () => {
235
- try {
236
- await adapter.handleTask(task, ws, pendingInputResolvers, abort.signal, pendingPermissionResolvers);
237
- }
238
- catch (err) {
239
- const errMsg = err instanceof Error ? err.message : String(err);
240
- if (abort.signal.aborted) {
241
- console.log(`[Daemon] Task ${task.id} cancelled`);
242
- }
243
- else {
244
- ws.sendTaskFailed(task.id, errMsg);
245
- console.error(`[Daemon] Task ${task.id} error:`, errMsg);
246
- }
247
- }
248
- finally {
249
- activeTasks--;
250
- activeAgentTasks.delete(agentId);
251
- taskAbortControllers.delete(task.id);
252
- pendingInputResolvers.delete(task.id);
253
- ws.send({ action: 'agent_ready', agentId });
254
- }
255
- })();
228
+ runTaskNow(task);
256
229
  break;
257
230
  }
258
231
  case 'task_cancelled': {
@@ -479,19 +452,15 @@ ${skillContent.slice(0, 15000)}`;
479
452
  // No resolver — check if we have a persisted session to resume
480
453
  const savedState = loadPMState(taskId);
481
454
  if (savedState) {
482
- // Check per-agent concurrency before resuming
455
+ // Per-agent concurrency: if busy, queue the reply instead of dropping it.
483
456
  const resumeAgentId = savedState.agentId || '__default__';
484
457
  if (activeAgentTasks.has(resumeAgentId)) {
485
458
  const busyTask = activeAgentTasks.get(resumeAgentId);
486
- console.warn(`[Daemon] Cannot resume task ${taskId} agent ${resumeAgentId} is busy with task ${busyTask}`);
487
- // Notify user and revert task status so it doesn't get stuck at "executing"
459
+ console.log(`[Daemon] Agent ${resumeAgentId} busy with ${busyTask} queuing reply for task ${taskId}`);
460
+ enqueueForAgent(resumeAgentId, { kind: 'input', taskId, answer, attachments, savedState });
488
461
  ws.sendPMMessage(taskId, {
489
462
  sender: 'pm',
490
- content: 'Agent is currently busy with another task. Please try again shortly.',
491
- });
492
- ws.sendTaskDone(taskId, {
493
- result: 'Agent busy — message not processed',
494
- durationMs: 0,
463
+ content: ' Đang xử việc khác, sẽ trả lời ngay khi xong.',
495
464
  });
496
465
  break;
497
466
  }
@@ -697,6 +666,67 @@ ${skillContent.slice(0, 15000)}`;
697
666
  console.log(`[Daemon] Unknown message type: ${type}`);
698
667
  }
699
668
  }, onAuthFailed);
669
+ /** Queue work for an agent that is currently busy. */
670
+ function enqueueForAgent(agentId, item) {
671
+ let q = agentQueues.get(agentId);
672
+ if (!q) {
673
+ q = [];
674
+ agentQueues.set(agentId, q);
675
+ }
676
+ q.push(item);
677
+ console.log(`[Daemon] Agent ${agentId} busy — queued ${item.kind} (queue: ${q.length})`);
678
+ }
679
+ /** Start the next queued item for an agent, if any and the agent is free. */
680
+ function processNextForAgent(agentId) {
681
+ if (activeAgentTasks.has(agentId))
682
+ return; // still busy
683
+ const q = agentQueues.get(agentId);
684
+ if (!q || q.length === 0)
685
+ return;
686
+ const item = q.shift();
687
+ console.log(`[Daemon] Dequeue ${item.kind} for agent ${agentId} (remaining: ${q.length})`);
688
+ if (item.kind === 'task') {
689
+ runTaskNow(item.task);
690
+ }
691
+ else if (item.savedState.mode === 'agent_team') {
692
+ resumeAgentTeamChat(item.taskId, item.answer, item.attachments, item.savedState, ws, pendingInputResolvers);
693
+ }
694
+ else {
695
+ resumePMChat(item.taskId, item.answer, item.attachments, item.savedState, ws, pendingInputResolvers);
696
+ }
697
+ }
698
+ /** Run a task immediately (agent assumed free). Drains the queue when done. */
699
+ function runTaskNow(task) {
700
+ const agentId = task.agentId || '__default__';
701
+ activeTasks++;
702
+ activeAgentTasks.set(agentId, task.id);
703
+ const abort = new AbortController();
704
+ taskAbortControllers.set(task.id, abort);
705
+ console.log(`[Daemon] Received task: ${task.id} agent=${agentId} — ${task.description.slice(0, 80)} (attachments: ${task.attachments?.length ?? 0}) [active: ${activeTasks}]`);
706
+ (async () => {
707
+ try {
708
+ await adapter.handleTask(task, ws, pendingInputResolvers, abort.signal, pendingPermissionResolvers);
709
+ }
710
+ catch (err) {
711
+ const errMsg = err instanceof Error ? err.message : String(err);
712
+ if (abort.signal.aborted) {
713
+ console.log(`[Daemon] Task ${task.id} cancelled`);
714
+ }
715
+ else {
716
+ ws.sendTaskFailed(task.id, errMsg);
717
+ console.error(`[Daemon] Task ${task.id} error:`, errMsg);
718
+ }
719
+ }
720
+ finally {
721
+ activeTasks--;
722
+ activeAgentTasks.delete(agentId);
723
+ taskAbortControllers.delete(task.id);
724
+ pendingInputResolvers.delete(task.id);
725
+ ws.send({ action: 'agent_ready', agentId });
726
+ processNextForAgent(agentId);
727
+ }
728
+ })();
729
+ }
700
730
  /**
701
731
  * Resume PM chat for a task after agent restart or done task reopen.
702
732
  * Loads persisted pmSessionId and runs a chat loop.
@@ -803,6 +833,7 @@ ${skillContent.slice(0, 15000)}`;
803
833
  resolvers.delete(taskId);
804
834
  cleanupAttachments(taskId);
805
835
  wsClient.send({ action: 'agent_ready', agentId });
836
+ processNextForAgent(agentId);
806
837
  }
807
838
  }
808
839
  /**
@@ -1049,6 +1080,7 @@ ${skillContent.slice(0, 15000)}`;
1049
1080
  resolvers.delete(taskId);
1050
1081
  cleanupAttachments(taskId);
1051
1082
  wsClient.send({ action: 'agent_ready', agentId });
1083
+ processNextForAgent(agentId);
1052
1084
  }
1053
1085
  }
1054
1086
  // Wire up agent metrics to heartbeat
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tuna-agent",
3
- "version": "0.1.164",
3
+ "version": "0.1.166",
4
4
  "description": "Tuna Agent - Run AI coding tasks on your machine",
5
5
  "bin": {
6
6
  "tuna-agent": "dist/cli/index.js"