claude-teammate 0.1.294 → 0.1.296
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/claude/process.js +13 -5
- package/src/claude.js +18 -6
- package/src/skills/fixer.js +54 -6
- package/src/skills/index.js +167 -24
- package/src/skills/observer.js +1 -1
package/package.json
CHANGED
package/src/claude/process.js
CHANGED
|
@@ -38,12 +38,19 @@ export function buildStreamArgs(args) {
|
|
|
38
38
|
}
|
|
39
39
|
|
|
40
40
|
export function formatClaudeInvocationError(error, timeoutMs) {
|
|
41
|
-
const
|
|
42
|
-
const
|
|
43
|
-
const
|
|
44
|
-
const
|
|
41
|
+
const isObj = error && typeof error === "object";
|
|
42
|
+
const stderr = isObj && "stderr" in error ? String(error.stderr || "") : "";
|
|
43
|
+
const output = isObj && "stdout" in error ? String(error.stdout || "") : "";
|
|
44
|
+
const timeout = Boolean(isObj && "killed" in error && error.killed);
|
|
45
|
+
const signal = isObj && "signal" in error ? String(error.signal || "") : "";
|
|
46
|
+
// Exit codes surface E2BIG/OOM/etc when stderr/stdout are empty — vital for
|
|
47
|
+
// diagnosing "Claude CLI invocation failed." with no other context. Only
|
|
48
|
+
// include when no other information was available so well-behaved errors
|
|
49
|
+
// stay readable.
|
|
50
|
+
const code = isObj && "code" in error && error.code !== null && error.code !== undefined ? String(error.code) : "";
|
|
45
51
|
const details = [stderr.trim(), output.trim()].filter(Boolean).join("\n").slice(0, 1000);
|
|
46
|
-
|
|
52
|
+
const codeFragment = !timeout && !signal && !details && code !== "" ? ` (exit ${code})` : "";
|
|
53
|
+
return `Claude CLI invocation failed${timeout ? ` after ${timeoutMs}ms` : ""}${signal ? ` (${signal})` : ""}${codeFragment}${details ? `: ${details}` : "."}`;
|
|
47
54
|
}
|
|
48
55
|
|
|
49
56
|
export function shouldRetryClaudeCommand(options = {}, attempt) {
|
|
@@ -157,6 +164,7 @@ function runClaudeCommandOnce(command, args, options) {
|
|
|
157
164
|
}
|
|
158
165
|
|
|
159
166
|
reject({
|
|
167
|
+
code,
|
|
160
168
|
stdout,
|
|
161
169
|
stderr,
|
|
162
170
|
killed: timedOut,
|
package/src/claude.js
CHANGED
|
@@ -498,7 +498,7 @@ export async function runClaudeClarification(input) {
|
|
|
498
498
|
);
|
|
499
499
|
}
|
|
500
500
|
|
|
501
|
-
const SKILL_CORRECTION_SCHEMA = {
|
|
501
|
+
export const SKILL_CORRECTION_SCHEMA = {
|
|
502
502
|
type: "object",
|
|
503
503
|
properties: {
|
|
504
504
|
isCorrection: { type: "boolean" },
|
|
@@ -509,17 +509,29 @@ const SKILL_CORRECTION_SCHEMA = {
|
|
|
509
509
|
additionalProperties: false
|
|
510
510
|
};
|
|
511
511
|
|
|
512
|
-
const SKILL_CORRECTION_SYSTEM = `You analyze user messages that reply to a previous AI response generated by a skill (slash command). Your job is to extract feedback so the skill can be improved.
|
|
512
|
+
export const SKILL_CORRECTION_SYSTEM = `You analyze user messages that reply to a previous AI response generated by a skill (slash command). Your job is to extract concrete feedback so the skill can be improved — and to refuse vague, off-topic, or low-signal replies that would only thrash the skill fixer.
|
|
513
513
|
|
|
514
514
|
User messages may be in any language (English, Vietnamese, Chinese, etc.). Treat all languages equally.
|
|
515
515
|
|
|
516
|
-
Default stance:
|
|
516
|
+
Default stance: isCorrection=FALSE. Only flip to true when the message clearly meets ALL of:
|
|
517
|
+
1. The user is replying about the previous bot output (not asking a new task, not status-checking, not chit-chat).
|
|
518
|
+
2. The user identifies something concrete that should change — wrong content, missing information, wrong format/structure, factual mistake, broken instruction, mis-applied rule.
|
|
519
|
+
3. The change is actionable — a maintainer could read correctionSummary and edit the skill instructions to satisfy it.
|
|
517
520
|
|
|
518
|
-
Set isCorrection=
|
|
521
|
+
Set isCorrection=FALSE in any of these cases (be conservative — false negatives are cheap, false positives waste a fix run):
|
|
522
|
+
- The message asks a follow-up or new task unrelated to revising the previous output.
|
|
523
|
+
- The message is a status/progress check ("done?", "any update?", "ETA?").
|
|
524
|
+
- The message is acknowledgement, thanks, or off-topic remarks.
|
|
525
|
+
- The complaint is vague ("not good", "redo", "this is wrong") with no specific change.
|
|
526
|
+
- The user is asking about a different feature/skill than the one that produced the previous output.
|
|
527
|
+
- The previous output is missing or unrelated to the user's reply.
|
|
519
528
|
|
|
520
529
|
When isCorrection=true:
|
|
521
|
-
- skillName: required best-effort.
|
|
522
|
-
|
|
530
|
+
- skillName: required, best-effort. Acceptable sources, in priority order:
|
|
531
|
+
1. The message explicitly references a slash command or skill identifier (e.g. /generate-test-design, "the test-design skill").
|
|
532
|
+
2. The previous bot output explicitly identifies the skill that produced it.
|
|
533
|
+
Otherwise return null. DO NOT guess from generic English words ("plan", "review", "test"); a skill with that exact name probably does not exist and a wrong guess sends the fixer down a dead end.
|
|
534
|
+
- correctionSummary: one English sentence capturing the concrete change. Include both what was wrong/missing and what it should be instead. Must be specific enough to act on; if you cannot write a specific sentence, set isCorrection=false instead.`;
|
|
523
535
|
|
|
524
536
|
/**
|
|
525
537
|
* Lightweight haiku call to detect if a human comment is correcting a skill's output.
|
package/src/skills/fixer.js
CHANGED
|
@@ -106,7 +106,7 @@ General:
|
|
|
106
106
|
- analysis must summarize what you read and what the improvement is (this becomes the PR description).
|
|
107
107
|
- reason must be one sentence explaining the concrete improvement, or "no improvement needed" when returning an empty payload.`;
|
|
108
108
|
|
|
109
|
-
const SKILL_FIX_TIMEOUT_MS =
|
|
109
|
+
const SKILL_FIX_TIMEOUT_MS = 360_000;
|
|
110
110
|
|
|
111
111
|
// Per-repo serialization: one PR creation at a time per repo. Different repos
|
|
112
112
|
// run in parallel. Keyed by absolute project root path. Worktree creation under
|
|
@@ -465,6 +465,52 @@ async function getDefaultBranch(projectRoot) {
|
|
|
465
465
|
return "main";
|
|
466
466
|
}
|
|
467
467
|
|
|
468
|
+
/**
|
|
469
|
+
* Push the fix branch upstream. Default strategy is `--force-with-lease` (safe
|
|
470
|
+
* against accidental overwrite of work the remote knows about that we don't).
|
|
471
|
+
*
|
|
472
|
+
* Stale-info recovery: a closed-but-undeleted remote branch from a prior fix
|
|
473
|
+
* can leave us without a valid lease. We pre-fetch the branch (no-op if it
|
|
474
|
+
* doesn't exist remotely), and if the lease is still rejected as "stale info",
|
|
475
|
+
* fall back to a plain `--force` push. This is safe in our flow because the
|
|
476
|
+
* worktree was just built off `origin/<defaultBranch>` and the only writer to
|
|
477
|
+
* `fix/skill-<name>` branches is this code path.
|
|
478
|
+
*
|
|
479
|
+
* Exposed for testing via `__testing.pushBranchWithLease`. The `exec` parameter
|
|
480
|
+
* lets tests inject a fake exec without spinning up real git.
|
|
481
|
+
*/
|
|
482
|
+
async function pushBranchWithLease({ cwd, branch, env, logger, skill, exec = execFileAsync }) {
|
|
483
|
+
const opts = { cwd, timeout: 60000, ...(env && { env }) };
|
|
484
|
+
const fetchOpts = { cwd, timeout: 30000, ...(env && { env }) };
|
|
485
|
+
|
|
486
|
+
// Pre-fetch so --force-with-lease has a known remote SHA. Branch may not
|
|
487
|
+
// exist on the remote yet (first push) — that exit code is non-fatal.
|
|
488
|
+
try {
|
|
489
|
+
await exec("git", ["fetch", "origin", branch], fetchOpts);
|
|
490
|
+
} catch (fetchErr) {
|
|
491
|
+
logger?.info?.("skill-fix: pre-push fetch of fix branch failed (likely first push)", {
|
|
492
|
+
skill,
|
|
493
|
+
branch,
|
|
494
|
+
error: fetchErr?.message
|
|
495
|
+
});
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
try {
|
|
499
|
+
await exec("git", ["push", "--force-with-lease", "-u", "origin", branch], opts);
|
|
500
|
+
return { used: "force-with-lease" };
|
|
501
|
+
} catch (err) {
|
|
502
|
+
const stderr = String(err?.stderr || err?.message || "");
|
|
503
|
+
const isStale = /stale info|rejected/i.test(stderr);
|
|
504
|
+
if (!isStale) throw err;
|
|
505
|
+
logger?.warn?.("skill-fix: --force-with-lease rejected (stale info), retrying with --force", {
|
|
506
|
+
skill,
|
|
507
|
+
branch
|
|
508
|
+
});
|
|
509
|
+
await exec("git", ["push", "--force", "-u", "origin", branch], opts);
|
|
510
|
+
return { used: "force" };
|
|
511
|
+
}
|
|
512
|
+
}
|
|
513
|
+
|
|
468
514
|
async function createSkillFixPR({ skillName, files, reason, analysis, location, projectRoot, logger, mode }) {
|
|
469
515
|
const isImprove = mode === "improve";
|
|
470
516
|
const branchPrefix = isImprove ? "improve/skill-" : "fix/skill-";
|
|
@@ -588,11 +634,12 @@ async function createSkillFixPR({ skillName, files, reason, analysis, location,
|
|
|
588
634
|
],
|
|
589
635
|
{ cwd: worktreePath, timeout: 10000 }
|
|
590
636
|
);
|
|
591
|
-
|
|
592
|
-
await execFileAsync("git", ["push", "--force-with-lease", "-u", "origin", branch], {
|
|
637
|
+
await pushBranchWithLease({
|
|
593
638
|
cwd: worktreePath,
|
|
594
|
-
|
|
595
|
-
|
|
639
|
+
branch,
|
|
640
|
+
env: gitAuthEnv,
|
|
641
|
+
logger,
|
|
642
|
+
skill: skillName
|
|
596
643
|
});
|
|
597
644
|
|
|
598
645
|
const prUrl = await openPR({ branch, prTitle, prBody, defaultBranch, provider, repo, projectRoot });
|
|
@@ -702,5 +749,6 @@ export const __testing = {
|
|
|
702
749
|
},
|
|
703
750
|
resetBackupMax() {
|
|
704
751
|
SKILL_BACKUP_MAX = 5;
|
|
705
|
-
}
|
|
752
|
+
},
|
|
753
|
+
pushBranchWithLease
|
|
706
754
|
};
|
package/src/skills/index.js
CHANGED
|
@@ -26,6 +26,15 @@ const COOLDOWN_SUCCESS_STATUSES = new Set(["patched", "patched-with-backup", "pr
|
|
|
26
26
|
// successful evaluation — count it so repeated sample hits don't burn Claude calls.
|
|
27
27
|
const IMPROVE_COOLDOWN_SUCCESS_STATUSES = new Set([...COOLDOWN_SUCCESS_STATUSES, "no-fix"]);
|
|
28
28
|
|
|
29
|
+
// Failure cooldown: stop retrying when the same (skill, errorType) keeps failing.
|
|
30
|
+
// Distinct from success cooldown — catches loops where the generator/CLI/git push
|
|
31
|
+
// keeps erroring out (e.g. repeated "Claude CLI invocation failed" or push rejected).
|
|
32
|
+
// Without this, user feedback or detector retries thrash the same skill indefinitely.
|
|
33
|
+
const COOLDOWN_FAILURE_STATUSES = new Set(["generation-error", "error", "patch-failed", "no-fix"]);
|
|
34
|
+
let SKILL_FIX_FAILURE_COOLDOWN_THRESHOLD = 3;
|
|
35
|
+
export const SKILL_FIX_FAILURE_COOLDOWN_WINDOW_MS_DEFAULT = 30 * 60 * 1000;
|
|
36
|
+
let SKILL_FIX_FAILURE_COOLDOWN_WINDOW_MS = SKILL_FIX_FAILURE_COOLDOWN_WINDOW_MS_DEFAULT;
|
|
37
|
+
|
|
29
38
|
// Phase 4: proactive improvement detector. Default off — enabling can spawn
|
|
30
39
|
// improvement PRs on every successful task. Sample rate keeps cost bounded.
|
|
31
40
|
// Cooldown 24h prevents repeat improvement PRs for the same skill.
|
|
@@ -257,6 +266,33 @@ async function fixSkillsAsync(
|
|
|
257
266
|
continue;
|
|
258
267
|
}
|
|
259
268
|
|
|
269
|
+
// Failure cooldown: skip when prior attempts keep erroring (CLI invocation
|
|
270
|
+
// crashes, generation errors, push failures, no-fix loops). Without this,
|
|
271
|
+
// every detection cycle re-runs the same broken pipeline.
|
|
272
|
+
const recentFailureCount = await getRecentFailedAttemptCount({
|
|
273
|
+
eventsRoot,
|
|
274
|
+
skill: skillName,
|
|
275
|
+
errorType,
|
|
276
|
+
windowMs: SKILL_FIX_FAILURE_COOLDOWN_WINDOW_MS,
|
|
277
|
+
mode
|
|
278
|
+
});
|
|
279
|
+
if (recentFailureCount >= SKILL_FIX_FAILURE_COOLDOWN_THRESHOLD) {
|
|
280
|
+
logger?.info("skill-fix: cooldown — repeated failures within window, skipping", {
|
|
281
|
+
skill: skillName,
|
|
282
|
+
errorType,
|
|
283
|
+
mode,
|
|
284
|
+
recentFailureCount
|
|
285
|
+
});
|
|
286
|
+
await appendSkillFixEvent(eventsRoot, {
|
|
287
|
+
skill: skillName,
|
|
288
|
+
errorType,
|
|
289
|
+
status: "cooldown",
|
|
290
|
+
mode,
|
|
291
|
+
recentFailureCount
|
|
292
|
+
});
|
|
293
|
+
continue;
|
|
294
|
+
}
|
|
295
|
+
|
|
260
296
|
// Resolve location AFTER cooldown so cooldown'd skills don't pay the FS read.
|
|
261
297
|
// Lock keyed by absolute location dir: global skills share a single key across
|
|
262
298
|
// projects (preventing concurrent overwrites of `~/.claude/skills/<name>`),
|
|
@@ -360,21 +396,92 @@ export function scheduleSkillFixWithFeedback({
|
|
|
360
396
|
return;
|
|
361
397
|
}
|
|
362
398
|
|
|
363
|
-
|
|
364
|
-
|
|
399
|
+
// Failure cooldown applies here too — without it, every Jira reply retriggers
|
|
400
|
+
// the same broken fix pipeline (CLI crashes, push errors). Cooldown is per
|
|
401
|
+
// (skill, "user-feedback") so unrelated detector failures do not block
|
|
402
|
+
// legitimate user corrections, and vice versa.
|
|
403
|
+
_runFeedbackFix({
|
|
365
404
|
skillName,
|
|
366
|
-
|
|
367
|
-
errorType: "user-feedback",
|
|
405
|
+
correctionSummary,
|
|
368
406
|
location,
|
|
369
407
|
projectRoot,
|
|
370
|
-
|
|
408
|
+
resolvedEventsRoot,
|
|
409
|
+
lockKey,
|
|
371
410
|
logger,
|
|
372
411
|
invokeClaudeTask,
|
|
373
412
|
epicContext,
|
|
374
413
|
issueKey
|
|
375
|
-
})
|
|
376
|
-
|
|
377
|
-
|
|
414
|
+
});
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
async function _runFeedbackFix({
|
|
418
|
+
skillName,
|
|
419
|
+
correctionSummary,
|
|
420
|
+
location,
|
|
421
|
+
projectRoot,
|
|
422
|
+
resolvedEventsRoot,
|
|
423
|
+
lockKey,
|
|
424
|
+
logger,
|
|
425
|
+
invokeClaudeTask,
|
|
426
|
+
epicContext,
|
|
427
|
+
issueKey
|
|
428
|
+
}) {
|
|
429
|
+
try {
|
|
430
|
+
const recentFailureCount = await getRecentFailedAttemptCount({
|
|
431
|
+
eventsRoot: resolvedEventsRoot,
|
|
432
|
+
skill: skillName,
|
|
433
|
+
errorType: "user-feedback",
|
|
434
|
+
windowMs: SKILL_FIX_FAILURE_COOLDOWN_WINDOW_MS,
|
|
435
|
+
mode: "fix"
|
|
436
|
+
});
|
|
437
|
+
if (recentFailureCount >= SKILL_FIX_FAILURE_COOLDOWN_THRESHOLD) {
|
|
438
|
+
logger?.info("skill-fix: cooldown — repeated user-feedback failures within window, skipping", {
|
|
439
|
+
skill: skillName,
|
|
440
|
+
recentFailureCount
|
|
441
|
+
});
|
|
442
|
+
await appendSkillFixEvent(resolvedEventsRoot, {
|
|
443
|
+
skill: skillName,
|
|
444
|
+
location: location.type,
|
|
445
|
+
errorType: "user-feedback",
|
|
446
|
+
status: "cooldown",
|
|
447
|
+
recentFailureCount
|
|
448
|
+
});
|
|
449
|
+
return;
|
|
450
|
+
}
|
|
451
|
+
} catch {
|
|
452
|
+
// Counting failures is best-effort; never let it block user feedback fixes.
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
if (activeFixLocks.has(lockKey)) {
|
|
456
|
+
// Recheck under the same flow (cooldown read above is async — another
|
|
457
|
+
// feedback may have started in the meantime).
|
|
458
|
+
await appendSkillFixEvent(resolvedEventsRoot, {
|
|
459
|
+
skill: skillName,
|
|
460
|
+
errorType: "user-feedback",
|
|
461
|
+
status: "lock-skipped"
|
|
462
|
+
});
|
|
463
|
+
return;
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
activeFixLocks.add(lockKey);
|
|
467
|
+
try {
|
|
468
|
+
await fixSingleSkill({
|
|
469
|
+
skillName,
|
|
470
|
+
errorContent: `User correction: ${correctionSummary}`,
|
|
471
|
+
errorType: "user-feedback",
|
|
472
|
+
location,
|
|
473
|
+
projectRoot,
|
|
474
|
+
eventsRoot: resolvedEventsRoot,
|
|
475
|
+
logger,
|
|
476
|
+
invokeClaudeTask,
|
|
477
|
+
epicContext,
|
|
478
|
+
issueKey
|
|
479
|
+
});
|
|
480
|
+
} catch {
|
|
481
|
+
// fixSingleSkill itself never throws upstream; this is just defensive.
|
|
482
|
+
} finally {
|
|
483
|
+
activeFixLocks.delete(lockKey);
|
|
484
|
+
}
|
|
378
485
|
}
|
|
379
486
|
|
|
380
487
|
async function fixSingleSkill({
|
|
@@ -575,22 +682,19 @@ async function appendSkillFixEvent(eventsRoot, fields) {
|
|
|
575
682
|
}
|
|
576
683
|
|
|
577
684
|
/**
|
|
578
|
-
* Count recent
|
|
579
|
-
* `
|
|
580
|
-
* concurrent write at most causes one extra fix to slip through
|
|
581
|
-
*
|
|
685
|
+
* Count recent skill-fix events for a (skill, errorType) pair within `windowMs`
|
|
686
|
+
* matching any of `statusSet`. Snapshot read — no lock against `appendSkillFixEvent`
|
|
687
|
+
* writes; a concurrent write at most causes one extra fix to slip through.
|
|
688
|
+
*
|
|
689
|
+
* `errorType` may be null/undefined to count across all errorTypes for a skill
|
|
690
|
+
* (used by the user-feedback path where errorType is uniform but the underlying
|
|
691
|
+
* cause varies — counting per-skill is the right grain).
|
|
582
692
|
*/
|
|
583
|
-
async function
|
|
693
|
+
async function countRecentEventsByStatus({ eventsRoot, skill, errorType, windowMs, mode, statusSet }) {
|
|
584
694
|
if (!eventsRoot || !skill) return 0;
|
|
585
|
-
// Pre-Phase-4 events have no `mode` field — treat them as "fix" so legacy fix
|
|
586
|
-
// history still throttles new fixes correctly. Improve mode requires explicit match.
|
|
587
695
|
const expectedMode = mode || "fix";
|
|
588
|
-
// Compound errorTypes carry a trailing "+" (multiple modes contributed in one
|
|
589
|
-
// detection cycle). Normalize so e.g. "bash-error-in-skill" matches a prior
|
|
590
|
-
// "bash-error-in-skill+" event — same root cause, cooldown should still bite.
|
|
591
696
|
const normalize = (t) => (typeof t === "string" ? t.replace(/\+$/, "") : t);
|
|
592
|
-
const normalizedErrorType = normalize(errorType);
|
|
593
|
-
const successSet = expectedMode === "improve" ? IMPROVE_COOLDOWN_SUCCESS_STATUSES : COOLDOWN_SUCCESS_STATUSES;
|
|
697
|
+
const normalizedErrorType = errorType == null ? null : normalize(errorType);
|
|
594
698
|
try {
|
|
595
699
|
const file = path.join(eventsRoot, "memory", "skill-fixes.json");
|
|
596
700
|
const events = JSON.parse(await readFile(file, "utf8"));
|
|
@@ -599,8 +703,8 @@ async function getRecentSuccessfulFixCount({ eventsRoot, skill, errorType, windo
|
|
|
599
703
|
let count = 0;
|
|
600
704
|
for (const e of events) {
|
|
601
705
|
if (e?.skill !== skill) continue;
|
|
602
|
-
if (normalize(e?.errorType) !== normalizedErrorType) continue;
|
|
603
|
-
if (!
|
|
706
|
+
if (normalizedErrorType !== null && normalize(e?.errorType) !== normalizedErrorType) continue;
|
|
707
|
+
if (!statusSet.has(e?.status)) continue;
|
|
604
708
|
const eventMode = e?.mode || "fix";
|
|
605
709
|
if (eventMode !== expectedMode) continue;
|
|
606
710
|
const ts = Date.parse(e.ts);
|
|
@@ -609,22 +713,61 @@ async function getRecentSuccessfulFixCount({ eventsRoot, skill, errorType, windo
|
|
|
609
713
|
}
|
|
610
714
|
return count;
|
|
611
715
|
} catch {
|
|
612
|
-
// Missing file or parse error — treat as no history (consistent with append path)
|
|
613
716
|
return 0;
|
|
614
717
|
}
|
|
615
718
|
}
|
|
616
719
|
|
|
720
|
+
/**
|
|
721
|
+
* Count recent successful skill-fix events for a (skill, errorType) pair within
|
|
722
|
+
* `windowMs`. Snapshot read — no lock against `appendSkillFixEvent` writes; a
|
|
723
|
+
* concurrent write at most causes one extra fix to slip through, which is
|
|
724
|
+
* preferable to serializing every detection cycle behind the events mutex.
|
|
725
|
+
*/
|
|
726
|
+
async function getRecentSuccessfulFixCount({ eventsRoot, skill, errorType, windowMs, mode }) {
|
|
727
|
+
const expectedMode = mode || "fix";
|
|
728
|
+
const successSet = expectedMode === "improve" ? IMPROVE_COOLDOWN_SUCCESS_STATUSES : COOLDOWN_SUCCESS_STATUSES;
|
|
729
|
+
return countRecentEventsByStatus({
|
|
730
|
+
eventsRoot,
|
|
731
|
+
skill,
|
|
732
|
+
errorType,
|
|
733
|
+
windowMs,
|
|
734
|
+
mode,
|
|
735
|
+
statusSet: successSet
|
|
736
|
+
});
|
|
737
|
+
}
|
|
738
|
+
|
|
739
|
+
/**
|
|
740
|
+
* Count recent failed skill-fix attempts for the same (skill, errorType) within
|
|
741
|
+
* `windowMs`. Used to break out of CLI/git/generation error loops where the fix
|
|
742
|
+
* pipeline keeps trying but never produces a usable patch.
|
|
743
|
+
*/
|
|
744
|
+
async function getRecentFailedAttemptCount({ eventsRoot, skill, errorType, windowMs, mode }) {
|
|
745
|
+
return countRecentEventsByStatus({
|
|
746
|
+
eventsRoot,
|
|
747
|
+
skill,
|
|
748
|
+
errorType,
|
|
749
|
+
windowMs,
|
|
750
|
+
mode,
|
|
751
|
+
statusSet: COOLDOWN_FAILURE_STATUSES
|
|
752
|
+
});
|
|
753
|
+
}
|
|
754
|
+
|
|
617
755
|
// Exported for tests only
|
|
618
756
|
export const __testing = {
|
|
619
757
|
appendSkillFixEvent,
|
|
620
758
|
getRecentSuccessfulFixCount,
|
|
621
|
-
|
|
759
|
+
getRecentFailedAttemptCount,
|
|
760
|
+
setCooldownConstants({ windowMs, threshold, failureWindowMs, failureThreshold } = {}) {
|
|
622
761
|
if (typeof windowMs === "number") SKILL_FIX_COOLDOWN_WINDOW_MS = windowMs;
|
|
623
762
|
if (typeof threshold === "number") SKILL_FIX_COOLDOWN_THRESHOLD = threshold;
|
|
763
|
+
if (typeof failureWindowMs === "number") SKILL_FIX_FAILURE_COOLDOWN_WINDOW_MS = failureWindowMs;
|
|
764
|
+
if (typeof failureThreshold === "number") SKILL_FIX_FAILURE_COOLDOWN_THRESHOLD = failureThreshold;
|
|
624
765
|
},
|
|
625
766
|
resetCooldownConstants() {
|
|
626
767
|
SKILL_FIX_COOLDOWN_WINDOW_MS = SKILL_FIX_COOLDOWN_WINDOW_MS_DEFAULT;
|
|
627
768
|
SKILL_FIX_COOLDOWN_THRESHOLD = 2;
|
|
769
|
+
SKILL_FIX_FAILURE_COOLDOWN_WINDOW_MS = SKILL_FIX_FAILURE_COOLDOWN_WINDOW_MS_DEFAULT;
|
|
770
|
+
SKILL_FIX_FAILURE_COOLDOWN_THRESHOLD = 3;
|
|
628
771
|
},
|
|
629
772
|
setImprovementConstants({ proactive, sampleRate, cooldownMs, cooldownThreshold } = {}) {
|
|
630
773
|
if (typeof proactive === "boolean") SKILL_IMPROVEMENT_PROACTIVE = proactive;
|
package/src/skills/observer.js
CHANGED
|
@@ -3,7 +3,7 @@ import { applySkillFailures } from "./index.js";
|
|
|
3
3
|
|
|
4
4
|
// Observer call ceiling. Hard cap so a runaway log read can never indefinitely
|
|
5
5
|
// block the fire-and-forget chain.
|
|
6
|
-
export const OBSERVER_TIMEOUT_MS =
|
|
6
|
+
export const OBSERVER_TIMEOUT_MS = 360_000;
|
|
7
7
|
|
|
8
8
|
// Cap raw log size sent to the model. Issue logs can grow to MB; observer only
|
|
9
9
|
// needs filtered signal lines. Truncation keeps prompt cost bounded and the
|