claude-teammate 0.1.294 → 0.1.296

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-teammate",
3
- "version": "0.1.294",
3
+ "version": "0.1.296",
4
4
  "description": "CLI bootstrapper for Claude Teammate.",
5
5
  "license": "MIT",
6
6
  "type": "module",
@@ -38,12 +38,19 @@ export function buildStreamArgs(args) {
38
38
  }
39
39
 
40
40
  export function formatClaudeInvocationError(error, timeoutMs) {
41
- const stderr = error instanceof Error && "stderr" in error ? String(error.stderr || "") : "";
42
- const output = error instanceof Error && "stdout" in error ? String(error.stdout || "") : "";
43
- const timeout = Boolean(error && typeof error === "object" && "killed" in error && error.killed);
44
- const signal = error && typeof error === "object" && "signal" in error ? String(error.signal || "") : "";
41
+ const isObj = error && typeof error === "object";
42
+ const stderr = isObj && "stderr" in error ? String(error.stderr || "") : "";
43
+ const output = isObj && "stdout" in error ? String(error.stdout || "") : "";
44
+ const timeout = Boolean(isObj && "killed" in error && error.killed);
45
+ const signal = isObj && "signal" in error ? String(error.signal || "") : "";
46
+ // Exit codes surface E2BIG/OOM/etc when stderr/stdout are empty — vital for
47
+ // diagnosing "Claude CLI invocation failed." with no other context. Only
48
+ // include when no other information was available so well-behaved errors
49
+ // stay readable.
50
+ const code = isObj && "code" in error && error.code !== null && error.code !== undefined ? String(error.code) : "";
45
51
  const details = [stderr.trim(), output.trim()].filter(Boolean).join("\n").slice(0, 1000);
46
- return `Claude CLI invocation failed${timeout ? ` after ${timeoutMs}ms` : ""}${signal ? ` (${signal})` : ""}${details ? `: ${details}` : "."}`;
52
+ const codeFragment = !timeout && !signal && !details && code !== "" ? ` (exit ${code})` : "";
53
+ return `Claude CLI invocation failed${timeout ? ` after ${timeoutMs}ms` : ""}${signal ? ` (${signal})` : ""}${codeFragment}${details ? `: ${details}` : "."}`;
47
54
  }
48
55
 
49
56
  export function shouldRetryClaudeCommand(options = {}, attempt) {
@@ -157,6 +164,7 @@ function runClaudeCommandOnce(command, args, options) {
157
164
  }
158
165
 
159
166
  reject({
167
+ code,
160
168
  stdout,
161
169
  stderr,
162
170
  killed: timedOut,
package/src/claude.js CHANGED
@@ -498,7 +498,7 @@ export async function runClaudeClarification(input) {
498
498
  );
499
499
  }
500
500
 
501
- const SKILL_CORRECTION_SCHEMA = {
501
+ export const SKILL_CORRECTION_SCHEMA = {
502
502
  type: "object",
503
503
  properties: {
504
504
  isCorrection: { type: "boolean" },
@@ -509,17 +509,29 @@ const SKILL_CORRECTION_SCHEMA = {
509
509
  additionalProperties: false
510
510
  };
511
511
 
512
- const SKILL_CORRECTION_SYSTEM = `You analyze user messages that reply to a previous AI response generated by a skill (slash command). Your job is to extract feedback so the skill can be improved.
512
+ export const SKILL_CORRECTION_SYSTEM = `You analyze user messages that reply to a previous AI response generated by a skill (slash command). Your job is to extract concrete feedback so the skill can be improved — and to refuse vague, off-topic, or low-signal replies that would only thrash the skill fixer.
513
513
 
514
514
  User messages may be in any language (English, Vietnamese, Chinese, etc.). Treat all languages equally.
515
515
 
516
- Default stance: if the user's reply requests ANY change to the previous bot output — add content, remove content, restructure, fix format, fix behavior, narrow scope, broaden scope, correct a mistake, change style, change content, follow a different rule next time — then isCorrection=true. The skill produced output that did not fully meet the user's needs on the first try; that is enough signal to attempt skill improvement. Cooldown and the downstream generator will filter false positives.
516
+ Default stance: isCorrection=FALSE. Only flip to true when the message clearly meets ALL of:
517
+ 1. The user is replying about the previous bot output (not asking a new task, not status-checking, not chit-chat).
518
+ 2. The user identifies something concrete that should change — wrong content, missing information, wrong format/structure, factual mistake, broken instruction, mis-applied rule.
519
+ 3. The change is actionable — a maintainer could read correctionSummary and edit the skill instructions to satisfy it.
517
520
 
518
- Set isCorrection=false ONLY when the user's message is clearly unrelated to improving the previous output e.g. a brand-new unrelated task, a status question, a thank-you, an off-topic remark.
521
+ Set isCorrection=FALSE in any of these cases (be conservativefalse negatives are cheap, false positives waste a fix run):
522
+ - The message asks a follow-up or new task unrelated to revising the previous output.
523
+ - The message is a status/progress check ("done?", "any update?", "ETA?").
524
+ - The message is acknowledgement, thanks, or off-topic remarks.
525
+ - The complaint is vague ("not good", "redo", "this is wrong") with no specific change.
526
+ - The user is asking about a different feature/skill than the one that produced the previous output.
527
+ - The previous output is missing or unrelated to the user's reply.
519
528
 
520
529
  When isCorrection=true:
521
- - skillName: required best-effort. If the message contains /skill-name, use that. Otherwise infer from the previous bot output (what skill most likely produced it — e.g. test design output → test-design skill, code review output → review skill). Use null only when there is genuinely no signal.
522
- - correctionSummary: one sentence (in English) capturing what the user wants different. Include both what was wrong/missing and what it should be instead. Keep it concrete enough that a maintainer can patch the skill instructions.`;
530
+ - skillName: required, best-effort. Acceptable sources, in priority order:
531
+ 1. The message explicitly references a slash command or skill identifier (e.g. /generate-test-design, "the test-design skill").
532
+ 2. The previous bot output explicitly identifies the skill that produced it.
533
+ Otherwise return null. DO NOT guess from generic English words ("plan", "review", "test"); a skill with that exact name probably does not exist and a wrong guess sends the fixer down a dead end.
534
+ - correctionSummary: one English sentence capturing the concrete change. Include both what was wrong/missing and what it should be instead. Must be specific enough to act on; if you cannot write a specific sentence, set isCorrection=false instead.`;
523
535
 
524
536
  /**
525
537
  * Lightweight haiku call to detect if a human comment is correcting a skill's output.
@@ -106,7 +106,7 @@ General:
106
106
  - analysis must summarize what you read and what the improvement is (this becomes the PR description).
107
107
  - reason must be one sentence explaining the concrete improvement, or "no improvement needed" when returning an empty payload.`;
108
108
 
109
- const SKILL_FIX_TIMEOUT_MS = 90_000;
109
+ const SKILL_FIX_TIMEOUT_MS = 360_000;
110
110
 
111
111
  // Per-repo serialization: one PR creation at a time per repo. Different repos
112
112
  // run in parallel. Keyed by absolute project root path. Worktree creation under
@@ -465,6 +465,52 @@ async function getDefaultBranch(projectRoot) {
465
465
  return "main";
466
466
  }
467
467
 
468
+ /**
469
+ * Push the fix branch upstream. Default strategy is `--force-with-lease` (safe
470
+ * against accidental overwrite of work the remote knows about that we don't).
471
+ *
472
+ * Stale-info recovery: a closed-but-undeleted remote branch from a prior fix
473
+ * can leave us without a valid lease. We pre-fetch the branch (no-op if it
474
+ * doesn't exist remotely), and if the lease is still rejected as "stale info",
475
+ * fall back to a plain `--force` push. This is safe in our flow because the
476
+ * worktree was just built off `origin/<defaultBranch>` and the only writer to
477
+ * `fix/skill-<name>` branches is this code path.
478
+ *
479
+ * Exposed for testing via `__testing.pushBranchWithLease`. The `exec` parameter
480
+ * lets tests inject a fake exec without spinning up real git.
481
+ */
482
+ async function pushBranchWithLease({ cwd, branch, env, logger, skill, exec = execFileAsync }) {
483
+ const opts = { cwd, timeout: 60000, ...(env && { env }) };
484
+ const fetchOpts = { cwd, timeout: 30000, ...(env && { env }) };
485
+
486
+ // Pre-fetch so --force-with-lease has a known remote SHA. Branch may not
487
+ // exist on the remote yet (first push) — that exit code is non-fatal.
488
+ try {
489
+ await exec("git", ["fetch", "origin", branch], fetchOpts);
490
+ } catch (fetchErr) {
491
+ logger?.info?.("skill-fix: pre-push fetch of fix branch failed (likely first push)", {
492
+ skill,
493
+ branch,
494
+ error: fetchErr?.message
495
+ });
496
+ }
497
+
498
+ try {
499
+ await exec("git", ["push", "--force-with-lease", "-u", "origin", branch], opts);
500
+ return { used: "force-with-lease" };
501
+ } catch (err) {
502
+ const stderr = String(err?.stderr || err?.message || "");
503
+ const isStale = /stale info|rejected/i.test(stderr);
504
+ if (!isStale) throw err;
505
+ logger?.warn?.("skill-fix: --force-with-lease rejected (stale info), retrying with --force", {
506
+ skill,
507
+ branch
508
+ });
509
+ await exec("git", ["push", "--force", "-u", "origin", branch], opts);
510
+ return { used: "force" };
511
+ }
512
+ }
513
+
468
514
  async function createSkillFixPR({ skillName, files, reason, analysis, location, projectRoot, logger, mode }) {
469
515
  const isImprove = mode === "improve";
470
516
  const branchPrefix = isImprove ? "improve/skill-" : "fix/skill-";
@@ -588,11 +634,12 @@ async function createSkillFixPR({ skillName, files, reason, analysis, location,
588
634
  ],
589
635
  { cwd: worktreePath, timeout: 10000 }
590
636
  );
591
- // --force-with-lease: safe retry after partial failure — only overwrites if remote matches expected
592
- await execFileAsync("git", ["push", "--force-with-lease", "-u", "origin", branch], {
637
+ await pushBranchWithLease({
593
638
  cwd: worktreePath,
594
- timeout: 60000,
595
- ...(gitAuthEnv && { env: gitAuthEnv })
639
+ branch,
640
+ env: gitAuthEnv,
641
+ logger,
642
+ skill: skillName
596
643
  });
597
644
 
598
645
  const prUrl = await openPR({ branch, prTitle, prBody, defaultBranch, provider, repo, projectRoot });
@@ -702,5 +749,6 @@ export const __testing = {
702
749
  },
703
750
  resetBackupMax() {
704
751
  SKILL_BACKUP_MAX = 5;
705
- }
752
+ },
753
+ pushBranchWithLease
706
754
  };
@@ -26,6 +26,15 @@ const COOLDOWN_SUCCESS_STATUSES = new Set(["patched", "patched-with-backup", "pr
26
26
  // successful evaluation — count it so repeated sample hits don't burn Claude calls.
27
27
  const IMPROVE_COOLDOWN_SUCCESS_STATUSES = new Set([...COOLDOWN_SUCCESS_STATUSES, "no-fix"]);
28
28
 
29
+ // Failure cooldown: stop retrying when the same (skill, errorType) keeps failing.
30
+ // Distinct from success cooldown — catches loops where the generator/CLI/git push
31
+ // keeps erroring out (e.g. repeated "Claude CLI invocation failed" or push rejected).
32
+ // Without this, user feedback or detector retries thrash the same skill indefinitely.
33
+ const COOLDOWN_FAILURE_STATUSES = new Set(["generation-error", "error", "patch-failed", "no-fix"]);
34
+ let SKILL_FIX_FAILURE_COOLDOWN_THRESHOLD = 3;
35
+ export const SKILL_FIX_FAILURE_COOLDOWN_WINDOW_MS_DEFAULT = 30 * 60 * 1000;
36
+ let SKILL_FIX_FAILURE_COOLDOWN_WINDOW_MS = SKILL_FIX_FAILURE_COOLDOWN_WINDOW_MS_DEFAULT;
37
+
29
38
  // Phase 4: proactive improvement detector. Default off — enabling can spawn
30
39
  // improvement PRs on every successful task. Sample rate keeps cost bounded.
31
40
  // Cooldown 24h prevents repeat improvement PRs for the same skill.
@@ -257,6 +266,33 @@ async function fixSkillsAsync(
257
266
  continue;
258
267
  }
259
268
 
269
+ // Failure cooldown: skip when prior attempts keep erroring (CLI invocation
270
+ // crashes, generation errors, push failures, no-fix loops). Without this,
271
+ // every detection cycle re-runs the same broken pipeline.
272
+ const recentFailureCount = await getRecentFailedAttemptCount({
273
+ eventsRoot,
274
+ skill: skillName,
275
+ errorType,
276
+ windowMs: SKILL_FIX_FAILURE_COOLDOWN_WINDOW_MS,
277
+ mode
278
+ });
279
+ if (recentFailureCount >= SKILL_FIX_FAILURE_COOLDOWN_THRESHOLD) {
280
+ logger?.info("skill-fix: cooldown — repeated failures within window, skipping", {
281
+ skill: skillName,
282
+ errorType,
283
+ mode,
284
+ recentFailureCount
285
+ });
286
+ await appendSkillFixEvent(eventsRoot, {
287
+ skill: skillName,
288
+ errorType,
289
+ status: "cooldown",
290
+ mode,
291
+ recentFailureCount
292
+ });
293
+ continue;
294
+ }
295
+
260
296
  // Resolve location AFTER cooldown so cooldown'd skills don't pay the FS read.
261
297
  // Lock keyed by absolute location dir: global skills share a single key across
262
298
  // projects (preventing concurrent overwrites of `~/.claude/skills/<name>`),
@@ -360,21 +396,92 @@ export function scheduleSkillFixWithFeedback({
360
396
  return;
361
397
  }
362
398
 
363
- activeFixLocks.add(lockKey);
364
- fixSingleSkill({
399
+ // Failure cooldown applies here too — without it, every Jira reply retriggers
400
+ // the same broken fix pipeline (CLI crashes, push errors). Cooldown is per
401
+ // (skill, "user-feedback") so unrelated detector failures do not block
402
+ // legitimate user corrections, and vice versa.
403
+ _runFeedbackFix({
365
404
  skillName,
366
- errorContent: `User correction: ${correctionSummary}`,
367
- errorType: "user-feedback",
405
+ correctionSummary,
368
406
  location,
369
407
  projectRoot,
370
- eventsRoot: resolvedEventsRoot,
408
+ resolvedEventsRoot,
409
+ lockKey,
371
410
  logger,
372
411
  invokeClaudeTask,
373
412
  epicContext,
374
413
  issueKey
375
- })
376
- .catch(() => {})
377
- .finally(() => activeFixLocks.delete(lockKey));
414
+ });
415
+ }
416
+
417
+ async function _runFeedbackFix({
418
+ skillName,
419
+ correctionSummary,
420
+ location,
421
+ projectRoot,
422
+ resolvedEventsRoot,
423
+ lockKey,
424
+ logger,
425
+ invokeClaudeTask,
426
+ epicContext,
427
+ issueKey
428
+ }) {
429
+ try {
430
+ const recentFailureCount = await getRecentFailedAttemptCount({
431
+ eventsRoot: resolvedEventsRoot,
432
+ skill: skillName,
433
+ errorType: "user-feedback",
434
+ windowMs: SKILL_FIX_FAILURE_COOLDOWN_WINDOW_MS,
435
+ mode: "fix"
436
+ });
437
+ if (recentFailureCount >= SKILL_FIX_FAILURE_COOLDOWN_THRESHOLD) {
438
+ logger?.info("skill-fix: cooldown — repeated user-feedback failures within window, skipping", {
439
+ skill: skillName,
440
+ recentFailureCount
441
+ });
442
+ await appendSkillFixEvent(resolvedEventsRoot, {
443
+ skill: skillName,
444
+ location: location.type,
445
+ errorType: "user-feedback",
446
+ status: "cooldown",
447
+ recentFailureCount
448
+ });
449
+ return;
450
+ }
451
+ } catch {
452
+ // Counting failures is best-effort; never let it block user feedback fixes.
453
+ }
454
+
455
+ if (activeFixLocks.has(lockKey)) {
456
+ // Recheck under the same flow (cooldown read above is async — another
457
+ // feedback may have started in the meantime).
458
+ await appendSkillFixEvent(resolvedEventsRoot, {
459
+ skill: skillName,
460
+ errorType: "user-feedback",
461
+ status: "lock-skipped"
462
+ });
463
+ return;
464
+ }
465
+
466
+ activeFixLocks.add(lockKey);
467
+ try {
468
+ await fixSingleSkill({
469
+ skillName,
470
+ errorContent: `User correction: ${correctionSummary}`,
471
+ errorType: "user-feedback",
472
+ location,
473
+ projectRoot,
474
+ eventsRoot: resolvedEventsRoot,
475
+ logger,
476
+ invokeClaudeTask,
477
+ epicContext,
478
+ issueKey
479
+ });
480
+ } catch {
481
+ // fixSingleSkill itself never throws upstream; this is just defensive.
482
+ } finally {
483
+ activeFixLocks.delete(lockKey);
484
+ }
378
485
  }
379
486
 
380
487
  async function fixSingleSkill({
@@ -575,22 +682,19 @@ async function appendSkillFixEvent(eventsRoot, fields) {
575
682
  }
576
683
 
577
684
  /**
578
- * Count recent successful skill-fix events for a (skill, errorType) pair within
579
- * `windowMs`. Snapshot read — no lock against `appendSkillFixEvent` writes; a
580
- * concurrent write at most causes one extra fix to slip through, which is
581
- * preferable to serializing every detection cycle behind the events mutex.
685
+ * Count recent skill-fix events for a (skill, errorType) pair within `windowMs`
686
+ * matching any of `statusSet`. Snapshot read — no lock against `appendSkillFixEvent`
687
+ * writes; a concurrent write at most causes one extra fix to slip through.
688
+ *
689
+ * `errorType` may be null/undefined to count across all errorTypes for a skill
690
+ * (used by the user-feedback path where errorType is uniform but the underlying
691
+ * cause varies — counting per-skill is the right grain).
582
692
  */
583
- async function getRecentSuccessfulFixCount({ eventsRoot, skill, errorType, windowMs, mode }) {
693
+ async function countRecentEventsByStatus({ eventsRoot, skill, errorType, windowMs, mode, statusSet }) {
584
694
  if (!eventsRoot || !skill) return 0;
585
- // Pre-Phase-4 events have no `mode` field — treat them as "fix" so legacy fix
586
- // history still throttles new fixes correctly. Improve mode requires explicit match.
587
695
  const expectedMode = mode || "fix";
588
- // Compound errorTypes carry a trailing "+" (multiple modes contributed in one
589
- // detection cycle). Normalize so e.g. "bash-error-in-skill" matches a prior
590
- // "bash-error-in-skill+" event — same root cause, cooldown should still bite.
591
696
  const normalize = (t) => (typeof t === "string" ? t.replace(/\+$/, "") : t);
592
- const normalizedErrorType = normalize(errorType);
593
- const successSet = expectedMode === "improve" ? IMPROVE_COOLDOWN_SUCCESS_STATUSES : COOLDOWN_SUCCESS_STATUSES;
697
+ const normalizedErrorType = errorType == null ? null : normalize(errorType);
594
698
  try {
595
699
  const file = path.join(eventsRoot, "memory", "skill-fixes.json");
596
700
  const events = JSON.parse(await readFile(file, "utf8"));
@@ -599,8 +703,8 @@ async function getRecentSuccessfulFixCount({ eventsRoot, skill, errorType, windo
599
703
  let count = 0;
600
704
  for (const e of events) {
601
705
  if (e?.skill !== skill) continue;
602
- if (normalize(e?.errorType) !== normalizedErrorType) continue;
603
- if (!successSet.has(e?.status)) continue;
706
+ if (normalizedErrorType !== null && normalize(e?.errorType) !== normalizedErrorType) continue;
707
+ if (!statusSet.has(e?.status)) continue;
604
708
  const eventMode = e?.mode || "fix";
605
709
  if (eventMode !== expectedMode) continue;
606
710
  const ts = Date.parse(e.ts);
@@ -609,22 +713,61 @@ async function getRecentSuccessfulFixCount({ eventsRoot, skill, errorType, windo
609
713
  }
610
714
  return count;
611
715
  } catch {
612
- // Missing file or parse error — treat as no history (consistent with append path)
613
716
  return 0;
614
717
  }
615
718
  }
616
719
 
720
+ /**
721
+ * Count recent successful skill-fix events for a (skill, errorType) pair within
722
+ * `windowMs`. Snapshot read — no lock against `appendSkillFixEvent` writes; a
723
+ * concurrent write at most causes one extra fix to slip through, which is
724
+ * preferable to serializing every detection cycle behind the events mutex.
725
+ */
726
+ async function getRecentSuccessfulFixCount({ eventsRoot, skill, errorType, windowMs, mode }) {
727
+ const expectedMode = mode || "fix";
728
+ const successSet = expectedMode === "improve" ? IMPROVE_COOLDOWN_SUCCESS_STATUSES : COOLDOWN_SUCCESS_STATUSES;
729
+ return countRecentEventsByStatus({
730
+ eventsRoot,
731
+ skill,
732
+ errorType,
733
+ windowMs,
734
+ mode,
735
+ statusSet: successSet
736
+ });
737
+ }
738
+
739
+ /**
740
+ * Count recent failed skill-fix attempts for the same (skill, errorType) within
741
+ * `windowMs`. Used to break out of CLI/git/generation error loops where the fix
742
+ * pipeline keeps trying but never produces a usable patch.
743
+ */
744
+ async function getRecentFailedAttemptCount({ eventsRoot, skill, errorType, windowMs, mode }) {
745
+ return countRecentEventsByStatus({
746
+ eventsRoot,
747
+ skill,
748
+ errorType,
749
+ windowMs,
750
+ mode,
751
+ statusSet: COOLDOWN_FAILURE_STATUSES
752
+ });
753
+ }
754
+
617
755
  // Exported for tests only
618
756
  export const __testing = {
619
757
  appendSkillFixEvent,
620
758
  getRecentSuccessfulFixCount,
621
- setCooldownConstants({ windowMs, threshold } = {}) {
759
+ getRecentFailedAttemptCount,
760
+ setCooldownConstants({ windowMs, threshold, failureWindowMs, failureThreshold } = {}) {
622
761
  if (typeof windowMs === "number") SKILL_FIX_COOLDOWN_WINDOW_MS = windowMs;
623
762
  if (typeof threshold === "number") SKILL_FIX_COOLDOWN_THRESHOLD = threshold;
763
+ if (typeof failureWindowMs === "number") SKILL_FIX_FAILURE_COOLDOWN_WINDOW_MS = failureWindowMs;
764
+ if (typeof failureThreshold === "number") SKILL_FIX_FAILURE_COOLDOWN_THRESHOLD = failureThreshold;
624
765
  },
625
766
  resetCooldownConstants() {
626
767
  SKILL_FIX_COOLDOWN_WINDOW_MS = SKILL_FIX_COOLDOWN_WINDOW_MS_DEFAULT;
627
768
  SKILL_FIX_COOLDOWN_THRESHOLD = 2;
769
+ SKILL_FIX_FAILURE_COOLDOWN_WINDOW_MS = SKILL_FIX_FAILURE_COOLDOWN_WINDOW_MS_DEFAULT;
770
+ SKILL_FIX_FAILURE_COOLDOWN_THRESHOLD = 3;
628
771
  },
629
772
  setImprovementConstants({ proactive, sampleRate, cooldownMs, cooldownThreshold } = {}) {
630
773
  if (typeof proactive === "boolean") SKILL_IMPROVEMENT_PROACTIVE = proactive;
@@ -3,7 +3,7 @@ import { applySkillFailures } from "./index.js";
3
3
 
4
4
  // Observer call ceiling. Hard cap so a runaway log read can never indefinitely
5
5
  // block the fire-and-forget chain.
6
- export const OBSERVER_TIMEOUT_MS = 90_000;
6
+ export const OBSERVER_TIMEOUT_MS = 360_000;
7
7
 
8
8
  // Cap raw log size sent to the model. Issue logs can grow to MB; observer only
9
9
  // needs filtered signal lines. Truncation keeps prompt cost bounded and the