npm - claude-teammate - Versions diffs - 0.1.294 → 0.1.296 - Mend

claude-teammate 0.1.294 → 0.1.296

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-teammate",
-  "version": "0.1.294",
+  "version": "0.1.296",
   "description": "CLI bootstrapper for Claude Teammate.",
   "license": "MIT",
   "type": "module",

package/src/claude/process.js CHANGED Viewed

@@ -38,12 +38,19 @@ export function buildStreamArgs(args) {
 }
 export function formatClaudeInvocationError(error, timeoutMs) {
-  const stderr = error instanceof Error && "stderr" in error ? String(error.stderr || "") : "";
-  const output = error instanceof Error && "stdout" in error ? String(error.stdout || "") : "";
-  const timeout = Boolean(error && typeof error === "object" && "killed" in error && error.killed);
-  const signal = error && typeof error === "object" && "signal" in error ? String(error.signal || "") : "";
+  const isObj = error && typeof error === "object";
+  const stderr = isObj && "stderr" in error ? String(error.stderr || "") : "";
+  const output = isObj && "stdout" in error ? String(error.stdout || "") : "";
+  const timeout = Boolean(isObj && "killed" in error && error.killed);
+  const signal = isObj && "signal" in error ? String(error.signal || "") : "";
+  // Exit codes surface E2BIG/OOM/etc when stderr/stdout are empty — vital for
+  // diagnosing "Claude CLI invocation failed." with no other context. Only
+  // include when no other information was available so well-behaved errors
+  // stay readable.
+  const code = isObj && "code" in error && error.code !== null && error.code !== undefined ? String(error.code) : "";
   const details = [stderr.trim(), output.trim()].filter(Boolean).join("\n").slice(0, 1000);
-  return `Claude CLI invocation failed${timeout ? ` after ${timeoutMs}ms` : ""}${signal ? ` (${signal})` : ""}${details ? `: ${details}` : "."}`;
+  const codeFragment = !timeout && !signal && !details && code !== "" ? ` (exit ${code})` : "";
+  return `Claude CLI invocation failed${timeout ? ` after ${timeoutMs}ms` : ""}${signal ? ` (${signal})` : ""}${codeFragment}${details ? `: ${details}` : "."}`;
 }
 export function shouldRetryClaudeCommand(options = {}, attempt) {
@@ -157,6 +164,7 @@ function runClaudeCommandOnce(command, args, options) {
       }
       reject({
+        code,
         stdout,
         stderr,
         killed: timedOut,

package/src/claude.js CHANGED Viewed

@@ -498,7 +498,7 @@ export async function runClaudeClarification(input) {
   );
 }
-const SKILL_CORRECTION_SCHEMA = {
+export const SKILL_CORRECTION_SCHEMA = {
   type: "object",
   properties: {
     isCorrection: { type: "boolean" },
@@ -509,17 +509,29 @@ const SKILL_CORRECTION_SCHEMA = {
   additionalProperties: false
 };
-const SKILL_CORRECTION_SYSTEM = `You analyze user messages that reply to a previous AI response generated by a skill (slash command). Your job is to extract feedback so the skill can be improved.
+export const SKILL_CORRECTION_SYSTEM = `You analyze user messages that reply to a previous AI response generated by a skill (slash command). Your job is to extract concrete feedback so the skill can be improved — and to refuse vague, off-topic, or low-signal replies that would only thrash the skill fixer.
 User messages may be in any language (English, Vietnamese, Chinese, etc.). Treat all languages equally.
-Default stance: if the user's reply requests ANY change to the previous bot output — add content, remove content, restructure, fix format, fix behavior, narrow scope, broaden scope, correct a mistake, change style, change content, follow a different rule next time — then isCorrection=true. The skill produced output that did not fully meet the user's needs on the first try; that is enough signal to attempt skill improvement. Cooldown and the downstream generator will filter false positives.
+Default stance: isCorrection=FALSE. Only flip to true when the message clearly meets ALL of:
+1. The user is replying about the previous bot output (not asking a new task, not status-checking, not chit-chat).
+2. The user identifies something concrete that should change — wrong content, missing information, wrong format/structure, factual mistake, broken instruction, mis-applied rule.
+3. The change is actionable — a maintainer could read correctionSummary and edit the skill instructions to satisfy it.
-Set isCorrection=false ONLY when the user's message is clearly unrelated to improving the previous output — e.g. a brand-new unrelated task, a status question, a thank-you, an off-topic remark.
+Set isCorrection=FALSE in any of these cases (be conservative — false negatives are cheap, false positives waste a fix run):
+- The message asks a follow-up or new task unrelated to revising the previous output.
+- The message is a status/progress check ("done?", "any update?", "ETA?").
+- The message is acknowledgement, thanks, or off-topic remarks.
+- The complaint is vague ("not good", "redo", "this is wrong") with no specific change.
+- The user is asking about a different feature/skill than the one that produced the previous output.
+- The previous output is missing or unrelated to the user's reply.
 When isCorrection=true:
-- skillName: required best-effort. If the message contains /skill-name, use that. Otherwise infer from the previous bot output (what skill most likely produced it — e.g. test design output → test-design skill, code review output → review skill). Use null only when there is genuinely no signal.
-- correctionSummary: one sentence (in English) capturing what the user wants different. Include both what was wrong/missing and what it should be instead. Keep it concrete enough that a maintainer can patch the skill instructions.`;
+- skillName: required, best-effort. Acceptable sources, in priority order:
+  1. The message explicitly references a slash command or skill identifier (e.g. /generate-test-design, "the test-design skill").
+  2. The previous bot output explicitly identifies the skill that produced it.
+  Otherwise return null. DO NOT guess from generic English words ("plan", "review", "test"); a skill with that exact name probably does not exist and a wrong guess sends the fixer down a dead end.
+- correctionSummary: one English sentence capturing the concrete change. Include both what was wrong/missing and what it should be instead. Must be specific enough to act on; if you cannot write a specific sentence, set isCorrection=false instead.`;
 /**
  * Lightweight haiku call to detect if a human comment is correcting a skill's output.

package/src/skills/fixer.js CHANGED Viewed

@@ -106,7 +106,7 @@ General:
 - analysis must summarize what you read and what the improvement is (this becomes the PR description).
 - reason must be one sentence explaining the concrete improvement, or "no improvement needed" when returning an empty payload.`;
-const SKILL_FIX_TIMEOUT_MS = 90_000;
+const SKILL_FIX_TIMEOUT_MS = 360_000;
 // Per-repo serialization: one PR creation at a time per repo. Different repos
 // run in parallel. Keyed by absolute project root path. Worktree creation under
@@ -465,6 +465,52 @@ async function getDefaultBranch(projectRoot) {
   return "main";
 }
+/**
+ * Push the fix branch upstream. Default strategy is `--force-with-lease` (safe
+ * against accidental overwrite of work the remote knows about that we don't).
+ *
+ * Stale-info recovery: a closed-but-undeleted remote branch from a prior fix
+ * can leave us without a valid lease. We pre-fetch the branch (no-op if it
+ * doesn't exist remotely), and if the lease is still rejected as "stale info",
+ * fall back to a plain `--force` push. This is safe in our flow because the
+ * worktree was just built off `origin/<defaultBranch>` and the only writer to
+ * `fix/skill-<name>` branches is this code path.
+ *
+ * Exposed for testing via `__testing.pushBranchWithLease`. The `exec` parameter
+ * lets tests inject a fake exec without spinning up real git.
+ */
+async function pushBranchWithLease({ cwd, branch, env, logger, skill, exec = execFileAsync }) {
+  const opts = { cwd, timeout: 60000, ...(env && { env }) };
+  const fetchOpts = { cwd, timeout: 30000, ...(env && { env }) };
+  // Pre-fetch so --force-with-lease has a known remote SHA. Branch may not
+  // exist on the remote yet (first push) — that exit code is non-fatal.
+  try {
+    await exec("git", ["fetch", "origin", branch], fetchOpts);
+  } catch (fetchErr) {
+    logger?.info?.("skill-fix: pre-push fetch of fix branch failed (likely first push)", {
+      skill,
+      branch,
+      error: fetchErr?.message
+    });
+  }
+  try {
+    await exec("git", ["push", "--force-with-lease", "-u", "origin", branch], opts);
+    return { used: "force-with-lease" };
+  } catch (err) {
+    const stderr = String(err?.stderr || err?.message || "");
+    const isStale = /stale info|rejected/i.test(stderr);
+    if (!isStale) throw err;
+    logger?.warn?.("skill-fix: --force-with-lease rejected (stale info), retrying with --force", {
+      skill,
+      branch
+    });
+    await exec("git", ["push", "--force", "-u", "origin", branch], opts);
+    return { used: "force" };
+  }
+}
 async function createSkillFixPR({ skillName, files, reason, analysis, location, projectRoot, logger, mode }) {
   const isImprove = mode === "improve";
   const branchPrefix = isImprove ? "improve/skill-" : "fix/skill-";
@@ -588,11 +634,12 @@ async function createSkillFixPR({ skillName, files, reason, analysis, location,
       ],
       { cwd: worktreePath, timeout: 10000 }
     );
-    // --force-with-lease: safe retry after partial failure — only overwrites if remote matches expected
-    await execFileAsync("git", ["push", "--force-with-lease", "-u", "origin", branch], {
+    await pushBranchWithLease({
       cwd: worktreePath,
-      timeout: 60000,
-      ...(gitAuthEnv && { env: gitAuthEnv })
+      branch,
+      env: gitAuthEnv,
+      logger,
+      skill: skillName
     });
     const prUrl = await openPR({ branch, prTitle, prBody, defaultBranch, provider, repo, projectRoot });
@@ -702,5 +749,6 @@ export const __testing = {
   },
   resetBackupMax() {
     SKILL_BACKUP_MAX = 5;
-  }
+  },
+  pushBranchWithLease
 };

package/src/skills/index.js CHANGED Viewed

@@ -26,6 +26,15 @@ const COOLDOWN_SUCCESS_STATUSES = new Set(["patched", "patched-with-backup", "pr
 // successful evaluation — count it so repeated sample hits don't burn Claude calls.
 const IMPROVE_COOLDOWN_SUCCESS_STATUSES = new Set([...COOLDOWN_SUCCESS_STATUSES, "no-fix"]);
+// Failure cooldown: stop retrying when the same (skill, errorType) keeps failing.
+// Distinct from success cooldown — catches loops where the generator/CLI/git push
+// keeps erroring out (e.g. repeated "Claude CLI invocation failed" or push rejected).
+// Without this, user feedback or detector retries thrash the same skill indefinitely.
+const COOLDOWN_FAILURE_STATUSES = new Set(["generation-error", "error", "patch-failed", "no-fix"]);
+let SKILL_FIX_FAILURE_COOLDOWN_THRESHOLD = 3;
+export const SKILL_FIX_FAILURE_COOLDOWN_WINDOW_MS_DEFAULT = 30 * 60 * 1000;
+let SKILL_FIX_FAILURE_COOLDOWN_WINDOW_MS = SKILL_FIX_FAILURE_COOLDOWN_WINDOW_MS_DEFAULT;
 // Phase 4: proactive improvement detector. Default off — enabling can spawn
 // improvement PRs on every successful task. Sample rate keeps cost bounded.
 // Cooldown 24h prevents repeat improvement PRs for the same skill.
@@ -257,6 +266,33 @@ async function fixSkillsAsync(
       continue;
     }
+    // Failure cooldown: skip when prior attempts keep erroring (CLI invocation
+    // crashes, generation errors, push failures, no-fix loops). Without this,
+    // every detection cycle re-runs the same broken pipeline.
+    const recentFailureCount = await getRecentFailedAttemptCount({
+      eventsRoot,
+      skill: skillName,
+      errorType,
+      windowMs: SKILL_FIX_FAILURE_COOLDOWN_WINDOW_MS,
+      mode
+    });
+    if (recentFailureCount >= SKILL_FIX_FAILURE_COOLDOWN_THRESHOLD) {
+      logger?.info("skill-fix: cooldown — repeated failures within window, skipping", {
+        skill: skillName,
+        errorType,
+        mode,
+        recentFailureCount
+      });
+      await appendSkillFixEvent(eventsRoot, {
+        skill: skillName,
+        errorType,
+        status: "cooldown",
+        mode,
+        recentFailureCount
+      });
+      continue;
+    }
     // Resolve location AFTER cooldown so cooldown'd skills don't pay the FS read.
     // Lock keyed by absolute location dir: global skills share a single key across
     // projects (preventing concurrent overwrites of `~/.claude/skills/<name>`),
@@ -360,21 +396,92 @@ export function scheduleSkillFixWithFeedback({
     return;
   }
-  activeFixLocks.add(lockKey);
-  fixSingleSkill({
+  // Failure cooldown applies here too — without it, every Jira reply retriggers
+  // the same broken fix pipeline (CLI crashes, push errors). Cooldown is per
+  // (skill, "user-feedback") so unrelated detector failures do not block
+  // legitimate user corrections, and vice versa.
+  _runFeedbackFix({
     skillName,
-    errorContent: `User correction: ${correctionSummary}`,
-    errorType: "user-feedback",
+    correctionSummary,
     location,
     projectRoot,
-    eventsRoot: resolvedEventsRoot,
+    resolvedEventsRoot,
+    lockKey,
     logger,
     invokeClaudeTask,
     epicContext,
     issueKey
-  })
-    .catch(() => {})
-    .finally(() => activeFixLocks.delete(lockKey));
+  });
+}
+async function _runFeedbackFix({
+  skillName,
+  correctionSummary,
+  location,
+  projectRoot,
+  resolvedEventsRoot,
+  lockKey,
+  logger,
+  invokeClaudeTask,
+  epicContext,
+  issueKey
+}) {
+  try {
+    const recentFailureCount = await getRecentFailedAttemptCount({
+      eventsRoot: resolvedEventsRoot,
+      skill: skillName,
+      errorType: "user-feedback",
+      windowMs: SKILL_FIX_FAILURE_COOLDOWN_WINDOW_MS,
+      mode: "fix"
+    });
+    if (recentFailureCount >= SKILL_FIX_FAILURE_COOLDOWN_THRESHOLD) {
+      logger?.info("skill-fix: cooldown — repeated user-feedback failures within window, skipping", {
+        skill: skillName,
+        recentFailureCount
+      });
+      await appendSkillFixEvent(resolvedEventsRoot, {
+        skill: skillName,
+        location: location.type,
+        errorType: "user-feedback",
+        status: "cooldown",
+        recentFailureCount
+      });
+      return;
+    }
+  } catch {
+    // Counting failures is best-effort; never let it block user feedback fixes.
+  }
+  if (activeFixLocks.has(lockKey)) {
+    // Recheck under the same flow (cooldown read above is async — another
+    // feedback may have started in the meantime).
+    await appendSkillFixEvent(resolvedEventsRoot, {
+      skill: skillName,
+      errorType: "user-feedback",
+      status: "lock-skipped"
+    });
+    return;
+  }
+  activeFixLocks.add(lockKey);
+  try {
+    await fixSingleSkill({
+      skillName,
+      errorContent: `User correction: ${correctionSummary}`,
+      errorType: "user-feedback",
+      location,
+      projectRoot,
+      eventsRoot: resolvedEventsRoot,
+      logger,
+      invokeClaudeTask,
+      epicContext,
+      issueKey
+    });
+  } catch {
+    // fixSingleSkill itself never throws upstream; this is just defensive.
+  } finally {
+    activeFixLocks.delete(lockKey);
+  }
 }
 async function fixSingleSkill({
@@ -575,22 +682,19 @@ async function appendSkillFixEvent(eventsRoot, fields) {
 }
 /**
- * Count recent successful skill-fix events for a (skill, errorType) pair within
- * `windowMs`. Snapshot read — no lock against `appendSkillFixEvent` writes; a
- * concurrent write at most causes one extra fix to slip through, which is
- * preferable to serializing every detection cycle behind the events mutex.
+ * Count recent skill-fix events for a (skill, errorType) pair within `windowMs`
+ * matching any of `statusSet`. Snapshot read — no lock against `appendSkillFixEvent`
+ * writes; a concurrent write at most causes one extra fix to slip through.
+ *
+ * `errorType` may be null/undefined to count across all errorTypes for a skill
+ * (used by the user-feedback path where errorType is uniform but the underlying
+ * cause varies — counting per-skill is the right grain).
  */
-async function getRecentSuccessfulFixCount({ eventsRoot, skill, errorType, windowMs, mode }) {
+async function countRecentEventsByStatus({ eventsRoot, skill, errorType, windowMs, mode, statusSet }) {
   if (!eventsRoot || !skill) return 0;
-  // Pre-Phase-4 events have no `mode` field — treat them as "fix" so legacy fix
-  // history still throttles new fixes correctly. Improve mode requires explicit match.
   const expectedMode = mode || "fix";
-  // Compound errorTypes carry a trailing "+" (multiple modes contributed in one
-  // detection cycle). Normalize so e.g. "bash-error-in-skill" matches a prior
-  // "bash-error-in-skill+" event — same root cause, cooldown should still bite.
   const normalize = (t) => (typeof t === "string" ? t.replace(/\+$/, "") : t);
-  const normalizedErrorType = normalize(errorType);
-  const successSet = expectedMode === "improve" ? IMPROVE_COOLDOWN_SUCCESS_STATUSES : COOLDOWN_SUCCESS_STATUSES;
+  const normalizedErrorType = errorType == null ? null : normalize(errorType);
   try {
     const file = path.join(eventsRoot, "memory", "skill-fixes.json");
     const events = JSON.parse(await readFile(file, "utf8"));
@@ -599,8 +703,8 @@ async function getRecentSuccessfulFixCount({ eventsRoot, skill, errorType, windo
     let count = 0;
     for (const e of events) {
       if (e?.skill !== skill) continue;
-      if (normalize(e?.errorType) !== normalizedErrorType) continue;
-      if (!successSet.has(e?.status)) continue;
+      if (normalizedErrorType !== null && normalize(e?.errorType) !== normalizedErrorType) continue;
+      if (!statusSet.has(e?.status)) continue;
       const eventMode = e?.mode || "fix";
       if (eventMode !== expectedMode) continue;
       const ts = Date.parse(e.ts);
@@ -609,22 +713,61 @@ async function getRecentSuccessfulFixCount({ eventsRoot, skill, errorType, windo
     }
     return count;
   } catch {
-    // Missing file or parse error — treat as no history (consistent with append path)
     return 0;
   }
 }
+/**
+ * Count recent successful skill-fix events for a (skill, errorType) pair within
+ * `windowMs`. Snapshot read — no lock against `appendSkillFixEvent` writes; a
+ * concurrent write at most causes one extra fix to slip through, which is
+ * preferable to serializing every detection cycle behind the events mutex.
+ */
+async function getRecentSuccessfulFixCount({ eventsRoot, skill, errorType, windowMs, mode }) {
+  const expectedMode = mode || "fix";
+  const successSet = expectedMode === "improve" ? IMPROVE_COOLDOWN_SUCCESS_STATUSES : COOLDOWN_SUCCESS_STATUSES;
+  return countRecentEventsByStatus({
+    eventsRoot,
+    skill,
+    errorType,
+    windowMs,
+    mode,
+    statusSet: successSet
+  });
+}
+/**
+ * Count recent failed skill-fix attempts for the same (skill, errorType) within
+ * `windowMs`. Used to break out of CLI/git/generation error loops where the fix
+ * pipeline keeps trying but never produces a usable patch.
+ */
+async function getRecentFailedAttemptCount({ eventsRoot, skill, errorType, windowMs, mode }) {
+  return countRecentEventsByStatus({
+    eventsRoot,
+    skill,
+    errorType,
+    windowMs,
+    mode,
+    statusSet: COOLDOWN_FAILURE_STATUSES
+  });
+}
 // Exported for tests only
 export const __testing = {
   appendSkillFixEvent,
   getRecentSuccessfulFixCount,
-  setCooldownConstants({ windowMs, threshold } = {}) {
+  getRecentFailedAttemptCount,
+  setCooldownConstants({ windowMs, threshold, failureWindowMs, failureThreshold } = {}) {
     if (typeof windowMs === "number") SKILL_FIX_COOLDOWN_WINDOW_MS = windowMs;
     if (typeof threshold === "number") SKILL_FIX_COOLDOWN_THRESHOLD = threshold;
+    if (typeof failureWindowMs === "number") SKILL_FIX_FAILURE_COOLDOWN_WINDOW_MS = failureWindowMs;
+    if (typeof failureThreshold === "number") SKILL_FIX_FAILURE_COOLDOWN_THRESHOLD = failureThreshold;
   },
   resetCooldownConstants() {
     SKILL_FIX_COOLDOWN_WINDOW_MS = SKILL_FIX_COOLDOWN_WINDOW_MS_DEFAULT;
     SKILL_FIX_COOLDOWN_THRESHOLD = 2;
+    SKILL_FIX_FAILURE_COOLDOWN_WINDOW_MS = SKILL_FIX_FAILURE_COOLDOWN_WINDOW_MS_DEFAULT;
+    SKILL_FIX_FAILURE_COOLDOWN_THRESHOLD = 3;
   },
   setImprovementConstants({ proactive, sampleRate, cooldownMs, cooldownThreshold } = {}) {
     if (typeof proactive === "boolean") SKILL_IMPROVEMENT_PROACTIVE = proactive;

package/src/skills/observer.js CHANGED Viewed

@@ -3,7 +3,7 @@ import { applySkillFailures } from "./index.js";
 // Observer call ceiling. Hard cap so a runaway log read can never indefinitely
 // block the fire-and-forget chain.
-export const OBSERVER_TIMEOUT_MS = 90_000;
+export const OBSERVER_TIMEOUT_MS = 360_000;
 // Cap raw log size sent to the model. Issue logs can grow to MB; observer only
 // needs filtered signal lines. Truncation keeps prompt cost bounded and the