npm - code-ai-installer - Versions diffs - 4.3.1 → 4.3.2 - Mend

code-ai-installer 4.3.1 → 4.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/README.md +2 -1
package/dist/mcp/audit_ledger.d.ts +6 -4
package/dist/mcp/audit_ledger.js +6 -5
package/dist/mcp/scorecard.d.ts +4 -2
package/dist/mcp/scorecard.js +12 -7
package/dist/mcp/tools/aggregate_run_metrics.d.ts +3 -2
package/dist/mcp/tools/aggregate_run_metrics.js +4 -3
package/dist/mcp/tools/sign_off.js +7 -7
package/domains/development/agents/auditor.md +2 -2
package/domains/development/locales/en/agents/auditor.md +2 -2
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -157,8 +157,9 @@ Depending on `--target`, `code-ai` restructures your project:
 ## 🧬 Versions & migration
-`code-ai-installer` is on **v4.3.1**.
+`code-ai-installer` is on **v4.3.2**.
+- **v4.3.2** — the Auditor now sees **`/bugfix`** runs. A run scorecard is recorded when a run reaches its mode's final gate (full / hotfix → RG, bugfix → TEST) instead of only at RG, so bugfix work counts toward the ≥3-run audit threshold rather than staying invisible. Reads each domain's `pipeline.yaml` (no hardcoded gate); existing ledger entries are unaffected and only runs completed after the update are recorded.
 - **v4.3.1** — the `code-ai-mcp` registration is now **pinned** to the installed version (`npx -p code-ai-installer@<version>`) and re-pinned on every reinstall, so an updated server actually takes effect instead of an unpinned `npx` silently reusing a stale global/cache copy. The server also logs `code-ai-mcp v<version> · domain=<domain>` to stderr at startup, so the live build is visible in Claude's MCP logs.
 - **v4.3.0** — `render_diff` MCP tool (unified diff → a standalone HTML review page); MCP gate-flow + stop-at-user-gate sections added to the content / analytics / product conductors; Auditor trigger — a `/audit` command plus a Release-Gate nudge that surfaces after every 3rd completed run (development pilot).
 - **v4.1.0** — MCP servers now register in your **global (user-scope)** config via a direct, idempotent `~/.claude.json` merge (no dependency on the `claude` CLI); the conductor halts at each user gate — one at a time, no batching, no auto-pass on green.

package/dist/mcp/audit_ledger.d.ts CHANGED Viewed

@@ -1,15 +1,17 @@
 import { RunScorecard } from "./scorecard.js";
 import type { TaskState } from "./task_state.js";
+import type { GateName } from "../shared/index.js";
 /** Append one scorecard as a JSON line to the run ledger (creates dir if needed). */
 export declare function appendScorecard(card: RunScorecard): Promise<void>;
 /** Read all scorecards from the ledger. Malformed / partial lines are skipped. */
 export declare function readLedger(): Promise<RunScorecard[]>;
 /**
- * Build + append a scorecard for a completed run. Called from sign_off when RG
- * is signed. Best-effort by contract: callers MUST NOT let a ledger failure
- * break a sign-off (telemetry is never load-bearing for the gate).
+ * Build + append a scorecard for a completed run. Called from sign_off when the
+ * mode's terminal gate is signed (full/hotfix → RG, bugfix → TEST). Best-effort
+ * by contract: callers MUST NOT let a ledger failure break a sign-off (telemetry
+ * is never load-bearing for the gate).
  */
-export declare function recordRunScorecard(state: TaskState): Promise<void>;
+export declare function recordRunScorecard(state: TaskState, terminalGate: GateName): Promise<void>;
 /** Number of COMPLETED (RG-signed) runs in the ledger. */
 export declare function countCompletedRuns(): Promise<number>;
 /**

package/dist/mcp/audit_ledger.js CHANGED Viewed

@@ -72,13 +72,14 @@ async function readSideCounts(taskId) {
     };
 }
 /**
- * Build + append a scorecard for a completed run. Called from sign_off when RG
- * is signed. Best-effort by contract: callers MUST NOT let a ledger failure
- * break a sign-off (telemetry is never load-bearing for the gate).
+ * Build + append a scorecard for a completed run. Called from sign_off when the
+ * mode's terminal gate is signed (full/hotfix → RG, bugfix → TEST). Best-effort
+ * by contract: callers MUST NOT let a ledger failure break a sign-off (telemetry
+ * is never load-bearing for the gate).
  */
-export async function recordRunScorecard(state) {
+export async function recordRunScorecard(state, terminalGate) {
     const extras = await readSideCounts(state.task_id);
-    await appendScorecard(buildScorecard(state, extras));
+    await appendScorecard(buildScorecard(state, extras, terminalGate));
 }
 /** Number of COMPLETED (RG-signed) runs in the ledger. */
 export async function countCompletedRuns() {

package/dist/mcp/scorecard.d.ts CHANGED Viewed

@@ -1,10 +1,12 @@
 import { z } from "zod";
+import { GateName } from "../shared/index.js";
 import type { TaskState } from "./task_state.js";
 /**
  * Run scorecard — a compact, per-pipeline-run summary derived ENTIRELY from
  * telemetry the MCP state machine already persists (task state + jsonl side
  * logs). One scorecard is appended to the run ledger when a run completes
- * (RG signed). The Auditor's aggregation tool crunches >=3 of these.
+ * (its mode's terminal gate is signed — RG for full/hotfix, TEST for bugfix).
+ * The Auditor's aggregation tool crunches >=3 of these.
  *
  * Deliberately records raw signals, not judgments — interpretation belongs to
  * the aggregator (numbers) and, later, the Auditor agent (findings).
@@ -137,4 +139,4 @@ export type ScorecardExtras = {
  * Build a run scorecard from a task's persisted state plus side-log counts.
  * Pure + synchronous: deterministic and unit-testable on synthetic state.
  */
-export declare function buildScorecard(state: TaskState, extras: ScorecardExtras): RunScorecard;
+export declare function buildScorecard(state: TaskState, extras: ScorecardExtras, terminalGate?: GateName): RunScorecard;

package/dist/mcp/scorecard.js CHANGED Viewed

@@ -4,7 +4,8 @@ import { ClassificationOutcome, GateName, PipelineMode, Signer, } from "../share
  * Run scorecard — a compact, per-pipeline-run summary derived ENTIRELY from
  * telemetry the MCP state machine already persists (task state + jsonl side
  * logs). One scorecard is appended to the run ledger when a run completes
- * (RG signed). The Auditor's aggregation tool crunches >=3 of these.
+ * (its mode's terminal gate is signed — RG for full/hotfix, TEST for bugfix).
+ * The Auditor's aggregation tool crunches >=3 of these.
  *
  * Deliberately records raw signals, not judgments — interpretation belongs to
  * the aggregator (numbers) and, later, the Auditor agent (findings).
@@ -35,10 +36,10 @@ export const RunScorecard = z.object({
     schema_version: z.literal(SCORECARD_SCHEMA_VERSION),
     task_id: z.string().min(1),
     mode: PipelineMode,
-    /** True when an RG sign-off exists (the state machine does not set completed_at). */
+    /** True when the mode's terminal gate has been signed (RG for full/hotfix, TEST for bugfix). */
     completed: z.boolean(),
     created_at: z.string(),
-    /** Timestamp of the RG sign-off, or null if not completed. */
+    /** Timestamp of the terminal-gate sign-off, or null if not completed. */
     completed_at: z.string().nullable(),
     gates: z.array(GateScore),
     dev_rollback_count: z.number().int().nonnegative(),
@@ -52,7 +53,7 @@ export const RunScorecard = z.object({
  * Build a run scorecard from a task's persisted state plus side-log counts.
  * Pure + synchronous: deterministic and unit-testable on synthetic state.
  */
-export function buildScorecard(state, extras) {
+export function buildScorecard(state, extras, terminalGate = "RG") {
     // Group sign-offs by gate: count + last signer (last in chronological array).
     const signoffCount = new Map();
     const lastSigner = new Map();
@@ -73,9 +74,13 @@ export function buildScorecard(state, extras) {
         classification: lastClass.get(gate) ?? null,
         exceptions_count: extras.exceptions_by_gate[gate] ?? 0,
     }));
-    const rgSignoffs = state.signoffs.filter((s) => s.gate === "RG");
-    const completed = rgSignoffs.length > 0;
-    const completed_at = completed ? rgSignoffs[rgSignoffs.length - 1].timestamp : null;
+    // A run is complete when its mode's TERMINAL gate is signed (full/hotfix → RG,
+    // bugfix → TEST). The terminal gate is supplied by the caller (sign_off knows it
+    // from pipeline.yaml); defaults to "RG" so the full-mode path and standalone
+    // callers are unchanged.
+    const terminalSignoffs = state.signoffs.filter((s) => s.gate === terminalGate);
+    const completed = terminalSignoffs.length > 0;
+    const completed_at = completed ? terminalSignoffs[terminalSignoffs.length - 1].timestamp : null;
     const exceptions_count = Object.values(extras.exceptions_by_gate).reduce((a, b) => a + (b ?? 0), 0);
     // Group skill invocations by skill + gate into counts.
     const skillAgg = new Map();

package/dist/mcp/tools/aggregate_run_metrics.d.ts CHANGED Viewed

@@ -6,7 +6,8 @@ import type { AggregateRunMetricsInput, AggregateRunMetricsOutput } from "../../
  * the Auditor agent's job (a later ADR).
  *
  * Attribution: per-AGENT via gate→produced_by[0] from pipeline.yaml (single
- * source); per-WORKFLOW via mode. Only COMPLETED runs (RG signed) are
+ * source); per-WORKFLOW via mode. Only COMPLETED runs (the mode's terminal gate
+ * signed — RG for full/hotfix, TEST for bugfix) are
  * aggregated; `min_runs` (default 3) is the design's small-sample guard — below
  * it `met_threshold=false` and the Auditor should stay silent.
  *
@@ -14,6 +15,6 @@ import type { AggregateRunMetricsInput, AggregateRunMetricsOutput } from "../../
  * per_skill (incl. the gates a skill was pulled at) via get_skill instrumentation
  * (ADR-DEV-121) — the remaining gap is trigger_accuracy (relevant-vs-invoked),
  * which needs a relevance oracle; no explicit human-rejection signal; completed =
- * RG signed (completed_at unset).
+ * the mode's terminal gate signed (RG for full/hotfix, TEST for bugfix).
  */
 export declare function aggregateRunMetrics(input: AggregateRunMetricsInput): Promise<AggregateRunMetricsOutput>;

package/dist/mcp/tools/aggregate_run_metrics.js CHANGED Viewed

@@ -8,7 +8,8 @@ import { resolveActiveDomain } from "../config.js";
  * the Auditor agent's job (a later ADR).
  *
  * Attribution: per-AGENT via gate→produced_by[0] from pipeline.yaml (single
- * source); per-WORKFLOW via mode. Only COMPLETED runs (RG signed) are
+ * source); per-WORKFLOW via mode. Only COMPLETED runs (the mode's terminal gate
+ * signed — RG for full/hotfix, TEST for bugfix) are
  * aggregated; `min_runs` (default 3) is the design's small-sample guard — below
  * it `met_threshold=false` and the Auditor should stay silent.
  *
@@ -16,7 +17,7 @@ import { resolveActiveDomain } from "../config.js";
  * per_skill (incl. the gates a skill was pulled at) via get_skill instrumentation
  * (ADR-DEV-121) — the remaining gap is trigger_accuracy (relevant-vs-invoked),
  * which needs a relevance oracle; no explicit human-rejection signal; completed =
- * RG signed (completed_at unset).
+ * the mode's terminal gate signed (RG for full/hotfix, TEST for bugfix).
  */
 export async function aggregateRunMetrics(input) {
     const minRuns = input.min_runs;
@@ -117,7 +118,7 @@ export async function aggregateRunMetrics(input) {
         gates: [...a.gates].sort(),
     }));
     const notes = [
-        "completed = RG sign-off present (state machine does not populate completed_at).",
+        "completed = the mode's terminal gate is signed (RG for full/hotfix, TEST for bugfix).",
         "Skill invocations ARE captured (get_skill instrumentation, ADR-DEV-121): per_skill carries invocation counts + the gates each skill was pulled at, so skill.gates can be tuned to observed usage. The remaining gap is trigger_accuracy (relevant-vs-invoked), which needs a relevance oracle — not this data layer.",
         "No explicit human gate-rejection signal; rejections manifest as rollbacks/exceptions.",
         "Per-agent attribution uses gate→produced_by[0] from the active domain's pipeline.yaml.",

package/dist/mcp/tools/sign_off.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { getGateConfig, loadPipeline } from "../pipeline.js";
+import { getGateConfig, getNextGate, loadPipeline } from "../pipeline.js";
 import { readTaskState, writeTaskState } from "../task_state.js";
 import { recordRunScorecard, countCompletedRuns, auditNudgeFor } from "../audit_ledger.js";
 import { resolveActiveDomain } from "../config.js";
@@ -57,14 +57,14 @@ export async function signOff(input) {
         evidence: input.evidence,
     });
     await writeTaskState(state);
-    // Auditor telemetry: when the terminal gate is signed, the run is complete —
-    // append a scorecard to the local ledger, then compute the /audit nudge.
-    // Best-effort: a ledger failure (or nudge failure) must NEVER break a
-    // sign-off (telemetry is not load-bearing for the gate).
+    // Auditor telemetry: when the run reaches its mode's TERMINAL gate (full/hotfix
+    // → RG, bugfix → TEST), the run is complete — append a scorecard to the local
+    // ledger, then compute the /audit nudge. Best-effort: a ledger failure (or nudge
+    // failure) must NEVER break a sign-off (telemetry is not load-bearing for the gate).
     let audit_nudge;
-    if (input.gate === "RG") {
+    if (getNextGate(pipeline, state.mode, input.gate) === null) {
         try {
-            await recordRunScorecard(state);
+            await recordRunScorecard(state, input.gate);
             audit_nudge = auditNudgeFor(await countCompletedRuns());
         }
         catch {

package/domains/development/agents/auditor.md CHANGED Viewed

@@ -22,7 +22,7 @@ schema_version: 1
 - НЕ на каждом гейте и НЕ в фоне. Один проход, когда накопилось **≥3 завершённых прогона** (порог настраивается).
 - Ниже порога — молчит. На n=1 выводов не делает (малая выборка).
 - Per-gate телеметрию уже пишет стейт-машина; Аудитор делает один проход по накопленным данным.
-- Запуск прохода: команда `/audit` (вручную) или подсказка `audit_nudge`, которую `sign_off` возвращает после RG.
+- Запуск прохода: команда `/audit` (вручную) или подсказка `audit_nudge`, которую `sign_off` возвращает после завершения прогона (подписан финальный гейт режима — RG для full/hotfix, TEST для bugfix).
 ---
@@ -57,7 +57,7 @@ schema_version: 1
   - **Через человека даже в автономии:** разрушительное по существующему (удаление, крупная переписка, снятие возможностей).
   - **Всегда:** обязательный отчёт после любого автономного действия. Ничего невидимого.
   - **Дедуп при добавлении:** перед авто-добавлением скила — проверка пересечения (`related` + контролируемый словарь); отчёт перечисляет добавления для последующей чистки.
-- Механизм предложение→одобрение и тумблер автономии уже реализованы (`propose_change` → `review_proposal`: матрица рисков + дедуп). Проход запускается командой `/audit` или подсказкой после RG.
+- Механизм предложение→одобрение и тумблер автономии уже реализованы (`propose_change` → `review_proposal`: матрица рисков + дедуп). Проход запускается командой `/audit` или подсказкой после завершения прогона.
 ---

package/domains/development/locales/en/agents/auditor.md CHANGED Viewed

@@ -22,7 +22,7 @@ Close the self-improvement loop: build → run → measure → improve. Once rea
 - NOT at every gate and NOT in the background. One pass, once **≥3 completed runs** have accumulated (threshold configurable).
 - Below the threshold — silent. It draws no conclusions from n=1 (small sample).
 - Per-gate telemetry is already persisted by the state machine; the Auditor makes one pass over the accumulated data.
-- Triggering a pass: the `/audit` command (manual) or the `audit_nudge` that `sign_off` returns after RG.
+- Triggering a pass: the `/audit` command (manual) or the `audit_nudge` that `sign_off` returns when a run completes (its mode's terminal gate is signed — RG for full/hotfix, TEST for bugfix).
 ---
@@ -57,7 +57,7 @@ Close the self-improvement loop: build → run → measure → improve. Once rea
   - **Human-gated even under autonomy:** destructive changes to existing assets (delete, major rewrite, capability removal).
   - **Always:** a mandatory report after any autonomous action. Nothing invisible.
   - **Additive dedup:** before auto-adding a skill — an overlap check (`related` + controlled vocab); the report lists additions for later pruning.
-- The propose→approve mechanism and the autonomy toggle already exist (`propose_change` → `review_proposal`: risk matrix + dedup). A pass is triggered by `/audit` or the post-RG nudge.
+- The propose→approve mechanism and the autonomy toggle already exist (`propose_change` → `review_proposal`: risk matrix + dedup). A pass is triggered by `/audit` or the post-completion nudge.
 ---

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "code-ai-installer",
-  "version": "4.3.1",
+  "version": "4.3.2",
   "description": "Production-ready CLI to install code-ai agents and skills for multiple AI coding assistants. Bundles the code-ai-mcp MCP server for Claude Code.",
   "license": "MIT",
   "author": "Denish1209",