npm - nubos-pilot - Versions diffs - 1.2.2 → 1.2.4 - Mend

nubos-pilot 1.2.2 → 1.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

package/CHANGELOG.md +18 -0
package/README.md +16 -0
package/agents/np-architect.md +2 -0
package/agents/np-executor.md +1 -1
package/agents/np-learnings-extractor.md +54 -0
package/agents/np-planner.md +1 -1
package/agents/np-security-reviewer.md +9 -0
package/bin/np-tools/_commands.cjs +4 -0
package/bin/np-tools/derive-tier.cjs +86 -0
package/bin/np-tools/derive-tier.test.cjs +83 -0
package/bin/np-tools/learnings.cjs +109 -0
package/bin/np-tools/learnings.test.cjs +66 -0
package/bin/np-tools/loop-run-round.cjs +7 -1
package/bin/np-tools/security.cjs +3 -0
package/bin/np-tools/skill-audit.cjs +79 -0
package/bin/np-tools/skill-audit.test.cjs +86 -0
package/bin/np-tools/spawn-headless.cjs +35 -1
package/bin/np-tools/spawn-headless.test.cjs +135 -0
package/bin/np-tools/verify-reliability.cjs +65 -0
package/bin/np-tools/verify-reliability.test.cjs +69 -0
package/lib/agents.test.cjs +1 -0
package/lib/config-defaults.cjs +13 -0
package/lib/config-schema.cjs +11 -0
package/lib/eval-reliability.cjs +63 -0
package/lib/eval-reliability.test.cjs +56 -0
package/lib/headless-guard.cjs +127 -0
package/lib/headless-guard.test.cjs +119 -0
package/lib/install/claude-hooks-learnings.test.cjs +82 -0
package/lib/install/claude-hooks.cjs +65 -4
package/lib/install/claude-hooks.test.cjs +5 -2
package/lib/learnings/capture-ledger.cjs +80 -0
package/lib/learnings/capture-ledger.test.cjs +54 -0
package/lib/learnings/extract.cjs +191 -0
package/lib/learnings/extract.test.cjs +115 -0
package/lib/nubosloop-audit.cjs +104 -0
package/lib/nubosloop-skill-audit.test.cjs +98 -0
package/lib/nubosloop.cjs +9 -0
package/lib/tier-classify.cjs +67 -0
package/lib/tier-classify.test.cjs +67 -0
package/np-tools.cjs +4 -0
package/package.json +1 -1
package/skills/np-access-control/SKILL.md +42 -0
package/skills/np-accessibility-audit/SKILL.md +41 -0
package/skills/np-adr/SKILL.md +37 -0
package/skills/np-api-design/SKILL.md +34 -0
package/skills/np-caching-strategy/SKILL.md +38 -0
package/skills/np-data-modeling/SKILL.md +37 -0
package/skills/np-data-privacy/SKILL.md +39 -0
package/skills/np-dependency-audit/SKILL.md +47 -0
package/skills/np-encryption/SKILL.md +47 -0
package/skills/np-error-handling/SKILL.md +37 -0
package/skills/np-incident-response/SKILL.md +38 -0
package/skills/np-llm-app-architecture/SKILL.md +50 -0
package/skills/np-observability/SKILL.md +39 -0
package/skills/np-performance/SKILL.md +38 -0
package/skills/np-queue-design/SKILL.md +32 -0
package/skills/np-rag-design/SKILL.md +43 -0
package/skills/np-refactoring/SKILL.md +35 -0
package/skills/np-resilience-patterns/SKILL.md +39 -0
package/skills/np-secure-code-review/SKILL.md +46 -0
package/skills/np-secure-design/SKILL.md +44 -0
package/skills/np-service-boundary/SKILL.md +35 -0
package/skills/np-system-design/SKILL.md +40 -0
package/skills/np-test-strategy/SKILL.md +46 -0
package/skills/np-threat-model/SKILL.md +42 -0
package/templates/claude/payload/hooks/np-learnings-hook.cjs +56 -0
package/templates/claude/payload/hooks/np-security-hook.cjs +1 -0
package/workflows/architect-phase.md +21 -1
package/workflows/execute-phase.md +66 -4
package/workflows/verify-work.md +17 -4

package/CHANGELOG.md CHANGED Viewed

@@ -4,6 +4,24 @@ All notable changes to nubos-pilot are documented in this file. Format
 follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/); versioning
 follows [SemVer](https://semver.org/spec/v2.0.0.html).
+## [1.2.4] - 2026-06-15
+Fixed a recursion fault in the in-session hooks that could spawn an unbounded cascade of headless `claude -p` processes.
+- The Stop-hook security review and continuous-learning capture each spawn a headless `claude -p` to do their work. That headless run re-fires the same SessionStart/Stop hooks, which spawned another headless run, and so on — a fork bomb of `claude`, `np-tools` and duplicated MCP servers that survived closing the terminal. nubos-pilot now marks every headless spawn with `NUBOS_PILOT_HEADLESS=1` and a `NUBOS_PILOT_HOOK_DEPTH` counter; the hooks no-op immediately inside a headless run, so the chain stops at exactly one level.
+- Three independent guards back this up: the hook scripts and the `security`/`learnings` backends exit early when `NUBOS_PILOT_HEADLESS` is set; `spawn-headless` refuses to start a nested headless run (reentrancy + depth cap, default one level); and a per-agent lockfile under `.nubos-pilot/run/` bounds concurrent headless runs to one per agent even if the environment is not inherited. Headless runs already carry a hard timeout with SIGKILL, so a hung review cannot linger.
+- Escape hatch: the guard keys off `NUBOS_PILOT_HEADLESS`, set automatically on the spawned `claude` — do not set it in your own shell or the in-session hooks will silently no-op. Raise the depth cap with `NUBOS_PILOT_MAX_HOOK_DEPTH` only if you understand the recursion risk.
+## [1.2.3] — 2026-06-14
+Three opt-in layers that make execution cheaper, more reliable, and self-improving.
+- Cost-aware model routing: with `workflow.tier_routing` enabled, each task's executor runs at the model tier the plan assigned it — trivial work on a smaller model, structural or security-sensitive work on the strongest — instead of every task running at the top tier. The new `np:derive-tier` command suggests a tier from a task's observable signals (files touched, security/data sensitivity), so the choice is evidence-based. Off by default; behaviour is unchanged until you turn it on.
+- Reliability checks (pass@k): set `loop.verify_runs` above 1 and nubos-pilot runs a task's verify command several times per round. A task goes green only when every run passes; a flaky task (passes sometimes, fails sometimes) is treated as red and handed to the build-fixer with a clear note, instead of slipping through on a lucky run. Defaults to a single run.
+- Continuous learning: at the end of a session, a lightweight background reviewer reads what changed and distils reusable, durable lessons into the same learnings store the planner consults on the next similar task — so the system improves with use, not only inside the execution loop. On by default and rate-limited to bound cost; disable with `learnings.auto_capture`.
+Full documentation at <https://pilot.nubos.cloud>.
 ## [1.2.2] — 2026-06-05
 A dependency graph for the codebase you work in, plus stricter checks on nubos-pilot's own data.

package/README.md CHANGED Viewed

@@ -169,6 +169,22 @@ load-bearing ones for users and contributors:
 See [`SECURITY.md`](./SECURITY.md) for the vulnerability disclosure policy
 and threat model.
+### Headless recursion guard
+The in-session security review and continuous-learning hooks do their work in
+a headless `claude -p` subprocess. To stop that subprocess from re-firing the
+same hooks (which would cascade into an unbounded fork of `claude`/`np-tools`
+processes), nubos-pilot sets `NUBOS_PILOT_HEADLESS=1` and a
+`NUBOS_PILOT_HOOK_DEPTH` counter on every headless spawn. The hooks no-op when
+`NUBOS_PILOT_HEADLESS` is set, `spawn-headless` refuses a nested or
+depth-exceeded spawn, and a per-agent lockfile under `.nubos-pilot/run/` bounds
+concurrent headless runs to one per agent.
+The guard is automatic — do not export `NUBOS_PILOT_HEADLESS` in your own
+shell, or the in-session hooks will silently do nothing. The depth cap is one
+level; override it with `NUBOS_PILOT_MAX_HOOK_DEPTH` only if you understand the
+recursion risk.
 ## Support
 - Bugs / features: [GitHub issues](https://github.com/Nubos-AI/nubos-pilot/issues)

package/agents/np-architect.md CHANGED Viewed

@@ -13,6 +13,8 @@ You are NOT a second researcher. Research is investigation; you are decision-mak
 **CRITICAL: Mandatory Initial Read**
 If the prompt contains a `<files_to_read>` block, you MUST use the `Read` tool to load every file listed there before performing any other actions. This is your primary context.
+**Design skills.** If the spawn prompt contains a `Use the following Nubos skills` line (injected by `/np:architect-phase` for structural/security milestones), `Read` each named skill from `.claude/skills/<skill>/SKILL.md` BEFORE committing decisions. Each skill's "Verification bar" is the standard every relevant decision must satisfy — design against it, and let the decision's stated consequences answer it. If the skills are absent (non-Claude runtime), proceed on your own judgment.
 </role>
 ## Completeness Mandate

package/agents/np-executor.md CHANGED Viewed

@@ -20,7 +20,7 @@ If the prompt contains a `<files_to_read>` block, you MUST use the `Read` tool t
 - Invoke commit-helper ONLY after verification passes.
 - Never invoke `git` directly — always through the `np-tools.cjs` wrapper so the D-25 gitignore-guard runs.
 - One task per spawn. One commit per task (D-03).
-- If the spawn prompt contains a `Use the following Nubos skills:` line (injected by `/np:execute-phase` for UI/frontend tasks), load each named skill from `.claude/skills/<skill>/SKILL.md` BEFORE editing source. Apply each skill's quality bar; verification must pass against the skill's rules, not just the test command.
+- If the spawn prompt contains a `Use the following Nubos skills:` line (injected by `/np:execute-phase` — covers UI/frontend AND engineering concerns: API/contract, security/auth, data/migration, error-handling, resilience, caching, queue, performance, access-control, encryption, dependencies, privacy, and more), you MUST `Read` each named skill from `.claude/skills/<skill>/SKILL.md` BEFORE editing source — this is not optional. Apply each skill's quality bar; the task is NOT done until its diff satisfies every matched skill's "Verification bar", not just the test command. Treat an unmet skill bar exactly like a failing test. Immediately after reading each skill, stamp consultation via Bash: `node .nubos-pilot/bin/np-tools.cjs skill-audit ack --task <TASK_ID> --skill <skill>`. This is audited at post-critics exactly like the Rule-9 search evidence — an injected skill with no `ack` becomes a `skill-bar-unconsulted` finding that bounces the task back to you.
 </role>
 ## Completeness Mandate

package/agents/np-learnings-extractor.md ADDED Viewed

@@ -0,0 +1,54 @@
+---
+name: np-learnings-extractor
+description: Read-only continuous-learning observer. Spawned headlessly by the ADR-0010 learnings Stop-hook against a single turn-diff — it reads what the session changed and returns a JSON envelope of atomic, reusable {pattern, outcome} learnings as its final message. Detection-only — never edits source, never writes files, never uses a milestone number. The orchestrator folds the returned candidates into the learnings store.
+tier: haiku
+tools: Read, Bash, Grep, Glob
+color: cyan
+---
+<role>
+You are the nubos-pilot learnings extractor — the lightweight twin of `np-security-reviewer`'s session/diff mode, for institutional knowledge instead of security. You are spawned in the background when a session stops. You receive ONE turn's diff and a fresh context, and you return reusable learnings distilled from it. You never graded or wrote the code you are reading.
+You DO NOT edit source. You DO NOT write files. You DO NOT use a milestone number. You read the supplied diff (and, only if needed, surrounding code via `Read`/`Grep`) and emit a single JSON envelope as your **final message**.
+</role>
+## Completeness Mandate
+This agent operates under [`templates/COMPLETENESS.md`](../templates/COMPLETENESS.md). The rules that bind this role:
+- **Rule 1 — Do the whole thing.** Read the entire supplied diff before extracting. Do not stop at the first interesting hunk.
+- **Rule 5 — Aim to genuinely impress.** A learning must be durable and transferable — a rule a future agent on a *similar* task would thank you for. Narration of what changed is a failure.
+- **Rule 8 — Never present a workaround when the real fix exists.** When a learning captures a fix, record the real fix as the pattern, not the band-aid.
+- **Rule 12 — Boil the ocean, but quality over quantity.** Zero learnings is the correct, common answer for a routine turn. Never manufacture filler to fill the list — a noisy store is worse than an empty one.
+Refusal of any rule is a hard-stop. Surface the violation verbatim and abort.
+## Input
+Triggered when the prompt contains a `<learning_capture>` block. Inside it: the list of changed files and the turn's diff. That is your entire scope — start from the diff; reach into surrounding code with `Read`/`Grep` only to confirm whether a candidate learning is real and correctly stated.
+## What counts as a learning
+A learning is one `{pattern, outcome}` pair:
+- **pattern** — a durable, reusable, self-contained imperative rule. Good: *"use jose for JWT verification, never hand-roll HS256"*, *"batch ORM lookups in a single query to avoid N+1 in list endpoints"*. Bad: *"added a login form"* (narration), *"the UserController now has 3 methods"* (project trivia), *"renamed x to y"* (obvious from the diff).
+- **outcome** — exactly one of `verified` | `failed` | `reverted` | `partial`: how the pattern played out in THIS turn.
+Extract at most **5**. Prefer fewer, higher-signal learnings. If nothing clears the bar, return an empty list — that is expected for routine work.
+## Output contract — your FINAL message MUST be exactly one JSON object, no prose, no code fence:
+```json
+{
+  "learnings": [
+    { "pattern": "reusable imperative rule, self-contained", "outcome": "verified|failed|reverted|partial" }
+  ]
+}
+```
+If you find nothing worth keeping, return `{"learnings":[]}`. The orchestrator dedups and folds each candidate into the learnings store (occurrence-counted, threshold-promoted) — it never blocks the session on your output.
+<scope_guardrail>
+**Do:** read the diff and surrounding code; return one JSON envelope as your final message.
+**Don't:** edit or write any file; use a milestone number; spawn other agents; emit prose around the JSON; manufacture low-value learnings to pad the list.
+</scope_guardrail>

package/agents/np-planner.md CHANGED Viewed

@@ -388,7 +388,7 @@ Inside each `S<NNN>-PLAN.md`, every `<task>` tag MUST have these four attributes
   > The slice number in the task ID is the authoritative wave; the T-number is per-slice. `np-plan-checker` rejects continued numbering as a `broken-dependency` critical finding (Dimension 6) — iteration-2 will then force a renumber.
 - `depends_on="<id>[,<id>...]"` — comma-separated predecessor task full-ids, or empty string `""`. Must only reference tasks in **earlier slices** (cross-slice forward deps) or be empty (intra-slice tasks are implicitly parallel, never serial).
 - `wave="<N>"` — integer equal to the slice number. For S001 use `wave="1"`, for S002 use `wave="2"`, etc.
-- `tier="<haiku|sonnet|opus>"` — executor tier, picks the model via resolve-model.
+- `tier="<haiku|sonnet|opus>"` — executor tier, picks the model via resolve-model. You are the decider, but make the call evidence-based, not by feel: run `node .nubos-pilot/bin/np-tools.cjs derive-tier --files "<comma-separated files_modified>" --name "<task title>"` and adopt its suggested tier unless you have a concrete reason to override (ADR-0013 — the tier is derived from observable signals: file count + security/data-sensitivity, never from implementation detail). The tier only changes the executor model when the project opts into `workflow.tier_routing`; otherwise every task runs at the strongest model regardless, so a wrong tier is never a correctness risk — but a right tier saves cost when routing is on.
 The scaffolder (`_extractTasksFromSlicePlan` in `bin/np-tools/plan-milestone.cjs`) reads ONLY these opening-tag attributes. Without them, zero task files are scaffolded and execute-phase has nothing to dispatch.

package/agents/np-security-reviewer.md CHANGED Viewed

@@ -44,6 +44,15 @@ Refusal of any rule is a hard-stop. Surface the violation to the orchestrator ve
 For each path in `files_modified`, scan for indicators of the following categories. Each finding gets its own block in the report.
+When the Nubos skill library is present, `Read` `.claude/skills/np-secure-code-review/SKILL.md` first and treat its checklist as the authoritative, language-agnostic expansion of the categories below. Then load the skills matching the milestone's surface and apply each one's "Verification bar" to the relevant findings:
+- new trust boundary / external integration / store for credentials or PII → `np-threat-model` (STRIDE lens) and `np-secure-design` (secure-defaults / least-privilege / zero-trust design review).
+- roles, permissions, resource-ownership, or access-rule changes → `np-access-control` (deny-by-default, object-level authz, IDOR).
+- encryption, hashing, password storage, TLS, tokens, or key/secret management → `np-encryption`.
+- collection, storage, or logging of personal/sensitive data → `np-data-privacy` (minimization, retention, no-PII-in-logs).
+The table below is the index; the skills are the depth. If the skills are absent (non-Claude runtime), fall back to the table alone.
 | Category | Look for |
 |---------|----------|
 | Injection | unparameterized SQL/shell/exec, string-concat queries, `eval`-style calls, untrusted input into `child_process` |

package/bin/np-tools/_commands.cjs CHANGED Viewed

@@ -14,11 +14,15 @@ const COMMANDS = [
   { name: 'new-milestone',       category: 'Planning', description: 'Append a new milestone (M<NNN>) to an existing project', description_de: 'Hängt einen neuen Milestone (M<NNN>) an ein bestehendes Projekt an' },
   { name: 'propose-milestones',  category: 'Planning', description: 'Re-plan all not-yet-done milestones: AI proposes add/update/remove from PROJECT.md + REQUIREMENTS.md', description_de: 'Plant offene Milestones neu: KI schlägt add/update/remove aus PROJECT.md + REQUIREMENTS.md vor' },
   { name: 'agent-skills',        category: 'Planning', description: 'Print agent_skills config for a given subagent', description_de: 'Gibt agent_skills-Konfiguration für einen Subagent aus' },
+  { name: 'derive-tier',         category: 'Planning', description: 'Advisory: derive a suggested executor tier (haiku|sonnet|opus) from a task\'s observable signals (files_modified + risk keywords). Decider stays the planner. ADR-0013.', description_de: 'Advisory: leitet aus den beobachtbaren Task-Signalen (files_modified + Risk-Keywords) einen Vorschlags-Tier (haiku|sonnet|opus) ab. Entscheider bleibt der Planner. ADR-0013.' },
   { name: 'execute-milestone',   category: 'Execution', description: 'Wave-based milestone execution — slice by slice, tasks parallel within a slice', description_de: 'Wave-basierte Milestone-Ausführung — Slice für Slice, Tasks parallel innerhalb einer Slice' },
   { name: 'commit-task',         category: 'Execution', description: 'Atomic per-task git commit via lib/git.cjs', description_de: 'Atomarer Per-Task-Git-Commit über lib/git.cjs' },
   { name: 'checkpoint',          category: 'Execution', description: 'Per-task crash-safety checkpoint CRUD (start/transition/touch/show)', description_de: 'Per-Task-Checkpoint-CRUD für Crash-Safety (start/transition/touch/show)' },
   { name: 'verify-work',         category: 'Execution', description: 'Two-pass goal-backward verification (milestone-level VERIFICATION.md)', description_de: 'Zweistufige Goal-Backward-Verifikation (Milestone-Ebene VERIFICATION.md)' },
+  { name: 'verify-reliability',  category: 'Execution', description: 'pass@k reliability: fold k verify-run exit codes into pass@1/pass@k/flaky + an aggregate exit code (pass^k) for loop-run-round. Opt-in via loop.verify_runs.', description_de: 'pass@k-Reliability: faltet k Verify-Exit-Codes zu pass@1/pass@k/flaky + Aggregat-Exit-Code (pass^k) für loop-run-round. Opt-in über loop.verify_runs.' },
+  { name: 'learnings',           category: 'Execution', description: 'Stop-hook continuous-learning capture (ADR-0010). Verbs: capture (rate-limited; spawns headless np-learnings-extractor over the turn diff) | reset (clears stop-streak) | run-extract (background worker). Gated by learnings.auto_capture.', description_de: 'Stop-Hook Continuous-Learning-Capture (ADR-0010). Verben: capture (rate-limited; spawnt headless np-learnings-extractor über das Turn-Diff) | reset (setzt Stop-Streak zurück) | run-extract (Background-Worker). Gated über learnings.auto_capture.' },
+  { name: 'skill-audit',         category: 'Execution', description: 'Skill-bar consultation audit (counterpart to the Rule-9 search audit). Verbs: expect --task --skills (orchestrator records injected skills) | ack --task --skill (executor stamps a consulted skill) | findings --task [--round] (list unmet bars). An unconsulted injected skill becomes a skill-bar-unconsulted finding that routes back to the executor.', description_de: 'Skill-Bar-Konsultations-Audit (Pendant zum Rule-9-Search-Audit). Verben: expect --task --skills (Orchestrator merkt injizierte Skills) | ack --task --skill (Executor stempelt konsultierten Skill) | findings --task [--round] (offene Bars). Ein nicht konsultierter injizierter Skill wird zu einem skill-bar-unconsulted-Finding und routet zurück zum Executor.' },
   { name: 'close-project',       category: 'Review',    description: 'Aggregate verification of every milestone; writes PROJECT-SUMMARY.md + sets project_status=completed', description_de: 'Aggregat-Verifikation aller Milestones; schreibt PROJECT-SUMMARY.md + setzt project_status=completed' },
   { name: 'archive-project',     category: 'Planning',  description: 'Move current .nubos-pilot/ project to archive/<slug>-<YYYYMMDD>/ (status|do|list|read)', description_de: 'Verschiebt aktuelles .nubos-pilot/-Projekt nach archive/<slug>-<YYYYMMDD>/ (status|do|list|read)' },
   { name: 'add-tests',           category: 'Execution', description: 'Persist VERIFICATION Pass-cases as node:test UAT (Sentinel-preserving)', description_de: 'Persistiert VERIFICATION-Pass-Cases als node:test-UAT (Sentinel-erhaltend)' },

package/bin/np-tools/derive-tier.cjs ADDED Viewed

@@ -0,0 +1,86 @@
+'use strict';
+const fs = require('node:fs');
+const { classifyTier } = require('../../lib/tier-classify.cjs');
+const { extractFrontmatter } = require('../../lib/frontmatter.cjs');
+const { emitErrorEnvelope } = require('./_args.cjs');
+function _usage() {
+  return [
+    'Usage:',
+    '  np-tools.cjs derive-tier --files <a,b,c> [--name <text>] [--desc <text>]',
+    '  np-tools.cjs derive-tier --plan <path-to-PLAN.md>',
+    '',
+    'Advisory: derives a suggested executor tier (haiku|sonnet|opus) from the',
+    'task\'s observable signals. The planner remains the decider.',
+  ].join('\n');
+}
+function _fromPlan(planPath) {
+  const raw = fs.readFileSync(planPath, 'utf-8');
+  const { frontmatter, body } = extractFrontmatter(raw);
+  const nameMatch = String(body || '').match(/^#\s+(?:.*?—\s*)?(.+?)\s*$/m);
+  return {
+    files_modified: Array.isArray(frontmatter.files_modified) ? frontmatter.files_modified : [],
+    name: nameMatch ? nameMatch[1] : (frontmatter.id || ''),
+    desc: String(body || ''),
+  };
+}
+function run(argv, ctx) {
+  const context = ctx || {};
+  const stdout = context.stdout || process.stdout;
+  const stderr = context.stderr || process.stderr;
+  const args = Array.isArray(argv) ? argv.slice() : [];
+  let files = null;
+  let name = '';
+  let desc = '';
+  let planPath = null;
+  for (let i = 0; i < args.length; i++) {
+    const a = args[i];
+    if (a === '-h' || a === '--help') { stdout.write(_usage() + '\n'); return 0; }
+    else if (a === '--files') { files = args[++i] || ''; }
+    else if (a.startsWith('--files=')) { files = a.slice('--files='.length); }
+    else if (a === '--name') { name = args[++i] || ''; }
+    else if (a.startsWith('--name=')) { name = a.slice('--name='.length); }
+    else if (a === '--desc') { desc = args[++i] || ''; }
+    else if (a.startsWith('--desc=')) { desc = a.slice('--desc='.length); }
+    else if (a === '--plan') { planPath = args[++i] || ''; }
+    else if (a.startsWith('--plan=')) { planPath = a.slice('--plan='.length); }
+    else {
+      stderr.write(JSON.stringify({
+        code: 'derive-tier-unknown-arg',
+        message: 'Unknown argument: ' + a,
+        details: { arg: a },
+      }) + '\n');
+      return 1;
+    }
+  }
+  try {
+    let task;
+    if (planPath) {
+      task = _fromPlan(planPath);
+    } else {
+      const list = files == null
+        ? []
+        : String(files).split(',').map((s) => s.trim()).filter(Boolean);
+      task = { files_modified: list, name, desc };
+    }
+    const result = classifyTier(task);
+    stdout.write(JSON.stringify(result) + '\n');
+    return 0;
+  } catch (err) {
+    emitErrorEnvelope(err, stderr, 'derive-tier-internal-error');
+    return 1;
+  }
+}
+module.exports = { run };
+if (require.main === module) {
+  process.exit(run(process.argv.slice(2)));
+}

package/bin/np-tools/derive-tier.test.cjs ADDED Viewed

@@ -0,0 +1,83 @@
+'use strict';
+const { test } = require('node:test');
+const assert = require('node:assert');
+const os = require('node:os');
+const fs = require('node:fs');
+const path = require('node:path');
+const { run } = require('./derive-tier.cjs');
+function _capture() {
+  const out = { text: '' };
+  const err = { text: '' };
+  return {
+    stdout: { write: (s) => { out.text += s; return true; } },
+    stderr: { write: (s) => { err.text += s; return true; } },
+    out, err,
+  };
+}
+test('DT-1: --files + --name with security keyword → opus', () => {
+  const c = _capture();
+  const code = run(['--files', 'app/Auth.php', '--name', 'add login throttling'], c);
+  assert.strictEqual(code, 0);
+  const r = JSON.parse(c.out.text);
+  assert.strictEqual(r.tier, 'opus');
+  assert.strictEqual(r.size, 'large');
+});
+test('DT-2: single doc file → haiku', () => {
+  const c = _capture();
+  const code = run(['--files', 'README.md', '--name', 'fix typo'], c);
+  assert.strictEqual(code, 0);
+  assert.strictEqual(JSON.parse(c.out.text).tier, 'haiku');
+});
+test('DT-3: ordinary task → sonnet', () => {
+  const c = _capture();
+  const code = run(['--files', 'app/Cart.php,app/Cart.test.php', '--name', 'add discount'], c);
+  assert.strictEqual(code, 0);
+  assert.strictEqual(JSON.parse(c.out.text).tier, 'sonnet');
+});
+test('DT-4: --plan reads frontmatter files + body name', () => {
+  const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'np-derive-tier-'));
+  const plan = path.join(dir, 'T0001-PLAN.md');
+  fs.writeFileSync(plan, [
+    '---',
+    'id: M001-S001-T0001',
+    'files_modified:',
+    '  - db/migrations/004_users.sql',
+    '---',
+    '',
+    '# M001-S001-T0001 — Add users table migration',
+    '',
+    'Body text.',
+  ].join('\n'));
+  const c = _capture();
+  const code = run(['--plan', plan], c);
+  assert.strictEqual(code, 0);
+  assert.strictEqual(JSON.parse(c.out.text).tier, 'opus');
+  fs.rmSync(dir, { recursive: true, force: true });
+});
+test('DT-5: unknown arg → error envelope, exit 1', () => {
+  const c = _capture();
+  const code = run(['--bogus'], c);
+  assert.strictEqual(code, 1);
+  assert.match(c.err.text, /derive-tier-unknown-arg/);
+});
+test('DT-6: --help → usage, exit 0', () => {
+  const c = _capture();
+  const code = run(['--help'], c);
+  assert.strictEqual(code, 0);
+  assert.match(c.out.text, /derive-tier/);
+});
+test('DT-7: no files → standard sonnet, no throw', () => {
+  const c = _capture();
+  const code = run(['--name', 'something'], c);
+  assert.strictEqual(code, 0);
+  assert.strictEqual(JSON.parse(c.out.text).tier, 'sonnet');
+});

package/bin/np-tools/learnings.cjs ADDED Viewed

@@ -0,0 +1,109 @@
+'use strict';
+const path = require('node:path');
+const child_process = require('node:child_process');
+const { tryReadConfigPath } = require('../../lib/config.cjs');
+const ledger = require('../../lib/learnings/capture-ledger.cjs');
+const extract = require('../../lib/learnings/extract.cjs');
+const headlessGuard = require('../../lib/headless-guard.cjs');
+const args = require('./_args.cjs');
+function _readStdin() {
+  return new Promise((resolve) => {
+    if (process.stdin.isTTY) return resolve('');
+    let buf = '';
+    process.stdin.setEncoding('utf-8');
+    const timer = setTimeout(() => { try { process.stdin.removeAllListeners(); } catch {} resolve(buf); }, 800);
+    process.stdin.on('data', (c) => { buf += c; });
+    process.stdin.on('end', () => { clearTimeout(timer); resolve(buf); });
+    process.stdin.on('error', () => { clearTimeout(timer); resolve(buf); });
+  });
+}
+function _safeParse(s) { try { return s ? JSON.parse(s) : {}; } catch { return {}; } }
+async function _payload(argv) {
+  const inline = args.getFlag(argv, '--payload', { allowDashValues: true });
+  if (inline !== undefined) return _safeParse(inline);
+  if (argv.includes('--stdin')) return _safeParse(await _readStdin());
+  return {};
+}
+function _cfg(cwd) {
+  return {
+    auto_capture: tryReadConfigPath(cwd, 'learnings.auto_capture', true) !== false,
+    max_per_hour: Number(tryReadConfigPath(cwd, 'learnings.max_captures_per_hour', 10)) || 10,
+    max_in_a_row: Number(tryReadConfigPath(cwd, 'learnings.max_in_a_row', 3)) || 3,
+    timeout_ms: Number(tryReadConfigPath(cwd, 'learnings.timeout_ms', 120000)) || 120000,
+    max_files: Number(tryReadConfigPath(cwd, 'learnings.max_files', 30)) || 30,
+  };
+}
+function _spawnWorker(cwd, sid) {
+  const npTools = path.join(__dirname, '..', '..', 'np-tools.cjs');
+  try {
+    const child = child_process.spawn(
+      process.execPath,
+      [npTools, 'learnings', 'run-extract', '--session', sid],
+      { cwd, detached: true, stdio: 'ignore' },
+    );
+    child.unref();
+    return true;
+  } catch { return false; }
+}
+function _emit(stdout, obj) { stdout.write(JSON.stringify(obj) + '\n'); }
+async function run(argv, ctx) {
+  const context = ctx || {};
+  const cwd = context.cwd || process.cwd();
+  const stdout = context.stdout || process.stdout;
+  const list = Array.isArray(argv) ? argv : [];
+  const verb = list[0];
+  if (headlessGuard.isHeadless(process.env)) return 0;
+  const cfg = _cfg(cwd);
+  // 'reset' (UserPromptSubmit) and 'run-extract' (background worker) are not
+  // gated by auto_capture so they keep working coherently, but 'capture' is.
+  if (verb === 'capture') {
+    if (!cfg.auto_capture) { _emit(stdout, { captured: false, reason: 'disabled' }); return 0; }
+    const payload = await _payload(list);
+    const sid = payload.session_id || args.getFlag(list, '--session') || '';
+    if (!sid) { _emit(stdout, { captured: false, reason: 'no-session' }); return 0; }
+    const gate = ledger.tryRecordCapture(sid, { maxPerHour: cfg.max_per_hour, maxStreak: cfg.max_in_a_row });
+    if (!gate.allowed) { _emit(stdout, { captured: false, reason: gate.reason }); return 0; }
+    _spawnWorker(cwd, sid);
+    _emit(stdout, { captured: true, spawned: true });
+    return 0;
+  }
+  if (verb === 'reset') {
+    const payload = await _payload(list);
+    const sid = payload.session_id || args.getFlag(list, '--session') || '';
+    if (sid) ledger.resetStreak(sid);
+    return 0;
+  }
+  if (verb === 'run-extract') {
+    const sid = args.getFlag(list, '--session') || '';
+    try {
+      const result = extract.runExtract({ cwd, sid, config: cfg });
+      _emit(stdout, result);
+    } catch (err) {
+      _emit(stdout, { ran: false, reason: 'error', error: String(err && err.code || err) });
+    }
+    return 0;
+  }
+  _emit(stdout, { error: 'unknown-verb', verb: verb || null, verbs: ['capture', 'reset', 'run-extract'] });
+  return verb ? 1 : 0;
+}
+module.exports = { run };
+if (require.main === module) {
+  run(process.argv.slice(3)).then((c) => process.exit(c)).catch(() => process.exit(0));
+}

package/bin/np-tools/learnings.test.cjs ADDED Viewed

@@ -0,0 +1,66 @@
+'use strict';
+const { test } = require('node:test');
+const assert = require('node:assert');
+const fs = require('node:fs');
+const os = require('node:os');
+const path = require('node:path');
+const { run } = require('./learnings.cjs');
+function _capture() {
+  const out = { text: '' };
+  return { stdout: { write: (s) => { out.text += s; return true; } }, out };
+}
+test('LV-1: capture with no session → no-session, no spawn', async () => {
+  const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'np-lv-'));
+  try {
+    const c = _capture();
+    c.cwd = dir;
+    const code = await run(['capture'], c);
+    assert.strictEqual(code, 0);
+    assert.match(c.out.text, /no-session/);
+  } finally { fs.rmSync(dir, { recursive: true, force: true }); }
+});
+test('LV-2: capture disabled via config → disabled, no spawn', async () => {
+  const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'np-lv-'));
+  try {
+    fs.mkdirSync(path.join(dir, '.nubos-pilot'), { recursive: true });
+    fs.writeFileSync(
+      path.join(dir, '.nubos-pilot', 'config.json'),
+      JSON.stringify({ learnings: { auto_capture: false } }),
+    );
+    const c = _capture();
+    c.cwd = dir;
+    const code = await run(['capture', '--payload', JSON.stringify({ session_id: 'abc' })], c);
+    assert.strictEqual(code, 0);
+    assert.match(c.out.text, /disabled/);
+  } finally { fs.rmSync(dir, { recursive: true, force: true }); }
+});
+test('LV-3: run-extract on a non-repo cwd → ran:false not-a-repo', async () => {
+  const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'np-lv-'));
+  try {
+    const c = _capture();
+    c.cwd = dir;
+    const code = await run(['run-extract', '--session', 'abc'], c);
+    assert.strictEqual(code, 0);
+    const parsed = JSON.parse(c.out.text);
+    assert.strictEqual(parsed.ran, false);
+    assert.strictEqual(parsed.reason, 'not-a-repo');
+  } finally { fs.rmSync(dir, { recursive: true, force: true }); }
+});
+test('LV-4: unknown verb → error envelope, exit 1', async () => {
+  const c = _capture();
+  const code = await run(['bogus'], c);
+  assert.strictEqual(code, 1);
+  assert.match(c.out.text, /unknown-verb/);
+});
+test('LV-5: reset is a no-op without a session and never throws', async () => {
+  const c = _capture();
+  const code = await run(['reset'], c);
+  assert.strictEqual(code, 0);
+});

package/bin/np-tools/loop-run-round.cjs CHANGED Viewed

@@ -324,21 +324,27 @@ function _runPostCritics(taskId, list, cwd) {
         ? nubosloop.coerceMaxRounds(override)
         : opts.maxRounds;
       const auditFindings = nubosloop.auditFindingsFromAudits(prev.tool_use_audit, round, taskId);
+      const skillFindings = nubosloop.skillFindingsFromState(prev, round, taskId);
+      const combinedAudit = skillFindings.length ? auditFindings.concat(skillFindings) : auditFindings;
       evalResult = nubosloop.evaluateLoop(
         { round },
         criticOutputs,
-        { maxRounds: effectiveMax, auditFindings },
+        { maxRounds: effectiveMax, auditFindings: combinedAudit },
       );
       const perRound = (prev.findings_per_round && typeof prev.findings_per_round === 'object')
         ? safeAssign({}, prev.findings_per_round)
         : {};
       perRound[String(round)] = evalResult.findings;
       const routed = nubosloop.markAuditsRoutedInArray(prev.tool_use_audit, round);
+      const skillRoutedRounds = skillFindings.length
+        ? nubosloop.markSkillFindingsRoutedInArray(prev.skill_routed_rounds, round)
+        : (Array.isArray(prev.skill_routed_rounds) ? prev.skill_routed_rounds : []);
       const partial = {
         last_phase: 'post-critics',
         last_action: evalResult.next_action,
         findings: evalResult.findings,
         findings_per_round: perRound,
+        skill_routed_rounds: skillRoutedRounds,
         tool_use_audit: routed.audits,
       };
       if (force) partial.forced_post_critics = true;

package/bin/np-tools/security.cjs CHANGED Viewed

@@ -8,6 +8,7 @@ const { tryReadConfigPath } = require('../../lib/config.cjs');
 const scan = require('../../lib/security/scan.cjs');
 const ledger = require('../../lib/security/ledger.cjs');
 const review = require('../../lib/security/review.cjs');
+const headlessGuard = require('../../lib/headless-guard.cjs');
 const args = require('./_args.cjs');
 const COMMIT_RE = /\bgit\b[\s\S]*\b(commit|push)\b/;
@@ -93,6 +94,8 @@ async function run(argv, ctx) {
   const list = Array.isArray(argv) ? argv : [];
   const verb = list[0];
+  if (headlessGuard.isHeadless(process.env)) return 0;
   const cfg = _cfg(cwd);
   if (!cfg.enabled && verb !== 'run-review') return 0;

package/bin/np-tools/skill-audit.cjs ADDED Viewed

@@ -0,0 +1,79 @@
+'use strict';
+const nubosloop = require('../../lib/nubosloop.cjs');
+const checkpoint = require('../../lib/checkpoint.cjs');
+const { TASK_ID_RE } = require('../../lib/ids.cjs');
+const args = require('./_args.cjs');
+const { NubosPilotError } = require('../../lib/core.cjs');
+function _usage() {
+  return [
+    'Usage:',
+    '  np-tools.cjs skill-audit expect   --task <id> --skills <a,b,c>   (orchestrator: record injected skills)',
+    '  np-tools.cjs skill-audit ack      --task <id> --skill <name>     (executor: stamp a consulted skill)',
+    '  np-tools.cjs skill-audit findings --task <id> [--round <n>]      (read-only: list unmet skill bars)',
+    '',
+    'Mechanical counterpart to the Rule-9 search audit: a skill injected as a task\'s',
+    'quality bar that the executor never consulted becomes a `skill-bar-unconsulted`',
+    'finding at post-critics, routing the task back to the executor (once per round).',
+  ].join('\n');
+}
+function _assertTask(taskId) {
+  args.assertMatch(taskId, TASK_ID_RE, 'skill-audit-invalid-task-id', 'taskId');
+}
+function run(argv, ctx) {
+  const context = ctx || {};
+  const cwd = context.cwd || process.cwd();
+  const stdout = context.stdout || process.stdout;
+  const stderr = context.stderr || process.stderr;
+  const list = Array.isArray(argv) ? argv : [];
+  const verb = list[0];
+  const tail = list.slice(1);
+  if (!verb || verb === '-h' || verb === '--help') { stdout.write(_usage() + '\n'); return 0; }
+  try {
+    if (verb === 'expect') {
+      const taskId = args.getFlag(tail, '--task');
+      _assertTask(taskId);
+      const raw = args.getFlag(tail, '--skills') || '';
+      const skills = String(raw).split(',').map((s) => s.trim()).filter(Boolean);
+      const res = nubosloop.recordExpectedSkills(taskId, skills, cwd);
+      stdout.write(JSON.stringify(res) + '\n');
+      return 0;
+    }
+    if (verb === 'ack') {
+      const taskId = args.getFlag(tail, '--task');
+      _assertTask(taskId);
+      const skill = args.getFlag(tail, '--skill');
+      if (!skill) throw new NubosPilotError('skill-audit-missing-skill', 'ack requires --skill <name>', {});
+      const res = nubosloop.recordSkillEvidence(taskId, skill, cwd);
+      stdout.write(JSON.stringify(res) + '\n');
+      return 0;
+    }
+    if (verb === 'findings') {
+      const taskId = args.getFlag(tail, '--task');
+      _assertTask(taskId);
+      const cp = checkpoint.readCheckpoint(taskId, cwd) || {};
+      const prev = cp.nubosloop || {};
+      const roundArg = args.getFlag(tail, '--round');
+      const round = roundArg != null ? Number(roundArg) : (Number(prev.round) || 1);
+      const findings = nubosloop.skillFindingsFromState(prev, round, taskId);
+      stdout.write(JSON.stringify({ task_id: taskId, round, findings }) + '\n');
+      return 0;
+    }
+    stderr.write(JSON.stringify({ code: 'skill-audit-unknown-verb', message: 'Unknown verb: ' + verb, details: { verb, verbs: ['expect', 'ack', 'findings'] } }) + '\n');
+    return 1;
+  } catch (err) {
+    args.emitErrorEnvelope(err, stderr, 'skill-audit-internal-error');
+    return 1;
+  }
+}
+module.exports = { run };
+if (require.main === module) {
+  process.exit(run(process.argv.slice(3)));
+}