nubos-pilot 1.2.1 → 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/CHANGELOG.md +43 -1
  2. package/agents/np-architect.md +2 -0
  3. package/agents/np-executor.md +1 -1
  4. package/agents/np-learnings-extractor.md +54 -0
  5. package/agents/np-planner.md +1 -1
  6. package/agents/np-security-reviewer.md +9 -0
  7. package/bin/np-tools/_commands.cjs +5 -0
  8. package/bin/np-tools/derive-tier.cjs +86 -0
  9. package/bin/np-tools/derive-tier.test.cjs +83 -0
  10. package/bin/np-tools/doctor.cjs +15 -2
  11. package/bin/np-tools/graph-impact.cjs +111 -0
  12. package/bin/np-tools/graph-impact.test.cjs +119 -0
  13. package/bin/np-tools/learnings.cjs +105 -0
  14. package/bin/np-tools/learnings.test.cjs +66 -0
  15. package/bin/np-tools/loop-run-round.cjs +7 -1
  16. package/bin/np-tools/scan-codebase.cjs +21 -1
  17. package/bin/np-tools/skill-audit.cjs +79 -0
  18. package/bin/np-tools/skill-audit.test.cjs +86 -0
  19. package/bin/np-tools/verify-reliability.cjs +65 -0
  20. package/bin/np-tools/verify-reliability.test.cjs +69 -0
  21. package/lib/agents.test.cjs +1 -0
  22. package/lib/checkpoint.cjs +3 -0
  23. package/lib/codebase-graph.cjs +0 -0
  24. package/lib/codebase-graph.test.cjs +174 -0
  25. package/lib/codebase-manifest.cjs +3 -0
  26. package/lib/config-defaults.cjs +13 -0
  27. package/lib/config-schema.cjs +11 -0
  28. package/lib/eval-reliability.cjs +63 -0
  29. package/lib/eval-reliability.test.cjs +56 -0
  30. package/lib/install/claude-hooks-learnings.test.cjs +82 -0
  31. package/lib/install/claude-hooks.cjs +65 -4
  32. package/lib/install/claude-hooks.test.cjs +5 -2
  33. package/lib/learnings/capture-ledger.cjs +80 -0
  34. package/lib/learnings/capture-ledger.test.cjs +54 -0
  35. package/lib/learnings/extract.cjs +191 -0
  36. package/lib/learnings/extract.test.cjs +115 -0
  37. package/lib/learnings.cjs +19 -95
  38. package/lib/memory.cjs +38 -33
  39. package/lib/messaging.cjs +12 -6
  40. package/lib/metrics-aggregate.cjs +14 -2
  41. package/lib/migrate.cjs +29 -0
  42. package/lib/migrate.test.cjs +91 -0
  43. package/lib/nubosloop-audit.cjs +104 -0
  44. package/lib/nubosloop-skill-audit.test.cjs +98 -0
  45. package/lib/nubosloop.cjs +9 -0
  46. package/lib/schemas/data/checkpoint.v1.json +13 -0
  47. package/lib/schemas/data/codebase-manifest.v1.json +22 -0
  48. package/lib/schemas/data/learnings.v1.json +28 -0
  49. package/lib/schemas/data/memory-manifest.v1.json +14 -0
  50. package/lib/schemas/data/memory-record.v1.json +16 -0
  51. package/lib/schemas/data/message.v1.json +19 -0
  52. package/lib/schemas/data/metrics-record.v1.json +11 -0
  53. package/lib/tier-classify.cjs +67 -0
  54. package/lib/tier-classify.test.cjs +67 -0
  55. package/lib/validate.cjs +301 -0
  56. package/lib/validate.test.cjs +242 -0
  57. package/np-tools.cjs +5 -0
  58. package/package.json +3 -1
  59. package/skills/np-access-control/SKILL.md +42 -0
  60. package/skills/np-accessibility-audit/SKILL.md +41 -0
  61. package/skills/np-adr/SKILL.md +37 -0
  62. package/skills/np-api-design/SKILL.md +34 -0
  63. package/skills/np-caching-strategy/SKILL.md +38 -0
  64. package/skills/np-data-modeling/SKILL.md +37 -0
  65. package/skills/np-data-privacy/SKILL.md +39 -0
  66. package/skills/np-dependency-audit/SKILL.md +47 -0
  67. package/skills/np-encryption/SKILL.md +47 -0
  68. package/skills/np-error-handling/SKILL.md +37 -0
  69. package/skills/np-incident-response/SKILL.md +38 -0
  70. package/skills/np-llm-app-architecture/SKILL.md +50 -0
  71. package/skills/np-observability/SKILL.md +39 -0
  72. package/skills/np-performance/SKILL.md +38 -0
  73. package/skills/np-queue-design/SKILL.md +32 -0
  74. package/skills/np-rag-design/SKILL.md +43 -0
  75. package/skills/np-refactoring/SKILL.md +35 -0
  76. package/skills/np-resilience-patterns/SKILL.md +39 -0
  77. package/skills/np-secure-code-review/SKILL.md +46 -0
  78. package/skills/np-secure-design/SKILL.md +44 -0
  79. package/skills/np-service-boundary/SKILL.md +35 -0
  80. package/skills/np-system-design/SKILL.md +40 -0
  81. package/skills/np-test-strategy/SKILL.md +46 -0
  82. package/skills/np-threat-model/SKILL.md +42 -0
  83. package/templates/claude/payload/hooks/np-learnings-hook.cjs +55 -0
  84. package/workflows/architect-phase.md +21 -1
  85. package/workflows/execute-phase.md +66 -4
  86. package/workflows/verify-work.md +17 -4
package/CHANGELOG.md CHANGED
@@ -4,7 +4,49 @@ All notable changes to nubos-pilot are documented in this file. Format
4
4
  follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/); versioning
5
5
  follows [SemVer](https://semver.org/spec/v2.0.0.html).
6
6
 
7
- ## [1.1.4] — 2026-05-25
7
+ ## [1.2.3] — 2026-06-14
8
+
9
+ Three opt-in layers that make execution cheaper, more reliable, and self-improving.
10
+
11
+ - Cost-aware model routing: with `workflow.tier_routing` enabled, each task's executor runs at the model tier the plan assigned it — trivial work on a smaller model, structural or security-sensitive work on the strongest — instead of every task running at the top tier. The new `np:derive-tier` command suggests a tier from a task's observable signals (files touched, security/data sensitivity), so the choice is evidence-based. Off by default; behaviour is unchanged until you turn it on.
12
+ - Reliability checks (pass@k): set `loop.verify_runs` above 1 and nubos-pilot runs a task's verify command several times per round. A task goes green only when every run passes; a flaky task (passes sometimes, fails sometimes) is treated as red and handed to the build-fixer with a clear note, instead of slipping through on a lucky run. Defaults to a single run.
13
+ - Continuous learning: at the end of a session, a lightweight background reviewer reads what changed and distils reusable, durable lessons into the same learnings store the planner consults on the next similar task — so the system improves with use, not only inside the execution loop. On by default and rate-limited to bound cost; disable with `learnings.auto_capture`.
14
+
15
+ Full documentation at <https://pilot.nubos.cloud>.
16
+
17
+ ## [1.2.2] — 2026-06-05
18
+
19
+ A dependency graph for the codebase you work in, plus stricter checks on nubos-pilot's own data.
20
+
21
+ - `np:scan-codebase` now builds a module dependency graph and writes it to `.nubos-pilot/codebase/.graph.json`. The new `np:graph-impact` command shows what a change touches before you make it. It reports which modules depend on a file, what that file depends on, and any dependency cycle it sits in. The graph reads relative imports only. It builds no AST and adds no dependencies.
22
+ - Persisted state files are now validated on read against versioned schemas. A corrupt single-document store fails with a clear error code. A bad line in an append-only log is skipped, not fatal.
23
+ - The reference docs now list every error code. That list is generated from source and checked on each build, so it cannot drift from the code.
24
+ - Internal logging goes through one structured logger. A test keeps `console.*` out of `lib/` and `bin/np-tools/`.
25
+ - Added `ATTRIBUTIONS.md`. It names the third-party packages nubos-pilot uses and their licenses.
26
+
27
+ Full documentation at <https://pilot.nubos.cloud>.
28
+
29
+ ## [1.2.1] — 2026-06-02
30
+
31
+ Two always-on quality layers that act while the agent writes code.
32
+
33
+ - In-session security review: nubos-pilot reviews the code it writes for
34
+ vulnerabilities while it works and fixes findings in the same session,
35
+ before they reach a pull request. Three non-blocking depths — an instant
36
+ per-edit pattern scan with no model call, a background semantic review of
37
+ the turn's diff at end of turn, and a deeper review that reads surrounding
38
+ code on each commit or push the agent makes.
39
+ - The security reviewer runs independently with a fresh context, reports each
40
+ finding once, and never blocks a write or commit. Extend it with custom
41
+ pattern rules and a review guidance file; built-in checks stay on.
42
+ - Requirements-aware executor: `/np:execute-phase` injects the milestone
43
+ success criteria into the executor as its acceptance target, so it writes
44
+ against the requirements from the first round, not just the verify command.
45
+ - New configuration blocks `security.*` and `conformance.*`.
46
+
47
+ Full documentation at <https://pilot.nubos.cloud>.
48
+
49
+ ## [1.2.0] — 2026-05-25
8
50
 
9
51
  Public release.
10
52
 
@@ -13,6 +13,8 @@ You are NOT a second researcher. Research is investigation; you are decision-mak
13
13
 
14
14
  **CRITICAL: Mandatory Initial Read**
15
15
  If the prompt contains a `<files_to_read>` block, you MUST use the `Read` tool to load every file listed there before performing any other actions. This is your primary context.
16
+
17
+ **Design skills.** If the spawn prompt contains a `Use the following Nubos skills` line (injected by `/np:architect-phase` for structural/security milestones), `Read` each named skill from `.claude/skills/<skill>/SKILL.md` BEFORE committing decisions. Each skill's "Verification bar" is the standard every relevant decision must satisfy — design against it, and let the decision's stated consequences answer it. If the skills are absent (non-Claude runtime), proceed on your own judgment.
16
18
  </role>
17
19
 
18
20
  ## Completeness Mandate
@@ -20,7 +20,7 @@ If the prompt contains a `<files_to_read>` block, you MUST use the `Read` tool t
20
20
  - Invoke commit-helper ONLY after verification passes.
21
21
  - Never invoke `git` directly — always through the `np-tools.cjs` wrapper so the D-25 gitignore-guard runs.
22
22
  - One task per spawn. One commit per task (D-03).
23
- - If the spawn prompt contains a `Use the following Nubos skills:` line (injected by `/np:execute-phase` for UI/frontend tasks), load each named skill from `.claude/skills/<skill>/SKILL.md` BEFORE editing source. Apply each skill's quality bar; verification must pass against the skill's rules, not just the test command.
23
+ - If the spawn prompt contains a `Use the following Nubos skills:` line (injected by `/np:execute-phase` covers UI/frontend AND engineering concerns: API/contract, security/auth, data/migration, error-handling, resilience, caching, queue, performance, access-control, encryption, dependencies, privacy, and more), you MUST `Read` each named skill from `.claude/skills/<skill>/SKILL.md` BEFORE editing source — this is not optional. Apply each skill's quality bar; the task is NOT done until its diff satisfies every matched skill's "Verification bar", not just the test command. Treat an unmet skill bar exactly like a failing test. Immediately after reading each skill, stamp consultation via Bash: `node .nubos-pilot/bin/np-tools.cjs skill-audit ack --task <TASK_ID> --skill <skill>`. This is audited at post-critics exactly like the Rule-9 search evidence — an injected skill with no `ack` becomes a `skill-bar-unconsulted` finding that bounces the task back to you.
24
24
  </role>
25
25
 
26
26
  ## Completeness Mandate
@@ -0,0 +1,54 @@
1
+ ---
2
+ name: np-learnings-extractor
3
+ description: Read-only continuous-learning observer. Spawned headlessly by the ADR-0010 learnings Stop-hook against a single turn-diff — it reads what the session changed and returns a JSON envelope of atomic, reusable {pattern, outcome} learnings as its final message. Detection-only — never edits source, never writes files, never uses a milestone number. The orchestrator folds the returned candidates into the learnings store.
4
+ tier: haiku
5
+ tools: Read, Bash, Grep, Glob
6
+ color: cyan
7
+ ---
8
+
9
+ <role>
10
+ You are the nubos-pilot learnings extractor — the lightweight twin of `np-security-reviewer`'s session/diff mode, for institutional knowledge instead of security. You are spawned in the background when a session stops. You receive ONE turn's diff and a fresh context, and you return reusable learnings distilled from it. You never graded or wrote the code you are reading.
11
+
12
+ You DO NOT edit source. You DO NOT write files. You DO NOT use a milestone number. You read the supplied diff (and, only if needed, surrounding code via `Read`/`Grep`) and emit a single JSON envelope as your **final message**.
13
+ </role>
14
+
15
+ ## Completeness Mandate
16
+
17
+ This agent operates under [`templates/COMPLETENESS.md`](../templates/COMPLETENESS.md). The rules that bind this role:
18
+
19
+ - **Rule 1 — Do the whole thing.** Read the entire supplied diff before extracting. Do not stop at the first interesting hunk.
20
+ - **Rule 5 — Aim to genuinely impress.** A learning must be durable and transferable — a rule a future agent on a *similar* task would thank you for. Narration of what changed is a failure.
21
+ - **Rule 8 — Never present a workaround when the real fix exists.** When a learning captures a fix, record the real fix as the pattern, not the band-aid.
22
+ - **Rule 12 — Boil the ocean, but quality over quantity.** Zero learnings is the correct, common answer for a routine turn. Never manufacture filler to fill the list — a noisy store is worse than an empty one.
23
+
24
+ Refusal of any rule is a hard-stop. Surface the violation verbatim and abort.
25
+
26
+ ## Input
27
+
28
+ Triggered when the prompt contains a `<learning_capture>` block. Inside it: the list of changed files and the turn's diff. That is your entire scope — start from the diff; reach into surrounding code with `Read`/`Grep` only to confirm whether a candidate learning is real and correctly stated.
29
+
30
+ ## What counts as a learning
31
+
32
+ A learning is one `{pattern, outcome}` pair:
33
+
34
+ - **pattern** — a durable, reusable, self-contained imperative rule. Good: *"use jose for JWT verification, never hand-roll HS256"*, *"batch ORM lookups in a single query to avoid N+1 in list endpoints"*. Bad: *"added a login form"* (narration), *"the UserController now has 3 methods"* (project trivia), *"renamed x to y"* (obvious from the diff).
35
+ - **outcome** — exactly one of `verified` | `failed` | `reverted` | `partial`: how the pattern played out in THIS turn.
36
+
37
+ Extract at most **5**. Prefer fewer, higher-signal learnings. If nothing clears the bar, return an empty list — that is expected for routine work.
38
+
39
+ ## Output contract — your FINAL message MUST be exactly one JSON object, no prose, no code fence:
40
+
41
+ ```json
42
+ {
43
+ "learnings": [
44
+ { "pattern": "reusable imperative rule, self-contained", "outcome": "verified|failed|reverted|partial" }
45
+ ]
46
+ }
47
+ ```
48
+
49
+ If you find nothing worth keeping, return `{"learnings":[]}`. The orchestrator dedups and folds each candidate into the learnings store (occurrence-counted, threshold-promoted) — it never blocks the session on your output.
50
+
51
+ <scope_guardrail>
52
+ **Do:** read the diff and surrounding code; return one JSON envelope as your final message.
53
+ **Don't:** edit or write any file; use a milestone number; spawn other agents; emit prose around the JSON; manufacture low-value learnings to pad the list.
54
+ </scope_guardrail>
@@ -388,7 +388,7 @@ Inside each `S<NNN>-PLAN.md`, every `<task>` tag MUST have these four attributes
388
388
  > The slice number in the task ID is the authoritative wave; the T-number is per-slice. `np-plan-checker` rejects continued numbering as a `broken-dependency` critical finding (Dimension 6) — iteration-2 will then force a renumber.
389
389
  - `depends_on="<id>[,<id>...]"` — comma-separated predecessor task full-ids, or empty string `""`. Must only reference tasks in **earlier slices** (cross-slice forward deps) or be empty (intra-slice tasks are implicitly parallel, never serial).
390
390
  - `wave="<N>"` — integer equal to the slice number. For S001 use `wave="1"`, for S002 use `wave="2"`, etc.
391
- - `tier="<haiku|sonnet|opus>"` — executor tier, picks the model via resolve-model.
391
+ - `tier="<haiku|sonnet|opus>"` — executor tier, picks the model via resolve-model. You are the decider, but make the call evidence-based, not by feel: run `node .nubos-pilot/bin/np-tools.cjs derive-tier --files "<comma-separated files_modified>" --name "<task title>"` and adopt its suggested tier unless you have a concrete reason to override (ADR-0013 — the tier is derived from observable signals: file count + security/data-sensitivity, never from implementation detail). The tier only changes the executor model when the project opts into `workflow.tier_routing`; otherwise every task runs at the strongest model regardless, so a wrong tier is never a correctness risk — but a right tier saves cost when routing is on.
392
392
 
393
393
  The scaffolder (`_extractTasksFromSlicePlan` in `bin/np-tools/plan-milestone.cjs`) reads ONLY these opening-tag attributes. Without them, zero task files are scaffolded and execute-phase has nothing to dispatch.
394
394
 
@@ -44,6 +44,15 @@ Refusal of any rule is a hard-stop. Surface the violation to the orchestrator ve
44
44
 
45
45
  For each path in `files_modified`, scan for indicators of the following categories. Each finding gets its own block in the report.
46
46
 
47
+ When the Nubos skill library is present, `Read` `.claude/skills/np-secure-code-review/SKILL.md` first and treat its checklist as the authoritative, language-agnostic expansion of the categories below. Then load the skills matching the milestone's surface and apply each one's "Verification bar" to the relevant findings:
48
+
49
+ - new trust boundary / external integration / store for credentials or PII → `np-threat-model` (STRIDE lens) and `np-secure-design` (secure-defaults / least-privilege / zero-trust design review).
50
+ - roles, permissions, resource-ownership, or access-rule changes → `np-access-control` (deny-by-default, object-level authz, IDOR).
51
+ - encryption, hashing, password storage, TLS, tokens, or key/secret management → `np-encryption`.
52
+ - collection, storage, or logging of personal/sensitive data → `np-data-privacy` (minimization, retention, no-PII-in-logs).
53
+
54
+ The table below is the index; the skills are the depth. If the skills are absent (non-Claude runtime), fall back to the table alone.
55
+
47
56
  | Category | Look for |
48
57
  |---------|----------|
49
58
  | Injection | unparameterized SQL/shell/exec, string-concat queries, `eval`-style calls, untrusted input into `child_process` |
@@ -14,11 +14,15 @@ const COMMANDS = [
14
14
  { name: 'new-milestone', category: 'Planning', description: 'Append a new milestone (M<NNN>) to an existing project', description_de: 'Hängt einen neuen Milestone (M<NNN>) an ein bestehendes Projekt an' },
15
15
  { name: 'propose-milestones', category: 'Planning', description: 'Re-plan all not-yet-done milestones: AI proposes add/update/remove from PROJECT.md + REQUIREMENTS.md', description_de: 'Plant offene Milestones neu: KI schlägt add/update/remove aus PROJECT.md + REQUIREMENTS.md vor' },
16
16
  { name: 'agent-skills', category: 'Planning', description: 'Print agent_skills config for a given subagent', description_de: 'Gibt agent_skills-Konfiguration für einen Subagent aus' },
17
+ { name: 'derive-tier', category: 'Planning', description: 'Advisory: derive a suggested executor tier (haiku|sonnet|opus) from a task\'s observable signals (files_modified + risk keywords). Decider stays the planner. ADR-0013.', description_de: 'Advisory: leitet aus den beobachtbaren Task-Signalen (files_modified + Risk-Keywords) einen Vorschlags-Tier (haiku|sonnet|opus) ab. Entscheider bleibt der Planner. ADR-0013.' },
17
18
 
18
19
  { name: 'execute-milestone', category: 'Execution', description: 'Wave-based milestone execution — slice by slice, tasks parallel within a slice', description_de: 'Wave-basierte Milestone-Ausführung — Slice für Slice, Tasks parallel innerhalb einer Slice' },
19
20
  { name: 'commit-task', category: 'Execution', description: 'Atomic per-task git commit via lib/git.cjs', description_de: 'Atomarer Per-Task-Git-Commit über lib/git.cjs' },
20
21
  { name: 'checkpoint', category: 'Execution', description: 'Per-task crash-safety checkpoint CRUD (start/transition/touch/show)', description_de: 'Per-Task-Checkpoint-CRUD für Crash-Safety (start/transition/touch/show)' },
21
22
  { name: 'verify-work', category: 'Execution', description: 'Two-pass goal-backward verification (milestone-level VERIFICATION.md)', description_de: 'Zweistufige Goal-Backward-Verifikation (Milestone-Ebene VERIFICATION.md)' },
23
+ { name: 'verify-reliability', category: 'Execution', description: 'pass@k reliability: fold k verify-run exit codes into pass@1/pass@k/flaky + an aggregate exit code (pass^k) for loop-run-round. Opt-in via loop.verify_runs.', description_de: 'pass@k-Reliability: faltet k Verify-Exit-Codes zu pass@1/pass@k/flaky + Aggregat-Exit-Code (pass^k) für loop-run-round. Opt-in über loop.verify_runs.' },
24
+ { name: 'learnings', category: 'Execution', description: 'Stop-hook continuous-learning capture (ADR-0010). Verbs: capture (rate-limited; spawns headless np-learnings-extractor over the turn diff) | reset (clears stop-streak) | run-extract (background worker). Gated by learnings.auto_capture.', description_de: 'Stop-Hook Continuous-Learning-Capture (ADR-0010). Verben: capture (rate-limited; spawnt headless np-learnings-extractor über das Turn-Diff) | reset (setzt Stop-Streak zurück) | run-extract (Background-Worker). Gated über learnings.auto_capture.' },
25
+ { name: 'skill-audit', category: 'Execution', description: 'Skill-bar consultation audit (counterpart to the Rule-9 search audit). Verbs: expect --task --skills (orchestrator records injected skills) | ack --task --skill (executor stamps a consulted skill) | findings --task [--round] (list unmet bars). An unconsulted injected skill becomes a skill-bar-unconsulted finding that routes back to the executor.', description_de: 'Skill-Bar-Konsultations-Audit (Pendant zum Rule-9-Search-Audit). Verben: expect --task --skills (Orchestrator merkt injizierte Skills) | ack --task --skill (Executor stempelt konsultierten Skill) | findings --task [--round] (offene Bars). Ein nicht konsultierter injizierter Skill wird zu einem skill-bar-unconsulted-Finding und routet zurück zum Executor.' },
22
26
  { name: 'close-project', category: 'Review', description: 'Aggregate verification of every milestone; writes PROJECT-SUMMARY.md + sets project_status=completed', description_de: 'Aggregat-Verifikation aller Milestones; schreibt PROJECT-SUMMARY.md + setzt project_status=completed' },
23
27
  { name: 'archive-project', category: 'Planning', description: 'Move current .nubos-pilot/ project to archive/<slug>-<YYYYMMDD>/ (status|do|list|read)', description_de: 'Verschiebt aktuelles .nubos-pilot/-Projekt nach archive/<slug>-<YYYYMMDD>/ (status|do|list|read)' },
24
28
  { name: 'add-tests', category: 'Execution', description: 'Persist VERIFICATION Pass-cases as node:test UAT (Sentinel-preserving)', description_de: 'Persistiert VERIFICATION-Pass-Cases als node:test-UAT (Sentinel-erhaltend)' },
@@ -36,6 +40,7 @@ const COMMANDS = [
36
40
  { name: 'doctor', category: 'Install', description: '12-check install-integrity scan (--fix for auto-safe fixes)', description_de: '12-Check-Install-Integritäts-Scan (--fix für auto-sichere Fixes)' },
37
41
  { name: 'scan-codebase', category: 'Install', description: 'Initial deep codebase inventory → .nubos-pilot/codebase/ skill docs', description_de: 'Initiale tiefe Codebase-Inventur → .nubos-pilot/codebase/ Skill-Docs' },
38
42
  { name: 'update-docs', category: 'Install', description: 'Refresh stale module docs after code changes', description_de: 'Aktualisiert veraltete Modul-Docs nach Code-Änderungen' },
43
+ { name: 'graph-impact', category: 'Utility', description: 'Query the module dependency graph (.graph.json from np:scan-codebase): impact (transitive dependents), dependencies, cluster, cycle membership. Flags: --module <id> | --path <relpath> | --cycles', description_de: 'Fragt den Modul-Dependency-Graphen ab (.graph.json aus np:scan-codebase): Impact (transitive Dependents), Dependencies, Cluster, Zyklus-Zugehörigkeit. Flags: --module <id> | --path <relpath> | --cycles' },
39
44
 
40
45
  { name: 'resolve-model', category: 'Utility', description: 'Resolve agent/tier to model alias or id (Tier×Profile matrix)', description_de: 'Löst Agent/Tier zu Model-Alias oder -ID auf (Tier×Profile-Matrix)' },
41
46
  { name: 'metrics', category: 'Utility', description: 'Record JSONL metrics entry (record | now | start-timestamp | end-timestamp)', description_de: 'Schreibt JSONL-Metrics-Eintrag (record | now | start-timestamp | end-timestamp)' },
@@ -0,0 +1,86 @@
1
+ 'use strict';
2
+
3
+ const fs = require('node:fs');
4
+
5
+ const { classifyTier } = require('../../lib/tier-classify.cjs');
6
+ const { extractFrontmatter } = require('../../lib/frontmatter.cjs');
7
+ const { emitErrorEnvelope } = require('./_args.cjs');
8
+
9
+ function _usage() {
10
+ return [
11
+ 'Usage:',
12
+ ' np-tools.cjs derive-tier --files <a,b,c> [--name <text>] [--desc <text>]',
13
+ ' np-tools.cjs derive-tier --plan <path-to-PLAN.md>',
14
+ '',
15
+ 'Advisory: derives a suggested executor tier (haiku|sonnet|opus) from the',
16
+ 'task\'s observable signals. The planner remains the decider.',
17
+ ].join('\n');
18
+ }
19
+
20
+ function _fromPlan(planPath) {
21
+ const raw = fs.readFileSync(planPath, 'utf-8');
22
+ const { frontmatter, body } = extractFrontmatter(raw);
23
+ const nameMatch = String(body || '').match(/^#\s+(?:.*?—\s*)?(.+?)\s*$/m);
24
+ return {
25
+ files_modified: Array.isArray(frontmatter.files_modified) ? frontmatter.files_modified : [],
26
+ name: nameMatch ? nameMatch[1] : (frontmatter.id || ''),
27
+ desc: String(body || ''),
28
+ };
29
+ }
30
+
31
+ function run(argv, ctx) {
32
+ const context = ctx || {};
33
+ const stdout = context.stdout || process.stdout;
34
+ const stderr = context.stderr || process.stderr;
35
+ const args = Array.isArray(argv) ? argv.slice() : [];
36
+
37
+ let files = null;
38
+ let name = '';
39
+ let desc = '';
40
+ let planPath = null;
41
+
42
+ for (let i = 0; i < args.length; i++) {
43
+ const a = args[i];
44
+ if (a === '-h' || a === '--help') { stdout.write(_usage() + '\n'); return 0; }
45
+ else if (a === '--files') { files = args[++i] || ''; }
46
+ else if (a.startsWith('--files=')) { files = a.slice('--files='.length); }
47
+ else if (a === '--name') { name = args[++i] || ''; }
48
+ else if (a.startsWith('--name=')) { name = a.slice('--name='.length); }
49
+ else if (a === '--desc') { desc = args[++i] || ''; }
50
+ else if (a.startsWith('--desc=')) { desc = a.slice('--desc='.length); }
51
+ else if (a === '--plan') { planPath = args[++i] || ''; }
52
+ else if (a.startsWith('--plan=')) { planPath = a.slice('--plan='.length); }
53
+ else {
54
+ stderr.write(JSON.stringify({
55
+ code: 'derive-tier-unknown-arg',
56
+ message: 'Unknown argument: ' + a,
57
+ details: { arg: a },
58
+ }) + '\n');
59
+ return 1;
60
+ }
61
+ }
62
+
63
+ try {
64
+ let task;
65
+ if (planPath) {
66
+ task = _fromPlan(planPath);
67
+ } else {
68
+ const list = files == null
69
+ ? []
70
+ : String(files).split(',').map((s) => s.trim()).filter(Boolean);
71
+ task = { files_modified: list, name, desc };
72
+ }
73
+ const result = classifyTier(task);
74
+ stdout.write(JSON.stringify(result) + '\n');
75
+ return 0;
76
+ } catch (err) {
77
+ emitErrorEnvelope(err, stderr, 'derive-tier-internal-error');
78
+ return 1;
79
+ }
80
+ }
81
+
82
+ module.exports = { run };
83
+
84
+ if (require.main === module) {
85
+ process.exit(run(process.argv.slice(2)));
86
+ }
@@ -0,0 +1,83 @@
1
+ 'use strict';
2
+
3
+ const { test } = require('node:test');
4
+ const assert = require('node:assert');
5
+ const os = require('node:os');
6
+ const fs = require('node:fs');
7
+ const path = require('node:path');
8
+ const { run } = require('./derive-tier.cjs');
9
+
10
+ function _capture() {
11
+ const out = { text: '' };
12
+ const err = { text: '' };
13
+ return {
14
+ stdout: { write: (s) => { out.text += s; return true; } },
15
+ stderr: { write: (s) => { err.text += s; return true; } },
16
+ out, err,
17
+ };
18
+ }
19
+
20
+ test('DT-1: --files + --name with security keyword → opus', () => {
21
+ const c = _capture();
22
+ const code = run(['--files', 'app/Auth.php', '--name', 'add login throttling'], c);
23
+ assert.strictEqual(code, 0);
24
+ const r = JSON.parse(c.out.text);
25
+ assert.strictEqual(r.tier, 'opus');
26
+ assert.strictEqual(r.size, 'large');
27
+ });
28
+
29
+ test('DT-2: single doc file → haiku', () => {
30
+ const c = _capture();
31
+ const code = run(['--files', 'README.md', '--name', 'fix typo'], c);
32
+ assert.strictEqual(code, 0);
33
+ assert.strictEqual(JSON.parse(c.out.text).tier, 'haiku');
34
+ });
35
+
36
+ test('DT-3: ordinary task → sonnet', () => {
37
+ const c = _capture();
38
+ const code = run(['--files', 'app/Cart.php,app/Cart.test.php', '--name', 'add discount'], c);
39
+ assert.strictEqual(code, 0);
40
+ assert.strictEqual(JSON.parse(c.out.text).tier, 'sonnet');
41
+ });
42
+
43
+ test('DT-4: --plan reads frontmatter files + body name', () => {
44
+ const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'np-derive-tier-'));
45
+ const plan = path.join(dir, 'T0001-PLAN.md');
46
+ fs.writeFileSync(plan, [
47
+ '---',
48
+ 'id: M001-S001-T0001',
49
+ 'files_modified:',
50
+ ' - db/migrations/004_users.sql',
51
+ '---',
52
+ '',
53
+ '# M001-S001-T0001 — Add users table migration',
54
+ '',
55
+ 'Body text.',
56
+ ].join('\n'));
57
+ const c = _capture();
58
+ const code = run(['--plan', plan], c);
59
+ assert.strictEqual(code, 0);
60
+ assert.strictEqual(JSON.parse(c.out.text).tier, 'opus');
61
+ fs.rmSync(dir, { recursive: true, force: true });
62
+ });
63
+
64
+ test('DT-5: unknown arg → error envelope, exit 1', () => {
65
+ const c = _capture();
66
+ const code = run(['--bogus'], c);
67
+ assert.strictEqual(code, 1);
68
+ assert.match(c.err.text, /derive-tier-unknown-arg/);
69
+ });
70
+
71
+ test('DT-6: --help → usage, exit 0', () => {
72
+ const c = _capture();
73
+ const code = run(['--help'], c);
74
+ assert.strictEqual(code, 0);
75
+ assert.match(c.out.text, /derive-tier/);
76
+ });
77
+
78
+ test('DT-7: no files → standard sonnet, no throw', () => {
79
+ const c = _capture();
80
+ const code = run(['--name', 'something'], c);
81
+ assert.strictEqual(code, 0);
82
+ assert.strictEqual(JSON.parse(c.out.text).tier, 'sonnet');
83
+ });
@@ -399,14 +399,27 @@ function _checkNubosloopKnowledgeStore(projectRoot) {
399
399
  }
400
400
  try {
401
401
  const parsed = JSON.parse(fs.readFileSync(learningsPath, 'utf-8'));
402
- if (!parsed || typeof parsed !== 'object' || !Array.isArray(parsed.learnings)) {
402
+ const { STORE_VERSION } = require('../../lib/learnings.cjs');
403
+ const { validate } = require('../../lib/validate.cjs');
404
+ const isObject = parsed && typeof parsed === 'object' && !Array.isArray(parsed);
405
+ let errors;
406
+ if (isObject && parsed.version === STORE_VERSION) {
407
+ errors = validate(parsed, 'learnings.v1');
408
+ } else if (!isObject || !Array.isArray(parsed.learnings)) {
409
+ errors = [{ message: 'expected JSON object with `version` and `learnings[]`' }];
410
+ } else {
411
+ errors = [];
412
+ }
413
+ if (errors.length) {
403
414
  issues.push({
404
415
  id: 'nubosloop-knowledge-store-corrupt',
405
416
  severity: 'warn',
406
417
  fixable: 'manual',
407
418
  details: {
408
419
  path: learningsPath,
409
- hint: 'expected JSON with `version` and `learnings[]`; remove or restore from a backup.',
420
+ violations: errors.length,
421
+ first: errors[0].message,
422
+ hint: 'store violates the learnings.v1 schema; remove or restore from a backup.',
410
423
  },
411
424
  });
412
425
  }
@@ -0,0 +1,111 @@
1
+ const fs = require('node:fs');
2
+ const path = require('node:path');
3
+
4
+ const { NubosPilotError } = require('../../lib/core.cjs');
5
+ const g = require('../../lib/codebase-graph.cjs');
6
+
7
+ function _parseArgs(args) {
8
+ const flags = { cwd: null, module: null, filePath: null, cycles: false };
9
+ for (let i = 0; i < (args || []).length; i++) {
10
+ const a = args[i];
11
+ if (a === '--cwd') flags.cwd = args[++i];
12
+ else if (a === '--module') flags.module = args[++i];
13
+ else if (a === '--path') flags.filePath = args[++i];
14
+ else if (a === '--cycles') flags.cycles = true;
15
+ }
16
+ return flags;
17
+ }
18
+
19
+ function _graphPath(projectRoot) {
20
+ return path.join(projectRoot, '.nubos-pilot', 'codebase', '.graph.json');
21
+ }
22
+
23
+ function _load(projectRoot) {
24
+ const p = _graphPath(projectRoot);
25
+ let raw;
26
+ try {
27
+ raw = fs.readFileSync(p, 'utf-8');
28
+ } catch {
29
+ throw new NubosPilotError(
30
+ 'graph-not-found',
31
+ 'module graph not found — run np:scan-codebase first',
32
+ { path: '.nubos-pilot/codebase/.graph.json' },
33
+ );
34
+ }
35
+ try {
36
+ return JSON.parse(raw);
37
+ } catch (err) {
38
+ throw new NubosPilotError(
39
+ 'graph-unreadable',
40
+ 'module graph is not valid JSON — re-run np:scan-codebase',
41
+ { path: '.nubos-pilot/codebase/.graph.json', cause: err && err.message },
42
+ );
43
+ }
44
+ }
45
+
46
+ function _moduleForPath(graph, rel) {
47
+ const norm = rel.split(path.sep).join('/');
48
+ const dir = norm.includes('/') ? norm.slice(0, norm.lastIndexOf('/')) : '';
49
+ const node = (graph.nodes || []).find((n) => n.directory === dir);
50
+ return node ? node.id : null;
51
+ }
52
+
53
+ function run(args, ctx) {
54
+ const context = ctx || {};
55
+ const stdout = context.stdout || process.stdout;
56
+ const flags = _parseArgs(args);
57
+ const projectRoot = path.resolve(flags.cwd || context.cwd || process.cwd());
58
+ const graph = _load(projectRoot);
59
+
60
+ if (flags.cycles && !flags.module && !flags.filePath) {
61
+ stdout.write(JSON.stringify({
62
+ module_count: graph.module_count,
63
+ cycle_count: (graph.cycles || []).length,
64
+ cycles: graph.cycles || [],
65
+ }, null, 2));
66
+ return 0;
67
+ }
68
+
69
+ let moduleId = flags.module;
70
+ if (!moduleId && flags.filePath) {
71
+ moduleId = _moduleForPath(graph, flags.filePath);
72
+ if (!moduleId) {
73
+ throw new NubosPilotError(
74
+ 'graph-path-unmapped',
75
+ 'no module owns that path: ' + flags.filePath,
76
+ { path: flags.filePath },
77
+ );
78
+ }
79
+ }
80
+ if (!moduleId) {
81
+ throw new NubosPilotError(
82
+ 'graph-missing-target',
83
+ '--module <id> or --path <relpath> required',
84
+ {},
85
+ );
86
+ }
87
+ if (!(graph.nodes || []).some((n) => n.id === moduleId)) {
88
+ throw new NubosPilotError(
89
+ 'graph-unknown-module',
90
+ 'module not in graph: ' + moduleId,
91
+ { module: moduleId },
92
+ );
93
+ }
94
+
95
+ stdout.write(JSON.stringify({
96
+ module: moduleId,
97
+ direct_dependents: g.directDependents(graph, moduleId),
98
+ impact: g.transitiveDependents(graph, moduleId),
99
+ direct_dependencies: g.directDependencies(graph, moduleId),
100
+ transitive_dependencies: g.transitiveDependencies(graph, moduleId),
101
+ cluster: g.clusterOf(graph, moduleId),
102
+ in_cycle: g.cycleFor(graph, moduleId),
103
+ }, null, 2));
104
+ return 0;
105
+ }
106
+
107
+ module.exports = { run, _parseArgs };
108
+
109
+ if (require.main === module) {
110
+ process.exit(run(process.argv.slice(2)) || 0);
111
+ }
@@ -0,0 +1,119 @@
1
+ const { test, afterEach } = require('node:test');
2
+ const assert = require('node:assert/strict');
3
+ const fs = require('node:fs');
4
+ const path = require('node:path');
5
+ const os = require('node:os');
6
+
7
+ const cli = require('./graph-impact.cjs');
8
+
9
+ const _sandboxes = [];
10
+
11
+ function makeSandbox() {
12
+ const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'np-gi-'));
13
+ _sandboxes.push(dir);
14
+ return dir;
15
+ }
16
+
17
+ function writeGraph(root, graph) {
18
+ const dir = path.join(root, '.nubos-pilot', 'codebase');
19
+ fs.mkdirSync(dir, { recursive: true });
20
+ fs.writeFileSync(path.join(dir, '.graph.json'), JSON.stringify(graph));
21
+ }
22
+
23
+ function capture() {
24
+ let buf = '';
25
+ return { stream: { write: (s) => { buf += s; } }, read: () => buf };
26
+ }
27
+
28
+ const SAMPLE = {
29
+ schema_version: 1,
30
+ module_count: 3,
31
+ edge_count: 2,
32
+ nodes: [
33
+ { id: 'a', directory: 'a', primary_language: 'javascript', file_count: 1 },
34
+ { id: 'b', directory: 'b', primary_language: 'javascript', file_count: 1 },
35
+ { id: 'c', directory: 'c', primary_language: 'javascript', file_count: 1 },
36
+ ],
37
+ edges: [
38
+ { from: 'a', to: 'b', weight: 1 },
39
+ { from: 'b', to: 'c', weight: 1 },
40
+ ],
41
+ cycles: [],
42
+ clusters: [{ id: 0, members: ['a', 'b', 'c'] }],
43
+ metrics: { unresolved_internal_deps: 0, max_fan_in: 1, max_fan_out: 1, isolated_modules: 0 },
44
+ };
45
+
46
+ afterEach(() => {
47
+ while (_sandboxes.length) {
48
+ const dir = _sandboxes.pop();
49
+ try { fs.rmSync(dir, { recursive: true, force: true }); } catch {}
50
+ }
51
+ });
52
+
53
+ test('GI-1: --module reports impact and dependencies', () => {
54
+ const root = makeSandbox();
55
+ writeGraph(root, SAMPLE);
56
+ const out = capture();
57
+ const rc = cli.run(['--module', 'c'], { cwd: root, stdout: out.stream });
58
+ assert.equal(rc, 0);
59
+ const res = JSON.parse(out.read());
60
+ assert.equal(res.module, 'c');
61
+ assert.deepEqual(res.direct_dependents, ['b']);
62
+ assert.deepEqual(res.impact, ['a', 'b']);
63
+ assert.deepEqual(res.transitive_dependencies, []);
64
+ });
65
+
66
+ test('GI-2: --path maps a file to its owning module', () => {
67
+ const root = makeSandbox();
68
+ writeGraph(root, SAMPLE);
69
+ const out = capture();
70
+ cli.run(['--path', 'a/login.js'], { cwd: root, stdout: out.stream });
71
+ const res = JSON.parse(out.read());
72
+ assert.equal(res.module, 'a');
73
+ assert.deepEqual(res.direct_dependencies, ['b']);
74
+ });
75
+
76
+ test('GI-3: missing graph throws graph-not-found', () => {
77
+ const root = makeSandbox();
78
+ assert.throws(
79
+ () => cli.run(['--module', 'a'], { cwd: root, stdout: capture().stream }),
80
+ (err) => err.code === 'graph-not-found',
81
+ );
82
+ });
83
+
84
+ test('GI-4: unknown module throws graph-unknown-module', () => {
85
+ const root = makeSandbox();
86
+ writeGraph(root, SAMPLE);
87
+ assert.throws(
88
+ () => cli.run(['--module', 'nope'], { cwd: root, stdout: capture().stream }),
89
+ (err) => err.code === 'graph-unknown-module',
90
+ );
91
+ });
92
+
93
+ test('GI-5: no target throws graph-missing-target', () => {
94
+ const root = makeSandbox();
95
+ writeGraph(root, SAMPLE);
96
+ assert.throws(
97
+ () => cli.run([], { cwd: root, stdout: capture().stream }),
98
+ (err) => err.code === 'graph-missing-target',
99
+ );
100
+ });
101
+
102
+ test('GI-6: --cycles dumps the cycle list', () => {
103
+ const root = makeSandbox();
104
+ writeGraph(root, Object.assign({}, SAMPLE, { cycles: [['a', 'b']] }));
105
+ const out = capture();
106
+ cli.run(['--cycles'], { cwd: root, stdout: out.stream });
107
+ const res = JSON.parse(out.read());
108
+ assert.equal(res.cycle_count, 1);
109
+ assert.deepEqual(res.cycles[0], ['a', 'b']);
110
+ });
111
+
112
+ test('GI-7: unmappable --path throws graph-path-unmapped', () => {
113
+ const root = makeSandbox();
114
+ writeGraph(root, SAMPLE);
115
+ assert.throws(
116
+ () => cli.run(['--path', 'ghost/x.js'], { cwd: root, stdout: capture().stream }),
117
+ (err) => err.code === 'graph-path-unmapped',
118
+ );
119
+ });