nubos-pilot 1.2.2 → 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/CHANGELOG.md +10 -0
  2. package/agents/np-architect.md +2 -0
  3. package/agents/np-executor.md +1 -1
  4. package/agents/np-learnings-extractor.md +54 -0
  5. package/agents/np-planner.md +1 -1
  6. package/agents/np-security-reviewer.md +9 -0
  7. package/bin/np-tools/_commands.cjs +4 -0
  8. package/bin/np-tools/derive-tier.cjs +86 -0
  9. package/bin/np-tools/derive-tier.test.cjs +83 -0
  10. package/bin/np-tools/learnings.cjs +105 -0
  11. package/bin/np-tools/learnings.test.cjs +66 -0
  12. package/bin/np-tools/loop-run-round.cjs +7 -1
  13. package/bin/np-tools/skill-audit.cjs +79 -0
  14. package/bin/np-tools/skill-audit.test.cjs +86 -0
  15. package/bin/np-tools/verify-reliability.cjs +65 -0
  16. package/bin/np-tools/verify-reliability.test.cjs +69 -0
  17. package/lib/agents.test.cjs +1 -0
  18. package/lib/config-defaults.cjs +13 -0
  19. package/lib/config-schema.cjs +11 -0
  20. package/lib/eval-reliability.cjs +63 -0
  21. package/lib/eval-reliability.test.cjs +56 -0
  22. package/lib/install/claude-hooks-learnings.test.cjs +82 -0
  23. package/lib/install/claude-hooks.cjs +65 -4
  24. package/lib/install/claude-hooks.test.cjs +5 -2
  25. package/lib/learnings/capture-ledger.cjs +80 -0
  26. package/lib/learnings/capture-ledger.test.cjs +54 -0
  27. package/lib/learnings/extract.cjs +191 -0
  28. package/lib/learnings/extract.test.cjs +115 -0
  29. package/lib/nubosloop-audit.cjs +104 -0
  30. package/lib/nubosloop-skill-audit.test.cjs +98 -0
  31. package/lib/nubosloop.cjs +9 -0
  32. package/lib/tier-classify.cjs +67 -0
  33. package/lib/tier-classify.test.cjs +67 -0
  34. package/np-tools.cjs +4 -0
  35. package/package.json +1 -1
  36. package/skills/np-access-control/SKILL.md +42 -0
  37. package/skills/np-accessibility-audit/SKILL.md +41 -0
  38. package/skills/np-adr/SKILL.md +37 -0
  39. package/skills/np-api-design/SKILL.md +34 -0
  40. package/skills/np-caching-strategy/SKILL.md +38 -0
  41. package/skills/np-data-modeling/SKILL.md +37 -0
  42. package/skills/np-data-privacy/SKILL.md +39 -0
  43. package/skills/np-dependency-audit/SKILL.md +47 -0
  44. package/skills/np-encryption/SKILL.md +47 -0
  45. package/skills/np-error-handling/SKILL.md +37 -0
  46. package/skills/np-incident-response/SKILL.md +38 -0
  47. package/skills/np-llm-app-architecture/SKILL.md +50 -0
  48. package/skills/np-observability/SKILL.md +39 -0
  49. package/skills/np-performance/SKILL.md +38 -0
  50. package/skills/np-queue-design/SKILL.md +32 -0
  51. package/skills/np-rag-design/SKILL.md +43 -0
  52. package/skills/np-refactoring/SKILL.md +35 -0
  53. package/skills/np-resilience-patterns/SKILL.md +39 -0
  54. package/skills/np-secure-code-review/SKILL.md +46 -0
  55. package/skills/np-secure-design/SKILL.md +44 -0
  56. package/skills/np-service-boundary/SKILL.md +35 -0
  57. package/skills/np-system-design/SKILL.md +40 -0
  58. package/skills/np-test-strategy/SKILL.md +46 -0
  59. package/skills/np-threat-model/SKILL.md +42 -0
  60. package/templates/claude/payload/hooks/np-learnings-hook.cjs +55 -0
  61. package/workflows/architect-phase.md +21 -1
  62. package/workflows/execute-phase.md +66 -4
  63. package/workflows/verify-work.md +17 -4
package/lib/nubosloop.cjs CHANGED
@@ -24,6 +24,7 @@ const ROUTE_TABLE = {
24
24
  'lint-violation': 'executor',
25
25
  'critic-error': 'stuck',
26
26
  'rule-9-violation': 'executor',
27
+ 'skill-bar-unconsulted': 'executor',
27
28
  'missing-test': 'executor',
28
29
  'edge-case-gap': 'executor',
29
30
  'weak-assertion': 'executor',
@@ -311,6 +312,10 @@ const {
311
312
  auditToolUse,
312
313
  recordSearchEvidence,
313
314
  searchEvidenceForRound,
315
+ recordSkillEvidence,
316
+ recordExpectedSkills,
317
+ skillFindingsFromState,
318
+ markSkillFindingsRoutedInArray,
314
319
  readToolUseAudit,
315
320
  auditFindingsForRound,
316
321
  auditFindingsFromAudits,
@@ -502,6 +507,10 @@ module.exports = {
502
507
  auditToolUse,
503
508
  recordSearchEvidence,
504
509
  searchEvidenceForRound,
510
+ recordSkillEvidence,
511
+ recordExpectedSkills,
512
+ skillFindingsFromState,
513
+ markSkillFindingsRoutedInArray,
505
514
  readToolUseAudit,
506
515
  auditFindingsForRound,
507
516
  auditFindingsFromAudits,
@@ -0,0 +1,67 @@
1
+ 'use strict';
2
+
3
+ const { VALID_TIERS } = require('./model-profiles.cjs');
4
+
5
+ // ADR-0013: a tier is a routing/meta property derived from OBSERVABLE task
6
+ // signals (files touched + risk keywords), never invented from implementation
7
+ // detail. classifyTier is advisory — the planner remains the decider; this
8
+ // helper only makes that decision evidence-based. Output is deterministic
9
+ // (no clock, no randomness) so a given task always classifies the same way.
10
+
11
+ const RISK_RE = /\b(auth|authn|authz|authoriz\w*|login|crypto|encrypt\w*|decrypt\w*|password|secret|credential|token|jwt|oauth|saml|session|payment|billing|invoice|permission|role|access[\s-]?control|migrat\w*|schema)\b/i;
12
+ const ARCH_RE = /\b(architect\w*|cross[\s-]?cutting|multi[\s-]?module|redesign|breaking[\s-]?change|public[\s-]?api|contract|interface|protocol|state[\s-]?machine|concurren\w*|distributed|orchestrat\w*)\b/i;
13
+ const TRIVIAL_RE = /\b(typo|comment|rename|docs?|readme|changelog|copy(?:writing)?|wording|spelling|version[\s-]?bump|bump[\s-]?version|lint|format(?:ting)?|whitespace|config[\s-]?value|constant|string[\s-]?literal)\b/i;
14
+
15
+ const SIZE_TO_TIER = Object.freeze({ trivial: 'haiku', standard: 'sonnet', large: 'opus' });
16
+
17
+ const LARGE_FILE_THRESHOLD = 6;
18
+
19
+ function _text(name, desc) {
20
+ return [String(name || ''), String(desc || '')].join(' ');
21
+ }
22
+
23
+ /**
24
+ * @param {{files_modified?: string[], name?: string, desc?: string}} task
25
+ * @returns {{tier: string, size: string, rationale: string, signals: {file_count: number, risk: boolean, arch: boolean, trivial: boolean}}}
26
+ */
27
+ function classifyTier(task) {
28
+ const t = task || {};
29
+ const files = Array.isArray(t.files_modified) ? t.files_modified : [];
30
+ const fileCount = files.length;
31
+ const haystack = _text(t.name, t.desc) + ' ' + files.join(' ');
32
+
33
+ const risk = RISK_RE.test(haystack);
34
+ const arch = ARCH_RE.test(haystack);
35
+ const trivial = TRIVIAL_RE.test(haystack);
36
+
37
+ let size;
38
+ let rationale;
39
+ if (risk) {
40
+ size = 'large';
41
+ rationale = 'security/data-sensitive surface (auth, crypto, secrets, or migration) — escalate to the strongest tier';
42
+ } else if (arch || fileCount >= LARGE_FILE_THRESHOLD) {
43
+ size = 'large';
44
+ rationale = arch
45
+ ? 'architectural / cross-cutting change — invariants span multiple units'
46
+ : 'broad change touching ' + fileCount + ' files — cross-file invariants likely';
47
+ } else if (fileCount <= 1 && trivial) {
48
+ size = 'trivial';
49
+ rationale = 'single-file mechanical edit (docs/rename/format/config) — narrow, low-risk';
50
+ } else {
51
+ size = 'standard';
52
+ rationale = 'ordinary single-concern implementation';
53
+ }
54
+
55
+ return {
56
+ tier: SIZE_TO_TIER[size],
57
+ size,
58
+ rationale,
59
+ signals: { file_count: fileCount, risk, arch, trivial },
60
+ };
61
+ }
62
+
63
+ function isValidTier(tier) {
64
+ return VALID_TIERS.includes(tier);
65
+ }
66
+
67
+ module.exports = { classifyTier, isValidTier, SIZE_TO_TIER, LARGE_FILE_THRESHOLD };
@@ -0,0 +1,67 @@
1
+ 'use strict';
2
+
3
+ const { test } = require('node:test');
4
+ const assert = require('node:assert');
5
+ const { classifyTier, isValidTier, SIZE_TO_TIER } = require('./tier-classify.cjs');
6
+
7
+ test('TC-1: security keyword forces large→opus regardless of file count', () => {
8
+ const r = classifyTier({ files_modified: ['app/Auth.php'], name: 'Add password reset flow' });
9
+ assert.strictEqual(r.size, 'large');
10
+ assert.strictEqual(r.tier, 'opus');
11
+ assert.strictEqual(r.signals.risk, true);
12
+ });
13
+
14
+ test('TC-2: migration path escalates to large', () => {
15
+ const r = classifyTier({ files_modified: ['db/migrations/003_add_col.sql'], name: 'add column' });
16
+ assert.strictEqual(r.tier, 'opus');
17
+ assert.strictEqual(r.signals.risk, true);
18
+ });
19
+
20
+ test('TC-3: many files → large even without risk/arch keywords', () => {
21
+ const r = classifyTier({ files_modified: ['a.ts', 'b.ts', 'c.ts', 'd.ts', 'e.ts', 'f.ts'], name: 'wire feature' });
22
+ assert.strictEqual(r.size, 'large');
23
+ assert.strictEqual(r.tier, 'opus');
24
+ });
25
+
26
+ test('TC-4: architectural keyword → large', () => {
27
+ const r = classifyTier({ files_modified: ['svc.ts'], name: 'refactor the orchestration interface' });
28
+ assert.strictEqual(r.size, 'large');
29
+ assert.strictEqual(r.signals.arch, true);
30
+ });
31
+
32
+ test('TC-5: single-file doc/typo → trivial→haiku', () => {
33
+ const r = classifyTier({ files_modified: ['README.md'], name: 'fix typo in readme' });
34
+ assert.strictEqual(r.size, 'trivial');
35
+ assert.strictEqual(r.tier, 'haiku');
36
+ assert.strictEqual(r.signals.trivial, true);
37
+ });
38
+
39
+ test('TC-6: ordinary single-concern → standard→sonnet', () => {
40
+ const r = classifyTier({ files_modified: ['app/Service.php', 'app/Service.test.php'], name: 'add discount calculation' });
41
+ assert.strictEqual(r.size, 'standard');
42
+ assert.strictEqual(r.tier, 'sonnet');
43
+ });
44
+
45
+ test('TC-7: trivial keyword but multiple files is NOT trivial', () => {
46
+ const r = classifyTier({ files_modified: ['a.ts', 'b.ts'], name: 'rename helper' });
47
+ assert.strictEqual(r.size, 'standard');
48
+ });
49
+
50
+ test('TC-8: empty/missing input → standard, no throw', () => {
51
+ const r = classifyTier({});
52
+ assert.strictEqual(r.size, 'standard');
53
+ assert.strictEqual(r.signals.file_count, 0);
54
+ const r2 = classifyTier(null);
55
+ assert.strictEqual(r2.size, 'standard');
56
+ });
57
+
58
+ test('TC-9: every emitted tier is a valid tier', () => {
59
+ for (const size of Object.keys(SIZE_TO_TIER)) {
60
+ assert.ok(isValidTier(SIZE_TO_TIER[size]), size + ' maps to a valid tier');
61
+ }
62
+ });
63
+
64
+ test('TC-10: deterministic — same input twice yields identical result', () => {
65
+ const input = { files_modified: ['x.ts'], name: 'add token validation' };
66
+ assert.deepStrictEqual(classifyTier(input), classifyTier(input));
67
+ });
package/np-tools.cjs CHANGED
@@ -26,6 +26,10 @@ const initWorkflows = {
26
26
 
27
27
  const topLevelCommands = {
28
28
  'agent-skills': require('./bin/np-tools/agent-skills.cjs'),
29
+ 'derive-tier': require('./bin/np-tools/derive-tier.cjs'),
30
+ 'verify-reliability': require('./bin/np-tools/verify-reliability.cjs'),
31
+ 'learnings': require('./bin/np-tools/learnings.cjs'),
32
+ 'skill-audit': require('./bin/np-tools/skill-audit.cjs'),
29
33
 
30
34
  'commit-task': require('./bin/np-tools/commit-task.cjs'),
31
35
  'checkpoint': require('./bin/np-tools/checkpoint.cjs'),
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nubos-pilot",
3
- "version": "1.2.2",
3
+ "version": "1.2.3",
4
4
  "description": "Self-hosted AI pilot for any codebase. Researcher and critic agents plan, execute and verify each change.",
5
5
  "homepage": "https://pilot.nubos.cloud",
6
6
  "repository": {
@@ -0,0 +1,42 @@
1
+ ---
2
+ name: np-access-control
3
+ description: "Quality bar for any change that adds or modifies authorization — roles, permissions, policies, scopes, resource ownership, RBAC/ABAC rules, or the checks that gate a protected action. Triggered for executor and architect work on policies, guards, middleware, permission tables, ownership lookups, or anywhere code decides what an identity may do. Encodes authorization rules the change MUST satisfy before commit — not a document to author. Language- and framework-agnostic."
4
+ user-invocable: false
5
+ ---
6
+
7
+ # Access Control
8
+
9
+ Authorization decides what an authenticated identity may do. It is the layer attackers reach *after* login, so a logged-in user is not an authorized one. Apply this bar to every access decision the change adds or touches. A missing check is a blocking finding, not a nit.
10
+
11
+ ## Before editing
12
+
13
+ - Read the existing authz model: `node .nubos-pilot/bin/np-tools.cjs knowledge-search "authorization policy roles permissions" --task $TASK_ID`. Match the established enforcement pattern, role names, and policy idiom; do not introduce a second one.
14
+ - Locate the single point where access is enforced for this surface. Add your check there, not in a new ad-hoc spot.
15
+
16
+ ## Deny by default
17
+
18
+ - Access is granted explicitly, never assumed. A new action, route, field, or resource starts forbidden and is opened deliberately.
19
+ - A missing or unrecognized role/permission means deny, not allow. No fail-open branches, no "if no rule matched, proceed".
20
+ - Adding a route or handler never silently widens access.
21
+
22
+ ## Enforce server-side, at the right object
23
+
24
+ - Every protected action is authorized on the server. Client-side hiding is UX, never a control — the agent and the user can both call the endpoint directly.
25
+ - Check against the *acting* identity, resolved from the session/token — never a role, user id, or `isAdmin` supplied in the request body or params.
26
+ - Check against the *specific* resource (object-level authz). Prevent IDOR: a resource id from the request is untrusted until an ownership/permission lookup confirms this identity may act on *that* object. Listing endpoints filter to the caller's scope; they do not return all rows and trim client-side.
27
+
28
+ ## Model least privilege, keep it auditable
29
+
30
+ - Grant the narrowest role/scope that makes the change work. No "admin to ship it", no wildcard scope where a specific one suffices.
31
+ - Separate authentication from authorization: who-you-are and what-you-may-do are distinct decisions; passing the first never implies the second.
32
+ - Roles, permissions, and policies are modeled explicitly and centrally — readable as data/code, not scattered as inline `if user.email ==` conditionals across handlers.
33
+ - Privilege changes (grant, revoke, role change) take effect immediately on the next request — no stale cached grant — and are logged with who/what/when.
34
+
35
+ ## Verification bar (must hold before commit)
36
+
37
+ - Every protected action has a server-side check on the acting identity and the specific resource; no IDOR path remains.
38
+ - Default is deny: unmatched/unknown permissions reject, and the new surface is not reachable without an explicit grant.
39
+ - The change uses the existing authz model and the narrowest privilege that works — no scattered, no client-trusted, no over-broad grant.
40
+ - Grant/revoke takes effect immediately and is logged.
41
+ - The forbidden case is proven forbidden: a negative-path test asserts an unauthorized identity and a wrong-owner resource are denied — not just that the happy path is allowed. Pair with [np-test-strategy] for those negative cases, [np-secure-code-review] for the surrounding input/auth surface, and [np-secure-design] when the change introduces a new trust boundary or privilege tier.
42
+ - If any item cannot be satisfied within task scope, stop and surface it as an authorization finding — do not commit around it.
@@ -0,0 +1,41 @@
1
+ ---
2
+ name: np-accessibility-audit
3
+ description: "Quality bar for changes that add or modify any UI surface — components, pages, forms, or markup (.tsx/.jsx/.vue/.svelte, views/components/pages). Triggered for executor work on user-facing rendering; encodes a WCAG 2.x AA checklist the change MUST satisfy before commit, not an audit document to author. Language- and framework-agnostic."
4
+ user-invocable: false
5
+ ---
6
+
7
+ # Accessibility Audit
8
+
9
+ Any UI you build or touch must be usable with a keyboard, a screen reader, and at low vision. This is a bar to meet, not a report to write. Automated checkers catch only part of WCAG — reason about the rest.
10
+
11
+ ## Before editing
12
+ - Read existing conventions: `node .nubos-pilot/bin/np-tools.cjs knowledge-search "accessibility component conventions" --task $TASK_ID`.
13
+
14
+ ## Semantics first
15
+ - Use real elements: `button` for actions, `a[href]` for navigation, `nav`/`main`/`header`/`footer` landmarks, real list and table markup. Never a `div`/`span` with a click handler when an element exists.
16
+ - One `h1` per page; headings descend in order (no skipped levels) and describe structure, not styling.
17
+ - Reach for ARIA only to fill a gap native HTML cannot. Wrong or redundant ARIA is worse than none — no `role="button"` on a `button`, no `aria-label` that contradicts visible text.
18
+
19
+ ## Keyboard and focus
20
+ - Everything interactive is reachable and operable by keyboard alone, in a logical tab order; no keyboard trap.
21
+ - Focus is always visible — never strip the outline without an equal-or-better replacement.
22
+ - Custom widgets implement their expected keys (Esc closes, arrows move within a group, Enter/Space activate).
23
+ - Manage focus on change: move focus into an opened modal and restore it on close; move focus on route change so it doesn't sit on stale content.
24
+
25
+ ## Names, contrast, and signal
26
+ - Every input has a programmatically associated label (`label[for]` or wrapping). Icon-only controls get an accessible name.
27
+ - Meaningful images have descriptive `alt`; decorative images have empty `alt=""`. Use `aria-label` only when there is no visible text to name the element.
28
+ - Text contrast meets AA: 4.5:1 normal, 3:1 large text and UI/graphical boundaries.
29
+ - Never use color as the only carrier of meaning — pair it with text, icon, or shape (errors, status, required fields).
30
+
31
+ ## Dynamic and motion
32
+ - Form errors are associated with their field (`aria-describedby`) and announced; do not signal validity by color alone.
33
+ - Asynchronous updates (toasts, async results, validation) are announced via a live region.
34
+ - Respect `prefers-reduced-motion`: gate non-essential animation and avoid motion that could trigger vestibular issues.
35
+
36
+ ## Verification bar (must hold before commit)
37
+ - The full flow is operable with keyboard only — tab order is logical, focus stays visible, nothing is trapped.
38
+ - Run an automated checker (axe/Lighthouse or equivalent) with zero violations, then manually confirm what it cannot: semantics, focus order, names, and meaningful contrast.
39
+ - Every control and image has an accessible name; no information is conveyed by color alone.
40
+ - Modal/route/async transitions manage focus and announce updates correctly.
41
+ - Cross-check visual choices against [np-web-design-guidelines] and [np-frontend-design]; accessibility constrains, never contradicts, them.
@@ -0,0 +1,37 @@
1
+ ---
2
+ name: np-adr
3
+ description: "Quality bar for the reasoning behind an architecturally significant decision — a datastore choice, sync vs async, a new external dependency, an auth model, a public contract, or any choice that is costly to reverse, spans multiple components, or constrains future work. Triggered for architect/planner/executor work that makes such a call (typically captured in the milestone ARCHITECTURE artifact). Encodes the decision-recording discipline the reasoning MUST satisfy, not a mandate to author a separate document. Language- and framework-agnostic."
4
+ user-invocable: false
5
+ ---
6
+
7
+ # Architecture Decision Records
8
+
9
+ A significant decision with no recorded alternatives is a guess wearing a suit. This bar governs the reasoning you commit when you make an architecturally significant call — np's architect already emits ADR-style decisions into the milestone ARCHITECTURE artifact; this is the quality bar for that reasoning, not an instruction to spawn new files.
10
+
11
+ ## Before editing
12
+
13
+ - Check whether the decision is already recorded or constrained: `node .nubos-pilot/bin/np-tools.cjs knowledge-search "<decision topic>" --task $TASK_ID`. If a prior ADR-style decision already covers this, you are superseding it — say so explicitly; don't silently contradict it.
14
+
15
+ ## What is worth recording
16
+
17
+ - **Significant only.** Record a decision when it is costly to reverse, affects multiple components, introduces a new external dependency, or constrains future choices: datastore choice, sync vs async, an auth/permission model, a public contract, a serialization format. Ignore the trivial — a local variable name or a one-file refactor is not an ADR.
18
+ - **One decision per record.** Don't bundle "we'll use Postgres and also restructure auth" — split them. Each stands or falls on its own forces.
19
+
20
+ ## What the record must capture
21
+
22
+ - **Context / forces.** The constraints and pressures that make this a real decision — load, team, deadlines, existing stack, compliance. Without forces the decision looks arbitrary later.
23
+ - **The decision.** Stated plainly, in the active voice: what was chosen.
24
+ - **Consequences, good and bad.** Name what this buys you *and* what it costs — the lock-in, the new failure mode, the operational burden. A record listing only upsides is marketing, not engineering.
25
+ - **Alternatives considered and why rejected (load-bearing).** This is the value. At least one real alternative with the concrete reason it lost. "We considered X but rejected it because Y" — no Y means no decision was actually made.
26
+
27
+ ## Timing & immutability
28
+
29
+ - **Record at decision time, not retroactively.** Reconstructed reasoning launders out the forces that were actually live; capture it while the trade-off is still in your hands.
30
+ - **Immutable once accepted.** Don't rewrite an accepted decision — supersede it with a new one that references the old. The history of *why we changed our minds* is itself the asset.
31
+
32
+ ## Verification bar (must hold before commit)
33
+
34
+ - The decision is genuinely significant (hard to reverse / cross-component / constrains the future) — trivial calls are not recorded as ADRs.
35
+ - At least one real rejected alternative is named *with* its reason; consequences list both the wins and the costs.
36
+ - It is a single decision, recorded now (not reconstructed), and any prior decision it overrides is explicitly superseded, not silently edited.
37
+ - The forces tie to the actual system constraints — cross-check the structural framing against [np-system-design] and any boundary impact against [np-service-boundary].
@@ -0,0 +1,34 @@
1
+ ---
2
+ name: np-api-design
3
+ description: "Quality bar for changes that add or modify an API surface — HTTP/REST endpoints, RPC handlers, GraphQL resolvers, public library/SDK functions, CLI flags, or any contract another system consumes. Triggered for executor work on controllers, routes, handlers, resolvers, or public interfaces. Encodes contract-design rules the change MUST satisfy before commit, not a spec document to author. Language- and framework-agnostic."
4
+ user-invocable: false
5
+ ---
6
+
7
+ # API Design
8
+
9
+ An API is a promise. Every endpoint, flag, or exported function you add or touch is a contract you will have to keep. Apply this bar to the contract you are about to commit.
10
+
11
+ ## Before editing
12
+
13
+ - Read the project's existing API conventions first: `node .nubos-pilot/bin/np-tools.cjs knowledge-search "api endpoint conventions" --task $TASK_ID`. Match the established naming, versioning, error-shape, and pagination idiom — consistency beats personal preference.
14
+
15
+ ## Contract rules
16
+
17
+ - **Names describe resources and intent**, not implementation. Plural nouns for collections, verbs only where REST nouns don't fit (RPC/CLI). No leaking internal table/class names into the public surface.
18
+ - **Inputs are validated and documented at the boundary.** Required vs optional is explicit; unknown fields are rejected or ignored deliberately, never silently mutating behavior.
19
+ - **Responses are stable and typed.** One consistent envelope/error shape across the surface. Don't return a bare array where the rest of the API returns an object — additive evolution requires room to grow.
20
+ - **Errors are actionable and consistent.** Correct status/category, a stable machine-readable code, and a message that tells the caller what to fix. Same error model everywhere.
21
+ - **Idempotency & methods match semantics.** Safe methods don't mutate; retries of idempotent operations don't double-apply. State this for any write path.
22
+ - **Pagination, filtering, sorting** follow the existing pattern for any collection that can grow unbounded. No unbounded list endpoints.
23
+
24
+ ## Compatibility (load-bearing)
25
+
26
+ - A change to an *existing* contract is breaking until proven otherwise: removed/renamed fields, narrowed types, new required inputs, changed defaults, altered error codes. If breaking, either version it or stop and surface it — never silently break consumers.
27
+ - Additive change is the default safe move: new optional input, new field, new endpoint.
28
+
29
+ ## Verification bar (must hold before commit)
30
+
31
+ - Every new/changed input is validated; every response and error follows the surface's existing shape.
32
+ - No accidental breaking change to an existing contract (or it is explicitly versioned + flagged).
33
+ - Auth and rate/abuse considerations for the new surface are handled — pair with [np-secure-code-review] for any authenticated or input-accepting endpoint.
34
+ - The contract is discoverable: types/signatures are explicit enough that a consumer needs no source-reading to call it correctly.
@@ -0,0 +1,38 @@
1
+ ---
2
+ name: np-caching-strategy
3
+ description: "Quality bar for changes that add or modify a cache — in-memory caches, Redis/Memcached or other distributed caches, HTTP/CDN response caching, or memoization of a computed value. Triggered for executor work that introduces or alters any layer that stores and reuses a previously computed or fetched result. Encodes caching-correctness rules the change MUST satisfy before commit, not a design document to author. Language- and framework-agnostic."
4
+ user-invocable: false
5
+ ---
6
+
7
+ # Caching Strategy
8
+
9
+ A cache is a correctness liability you take on to buy speed, not a free win. Every entry you store is a second source of truth that can lie. Apply this bar to the cache you are about to commit.
10
+
11
+ ## Before editing
12
+
13
+ - Read the project's existing caching conventions first: `node .nubos-pilot/bin/np-tools.cjs knowledge-search "cache invalidation TTL key" --task $TASK_ID`. Match the established cache layer, key prefix, serialization, and invalidation idiom — don't introduce a second caching mechanism alongside one that already works.
14
+
15
+ ## Justify the cache
16
+
17
+ - **Cache only when measurement shows it helps and correctness allows it.** Have a real number — the cost being avoided (slow query, expensive computation, remote call) and the hit rate you expect. No cache "just in case"; an unmeasured cache adds risk and buys nothing.
18
+ - **The cache is an optimization, not a dependency.** The system MUST stay correct with the cache empty or unavailable — same answer, slower. A miss recomputes the true value; the cache never becomes the only place data lives.
19
+
20
+ ## Key & correctness
21
+
22
+ - **The key captures every input that changes the value.** Identity, tenant, locale, permission scope, feature flags, API version — anything that varies the result varies the key. A cache that serves one user's data to another is a security incident, not a performance bug.
23
+ - **Never store per-user secrets or sensitive data in a shared cache.** Scope sensitive entries per principal or keep them out entirely — pair with [np-secure-code-review] for anything auth- or PII-adjacent.
24
+
25
+ ## Invalidation & bounds (load-bearing)
26
+
27
+ - **Every cache has an explicit invalidation story** — TTL, write-through, or event-based. "Stale forever" is a bug. State, for this cache, how an entry becomes wrong and what removes or refreshes it; write paths that change cached data must invalidate or update it.
28
+ - **The cache is bounded.** Max size plus an eviction policy (LRU/TTL) so it cannot grow until it exhausts memory. An unbounded cache is a memory leak with a delay.
29
+ - **Expiry handles the stampede.** When a hot key expires, concurrent misses must not all stampede the backing store — use a lock/single-flight, staggered TTLs, or stale-while-revalidate.
30
+
31
+ ## Verification bar (must hold before commit)
32
+
33
+ - A measured reason this cache exists; the system is provably correct with the cache empty (miss recomputes the true value).
34
+ - The key includes every value-varying input (identity/tenant/locale/permissions) — no cross-principal or cross-context bleed.
35
+ - An explicit invalidation path exists and every write that changes the cached value triggers it — no path leaves an entry stale forever.
36
+ - The cache is size-bounded with an eviction policy; expiry of a hot key cannot stampede the backing store.
37
+ - No per-user secret or sensitive value lands in a shared cache — pair with [np-secure-code-review].
38
+ - Cross-link [np-performance] for the hit-rate / latency claim and [np-data-modeling] for what the cached shape represents and how it stays consistent with its source.
@@ -0,0 +1,37 @@
1
+ ---
2
+ name: np-data-modeling
3
+ description: "Quality bar for changes that touch a data model, database schema, or migration — new tables/columns/entities, type or nullability changes, constraints, indexes, ORM model edits, or any backfill/transform of persisted data. Triggered for executor work on migrations, schema definitions, entity/model classes, or data-shape changes. Encodes modeling-correctness and migration-safety rules the change MUST satisfy before commit, not a schema document to author. Language- and database-agnostic."
4
+ user-invocable: false
5
+ ---
6
+
7
+ # Data Modeling
8
+
9
+ Persisted data outlives the code that writes it. A schema change is a one-way door under load: it runs against live data, against the old code still in flight, and it cannot be casually undone. Apply this bar to the change you are about to commit.
10
+
11
+ ## Before editing
12
+
13
+ - Read the project's existing schema and migration conventions first: `node .nubos-pilot/bin/np-tools.cjs knowledge-search "schema migration conventions" --task $TASK_ID`. Match the established naming, key strategy, timestamp/soft-delete idiom, and migration tooling — consistency beats personal preference.
14
+
15
+ ## Modeling correctness
16
+
17
+ - **Model the domain, not the screen.** Shape tables around entities and their real relationships, not around one view's convenience. Normalize by default; denormalize only with a stated reason.
18
+ - **Types and precision are exact.** Money is not a float; timestamps carry timezone/UTC intent; enums/identifiers use a bounded type. No string-typing of structured data.
19
+ - **Nullability and defaults are deliberate.** NULL must mean "unknown/absent" by design, never "I didn't decide." Every default is chosen, not inherited by accident.
20
+ - **Invariants live in the database.** Enforce with FK, unique, and check constraints — not application code alone. App-only invariants drift the moment a second writer or a backfill appears.
21
+ - **Index what you query, not everything.** Add indexes for real read paths and FK lookups; each index taxes every write, so don't over-index. Justify composite-column order.
22
+
23
+ ## Migration safety (load-bearing)
24
+
25
+ - **Backward-compatible and reversible.** Old and new code must both work during rollout. Provide a real down/rollback path, or stop and surface why none exists.
26
+ - **Expand then contract** for any rename, type change, or drop: add the new column → backfill → switch reads then writes → drop the old — as separate, independently deployable steps. Never rename/drop in the same step that introduces the replacement.
27
+ - **No long locks, no online table rewrites.** Adding NOT NULL or a default to a large table, or rewriting it in place, must not hold a blocking lock. Split into add-nullable → batched backfill → enforce.
28
+ - **Backfills are batched and idempotent.** Process in bounded chunks; re-running the migration must not double-apply or corrupt. No single statement that rewrites an unbounded table at once.
29
+ - **Destructive is never silent.** Dropping a column/table, narrowing a type, or deleting rows requires an explicit, surfaced accepted-risk finding — never folded quietly into an unrelated change.
30
+
31
+ ## Verification bar (must hold before commit)
32
+
33
+ - Types, nullability, and defaults are each a deliberate decision, not a copy-paste default; structured data is not string-typed.
34
+ - Every modeled invariant is backed by a DB constraint, not only app code.
35
+ - The migration is reversible and backward-compatible: old code keeps working mid-rollout, and renames/drops/type-changes use separate expand-then-contract steps.
36
+ - No long lock or unbatched rewrite on a large table; backfills are chunked and idempotent.
37
+ - Any destructive operation carries an explicit, surfaced accepted-risk finding — see [np-secure-code-review] for data exposure and retention, [np-performance] for index/query-shape impact.
@@ -0,0 +1,39 @@
1
+ ---
2
+ name: np-data-privacy
3
+ description: "Quality bar for executor or architect work that collects, stores, processes, exports, or logs personal or sensitive data — user profiles, contact data, identifiers, location, tracking and analytics events, anything tied to a natural person. Triggered for changes touching such flows: the change MUST satisfy these privacy obligations before commit, not a GDPR essay or DPIA document to author. Jurisdiction-agnostic, GDPR-informed. Language- and framework-agnostic."
4
+ user-invocable: false
5
+ ---
6
+
7
+ # Data Privacy (PII)
8
+
9
+ Personal data is a liability, not a free asset. Every field you collect, store, or move is something the system must protect, justify, and eventually delete. This bar applies the moment a change touches data tied to a person.
10
+
11
+ ## Before editing
12
+ - Read existing data-handling conventions/classification: `node .nubos-pilot/bin/np-tools.cjs knowledge-search "<query>" --task $TASK_ID`.
13
+
14
+ ## Classify and minimize
15
+ - Know which fields you touch are personal or sensitive (identity, contact, location, health, financial, biometric, behavioral). Treat anything that singles out a person as PII.
16
+ - Collect only what the stated purpose needs. No fields gathered "just in case" or because the schema had room.
17
+ - Prefer references and derived signals over raw PII where the feature allows; pseudonymize or anonymize when identity is not required for the use case.
18
+
19
+ ## Purpose and retention
20
+ - Data collected for one purpose is not silently reused for another. A new use of existing data is a new decision, not a convenience.
21
+ - Every piece of personal data has a defined lifetime and a real deletion path — including derived copies, caches, search indexes, message queues, exports, and backups. Retention with no expiry is a bug.
22
+ - Deletion must actually remove or irreversibly anonymize, not just hide a row behind a flag, unless a flag is a documented soft-delete with a real purge job.
23
+
24
+ ## Boundaries that leak
25
+ - Never write PII into logs, traces, analytics events, error reports, crash dumps, or LLM prompts. Redact or tokenize before it crosses those seams.
26
+ - Do not pass raw personal data to third-party services or models unless the purpose and lawful basis explicitly cover it.
27
+ - Access to personal data is least-privilege and auditable. A read of someone's record leaves a trail.
28
+
29
+ ## Subject rights
30
+ - If the system promises export or deletion of a person's data, the change must keep that promise reachable — new stores and copies are included in export and erasure, not orphaned.
31
+
32
+ ## Verification bar (must hold before commit)
33
+ - Every personal/sensitive field the change introduces is identified and justified by a concrete purpose; nothing is collected speculatively.
34
+ - Each new store of PII has a defined retention and a deletion/anonymization path that also covers caches, indexes, and backups.
35
+ - No PII reaches logs, analytics, error reporting, or LLM prompts — verified against [np-observability] redaction conventions.
36
+ - Data is not reused beyond its original purpose without an explicit, recorded decision.
37
+ - Sensitive personal data at rest and in transit is encrypted per [np-encryption]; access is least-privilege and audited per [np-access-control].
38
+ - Export and deletion flows the system promises still cover every store this change adds.
39
+ - Any new high-risk processing flow (large-scale, sensitive, profiling, or cross-border) is flagged for privacy review before it ships, not after.
@@ -0,0 +1,47 @@
1
+ ---
2
+ name: np-dependency-audit
3
+ description: "Quality bar for changes that add or upgrade a third-party dependency — a new package/library, a version bump, or an edit to a manifest (package.json, composer.json, go.mod, Cargo.toml, requirements/pyproject, Gemfile, pom/gradle) or its lockfile. Triggered for executor work that touches dependency declarations or lockfiles. Encodes supply-chain and dependency-hygiene rules the change MUST satisfy before commit, not a document to author. Language- and ecosystem-agnostic."
4
+ user-invocable: false
5
+ ---
6
+
7
+ # Dependency Audit
8
+
9
+ Every dependency you add is a permanent liability you now maintain and a new surface for a supply-chain attack. The bar is not "does it work" — it is "is this dependency justified, vetted, pinned, and minimal." Apply it to the manifest/lockfile change you are about to commit.
10
+
11
+ ## Before editing
12
+
13
+ - Check what's already in the tree and prior decisions: `node .nubos-pilot/bin/np-tools.cjs knowledge-search "dependency for <need>" --task $TASK_ID`. A capability already pulled in transitively, or a deliberately-rejected package, beats a new top-level dep.
14
+
15
+ ## Justify it
16
+
17
+ - **Default to no.** Could a few lines of your own code cover the need? Trivial things (padding, slug, uuid, simple debounce) are not worth a dependency and its update treadmill.
18
+ - **Prefer the standard library / platform.** Reach for a dependency only when it earns its keep over what the language or runtime already gives you.
19
+ - **Don't duplicate the tree.** If something equivalent is already a dependency, use it instead of adding a second one for the same job.
20
+
21
+ ## Vet it before adding
22
+
23
+ - **Maintained and alive** — recent commits/releases, issues being addressed, not a one-commit abandonware repo.
24
+ - **Healthy adoption** — real usage, not a near-zero-install package that happens to match the name you typed.
25
+ - **Sane license** — compatible with the project's licensing; no copyleft/unknown license slipping into a proprietary build.
26
+ - **No known critical vulnerabilities** in the version you pick (run the ecosystem's audit/advisory check).
27
+ - **Exact name, no typosquat** — verify the precise package identifier and namespace/scope; a transposed letter or look-alike org is a known attack.
28
+ - **Wary of install hooks** — postinstall/build scripts run arbitrary code on every machine that installs; treat their presence as a reason to look closer, not a default to accept.
29
+
30
+ ## Minimize the surface
31
+
32
+ - A small package that drags in 50 transitive sub-deps is a big dependency — judge the whole subtree, not the top-level line.
33
+ - Avoid pulling a heavy framework-grade dep for one function.
34
+
35
+ ## Pin and record
36
+
37
+ - **Pin the version and commit the lockfile** so every build resolves identically — no floating ranges that silently upgrade a transitive package under you.
38
+ - **An upgrade is a change, not a chore.** Read the changelog for breaking changes, and confirm the bump does not introduce a vulnerable or yanked transitive version.
39
+ - **Stay SBOM-aware** — know what actually ships; don't leave dead or unused deps declared.
40
+
41
+ ## Verification bar (must hold before commit)
42
+
43
+ - The dependency is justified (own code / stdlib / existing dep was considered and rejected for a reason) — not added on reflex.
44
+ - It is vetted: maintained, adopted, license-clean, free of known critical CVEs in the pinned version, exact-name-verified, install-hooks reviewed.
45
+ - Version is pinned and the lockfile is committed; the transitive surface was weighed, not ignored.
46
+ - For an upgrade: changelog read for breaking changes, and no vulnerable transitive version pulled in.
47
+ - Anything the new dep can reach (network, filesystem, credentials, deserialization) is treated as new attack surface — pair with [np-secure-design] for what it touches and [np-secure-code-review] for how it's wired in.
@@ -0,0 +1,47 @@
1
+ ---
2
+ name: np-encryption
3
+ description: "Quality bar for any change that encrypts, decrypts, hashes, signs, or verifies data; stores or checks passwords; sets up TLS or certificate handling; generates tokens, nonces, IVs, or salts; or reads, writes, or rotates keys and secrets. Triggered for executor work touching cryptography, password storage, transport security, signing/HMAC, or key/secret management. Encodes crypto rules the change MUST satisfy before commit — not a spec to author. Language- and framework-agnostic."
4
+ user-invocable: false
5
+ ---
6
+
7
+ # Encryption & Key Management
8
+
9
+ Crypto code is not done when it round-trips in a test — it is done when it would survive a hostile reviewer. The failure mode is silent: a broken cipher mode, a reused nonce, or a leaked key produces output that looks correct. Apply every relevant section to the diff. A single unaddressed item is a blocking finding, not a nit.
10
+
11
+ ## Before editing
12
+
13
+ - Read existing crypto conventions / locked decisions: `node .nubos-pilot/bin/np-tools.cjs knowledge-search "<query>" --task $TASK_ID`. Reuse the project's chosen library and key store; do not introduce a second.
14
+ - Locked decisions in RULES/CONTEXT (cipher, hasher, key source, rotation policy) override every generic default below.
15
+
16
+ ## Never roll your own
17
+
18
+ - Use the platform's vetted, current high-level crypto library — not raw block-cipher calls, not a hand-built construction. If you are choosing modes, padding, or combining primitives by hand, stop.
19
+ - Pick the right tool. Hashing is one-way (integrity, dedup, fingerprints). Encryption is reversible (confidentiality). Signing/HMAC proves authenticity. Do not substitute one for another.
20
+
21
+ ## Hashing & passwords
22
+
23
+ - Passwords go through a slow, salted password hasher (argon2/bcrypt/scrypt or the project's chosen one) with a per-secret salt. Never MD5/SHA1/plain-SHA256 for passwords.
24
+ - General-purpose fast hashes are for integrity/identity only, never for secrets that must resist guessing.
25
+
26
+ ## Encryption
27
+
28
+ - Use authenticated encryption (AEAD, e.g. AES-GCM / ChaCha20-Poly1305). Never ECB. Never unauthenticated CBC where tampering matters.
29
+ - A fresh IV/nonce per message from a CSPRNG. Never a static, zero, or reused IV/nonce — reuse breaks the cipher.
30
+ - Encrypt sensitive data in transit: TLS everywhere, verify certificates, no protocol/cipher downgrade, no disabled verification. Encrypt at rest where the threat model requires it.
31
+
32
+ ## Keys, secrets & randomness
33
+
34
+ - Keys and secrets live in a secret store / KMS / env — NEVER in source, config-in-repo, fixtures, or logs. No key material in error messages or URLs.
35
+ - Scope keys to purpose and plan rotation: rotation must not orphan data encrypted under the old key.
36
+ - Use a CSPRNG for anything security-relevant — tokens, IVs, salts, session ids. Never a normal/seedable RNG.
37
+ - Compare secrets, tokens, MACs, and signatures in constant time. Never `==` on a secret.
38
+
39
+ ## Verification bar (must hold before commit)
40
+
41
+ - No home-rolled crypto; a vetted current primitive/library is used for the right job (hash vs encrypt vs sign).
42
+ - Passwords use a slow salted hasher; no MD5/SHA1 for any secret.
43
+ - Encryption is AEAD with a CSPRNG-fresh IV/nonce; no ECB, no static/reused nonce.
44
+ - Data is encrypted in transit with verified TLS; at rest where the threat model demands it.
45
+ - No key or secret introduced into source, config-in-repo, or logs; rotation and scoping are accounted for.
46
+ - All security-relevant randomness is CSPRNG; secret comparisons are constant-time.
47
+ - If any item cannot be satisfied within task scope, stop and surface it as a finding — do not commit around it. Pair with [np-secure-code-review] for the sink-level review, [np-secure-design] when the change adds a new key or trust boundary, and [np-data-privacy] when the data is personal/regulated.
@@ -0,0 +1,37 @@
1
+ ---
2
+ name: np-error-handling
3
+ description: "Quality bar for changes that touch backend, service, integration, or IO code that can fail — network calls, database writes, external APIs, file/queue/process work, batch loops. Triggered for executor work on any failure-prone path; encodes the resilience checklist the change MUST satisfy before commit (fail loud, preserve cause, timeouts, bounded retries, idempotency, resource cleanup, actionable errors), not a doc to author. Language- and framework-agnostic."
4
+ user-invocable: false
5
+ ---
6
+
7
+ # Error Handling & Resilience
8
+
9
+ Code that can fail must fail predictably. The bar below is about what the change does on the unhappy path, not the happy one.
10
+
11
+ ## Before editing
12
+ - Read existing conventions: `node .nubos-pilot/bin/np-tools.cjs knowledge-search "error handling retry conventions" --task $TASK_ID`. Match the established error/retry idiom rather than inventing a new one.
13
+
14
+ ## Fail loud, preserve context
15
+ - No empty catch and no catch-and-continue that hides a failure. If you catch, you handle, rethrow, or log-and-escalate.
16
+ - Distinguish recoverable errors (retry, fallback, degrade) from programmer errors (bug — let it crash, don't paper over).
17
+ - When wrapping an error, preserve the original cause/stack/chain. Never discard the inner error to throw a vague new one.
18
+ - Surface actionable errors to callers: enough to act on, no internals (no stack traces, secrets, SQL, or host details leaking across a trust boundary).
19
+
20
+ ## Outbound calls & retries
21
+ - Every outbound or blocking IO call (network, DB, queue, subprocess, lock) has an explicit timeout. No unbounded waits.
22
+ - Retry only idempotent operations. Use backoff with a hard attempt/time cap — no tight retry loops, no retry storms against a struggling dependency.
23
+ - Write paths that may be retried are idempotent (idempotency key, upsert, or dedup) so a retry can't double-apply.
24
+
25
+ ## Cleanup & partial failure
26
+ - The failure path releases what the success path acquired: connections, file handles, locks, temp files, transactions. Prefer finally/defer/with-style guarantees over manual unwind.
27
+ - Validate inputs before mutating state where cheap; otherwise make partial mutation recoverable. Don't leave half-written state on error.
28
+ - Batch/loop work decides explicitly: fail-fast or collect-and-report. Don't let one bad item silently drop the rest or mask which items failed.
29
+
30
+ ## Verification bar (must hold before commit)
31
+ - No silent swallow: every catch handles, rethrows with cause, or escalates — verified, not assumed.
32
+ - Every new outbound/IO call has a timeout; every retry has backoff and a cap; retried writes are idempotent.
33
+ - Failure paths free all acquired resources; no leaked handles, locks, or open transactions.
34
+ - Caller-facing errors are actionable and leak no internals; batch work reports partial failures.
35
+ - Error and retry paths are covered, not just the happy path — see [np-test-strategy].
36
+ - Failures and retries are observable (logged/metered with cause and context) — see [np-observability].
37
+ - Error shapes and status codes returned across an API boundary are consistent — see [np-api-design].