nubos-pilot 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (273) hide show
  1. package/agents/np-ai-researcher.md +140 -0
  2. package/agents/np-code-fixer.md +363 -0
  3. package/agents/np-code-reviewer.md +351 -0
  4. package/agents/np-domain-researcher.md +136 -0
  5. package/agents/np-eval-auditor.md +167 -0
  6. package/agents/np-eval-planner.md +153 -0
  7. package/agents/np-executor.md +72 -0
  8. package/agents/np-framework-selector.md +171 -0
  9. package/agents/np-nyquist-auditor.md +185 -0
  10. package/agents/np-plan-checker.md +165 -0
  11. package/agents/np-planner.md +199 -0
  12. package/agents/np-researcher.md +150 -0
  13. package/agents/np-security-auditor.md +206 -0
  14. package/agents/np-ui-auditor.md +369 -0
  15. package/agents/np-ui-checker.md +192 -0
  16. package/agents/np-ui-researcher.md +324 -0
  17. package/agents/np-verifier.md +79 -0
  18. package/bin/check-coverage.cjs +40 -0
  19. package/bin/check-workflows.cjs +171 -0
  20. package/bin/check-workflows.test.cjs +208 -0
  21. package/bin/install.js +500 -0
  22. package/bin/np-tools/_commands.cjs +70 -0
  23. package/bin/np-tools/add-tests.cjs +171 -0
  24. package/bin/np-tools/add-tests.test.cjs +122 -0
  25. package/bin/np-tools/add-todo.cjs +108 -0
  26. package/bin/np-tools/add-todo.test.cjs +112 -0
  27. package/bin/np-tools/agent-skills.cjs +14 -0
  28. package/bin/np-tools/agent-skills.test.cjs +42 -0
  29. package/bin/np-tools/ai-integration-phase.cjs +109 -0
  30. package/bin/np-tools/ai-integration-phase.test.cjs +123 -0
  31. package/bin/np-tools/askuser.cjs +53 -0
  32. package/bin/np-tools/askuser.test.cjs +49 -0
  33. package/bin/np-tools/autonomous.cjs +69 -0
  34. package/bin/np-tools/autonomous.test.cjs +74 -0
  35. package/bin/np-tools/checkpoint.cjs +101 -0
  36. package/bin/np-tools/checkpoint.test.cjs +119 -0
  37. package/bin/np-tools/code-review.cjs +133 -0
  38. package/bin/np-tools/code-review.test.cjs +96 -0
  39. package/bin/np-tools/commit-task.cjs +120 -0
  40. package/bin/np-tools/commit-task.test.cjs +160 -0
  41. package/bin/np-tools/commit.cjs +103 -0
  42. package/bin/np-tools/commit.test.cjs +93 -0
  43. package/bin/np-tools/config.cjs +101 -0
  44. package/bin/np-tools/config.test.cjs +71 -0
  45. package/bin/np-tools/discuss-phase-power.cjs +265 -0
  46. package/bin/np-tools/discuss-phase-power.test.cjs +242 -0
  47. package/bin/np-tools/discuss-phase.cjs +132 -0
  48. package/bin/np-tools/discuss-phase.test.cjs +148 -0
  49. package/bin/np-tools/dispatch.cjs +116 -0
  50. package/bin/np-tools/doctor.cjs +242 -0
  51. package/bin/np-tools/eval-review.cjs +116 -0
  52. package/bin/np-tools/eval-review.test.cjs +123 -0
  53. package/bin/np-tools/execute-phase.cjs +182 -0
  54. package/bin/np-tools/execute-phase.test.cjs +116 -0
  55. package/bin/np-tools/execute-plan.cjs +124 -0
  56. package/bin/np-tools/execute-plan.test.cjs +82 -0
  57. package/bin/np-tools/help.cjs +28 -0
  58. package/bin/np-tools/help.test.cjs +29 -0
  59. package/bin/np-tools/init-dispatch.test.cjs +91 -0
  60. package/bin/np-tools/metrics.cjs +97 -0
  61. package/bin/np-tools/metrics.test.cjs +188 -0
  62. package/bin/np-tools/new-milestone.cjs +288 -0
  63. package/bin/np-tools/new-milestone.test.cjs +166 -0
  64. package/bin/np-tools/new-project.cjs +284 -0
  65. package/bin/np-tools/new-project.test.cjs +165 -0
  66. package/bin/np-tools/next.cjs +7 -0
  67. package/bin/np-tools/next.test.cjs +30 -0
  68. package/bin/np-tools/park.cjs +48 -0
  69. package/bin/np-tools/park.test.cjs +50 -0
  70. package/bin/np-tools/pause-work.cjs +24 -0
  71. package/bin/np-tools/pause-work.test.cjs +74 -0
  72. package/bin/np-tools/phase.cjs +71 -0
  73. package/bin/np-tools/phase.test.cjs +81 -0
  74. package/bin/np-tools/plan-diff.cjs +57 -0
  75. package/bin/np-tools/plan-diff.test.cjs +134 -0
  76. package/bin/np-tools/plan-milestone-gaps.cjs +115 -0
  77. package/bin/np-tools/plan-milestone-gaps.test.cjs +122 -0
  78. package/bin/np-tools/plan-phase.cjs +350 -0
  79. package/bin/np-tools/plan-phase.test.cjs +263 -0
  80. package/bin/np-tools/progress.cjs +7 -0
  81. package/bin/np-tools/progress.test.cjs +44 -0
  82. package/bin/np-tools/queue.cjs +213 -0
  83. package/bin/np-tools/research-phase.cjs +144 -0
  84. package/bin/np-tools/research-phase.test.cjs +154 -0
  85. package/bin/np-tools/reset-slice.cjs +17 -0
  86. package/bin/np-tools/reset-slice.test.cjs +96 -0
  87. package/bin/np-tools/resolve-model.cjs +110 -0
  88. package/bin/np-tools/resolve-model.test.cjs +200 -0
  89. package/bin/np-tools/resume-work.cjs +76 -0
  90. package/bin/np-tools/resume-work.test.cjs +91 -0
  91. package/bin/np-tools/skip.cjs +48 -0
  92. package/bin/np-tools/skip.test.cjs +66 -0
  93. package/bin/np-tools/slug.cjs +34 -0
  94. package/bin/np-tools/slug.test.cjs +46 -0
  95. package/bin/np-tools/state.cjs +16 -0
  96. package/bin/np-tools/state.test.cjs +40 -0
  97. package/bin/np-tools/stats.cjs +151 -0
  98. package/bin/np-tools/stats.test.cjs +118 -0
  99. package/bin/np-tools/triage.cjs +128 -0
  100. package/bin/np-tools/ui-phase.cjs +108 -0
  101. package/bin/np-tools/ui-phase.test.cjs +121 -0
  102. package/bin/np-tools/ui-review.cjs +108 -0
  103. package/bin/np-tools/ui-review.test.cjs +120 -0
  104. package/bin/np-tools/undo-task.cjs +31 -0
  105. package/bin/np-tools/undo-task.test.cjs +117 -0
  106. package/bin/np-tools/undo.cjs +43 -0
  107. package/bin/np-tools/undo.test.cjs +120 -0
  108. package/bin/np-tools/unpark.cjs +48 -0
  109. package/bin/np-tools/unpark.test.cjs +50 -0
  110. package/bin/np-tools/verify-work.cjs +186 -0
  111. package/bin/np-tools/verify-work.test.cjs +97 -0
  112. package/docs/adr/0001-no-daemon-invariant.md +82 -0
  113. package/docs/adr/0002-zero-runtime-dependencies.md +90 -0
  114. package/docs/adr/0003-max-six-unit-types.md +85 -0
  115. package/docs/adr/0004-atomic-commit-per-unit.md +102 -0
  116. package/docs/adr/0005-three-orthogonal-file-trees.md +98 -0
  117. package/docs/adr/0006-yaml-dependency-amendment.md +60 -0
  118. package/docs/adr/README.md +27 -0
  119. package/docs/agent-frontmatter-schema.md +84 -0
  120. package/docs/phase-artifact-schemas.md +292 -0
  121. package/docs/phase-directory-layout.md +82 -0
  122. package/lib/__tests__/README.md +1 -0
  123. package/lib/agents.cjs +98 -0
  124. package/lib/agents.test.cjs +286 -0
  125. package/lib/askuser.cjs +36 -0
  126. package/lib/askuser.test.cjs +310 -0
  127. package/lib/checkpoint.cjs +135 -0
  128. package/lib/checkpoint.test.cjs +184 -0
  129. package/lib/core.cjs +165 -0
  130. package/lib/core.test.cjs +405 -0
  131. package/lib/fixtures/README.md +1 -0
  132. package/lib/fixtures/phase-tree/README.md +1 -0
  133. package/lib/fixtures/plans/cycle/PLAN.md +16 -0
  134. package/lib/fixtures/plans/cycle/tasks/T-01.md +20 -0
  135. package/lib/fixtures/plans/cycle/tasks/T-02.md +20 -0
  136. package/lib/fixtures/plans/cycle/tasks/T-03.md +20 -0
  137. package/lib/fixtures/plans/linear/PLAN.md +16 -0
  138. package/lib/fixtures/plans/linear/tasks/T-01.md +20 -0
  139. package/lib/fixtures/plans/linear/tasks/T-02.md +20 -0
  140. package/lib/fixtures/plans/linear/tasks/T-03.md +20 -0
  141. package/lib/fixtures/plans/parallel/PLAN.md +16 -0
  142. package/lib/fixtures/plans/parallel/tasks/T-01.md +20 -0
  143. package/lib/fixtures/plans/parallel/tasks/T-02.md +20 -0
  144. package/lib/fixtures/plans/parallel/tasks/T-03.md +20 -0
  145. package/lib/fixtures/plans/wave-conflict/PLAN.md +16 -0
  146. package/lib/fixtures/plans/wave-conflict/tasks/T-01.md +20 -0
  147. package/lib/fixtures/plans/wave-conflict/tasks/T-02.md +20 -0
  148. package/lib/fixtures/roadmap/ROADMAP-malformed.md +3 -0
  149. package/lib/fixtures/roadmap/ROADMAP-minimal.md +51 -0
  150. package/lib/fixtures/roadmap/roadmap-malformed.yaml +7 -0
  151. package/lib/fixtures/roadmap/roadmap-minimal.yaml +40 -0
  152. package/lib/fixtures/roadmap/roadmap-ten-phases.yaml +101 -0
  153. package/lib/fixtures/templates/phase-context.md +6 -0
  154. package/lib/fixtures/templates/plan-skeleton.md +6 -0
  155. package/lib/frontmatter.cjs +251 -0
  156. package/lib/frontmatter.test.cjs +177 -0
  157. package/lib/gaps.cjs +197 -0
  158. package/lib/gaps.test.cjs +200 -0
  159. package/lib/git.cjs +207 -0
  160. package/lib/git.test.cjs +305 -0
  161. package/lib/install/agents-md.cjs +77 -0
  162. package/lib/install/backup.cjs +70 -0
  163. package/lib/install/codex-toml.cjs +440 -0
  164. package/lib/install/managed-block.cjs +30 -0
  165. package/lib/install/manifest.cjs +148 -0
  166. package/lib/install/mcp-writer.cjs +127 -0
  167. package/lib/install/runtime-detect.cjs +44 -0
  168. package/lib/install/staging.cjs +149 -0
  169. package/lib/metrics-aggregate.cjs +229 -0
  170. package/lib/metrics-aggregate.test.cjs +192 -0
  171. package/lib/metrics.cjs +120 -0
  172. package/lib/metrics.test.cjs +182 -0
  173. package/lib/model-aliases.regression.test.cjs +16 -0
  174. package/lib/model-profiles.cjs +42 -0
  175. package/lib/model-profiles.test.cjs +61 -0
  176. package/lib/next.cjs +236 -0
  177. package/lib/next.test.cjs +194 -0
  178. package/lib/phase.cjs +95 -0
  179. package/lib/phase.test.cjs +189 -0
  180. package/lib/plan-checker-contract.test.cjs +72 -0
  181. package/lib/plan-diff.cjs +173 -0
  182. package/lib/plan-diff.test.cjs +217 -0
  183. package/lib/plan.cjs +85 -0
  184. package/lib/plan.test.cjs +263 -0
  185. package/lib/progress.cjs +95 -0
  186. package/lib/progress.test.cjs +116 -0
  187. package/lib/researcher-contract.test.cjs +61 -0
  188. package/lib/roadmap-render.cjs +206 -0
  189. package/lib/roadmap-render.test.cjs +121 -0
  190. package/lib/roadmap.cjs +416 -0
  191. package/lib/roadmap.test.cjs +371 -0
  192. package/lib/runtime/_contract.test.cjs +61 -0
  193. package/lib/runtime/_readline.cjs +119 -0
  194. package/lib/runtime/_readline.test.cjs +126 -0
  195. package/lib/runtime/claude.cjs +48 -0
  196. package/lib/runtime/claude.test.cjs +101 -0
  197. package/lib/runtime/codex.cjs +35 -0
  198. package/lib/runtime/codex.test.cjs +114 -0
  199. package/lib/runtime/gemini.cjs +35 -0
  200. package/lib/runtime/gemini.test.cjs +109 -0
  201. package/lib/runtime/index.cjs +49 -0
  202. package/lib/runtime/index.test.cjs +181 -0
  203. package/lib/runtime/opencode.cjs +35 -0
  204. package/lib/runtime/opencode.test.cjs +124 -0
  205. package/lib/state.cjs +205 -0
  206. package/lib/state.test.cjs +264 -0
  207. package/lib/surface-audit.test.cjs +46 -0
  208. package/lib/tasks.cjs +327 -0
  209. package/lib/tasks.test.cjs +389 -0
  210. package/lib/template.cjs +66 -0
  211. package/lib/template.test.cjs +159 -0
  212. package/lib/undo.cjs +179 -0
  213. package/lib/undo.test.cjs +261 -0
  214. package/lib/verify.cjs +116 -0
  215. package/lib/verify.test.cjs +187 -0
  216. package/np-tools.cjs +303 -0
  217. package/package.json +39 -0
  218. package/templates/AI-SPEC.md +90 -0
  219. package/templates/CONTEXT.md +32 -0
  220. package/templates/PLAN.md +69 -0
  221. package/templates/PROJECT.md +60 -0
  222. package/templates/REQUIREMENTS.md +38 -0
  223. package/templates/SECURITY.md +61 -0
  224. package/templates/UI-SPEC.md +64 -0
  225. package/templates/VALIDATION.md +76 -0
  226. package/templates/claude/payload/README.md +11 -0
  227. package/templates/opencode/opencode.json +6 -0
  228. package/templates/opencode/payload/AGENTS.md +9 -0
  229. package/workflows/add-backlog.md +212 -0
  230. package/workflows/add-tests.md +69 -0
  231. package/workflows/add-todo.md +222 -0
  232. package/workflows/ai-integration-phase.md +230 -0
  233. package/workflows/autonomous.md +94 -0
  234. package/workflows/cleanup.md +325 -0
  235. package/workflows/code-review-fix.md +435 -0
  236. package/workflows/code-review.md +447 -0
  237. package/workflows/discuss-phase-assumptions.md +269 -0
  238. package/workflows/discuss-phase-power.md +139 -0
  239. package/workflows/discuss-phase.md +386 -0
  240. package/workflows/dispatch.md +9 -0
  241. package/workflows/doctor.md +10 -0
  242. package/workflows/eval-review.md +243 -0
  243. package/workflows/execute-phase.md +142 -0
  244. package/workflows/execute-plan.md +82 -0
  245. package/workflows/help.md +8 -0
  246. package/workflows/new-milestone.md +166 -0
  247. package/workflows/new-project.md +213 -0
  248. package/workflows/next.md +8 -0
  249. package/workflows/note.md +244 -0
  250. package/workflows/park.md +29 -0
  251. package/workflows/pause-work.md +34 -0
  252. package/workflows/plan-milestone-gaps.md +233 -0
  253. package/workflows/plan-phase.md +351 -0
  254. package/workflows/progress.md +8 -0
  255. package/workflows/queue.md +9 -0
  256. package/workflows/research-phase.md +327 -0
  257. package/workflows/reset-slice.md +39 -0
  258. package/workflows/resume-work.md +79 -0
  259. package/workflows/review.md +489 -0
  260. package/workflows/secure-phase.md +209 -0
  261. package/workflows/session-report.md +243 -0
  262. package/workflows/skip.md +29 -0
  263. package/workflows/state.md +7 -0
  264. package/workflows/stats.md +170 -0
  265. package/workflows/thread.md +214 -0
  266. package/workflows/triage.md +9 -0
  267. package/workflows/ui-phase.md +246 -0
  268. package/workflows/ui-review.md +222 -0
  269. package/workflows/undo-task.md +42 -0
  270. package/workflows/undo.md +55 -0
  271. package/workflows/unpark.md +29 -0
  272. package/workflows/validate-phase.md +231 -0
  273. package/workflows/verify-work.md +83 -0
@@ -0,0 +1,192 @@
1
+ const fs = require('node:fs');
2
+ const os = require('node:os');
3
+ const path = require('node:path');
4
+ const { test } = require('node:test');
5
+ const assert = require('node:assert/strict');
6
+
7
+ const agg = require('./metrics-aggregate.cjs');
8
+ const { aggregatePhase, aggregateSession, _readJsonlLines } = agg;
9
+
10
+ const _sandboxes = [];
11
+
12
+ function makeSandbox() {
13
+ const root = fs.mkdtempSync(path.join(os.tmpdir(), 'np-metrics-agg-'));
14
+ fs.mkdirSync(path.join(root, '.nubos-pilot', 'metrics'), { recursive: true });
15
+ _sandboxes.push(root);
16
+ return root;
17
+ }
18
+
19
+ function writeJsonl(root, name, records) {
20
+ const p = path.join(root, '.nubos-pilot', 'metrics', name);
21
+ const lines = records.map((r) => (typeof r === 'string' ? r : JSON.stringify(r))).join('\n') + '\n';
22
+ fs.writeFileSync(p, lines);
23
+ return p;
24
+ }
25
+
26
+ test.afterEach(() => {
27
+ while (_sandboxes.length) {
28
+ try { fs.rmSync(_sandboxes.pop(), { recursive: true, force: true }); } catch { }
29
+ }
30
+ });
31
+
32
+ function claudeRec(overrides) {
33
+ return Object.assign({
34
+ agent: 'np-executor', tier: 'sonnet', resolved_model: 'claude-sonnet-4-6',
35
+ phase: '10', plan: '10-01', task: '10-01-T01',
36
+ started_at: '2026-04-17T10:00:00Z',
37
+ ended_at: '2026-04-17T10:00:01Z',
38
+ duration_ms: 1000,
39
+ tokens_in: 100, tokens_out: 50,
40
+ retry_count: 0, status: 'ok', runtime: 'claude', error: null,
41
+ }, overrides || {});
42
+ }
43
+
44
+ function codexRec(overrides) {
45
+ return Object.assign({
46
+ agent: 'np-executor', tier: 'sonnet', resolved_model: 'claude-sonnet-4-6',
47
+ phase: '10', plan: '10-01', task: '10-01-T02',
48
+ started_at: '2026-04-17T10:00:00Z',
49
+ ended_at: '2026-04-17T10:00:01Z',
50
+ duration_ms: 1000,
51
+ tokens_in: null, tokens_out: null,
52
+ retry_count: 0, status: 'ok', runtime: 'codex', error: null,
53
+ }, overrides || {});
54
+ }
55
+
56
+ test('AGG-1: aggregatePhase on empty dir returns zero-shape', async () => {
57
+ const sb = makeSandbox();
58
+ const out = await aggregatePhase('10', { cwd: sb });
59
+ assert.equal(out.phase, '10');
60
+ assert.equal(out.record_count, 0);
61
+ assert.equal(out.total_tokens_in, null);
62
+ assert.equal(out.total_tokens_out, null);
63
+ assert.deepEqual(out.avg_duration_ms_by_tier, {});
64
+ assert.deepEqual(out.avg_duration_ms_by_agent, {});
65
+ assert.equal(out.retry_count_sum, 0);
66
+ assert.equal(out.error_count, 0);
67
+ assert.equal(out.error_rate, 0);
68
+ assert.deepEqual(out.agents_seen, []);
69
+ assert.equal(out.first_record_at, null);
70
+ assert.equal(out.last_record_at, null);
71
+ });
72
+
73
+ test('AGG-2: mixed claude + codex records → partial_tokens true; sum only claude tokens', async () => {
74
+ const sb = makeSandbox();
75
+ writeJsonl(sb, 'phase-10.jsonl', [
76
+ claudeRec({ tokens_in: 100, tokens_out: 50 }),
77
+ claudeRec({ tokens_in: 200, tokens_out: 75 }),
78
+ codexRec(),
79
+ codexRec(),
80
+ codexRec(),
81
+ ]);
82
+ const out = await aggregatePhase('10', { cwd: sb });
83
+ assert.equal(out.record_count, 5);
84
+ assert.equal(out.total_tokens_in, 300);
85
+ assert.equal(out.total_tokens_out, 125);
86
+ assert.equal(out.partial_tokens, true);
87
+ });
88
+
89
+ test('AGG-3: all-codex records → total_tokens_in null, partial_tokens false', async () => {
90
+ const sb = makeSandbox();
91
+ writeJsonl(sb, 'phase-10.jsonl', [codexRec(), codexRec(), codexRec(), codexRec(), codexRec()]);
92
+ const out = await aggregatePhase('10', { cwd: sb });
93
+ assert.equal(out.record_count, 5);
94
+ assert.equal(out.total_tokens_in, null);
95
+ assert.equal(out.total_tokens_out, null);
96
+ assert.equal(out.partial_tokens, false);
97
+ });
98
+
99
+ test('AGG-4: error_rate = error_count / record_count', async () => {
100
+ const sb = makeSandbox();
101
+ writeJsonl(sb, 'phase-10.jsonl', [
102
+ claudeRec({ status: 'ok' }),
103
+ claudeRec({ status: 'error' }),
104
+ claudeRec({ status: 'ok' }),
105
+ claudeRec({ status: 'timeout' }),
106
+ ]);
107
+ const out = await aggregatePhase('10', { cwd: sb });
108
+ assert.equal(out.record_count, 4);
109
+ assert.equal(out.error_count, 2);
110
+ assert.equal(out.error_rate, 0.5);
111
+ });
112
+
113
+ test('AGG-5: avg_duration_ms buckets by tier and agent', async () => {
114
+ const sb = makeSandbox();
115
+ writeJsonl(sb, 'phase-10.jsonl', [
116
+ claudeRec({ tier: 'opus', agent: 'alpha', duration_ms: 100 }),
117
+ claudeRec({ tier: 'opus', agent: 'alpha', duration_ms: 200 }),
118
+ claudeRec({ tier: 'opus', agent: 'alpha', duration_ms: 300 }),
119
+ claudeRec({ tier: 'haiku', agent: 'beta', duration_ms: 500 }),
120
+ ]);
121
+ const out = await aggregatePhase('10', { cwd: sb });
122
+ assert.equal(out.avg_duration_ms_by_tier.opus, 200);
123
+ assert.equal(out.avg_duration_ms_by_tier.haiku, 500);
124
+ assert.equal(out.avg_duration_ms_by_agent.alpha, 200);
125
+ assert.equal(out.avg_duration_ms_by_agent.beta, 500);
126
+ assert.deepEqual(out.agents_seen, ['alpha', 'beta']);
127
+ });
128
+
129
+ test('AGG-6: aggregateSession filters records below sinceIso', async () => {
130
+ const sb = makeSandbox();
131
+ writeJsonl(sb, 'phase-10.jsonl', [
132
+ claudeRec({ started_at: '2026-04-17T08:00:00Z' }),
133
+ claudeRec({ started_at: '2026-04-17T11:00:00Z' }),
134
+ claudeRec({ started_at: '2026-04-17T12:00:00Z' }),
135
+ ]);
136
+ const out = await aggregateSession('2026-04-17T10:00:00Z', { cwd: sb });
137
+ assert.equal(out.record_count, 2);
138
+ assert.equal(out.since_iso, '2026-04-17T10:00:00Z');
139
+ });
140
+
141
+ test('AGG-7: aggregateSession reads all phase-*.jsonl + meta.jsonl', async () => {
142
+ const sb = makeSandbox();
143
+ writeJsonl(sb, 'phase-09.jsonl', [claudeRec({ phase: '09' })]);
144
+ writeJsonl(sb, 'phase-10.jsonl', [claudeRec({ phase: '10' }), claudeRec({ phase: '10' })]);
145
+ writeJsonl(sb, 'meta.jsonl', [claudeRec({ phase: null })]);
146
+ const out = await aggregateSession(null, { cwd: sb });
147
+ assert.ok(out.by_phase['09']);
148
+ assert.ok(out.by_phase['10']);
149
+ assert.ok(out.by_phase['meta']);
150
+ assert.equal(out.by_phase['10'].record_count, 2);
151
+ assert.equal(out.record_count, 4);
152
+ assert.deepEqual(out.phases_touched.sort(), ['09', '10', 'meta']);
153
+ });
154
+
155
+ test('AGG-8: malformed line logs warning to stderr and skips, valid lines parse', async () => {
156
+ const sb = makeSandbox();
157
+ const p = path.join(sb, '.nubos-pilot', 'metrics', 'phase-10.jsonl');
158
+ const lines = [
159
+ JSON.stringify(claudeRec()),
160
+ '{not valid json',
161
+ JSON.stringify(claudeRec()),
162
+ JSON.stringify(claudeRec()),
163
+ ].join('\n') + '\n';
164
+ fs.writeFileSync(p, lines);
165
+ const orig = process.stderr.write.bind(process.stderr);
166
+ let captured = '';
167
+ process.stderr.write = (chunk) => { captured += chunk; return true; };
168
+ let out;
169
+ try {
170
+ out = await aggregatePhase('10', { cwd: sb });
171
+ } finally {
172
+ process.stderr.write = orig;
173
+ }
174
+ assert.equal(out.record_count, 3);
175
+ assert.match(captured, /skipping malformed JSONL/);
176
+ });
177
+
178
+ test('AGG-9: path-traversal phase rejected with metrics-invalid-phase', async () => {
179
+ const sb = makeSandbox();
180
+ await assert.rejects(
181
+ () => aggregatePhase('../etc/passwd', { cwd: sb }),
182
+ (err) => err && err.name === 'NubosPilotError' && err.code === 'metrics-invalid-phase',
183
+ );
184
+ });
185
+
186
+ test('READJL-1: _readJsonlLines on missing file resolves without error', async () => {
187
+ const sb = makeSandbox();
188
+ const missing = path.join(sb, '.nubos-pilot', 'metrics', 'absent.jsonl');
189
+ let records = [];
190
+ await _readJsonlLines(missing, (r) => records.push(r));
191
+ assert.equal(records.length, 0);
192
+ });
@@ -0,0 +1,120 @@
1
+ const fs = require('node:fs');
2
+ const os = require('node:os');
3
+ const path = require('node:path');
4
+ const { NubosPilotError, findProjectRoot } = require('./core.cjs');
5
+
6
+ const MAX_ERROR_MESSAGE = 300;
7
+
8
+ const MAX_ERROR_CODE = 40;
9
+
10
+ const MAX_RECORD_BYTES = 511;
11
+
12
+ const SCHEMA_FIELDS = [
13
+ 'agent', 'tier', 'resolved_model',
14
+ 'phase', 'plan', 'task',
15
+ 'started_at', 'ended_at', 'duration_ms',
16
+ 'tokens_in', 'tokens_out',
17
+ 'retry_count', 'status', 'runtime', 'error',
18
+ ];
19
+
20
+ const REQUIRED_INPUT_FIELDS = [
21
+ 'agent', 'tier', 'resolved_model',
22
+ 'phase', 'plan', 'task',
23
+ 'started_at', 'ended_at',
24
+ 'status', 'runtime',
25
+ ];
26
+
27
+ const SAFE_PHASE_RE = /^[A-Za-z0-9._-]+$/;
28
+
29
+ function _truncateError(err) {
30
+ if (!err || typeof err !== 'object') return null;
31
+ let code = typeof err.code === 'string' ? err.code : 'unknown';
32
+ if (code.length > MAX_ERROR_CODE) code = code.slice(0, MAX_ERROR_CODE);
33
+ let message = typeof err.message === 'string' ? err.message : '';
34
+ if (message.length > MAX_ERROR_MESSAGE) {
35
+ message = message.slice(0, MAX_ERROR_MESSAGE) + '…';
36
+ }
37
+ return { code, message };
38
+ }
39
+
40
+ function _durationMs(startedAt, endedAt) {
41
+ const s = Date.parse(startedAt);
42
+ const e = Date.parse(endedAt);
43
+ if (Number.isNaN(s) || Number.isNaN(e)) return 0;
44
+ return Math.max(0, e - s);
45
+ }
46
+
47
+ function buildRecord(args) {
48
+ const input = args || {};
49
+ const missing = [];
50
+ for (const f of REQUIRED_INPUT_FIELDS) {
51
+ const v = input[f];
52
+ if (v === undefined || v === null) {
53
+ if (f === 'phase' && v === '') continue;
54
+ missing.push(f);
55
+ }
56
+ }
57
+ if (missing.length) {
58
+ throw new NubosPilotError(
59
+ 'metrics-invalid-record',
60
+ 'metrics.buildRecord missing required fields: ' + missing.join(', '),
61
+ { missing },
62
+ );
63
+ }
64
+
65
+ const isClaude = input.runtime === 'claude';
66
+ const record = {
67
+ agent: String(input.agent),
68
+ tier: String(input.tier),
69
+ resolved_model: String(input.resolved_model),
70
+ phase: String(input.phase),
71
+ plan: String(input.plan),
72
+ task: String(input.task),
73
+ started_at: String(input.started_at),
74
+ ended_at: String(input.ended_at),
75
+ duration_ms: _durationMs(input.started_at, input.ended_at),
76
+ tokens_in: isClaude && typeof input.tokens_in === 'number' ? input.tokens_in : null,
77
+ tokens_out: isClaude && typeof input.tokens_out === 'number' ? input.tokens_out : null,
78
+ retry_count: typeof input.retry_count === 'number' ? input.retry_count : 0,
79
+ status: String(input.status),
80
+ runtime: String(input.runtime),
81
+ error: input.status === 'ok' ? null : _truncateError(input.error),
82
+ };
83
+
84
+ if (record.error && typeof record.error.message === 'string') {
85
+ while (Buffer.byteLength(JSON.stringify(record), 'utf8') > MAX_RECORD_BYTES) {
86
+ const msg = record.error.message;
87
+ const base = msg.endsWith('…') ? msg.slice(0, -1) : msg;
88
+ if (base.length === 0) { record.error.message = ''; break; }
89
+ record.error.message = base.slice(0, base.length - 1) + '…';
90
+ }
91
+ }
92
+
93
+ return record;
94
+ }
95
+
96
+ function appendRecord(record, opts) {
97
+ const cwd = (opts && opts.cwd) || process.cwd();
98
+ const root = findProjectRoot(cwd);
99
+ const dir = path.join(root, '.nubos-pilot', 'metrics');
100
+ fs.mkdirSync(dir, { recursive: true });
101
+
102
+ const phase = record.phase == null ? '' : String(record.phase);
103
+ let file;
104
+ if (phase === '') {
105
+ file = path.join(dir, 'meta.jsonl');
106
+ } else {
107
+ if (!SAFE_PHASE_RE.test(phase)) {
108
+ throw new NubosPilotError(
109
+ 'metrics-invalid-phase',
110
+ 'metrics.appendRecord phase must match /^[A-Za-z0-9._-]+$/: ' + phase,
111
+ { phase },
112
+ );
113
+ }
114
+ file = path.join(dir, 'phase-' + phase + '.jsonl');
115
+ }
116
+ fs.appendFileSync(file, JSON.stringify(record) + os.EOL);
117
+ return file;
118
+ }
119
+
120
+ module.exports = { appendRecord, buildRecord, MAX_ERROR_MESSAGE, SCHEMA_FIELDS };
@@ -0,0 +1,182 @@
1
+ const fs = require('node:fs');
2
+ const os = require('node:os');
3
+ const path = require('node:path');
4
+ const { test } = require('node:test');
5
+ const assert = require('node:assert/strict');
6
+
7
+ const metrics = require('./metrics.cjs');
8
+ const { appendRecord, buildRecord, MAX_ERROR_MESSAGE, SCHEMA_FIELDS } = metrics;
9
+
10
+ const _sandboxes = [];
11
+
12
+ function makeSandbox() {
13
+ const root = fs.mkdtempSync(path.join(os.tmpdir(), 'np-metrics-'));
14
+ fs.mkdirSync(path.join(root, '.nubos-pilot'), { recursive: true });
15
+ _sandboxes.push(root);
16
+ return root;
17
+ }
18
+
19
+ test.afterEach(() => {
20
+ while (_sandboxes.length) {
21
+ try { fs.rmSync(_sandboxes.pop(), { recursive: true, force: true }); } catch { }
22
+ }
23
+ });
24
+
25
+ function validClaudeArgs(overrides) {
26
+ return Object.assign({
27
+ agent: 'np-executor',
28
+ tier: 'sonnet',
29
+ resolved_model: 'claude-sonnet-4-6',
30
+ phase: '09',
31
+ plan: '09-01',
32
+ task: '09-01-T02',
33
+ started_at: '2026-04-16T14:30:12.123Z',
34
+ ended_at: '2026-04-16T14:31:08.987Z',
35
+ tokens_in: 3421,
36
+ tokens_out: 812,
37
+ retry_count: 0,
38
+ status: 'ok',
39
+ runtime: 'claude',
40
+ error: null,
41
+ }, overrides || {});
42
+ }
43
+
44
+ test('MET-1: SCHEMA_FIELDS equals D-08 15-field list in exact order', () => {
45
+ assert.deepEqual(SCHEMA_FIELDS, [
46
+ 'agent', 'tier', 'resolved_model',
47
+ 'phase', 'plan', 'task',
48
+ 'started_at', 'ended_at', 'duration_ms',
49
+ 'tokens_in', 'tokens_out',
50
+ 'retry_count', 'status', 'runtime', 'error',
51
+ ]);
52
+ assert.equal(SCHEMA_FIELDS.length, 15);
53
+ });
54
+
55
+ test('MET-2: MAX_ERROR_MESSAGE equals 300', () => {
56
+ assert.equal(MAX_ERROR_MESSAGE, 300);
57
+ });
58
+
59
+ test('MET-3: buildRecord with valid claude payload returns complete D-08 record', () => {
60
+ const rec = buildRecord(validClaudeArgs());
61
+ assert.equal(rec.agent, 'np-executor');
62
+ assert.equal(rec.tier, 'sonnet');
63
+ assert.equal(rec.resolved_model, 'claude-sonnet-4-6');
64
+ assert.equal(rec.phase, '09');
65
+ assert.equal(rec.plan, '09-01');
66
+ assert.equal(rec.task, '09-01-T02');
67
+ assert.equal(rec.started_at, '2026-04-16T14:30:12.123Z');
68
+ assert.equal(rec.ended_at, '2026-04-16T14:31:08.987Z');
69
+ const expectedDuration = Date.parse('2026-04-16T14:31:08.987Z') - Date.parse('2026-04-16T14:30:12.123Z');
70
+ assert.equal(rec.duration_ms, expectedDuration);
71
+ assert.equal(rec.tokens_in, 3421);
72
+ assert.equal(rec.tokens_out, 812);
73
+ assert.equal(rec.retry_count, 0);
74
+ assert.equal(rec.status, 'ok');
75
+ assert.equal(rec.runtime, 'claude');
76
+ assert.equal(rec.error, null, 'error must be null when status=ok');
77
+ });
78
+
79
+ test('MET-4: buildRecord with status=error preserves error object shape', () => {
80
+ const rec = buildRecord(validClaudeArgs({ status: 'error', error: { code: 'X', message: 'm' } }));
81
+ assert.deepEqual(rec.error, { code: 'X', message: 'm' });
82
+ });
83
+
84
+ test('MET-5: buildRecord truncates overlong error.message to <=300 chars + ellipsis', () => {
85
+ const rec = buildRecord(validClaudeArgs({
86
+ status: 'error',
87
+ error: { code: 'X', message: 'a'.repeat(1000) },
88
+ }));
89
+ assert.ok(
90
+ rec.error.message.length <= 301,
91
+ 'char-cap: 300 content chars + 1 ellipsis; stricter byte-budget may cut further',
92
+ );
93
+ assert.ok(rec.error.message.endsWith('…'), 'truncated message ends with ellipsis');
94
+ assert.equal(rec.error.message[0], 'a', 'kept content is the prefix of original message');
95
+ assert.ok(
96
+ rec.error.message.slice(0, -1).split('').every((c) => c === 'a'),
97
+ 'all kept characters are the original prefix',
98
+ );
99
+ });
100
+
101
+ test('MET-6: buildRecord runtime=codex nulls tokens_in/tokens_out (D-09)', () => {
102
+ const rec = buildRecord(validClaudeArgs({ runtime: 'codex', tokens_in: 9999, tokens_out: 9999 }));
103
+ assert.equal(rec.tokens_in, null, 'tokens_in must be null for non-claude runtime');
104
+ assert.equal(rec.tokens_out, null, 'tokens_out must be null for non-claude runtime');
105
+ assert.equal(rec.runtime, 'codex');
106
+ });
107
+
108
+ test('MET-7: buildRecord({}) throws metrics-invalid-record with missing[] detail', () => {
109
+ let thrown = null;
110
+ try { buildRecord({}); } catch (e) { thrown = e; }
111
+ assert.ok(thrown, 'expected throw on empty input');
112
+ assert.equal(thrown.name, 'NubosPilotError');
113
+ assert.equal(thrown.code, 'metrics-invalid-record');
114
+ assert.ok(Array.isArray(thrown.details.missing), 'details.missing must be array');
115
+ for (const f of ['agent', 'tier', 'phase', 'status', 'runtime']) {
116
+ assert.ok(thrown.details.missing.includes(f), 'missing[] must include ' + f);
117
+ }
118
+ });
119
+
120
+ test('MET-8: appendRecord routes phase=09 to phase-09.jsonl (one line, parseable)', () => {
121
+ const sb = makeSandbox();
122
+ const rec = buildRecord(validClaudeArgs());
123
+ appendRecord(rec, { cwd: sb });
124
+ const filePath = path.join(sb, '.nubos-pilot', 'metrics', 'phase-09.jsonl');
125
+ assert.ok(fs.existsSync(filePath), 'phase-09.jsonl must exist');
126
+ const lines = fs.readFileSync(filePath, 'utf-8').split(os.EOL).filter(Boolean);
127
+ assert.equal(lines.length, 1);
128
+ const parsed = JSON.parse(lines[0]);
129
+ assert.equal(parsed.phase, '09');
130
+ assert.equal(parsed.task, '09-01-T02');
131
+ assert.deepEqual(parsed, rec, 'round-trip JSON.parse must equal original record');
132
+ });
133
+
134
+ test('MET-9: appendRecord with phase="" routes to meta.jsonl (not phase-*.jsonl)', () => {
135
+ const sb = makeSandbox();
136
+ const rec1 = buildRecord(validClaudeArgs({ phase: '' }));
137
+ const rec2 = buildRecord(validClaudeArgs({ phase: '', task: '09-01-T03' }));
138
+ appendRecord(rec1, { cwd: sb });
139
+ appendRecord(rec2, { cwd: sb });
140
+ const metaPath = path.join(sb, '.nubos-pilot', 'metrics', 'meta.jsonl');
141
+ assert.ok(fs.existsSync(metaPath), 'meta.jsonl must exist');
142
+ const metaLines = fs.readFileSync(metaPath, 'utf-8').split(os.EOL).filter(Boolean);
143
+ assert.equal(metaLines.length, 2, 'two appends must produce exactly two lines in meta.jsonl');
144
+ const phaseFiles = fs.readdirSync(path.join(sb, '.nubos-pilot', 'metrics')).filter((f) => f.startsWith('phase-'));
145
+ assert.equal(phaseFiles.length, 0, 'no phase-*.jsonl files should exist when phase=""');
146
+ });
147
+
148
+ test('MET-10: two sequential appendRecord calls produce two independently-parseable lines', () => {
149
+ const sb = makeSandbox();
150
+ const r1 = buildRecord(validClaudeArgs({ task: '09-01-T01' }));
151
+ const r2 = buildRecord(validClaudeArgs({ task: '09-01-T02' }));
152
+ appendRecord(r1, { cwd: sb });
153
+ appendRecord(r2, { cwd: sb });
154
+ const filePath = path.join(sb, '.nubos-pilot', 'metrics', 'phase-09.jsonl');
155
+ const lines = fs.readFileSync(filePath, 'utf-8').split(os.EOL).filter(Boolean);
156
+ assert.equal(lines.length, 2);
157
+ const p1 = JSON.parse(lines[0]);
158
+ const p2 = JSON.parse(lines[1]);
159
+ assert.equal(p1.task, '09-01-T01');
160
+ assert.equal(p2.task, '09-01-T02');
161
+ });
162
+
163
+ test('MET-11: worst-case record size under macOS PIPE_BUF=512 (Pitfall 1)', () => {
164
+ const worst = buildRecord(validClaudeArgs({
165
+ status: 'error',
166
+ error: { code: 'a'.repeat(50), message: 'a'.repeat(1000) },
167
+ }));
168
+ const serialised = JSON.stringify(worst);
169
+ assert.ok(
170
+ serialised.length < 512,
171
+ 'Worst-case record is ' + serialised.length + ' bytes; must stay < 512 for macOS PIPE_BUF',
172
+ );
173
+ });
174
+
175
+ test('MET-12: serialised record has no raw newline bytes even with multiline error message', () => {
176
+ const rec = buildRecord(validClaudeArgs({
177
+ status: 'error',
178
+ error: { code: 'E', message: 'line1\nline2\nline3' },
179
+ }));
180
+ const serialised = JSON.stringify(rec);
181
+ assert.equal(serialised.indexOf('\n'), -1, 'JSON-serialised record must have no raw newline bytes');
182
+ });
@@ -0,0 +1,16 @@
1
+ const { test } = require('node:test');
2
+ const assert = require('node:assert/strict');
3
+
4
+ const { MODEL_ALIAS_MAP } = require('./model-profiles.cjs');
5
+
6
+ test('MAR-1: MODEL_ALIAS_MAP snapshot — fail loud on silent alias bump (D-17)', () => {
7
+ assert.deepEqual(
8
+ MODEL_ALIAS_MAP,
9
+ {
10
+ opus: 'claude-opus-4-7',
11
+ sonnet: 'claude-sonnet-4-6',
12
+ haiku: 'claude-haiku-4-5',
13
+ },
14
+ 'Regression gate: MODEL_ALIAS_MAP changed. Release checklist REQUIRES running this test on pre-bump main and confirming the failure message BEFORE updating the alias map. If this failure is expected, bump the literals in BOTH model-profiles.cjs AND this test in a single commit citing the Anthropic release URL.',
15
+ );
16
+ });
@@ -0,0 +1,42 @@
1
+ const { NubosPilotError } = require('./core.cjs');
2
+
3
+ const TIER_PROFILE_MATRIX = {
4
+ opus: { quality: 'opus', balanced: 'opus', budget: 'sonnet', inherit: '' },
5
+ sonnet: { quality: 'sonnet', balanced: 'sonnet', budget: 'haiku', inherit: '' },
6
+ haiku: { quality: 'sonnet', balanced: 'haiku', budget: 'haiku', inherit: '' },
7
+ };
8
+
9
+ const MODEL_ALIAS_MAP = {
10
+ opus: 'claude-opus-4-7',
11
+ sonnet: 'claude-sonnet-4-6',
12
+ haiku: 'claude-haiku-4-5',
13
+ };
14
+
15
+ const VALID_TIERS = ['haiku', 'sonnet', 'opus'];
16
+ const VALID_PROFILES = ['quality', 'balanced', 'budget', 'inherit'];
17
+
18
+ function resolve(tier, profile) {
19
+ if (!VALID_TIERS.includes(tier)) {
20
+ throw new NubosPilotError(
21
+ 'invalid-tier',
22
+ 'tier must be one of ' + VALID_TIERS.join('/'),
23
+ { got: tier, allowed: VALID_TIERS.slice() },
24
+ );
25
+ }
26
+ if (!VALID_PROFILES.includes(profile)) {
27
+ throw new NubosPilotError(
28
+ 'invalid-profile',
29
+ 'profile must be one of ' + VALID_PROFILES.join('/'),
30
+ { got: profile, allowed: VALID_PROFILES.slice() },
31
+ );
32
+ }
33
+ return TIER_PROFILE_MATRIX[tier][profile];
34
+ }
35
+
36
+ module.exports = {
37
+ TIER_PROFILE_MATRIX,
38
+ MODEL_ALIAS_MAP,
39
+ VALID_TIERS,
40
+ VALID_PROFILES,
41
+ resolve,
42
+ };
@@ -0,0 +1,61 @@
1
+ const { test } = require('node:test');
2
+ const assert = require('node:assert/strict');
3
+
4
+ const mp = require('./model-profiles.cjs');
5
+ const { TIER_PROFILE_MATRIX, MODEL_ALIAS_MAP, VALID_TIERS, VALID_PROFILES, resolve } = mp;
6
+
7
+ test('MP-1: TIER_PROFILE_MATRIX matches D-01 3x4 shape', () => {
8
+ assert.deepEqual(TIER_PROFILE_MATRIX, {
9
+ opus: { quality: 'opus', balanced: 'opus', budget: 'sonnet', inherit: '' },
10
+ sonnet: { quality: 'sonnet', balanced: 'sonnet', budget: 'haiku', inherit: '' },
11
+ haiku: { quality: 'sonnet', balanced: 'haiku', budget: 'haiku', inherit: '' },
12
+ });
13
+ });
14
+
15
+ test('MP-2: MODEL_ALIAS_MAP matches D-04 literals', () => {
16
+ assert.deepEqual(MODEL_ALIAS_MAP, {
17
+ opus: 'claude-opus-4-7',
18
+ sonnet: 'claude-sonnet-4-6',
19
+ haiku: 'claude-haiku-4-5',
20
+ });
21
+ });
22
+
23
+ test('MP-3: VALID_TIERS deepEquals [haiku, sonnet, opus]', () => {
24
+ assert.deepEqual(VALID_TIERS, ['haiku', 'sonnet', 'opus']);
25
+ });
26
+
27
+ test('MP-4: VALID_PROFILES deepEquals [quality, balanced, budget, inherit]', () => {
28
+ assert.deepEqual(VALID_PROFILES, ['quality', 'balanced', 'budget', 'inherit']);
29
+ });
30
+
31
+ test('MP-5: resolve() returns correct alias per matrix cell', () => {
32
+ assert.equal(resolve('opus', 'balanced'), 'opus');
33
+ assert.equal(resolve('haiku', 'budget'), 'haiku');
34
+ assert.equal(resolve('sonnet', 'budget'), 'haiku');
35
+ assert.equal(resolve('opus', 'budget'), 'sonnet');
36
+ assert.equal(resolve('haiku', 'quality'), 'sonnet');
37
+ });
38
+
39
+ test('MP-6: resolve() returns empty string for inherit profile (D-03)', () => {
40
+ assert.equal(resolve('opus', 'inherit'), '');
41
+ assert.equal(resolve('haiku', 'inherit'), '');
42
+ assert.equal(resolve('sonnet', 'inherit'), '');
43
+ });
44
+
45
+ test('MP-7: resolve() throws NubosPilotError(invalid-tier) on unknown tier', () => {
46
+ let thrown = null;
47
+ try { resolve('gpt-4', 'balanced'); } catch (e) { thrown = e; }
48
+ assert.ok(thrown);
49
+ assert.equal(thrown.name, 'NubosPilotError');
50
+ assert.equal(thrown.code, 'invalid-tier');
51
+ assert.equal(thrown.details.got, 'gpt-4');
52
+ });
53
+
54
+ test('MP-8: resolve() throws NubosPilotError(invalid-profile) on unknown profile', () => {
55
+ let thrown = null;
56
+ try { resolve('opus', 'eco'); } catch (e) { thrown = e; }
57
+ assert.ok(thrown);
58
+ assert.equal(thrown.name, 'NubosPilotError');
59
+ assert.equal(thrown.code, 'invalid-profile');
60
+ assert.equal(thrown.details.got, 'eco');
61
+ });