nemoris 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (223) hide show
  1. package/.env.example +49 -0
  2. package/LICENSE +21 -0
  3. package/README.md +209 -0
  4. package/SECURITY.md +119 -0
  5. package/bin/nemoris +46 -0
  6. package/config/agents/agent.toml.example +28 -0
  7. package/config/agents/default.toml +22 -0
  8. package/config/agents/orchestrator.toml +18 -0
  9. package/config/delivery.toml +73 -0
  10. package/config/embeddings.toml +5 -0
  11. package/config/identity/default-purpose.md +1 -0
  12. package/config/identity/default-soul.md +3 -0
  13. package/config/identity/orchestrator-purpose.md +1 -0
  14. package/config/identity/orchestrator-soul.md +1 -0
  15. package/config/improvement-targets.toml +15 -0
  16. package/config/jobs/heartbeat-check.toml +30 -0
  17. package/config/jobs/memory-rollup.toml +46 -0
  18. package/config/jobs/workspace-health.toml +63 -0
  19. package/config/mcp.toml +16 -0
  20. package/config/output-contracts.toml +17 -0
  21. package/config/peers.toml +32 -0
  22. package/config/peers.toml.example +32 -0
  23. package/config/policies/memory-default.toml +10 -0
  24. package/config/policies/memory-heartbeat.toml +5 -0
  25. package/config/policies/memory-ops.toml +10 -0
  26. package/config/policies/tools-heartbeat-minimal.toml +8 -0
  27. package/config/policies/tools-interactive-safe.toml +8 -0
  28. package/config/policies/tools-ops-bounded.toml +8 -0
  29. package/config/policies/tools-orchestrator.toml +7 -0
  30. package/config/providers/anthropic.toml +15 -0
  31. package/config/providers/ollama.toml +5 -0
  32. package/config/providers/openai-codex.toml +9 -0
  33. package/config/providers/openrouter.toml +5 -0
  34. package/config/router.toml +22 -0
  35. package/config/runtime.toml +114 -0
  36. package/config/skills/self-improvement.toml +15 -0
  37. package/config/skills/telegram-onboarding-spec.md +240 -0
  38. package/config/skills/workspace-monitor.toml +15 -0
  39. package/config/task-router.toml +42 -0
  40. package/install.sh +50 -0
  41. package/package.json +90 -0
  42. package/src/auth/auth-profiles.js +169 -0
  43. package/src/auth/openai-codex-oauth.js +285 -0
  44. package/src/battle.js +449 -0
  45. package/src/cli/help.js +265 -0
  46. package/src/cli/output-filter.js +49 -0
  47. package/src/cli/runtime-control.js +704 -0
  48. package/src/cli-main.js +2763 -0
  49. package/src/cli.js +78 -0
  50. package/src/config/loader.js +332 -0
  51. package/src/config/schema-validator.js +214 -0
  52. package/src/config/toml-lite.js +8 -0
  53. package/src/daemon/action-handlers.js +71 -0
  54. package/src/daemon/healing-tick.js +87 -0
  55. package/src/daemon/health-probes.js +90 -0
  56. package/src/daemon/notifier.js +57 -0
  57. package/src/daemon/nurse.js +218 -0
  58. package/src/daemon/repair-log.js +106 -0
  59. package/src/daemon/rule-staging.js +90 -0
  60. package/src/daemon/rules.js +29 -0
  61. package/src/daemon/telegram-commands.js +54 -0
  62. package/src/daemon/updater.js +85 -0
  63. package/src/jobs/job-runner.js +78 -0
  64. package/src/mcp/consumer.js +129 -0
  65. package/src/memory/active-recall.js +171 -0
  66. package/src/memory/backend-manager.js +97 -0
  67. package/src/memory/backends/file-backend.js +38 -0
  68. package/src/memory/backends/qmd-backend.js +219 -0
  69. package/src/memory/embedding-guards.js +24 -0
  70. package/src/memory/embedding-index.js +118 -0
  71. package/src/memory/embedding-service.js +179 -0
  72. package/src/memory/file-index.js +177 -0
  73. package/src/memory/memory-signature.js +5 -0
  74. package/src/memory/memory-store.js +648 -0
  75. package/src/memory/retrieval-planner.js +66 -0
  76. package/src/memory/scoring.js +145 -0
  77. package/src/memory/simhash.js +78 -0
  78. package/src/memory/sqlite-active-store.js +824 -0
  79. package/src/memory/write-policy.js +36 -0
  80. package/src/onboarding/aliases.js +33 -0
  81. package/src/onboarding/auth/api-key.js +224 -0
  82. package/src/onboarding/auth/ollama-detect.js +42 -0
  83. package/src/onboarding/clack-prompter.js +77 -0
  84. package/src/onboarding/doctor.js +530 -0
  85. package/src/onboarding/lock.js +42 -0
  86. package/src/onboarding/model-catalog.js +344 -0
  87. package/src/onboarding/phases/auth.js +589 -0
  88. package/src/onboarding/phases/build.js +130 -0
  89. package/src/onboarding/phases/choose.js +82 -0
  90. package/src/onboarding/phases/detect.js +98 -0
  91. package/src/onboarding/phases/hatch.js +216 -0
  92. package/src/onboarding/phases/identity.js +79 -0
  93. package/src/onboarding/phases/ollama.js +345 -0
  94. package/src/onboarding/phases/scaffold.js +99 -0
  95. package/src/onboarding/phases/telegram.js +377 -0
  96. package/src/onboarding/phases/validate.js +204 -0
  97. package/src/onboarding/phases/verify.js +206 -0
  98. package/src/onboarding/platform.js +482 -0
  99. package/src/onboarding/status-bar.js +95 -0
  100. package/src/onboarding/templates.js +794 -0
  101. package/src/onboarding/toml-writer.js +38 -0
  102. package/src/onboarding/tui.js +250 -0
  103. package/src/onboarding/uninstall.js +153 -0
  104. package/src/onboarding/wizard.js +499 -0
  105. package/src/providers/anthropic.js +168 -0
  106. package/src/providers/base.js +247 -0
  107. package/src/providers/circuit-breaker.js +136 -0
  108. package/src/providers/ollama.js +163 -0
  109. package/src/providers/openai-codex.js +149 -0
  110. package/src/providers/openrouter.js +136 -0
  111. package/src/providers/registry.js +36 -0
  112. package/src/providers/router.js +16 -0
  113. package/src/runtime/bootstrap-cache.js +47 -0
  114. package/src/runtime/capabilities-prompt.js +25 -0
  115. package/src/runtime/completion-ping.js +99 -0
  116. package/src/runtime/config-validator.js +121 -0
  117. package/src/runtime/context-ledger.js +360 -0
  118. package/src/runtime/cutover-readiness.js +42 -0
  119. package/src/runtime/daemon.js +729 -0
  120. package/src/runtime/delivery-ack.js +195 -0
  121. package/src/runtime/delivery-adapters/local-file.js +41 -0
  122. package/src/runtime/delivery-adapters/openclaw-cli.js +94 -0
  123. package/src/runtime/delivery-adapters/openclaw-peer.js +98 -0
  124. package/src/runtime/delivery-adapters/shadow.js +13 -0
  125. package/src/runtime/delivery-adapters/standalone-http.js +98 -0
  126. package/src/runtime/delivery-adapters/telegram.js +104 -0
  127. package/src/runtime/delivery-adapters/tui.js +128 -0
  128. package/src/runtime/delivery-manager.js +807 -0
  129. package/src/runtime/delivery-store.js +168 -0
  130. package/src/runtime/dependency-health.js +118 -0
  131. package/src/runtime/envelope.js +114 -0
  132. package/src/runtime/evaluation.js +1089 -0
  133. package/src/runtime/exec-approvals.js +216 -0
  134. package/src/runtime/executor.js +500 -0
  135. package/src/runtime/failure-ping.js +67 -0
  136. package/src/runtime/flows.js +83 -0
  137. package/src/runtime/guards.js +45 -0
  138. package/src/runtime/handoff.js +51 -0
  139. package/src/runtime/identity-cache.js +28 -0
  140. package/src/runtime/improvement-engine.js +109 -0
  141. package/src/runtime/improvement-harness.js +581 -0
  142. package/src/runtime/input-sanitiser.js +72 -0
  143. package/src/runtime/interaction-contract.js +347 -0
  144. package/src/runtime/lane-readiness.js +226 -0
  145. package/src/runtime/migration.js +323 -0
  146. package/src/runtime/model-resolution.js +78 -0
  147. package/src/runtime/network.js +64 -0
  148. package/src/runtime/notification-store.js +97 -0
  149. package/src/runtime/notifier.js +256 -0
  150. package/src/runtime/orchestrator.js +53 -0
  151. package/src/runtime/orphan-reaper.js +41 -0
  152. package/src/runtime/output-contract-schema.js +139 -0
  153. package/src/runtime/output-contract-validator.js +439 -0
  154. package/src/runtime/peer-readiness.js +69 -0
  155. package/src/runtime/peer-registry.js +133 -0
  156. package/src/runtime/pilot-status.js +108 -0
  157. package/src/runtime/prompt-builder.js +261 -0
  158. package/src/runtime/provider-attempt.js +582 -0
  159. package/src/runtime/report-fallback.js +71 -0
  160. package/src/runtime/result-normalizer.js +183 -0
  161. package/src/runtime/retention.js +74 -0
  162. package/src/runtime/review.js +244 -0
  163. package/src/runtime/route-job.js +15 -0
  164. package/src/runtime/run-store.js +38 -0
  165. package/src/runtime/schedule.js +88 -0
  166. package/src/runtime/scheduler-state.js +434 -0
  167. package/src/runtime/scheduler.js +656 -0
  168. package/src/runtime/session-compactor.js +182 -0
  169. package/src/runtime/session-search.js +155 -0
  170. package/src/runtime/slack-inbound.js +249 -0
  171. package/src/runtime/ssrf.js +102 -0
  172. package/src/runtime/status-aggregator.js +330 -0
  173. package/src/runtime/task-contract.js +140 -0
  174. package/src/runtime/task-packet.js +107 -0
  175. package/src/runtime/task-router.js +140 -0
  176. package/src/runtime/telegram-inbound.js +1565 -0
  177. package/src/runtime/token-counter.js +134 -0
  178. package/src/runtime/token-estimator.js +59 -0
  179. package/src/runtime/tool-loop.js +200 -0
  180. package/src/runtime/transport-server.js +311 -0
  181. package/src/runtime/tui-server.js +411 -0
  182. package/src/runtime/ulid.js +44 -0
  183. package/src/security/ssrf-check.js +197 -0
  184. package/src/setup.js +369 -0
  185. package/src/shadow/bridge.js +303 -0
  186. package/src/skills/loader.js +84 -0
  187. package/src/tools/catalog.json +49 -0
  188. package/src/tools/cli-delegate.js +44 -0
  189. package/src/tools/mcp-client.js +106 -0
  190. package/src/tools/micro/cancel-task.js +6 -0
  191. package/src/tools/micro/complete-task.js +6 -0
  192. package/src/tools/micro/fail-task.js +6 -0
  193. package/src/tools/micro/http-fetch.js +74 -0
  194. package/src/tools/micro/index.js +36 -0
  195. package/src/tools/micro/lcm-recall.js +60 -0
  196. package/src/tools/micro/list-dir.js +17 -0
  197. package/src/tools/micro/list-skills.js +46 -0
  198. package/src/tools/micro/load-skill.js +38 -0
  199. package/src/tools/micro/memory-search.js +45 -0
  200. package/src/tools/micro/read-file.js +11 -0
  201. package/src/tools/micro/session-search.js +54 -0
  202. package/src/tools/micro/shell-exec.js +43 -0
  203. package/src/tools/micro/trigger-job.js +79 -0
  204. package/src/tools/micro/web-search.js +58 -0
  205. package/src/tools/micro/workspace-paths.js +39 -0
  206. package/src/tools/micro/write-file.js +14 -0
  207. package/src/tools/micro/write-memory.js +41 -0
  208. package/src/tools/registry.js +348 -0
  209. package/src/tools/tool-result-contract.js +36 -0
  210. package/src/tui/chat.js +835 -0
  211. package/src/tui/renderer.js +175 -0
  212. package/src/tui/socket-client.js +217 -0
  213. package/src/utils/canonical-json.js +29 -0
  214. package/src/utils/compaction.js +30 -0
  215. package/src/utils/env-loader.js +5 -0
  216. package/src/utils/errors.js +80 -0
  217. package/src/utils/fs.js +101 -0
  218. package/src/utils/ids.js +5 -0
  219. package/src/utils/model-context-limits.js +30 -0
  220. package/src/utils/token-budget.js +74 -0
  221. package/src/utils/usage-cost.js +25 -0
  222. package/src/utils/usage-metrics.js +14 -0
  223. package/vendor/smol-toml-1.5.2.tgz +0 -0
@@ -0,0 +1,1089 @@
1
+ import path from "node:path";
2
+ import { RunReviewer } from "./review.js";
3
+ import { Scheduler } from "./scheduler.js";
4
+ import { RunStore } from "./run-store.js";
5
+ import { NotificationStore } from "./notification-store.js";
6
+ import { DeliveryStore } from "./delivery-store.js";
7
+ import { validateOutputContract } from "./output-contract-validator.js";
8
+
9
+ function groupBy(items, keyFn) {
10
+ const map = new Map();
11
+ for (const item of items) {
12
+ const key = keyFn(item);
13
+ if (!map.has(key)) map.set(key, []);
14
+ map.get(key).push(item);
15
+ }
16
+ return map;
17
+ }
18
+
19
+ function clamp(value, min = 0, max = 1) {
20
+ return Math.max(min, Math.min(max, value));
21
+ }
22
+
23
+ function average(values) {
24
+ if (values.length === 0) return null;
25
+ return values.reduce((sum, value) => sum + value, 0) / values.length;
26
+ }
27
+
28
+ function choosePrimaryV2Run(runs) {
29
+ if (!runs.length) return null;
30
+ return runs.find((run) => run.mode === "provider") || runs[0];
31
+ }
32
+
33
+ function pickBestLiveMatch(liveMatches) {
34
+ return liveMatches.find((match) => match.latestRun) || liveMatches[0] || null;
35
+ }
36
+
37
+ function normalizeText(value) {
38
+ return String(value || "")
39
+ .replace(/\s+/g, " ")
40
+ .trim();
41
+ }
42
+
43
+ function stripOuterFence(text) {
44
+ const raw = String(text || "").trim();
45
+ const fencedMatch = raw.match(/^```(?:json|markdown|md|text)?\s*([\s\S]*?)\s*```$/i);
46
+ return fencedMatch ? fencedMatch[1].trim() : raw;
47
+ }
48
+
49
+ function parseJsonCandidate(text) {
50
+ const candidate = stripOuterFence(text);
51
+ if (!(candidate.startsWith("{") && candidate.endsWith("}"))) {
52
+ return null;
53
+ }
54
+
55
+ try {
56
+ return JSON.parse(candidate);
57
+ } catch {
58
+ return null;
59
+ }
60
+ }
61
+
62
+ function titleCase(value) {
63
+ return String(value || "")
64
+ .replace(/[_-]+/g, " ")
65
+ .replace(/^#+\s*/, "")
66
+ .replace(/\b\w/g, (char) => char.toUpperCase())
67
+ .trim();
68
+ }
69
+
70
+ function normalizeInline(value) {
71
+ return normalizeText(value) || "None";
72
+ }
73
+
74
+ function normalizeSectionKey(value) {
75
+ return String(value || "")
76
+ .toLowerCase()
77
+ .replace(/^#+\s*/, "")
78
+ .replace(/[*:_-]+/g, " ")
79
+ .replace(/\s+/g, " ")
80
+ .trim();
81
+ }
82
+
83
+ function renderValue(value, { contract = null, summary = null } = {}) {
84
+ if (value == null) return "";
85
+
86
+ if (typeof value === "string") {
87
+ const parsed = parseJsonCandidate(value);
88
+ if (parsed && typeof parsed === "object") {
89
+ if (parsed.output !== undefined) {
90
+ return renderValue(parsed.output, {
91
+ contract,
92
+ summary: parsed.summary || summary
93
+ });
94
+ }
95
+
96
+ return renderValue(parsed, { contract, summary });
97
+ }
98
+
99
+ return stripOuterFence(value);
100
+ }
101
+
102
+ if (Array.isArray(value)) {
103
+ return value
104
+ .map((item) => renderValue(item, { contract, summary }))
105
+ .filter(Boolean)
106
+ .join("\n");
107
+ }
108
+
109
+ if (typeof value === "object") {
110
+ const entries = Object.entries(value);
111
+ const sectionOrder = contract?.requiredSections?.length
112
+ ? contract.requiredSections
113
+ : entries.map(([key]) => key);
114
+ const valueMap = new Map(entries.map(([key, entryValue]) => [normalizeSectionKey(key), entryValue]));
115
+ const sectionStyle = contract?.profile?.sectionStyle || (contract?.format === "structured_rollup" ? "headings" : "bullets");
116
+ const requireStatus = contract?.profile?.requireStatus ?? (contract?.format === "bulleted_briefing");
117
+
118
+ if (sectionStyle === "bullets") {
119
+ const lines = [];
120
+ if (requireStatus && summary) lines.push(`Status: ${normalizeInline(summary)}`);
121
+ for (const section of sectionOrder) {
122
+ lines.push(`- ${titleCase(section)}: ${normalizeInline(valueMap.get(normalizeSectionKey(section)))}`);
123
+ }
124
+ return lines.join("\n");
125
+ }
126
+
127
+ if (sectionStyle === "headings") {
128
+ const lines = [];
129
+ for (const section of sectionOrder) {
130
+ lines.push(`## ${titleCase(section)}`);
131
+ lines.push(`- ${normalizeInline(valueMap.get(normalizeSectionKey(section)))}`);
132
+ lines.push("");
133
+ }
134
+ return lines.join("\n").trim();
135
+ }
136
+
137
+ return entries
138
+ .map(([key, entryValue]) => `${titleCase(key)}: ${normalizeInline(entryValue)}`)
139
+ .join("\n");
140
+ }
141
+
142
+ return String(value);
143
+ }
144
+
145
+ function extractRunText(run, contract = null) {
146
+ if (!run) return "";
147
+ const summary = run?.result?.summary || run?.summary || "";
148
+ const value = run?.result?.output ?? run?.output ?? summary;
149
+ return renderValue(value, { contract, summary });
150
+ }
151
+
152
+ function extractLiveText(run) {
153
+ if (!run) return "";
154
+ return renderValue(run.summary || run.error || "");
155
+ }
156
+
157
+ function qualitySignals(text) {
158
+ const raw = String(text || "");
159
+ const normalized = normalizeText(text);
160
+ const lower = normalized.toLowerCase();
161
+ const findings = [];
162
+ let score = 1;
163
+
164
+ if (!normalized) {
165
+ findings.push("Output is empty.");
166
+ return { score: 0, findings };
167
+ }
168
+
169
+ if (normalized.length < 40) {
170
+ findings.push("Output is very short and may not carry enough signal.");
171
+ score -= 0.18;
172
+ }
173
+
174
+ if (/\[(title|link|summary|sub)\]/i.test(normalized)) {
175
+ findings.push("Output still contains placeholder text.");
176
+ score -= 0.4;
177
+ }
178
+
179
+ if (/i('| a)?m sorry|does not support|cannot complete|failed to/i.test(lower)) {
180
+ findings.push("Output contains apology or tool-failure language.");
181
+ score -= 0.28;
182
+ }
183
+
184
+ if (/```/.test(raw)) {
185
+ findings.push("Output is wrapped in a code block instead of a direct report.");
186
+ score -= 0.12;
187
+ }
188
+
189
+ if (/timed out|timeout/i.test(lower)) {
190
+ findings.push("Output references a timeout or stalled execution.");
191
+ score -= 0.35;
192
+ }
193
+
194
+ const repeatedLinePenalty = detectRepeatedLinePenalty(normalized);
195
+ if (repeatedLinePenalty > 0) {
196
+ findings.push("Output repeats nearly identical numbered lines.");
197
+ score -= repeatedLinePenalty;
198
+ }
199
+
200
+ return {
201
+ score: clamp(Number(score.toFixed(4))),
202
+ findings
203
+ };
204
+ }
205
+
206
+ function detectRepeatedLinePenalty(text) {
207
+ const lines = text
208
+ .split("\n")
209
+ .map((line) => line.trim())
210
+ .filter(Boolean)
211
+ .filter((line) => /^\d+\./.test(line));
212
+
213
+ if (lines.length < 3) return 0;
214
+
215
+ const normalized = lines.map((line) => line.replace(/^\d+\.\s*/, "").toLowerCase());
216
+ const unique = new Set(normalized);
217
+ const duplicateRatio = 1 - unique.size / normalized.length;
218
+ if (duplicateRatio < 0.5) return 0;
219
+ return Number((duplicateRatio * 0.25).toFixed(4));
220
+ }
221
+
222
+ function expandRelatedNotificationFiles(baseFiles, notifications) {
223
+ const related = new Set(baseFiles);
224
+ let changed = true;
225
+ while (changed) {
226
+ changed = false;
227
+ for (const item of notifications) {
228
+ if (!related.has(item.filePath)) continue;
229
+ for (const generated of item.generatedNotificationFiles || []) {
230
+ if (related.has(generated)) continue;
231
+ related.add(generated);
232
+ changed = true;
233
+ }
234
+ }
235
+ }
236
+ return related;
237
+ }
238
+
239
+ function evaluateLiveHistory(liveRuns) {
240
+ if (liveRuns.length === 0) {
241
+ return {
242
+ runCount: 0,
243
+ okCount: 0,
244
+ errorCount: 0,
245
+ skippedCount: 0,
246
+ failureRate: null,
247
+ avgDurationMs: null,
248
+ avgTotalTokens: null,
249
+ avgQualityScore: null,
250
+ findings: ["No recent live history available."]
251
+ };
252
+ }
253
+
254
+ const okCount = liveRuns.filter((run) => run.status === "ok").length;
255
+ const errorCount = liveRuns.filter((run) => run.status === "error").length;
256
+ const skippedCount = liveRuns.filter((run) => run.status === "skipped").length;
257
+ const failureRate = errorCount / liveRuns.length;
258
+ const durations = liveRuns.map((run) => run.durationMs).filter((value) => Number.isFinite(value));
259
+ const tokens = liveRuns.map((run) => run.usage?.total_tokens).filter((value) => Number.isFinite(value));
260
+ const qualityScores = liveRuns
261
+ .map((run) => qualitySignals(run.summary || run.error || "").score)
262
+ .filter((value) => Number.isFinite(value));
263
+
264
+ const findings = [];
265
+ if (failureRate >= 0.34) findings.push("Live cron lane is failing often.");
266
+ if (skippedCount > 0) findings.push("Live cron history includes intentional skips.");
267
+ if (tokens.some((value) => value > 12000)) findings.push("Live cron lane shows high token usage spikes.");
268
+
269
+ return {
270
+ runCount: liveRuns.length,
271
+ okCount,
272
+ errorCount,
273
+ skippedCount,
274
+ failureRate: Number(failureRate.toFixed(4)),
275
+ avgDurationMs: durations.length ? Math.round(average(durations)) : null,
276
+ avgTotalTokens: tokens.length ? Math.round(average(tokens)) : null,
277
+ avgQualityScore: qualityScores.length ? Number(average(qualityScores).toFixed(4)) : null,
278
+ findings
279
+ };
280
+ }
281
+
282
+ function scoreStatus(status) {
283
+ if (status === "ok") return 1;
284
+ if (status === "skipped") return 0.6;
285
+ if (status === "error") return 0;
286
+ return 0.5;
287
+ }
288
+
289
+ function buildRubric({ v2Run, liveHistory, matchedLiveJob, outputContract = null, contractCheck = null }) {
290
+ const findings = [];
291
+ const v2Text = extractRunText(v2Run, outputContract);
292
+ const v2Quality = qualitySignals(v2Text);
293
+ const latestLive = liveHistory[0] || null;
294
+ const latestLiveQuality = qualitySignals(extractLiveText(latestLive));
295
+ const history = evaluateLiveHistory(liveHistory);
296
+ const isDryRun = v2Run != null && v2Run.mode !== "provider";
297
+
298
+ const comparisonReadiness = v2Run
299
+ ? v2Run.mode === "provider"
300
+ ? 1
301
+ : 0.45
302
+ : 0;
303
+ const comparisonCoverage = matchedLiveJob ? 1 : 0.2;
304
+ const contractAdherence = contractCheck ? contractCheck.satisfiedRatio : null;
305
+ if (!v2Run) findings.push("No V2 run exists for this lane yet.");
306
+ else if (isDryRun) findings.push("Only a dry-run V2 artifact exists; output quality and contract adherence are excluded from the rubric score.");
307
+ if (!matchedLiveJob) findings.push("Comparison coverage is low because no live cron analogue is mapped.");
308
+
309
+ if (!isDryRun && v2Quality.findings.length) findings.push(...v2Quality.findings.map((item) => `V2: ${item}`));
310
+ if (latestLiveQuality.findings.length) findings.push(...latestLiveQuality.findings.map((item) => `Live: ${item}`));
311
+ if (history.findings.length) findings.push(...history.findings);
312
+
313
+ const fieldScores = contractCheck?.fieldScores || null;
314
+ if (!isDryRun && fieldScores) {
315
+ if (fieldScores.weakFields.length) {
316
+ findings.push(`Field-level quality is low for: ${fieldScores.weakFields.join(", ")}`);
317
+ }
318
+ if (fieldScores.placeholderFields.length) {
319
+ findings.push(`Placeholder text detected in: ${fieldScores.placeholderFields.join(", ")}`);
320
+ }
321
+ }
322
+
323
+ if (!matchedLiveJob) {
324
+ findings.push("No live cron analogue matched for this V2 job.");
325
+ }
326
+
327
+ const statusAlignment =
328
+ latestLive && v2Run
329
+ ? Number((scoreStatus(v2Run.result ? "ok" : v2Run.status) * scoreStatus(latestLive.status)).toFixed(4))
330
+ : null;
331
+ const liveReliability = history.failureRate == null ? null : Number((1 - history.failureRate).toFixed(4));
332
+
333
+ const fieldLevelQuality = !isDryRun && fieldScores ? fieldScores.averageScore : null;
334
+
335
+ const components = [
336
+ isDryRun ? null : v2Quality.score,
337
+ comparisonReadiness,
338
+ comparisonCoverage,
339
+ isDryRun ? null : contractAdherence,
340
+ fieldLevelQuality,
341
+ liveReliability,
342
+ statusAlignment
343
+ ].filter((value) => value != null);
344
+ const overallScore = components.length ? Number(average(components).toFixed(4)) : 0;
345
+
346
+ return {
347
+ overallScore,
348
+ dryRunExcluded: isDryRun,
349
+ components: {
350
+ v2OutputQuality: isDryRun ? null : v2Quality.score,
351
+ comparisonReadiness,
352
+ comparisonCoverage,
353
+ contractAdherence: isDryRun ? null : contractAdherence,
354
+ fieldLevelQuality,
355
+ liveReliability,
356
+ statusAlignment
357
+ },
358
+ findings: uniqueStrings(findings),
359
+ metrics: {
360
+ liveRunCount: history.runCount,
361
+ liveOkCount: history.okCount,
362
+ liveErrorCount: history.errorCount,
363
+ liveSkippedCount: history.skippedCount,
364
+ liveFailureRate: history.failureRate,
365
+ liveAvgDurationMs: history.avgDurationMs,
366
+ liveAvgTotalTokens: history.avgTotalTokens,
367
+ liveAvgQualityScore: history.avgQualityScore,
368
+ latestLiveQualityScore: latestLive ? latestLiveQuality.score : null
369
+ }
370
+ };
371
+ }
372
+
373
+ function uniqueStrings(items) {
374
+ return [...new Set(items.filter(Boolean))];
375
+ }
376
+
377
+ function summarizeV2Run(run) {
378
+ if (!run) return null;
379
+ return {
380
+ timestamp: run.timestamp,
381
+ mode: run.mode || null,
382
+ providerId: run.providerId || null,
383
+ modelId: run.modelId || null,
384
+ summary: run.result?.summary || run.summary || null,
385
+ output: run.result?.output || run.output || null,
386
+ retrievalMeta: run.retrievalMeta || null,
387
+ retrievedMemoryCount: run.retrievedMemory?.length || 0,
388
+ interaction: summarizeInteraction(run),
389
+ fallback: run.fallback || null
390
+ };
391
+ }
392
+
393
+ function summarizeInteraction(run) {
394
+ const interaction = run?.interaction || null;
395
+ if (!interaction) return null;
396
+ return {
397
+ ackRequired: interaction.ack?.required ?? false,
398
+ completionRequired: interaction.completion?.required ?? false,
399
+ handoffRequired: interaction.handoff?.required ?? false,
400
+ yielded: run?.yielded || false,
401
+ yieldSignal: run?.yieldSignal || null,
402
+ followUpState: run?.followUpState || null,
403
+ followUpQueued: run?.followUpQueued || false,
404
+ followUpConsumed: run?.followUpConsumed || false,
405
+ followUpTarget: run?.followUpTarget || null,
406
+ followUpCompleted: run?.followUpCompleted || false,
407
+ followUpExpired: run?.followUpExpired || false,
408
+ followUpEscalated: run?.followUpEscalated || false,
409
+ followUpEscalationFilePath: run?.followUpEscalationFilePath || null,
410
+ handoffTarget: interaction.handoff?.target || null,
411
+ suggestedPeerCount: interaction.handoff?.suggestions?.length || 0,
412
+ handoffChosenPeerId: run?.handoffChosenPeerId || null,
413
+ handoffChosenBy: run?.handoffChosenBy || null,
414
+ handoffDelivered: run?.handoffDelivered || false,
415
+ handoffDeliveryState: run?.handoffDeliveryState || null,
416
+ deliveryDeduped: run?.deliveryDeduped || false,
417
+ deliveryRetried: run?.deliveryRetried || false,
418
+ deliveryUncertain: run?.deliveryUncertain || false
419
+ };
420
+ }
421
+
422
+ function analyzeRetrieval(run) {
423
+ const retrievalMeta = run?.retrievalMeta || run?.plan?.packet?.layers?.retrievalMeta || null;
424
+ if (!run?.retrievedMemory?.length) {
425
+ return {
426
+ memoryCount: 0,
427
+ lexicalCount: 0,
428
+ semanticCount: 0,
429
+ qmdCount: 0,
430
+ freshEmbeddingCount: 0,
431
+ staleEmbeddingCount: 0,
432
+ missingEmbeddingCount: 0,
433
+ failedEmbeddingCount: retrievalMeta?.embeddingHealth?.failedCount || 0,
434
+ embeddingQueryMode: retrievalMeta?.embeddingQueryMode || "lexical_only",
435
+ embeddingError: retrievalMeta?.embeddingError || retrievalMeta?.embeddingHealth?.lastError || null,
436
+ findings: ["No retrieved memory was captured for this run."],
437
+ items: []
438
+ };
439
+ }
440
+
441
+ const items = run.retrievedMemory.map((item) => ({
442
+ entryId: item.entryId || null,
443
+ title: item.title || null,
444
+ sourceBackend: item.sourceBackend || "file",
445
+ candidateSource: item.candidateSource || "indexed",
446
+ lexicalScore: item.lexicalScore ?? 0,
447
+ embeddingSimilarity: item.embeddingSimilarity ?? 0,
448
+ embeddingFreshness: item.embeddingFreshness || "missing",
449
+ retrievalSources: item.retrievalSources || []
450
+ }));
451
+
452
+ const lexicalCount = items.filter((item) => item.retrievalSources.includes("lexical")).length;
453
+ const semanticCount = items.filter((item) => item.retrievalSources.includes("semantic")).length;
454
+ const qmdCount = items.filter((item) => item.retrievalSources.includes("qmd") || item.sourceBackend === "qmd").length;
455
+ const freshEmbeddingCount = items.filter((item) => item.embeddingFreshness === "fresh").length;
456
+ const staleEmbeddingCount = items.filter((item) => item.embeddingFreshness === "stale").length;
457
+ const missingEmbeddingCount = items.filter((item) => item.embeddingFreshness === "missing").length;
458
+ const failedEmbeddingCount = items.filter((item) => item.embeddingFreshness === "failed").length;
459
+ const findings = [];
460
+
461
+ if (semanticCount === 0) findings.push("No semantic retrieval candidates reached the final packet.");
462
+ if (staleEmbeddingCount > 0) findings.push("Some retrieved items have stale embeddings.");
463
+ if (freshEmbeddingCount === 0 && missingEmbeddingCount > 0) findings.push("Retrieved file memory is falling back to non-embedded retrieval.");
464
+ if ((retrievalMeta?.embeddingError || retrievalMeta?.embeddingHealth?.lastError) && !findings.includes("Embedding query failed and retrieval fell back to lexical mode.")) {
465
+ findings.push("Embedding query failed and retrieval fell back to lexical mode.");
466
+ }
467
+
468
+ return {
469
+ memoryCount: items.length,
470
+ lexicalCount,
471
+ semanticCount,
472
+ qmdCount,
473
+ freshEmbeddingCount,
474
+ staleEmbeddingCount,
475
+ missingEmbeddingCount,
476
+ failedEmbeddingCount,
477
+ embeddingQueryMode: retrievalMeta?.embeddingQueryMode || "lexical_only",
478
+ embeddingError: retrievalMeta?.embeddingError || retrievalMeta?.embeddingHealth?.lastError || null,
479
+ embeddingHealth: retrievalMeta?.embeddingHealth || null,
480
+ findings,
481
+ items
482
+ };
483
+ }
484
+
485
+ function summarizeInteractionDiagnosis({
486
+ ackRequired,
487
+ ackQueued,
488
+ completionRequired,
489
+ completionQueued,
490
+ handoffRequired,
491
+ handoffQueued,
492
+ yielded,
493
+ followUpQueued,
494
+ followUpConsumed,
495
+ handoffDelivered,
496
+ deliveryEvidenceRequired,
497
+ deliveryEvidenceHealthy,
498
+ deliveryUncertain,
499
+ findings
500
+ }) {
501
+ const missingStages = [];
502
+ if (ackRequired && !ackQueued) missingStages.push("ack");
503
+ if (completionRequired && !completionQueued && !yielded) missingStages.push("completion");
504
+ if (handoffRequired && !handoffQueued && !yielded) missingStages.push("handoff");
505
+
506
+ if (!findings?.length) {
507
+ return {
508
+ status: "healthy",
509
+ code: null,
510
+ summary: "Interaction lifecycle evidence is complete.",
511
+ missingStages,
512
+ findings: []
513
+ };
514
+ }
515
+
516
+ if (missingStages.length) {
517
+ return {
518
+ status: "action_required",
519
+ code: "missing_required_notifications",
520
+ summary: `Required interaction notifications are missing: ${missingStages.join(", ")}.`,
521
+ missingStages,
522
+ findings
523
+ };
524
+ }
525
+
526
+ if (yielded && followUpQueued && !followUpConsumed) {
527
+ return {
528
+ status: "action_required",
529
+ code: "follow_up_incomplete",
530
+ summary: "Yielded follow-up is still pending or incomplete.",
531
+ missingStages,
532
+ findings
533
+ };
534
+ }
535
+
536
+ if (deliveryEvidenceRequired && !deliveryEvidenceHealthy) {
537
+ return {
538
+ status: "action_required",
539
+ code: "missing_delivery_evidence",
540
+ summary: "Required pingback notifications do not have delivery receipts yet.",
541
+ missingStages,
542
+ findings
543
+ };
544
+ }
545
+
546
+ if (handoffRequired && handoffQueued && !handoffDelivered) {
547
+ return {
548
+ status: "action_required",
549
+ code: "handoff_incomplete",
550
+ summary: "Handoff was queued but not fully delivered.",
551
+ missingStages,
552
+ findings
553
+ };
554
+ }
555
+
556
+ if (deliveryUncertain) {
557
+ return {
558
+ status: "warning",
559
+ code: "delivery_uncertain",
560
+ summary: "Interaction lifecycle completed with uncertain delivery evidence.",
561
+ missingStages,
562
+ findings
563
+ };
564
+ }
565
+
566
+ return {
567
+ status: "warning",
568
+ code: "interaction_findings_present",
569
+ summary: findings[0] || "Interaction lifecycle has unresolved findings.",
570
+ missingStages,
571
+ findings
572
+ };
573
+ }
574
+
575
+ function analyzeInteraction(run, notifications = [], deliveries = []) {
576
+ const interaction = run?.interaction || null;
577
+ const findings = [];
578
+
579
+ const ackRequired = interaction?.ack?.required ?? false;
580
+ const completionRequired = interaction?.completion?.required ?? false;
581
+ const handoffRequired = interaction?.handoff?.required ?? false;
582
+ const ackQueued = notifications.some((item) => item.stage === "ack");
583
+ const completionQueued = notifications.some((item) => item.stage === "completion");
584
+ const handoffQueued = notifications.some((item) => item.stage === "handoff");
585
+ const followUpQueued = notifications.some((item) => item.stage === "follow_up");
586
+ const followUpNotification = notifications.find((item) => item.stage === "follow_up") || null;
587
+ const handoffNotification = notifications.find((item) => item.stage === "handoff") || null;
588
+ const handoffPendingChoice = handoffNotification?.status === "awaiting_choice";
589
+ const handoffState = handoffNotification?.handoffState || (handoffPendingChoice ? "pending" : null);
590
+ const handoffChosen = Boolean(handoffNotification?.chosenPeer?.peerId);
591
+ const handoffChosenPeerId = handoffNotification?.chosenPeer?.peerId || null;
592
+ const handoffChosenBy = handoffNotification?.chosenBy || null;
593
+ const handoffDelivery = handoffNotification
594
+ ? deliveries.find((item) => item.notificationFilePath === handoffNotification.filePath) || null
595
+ : null;
596
+ const handoffDelivered = Boolean(handoffDelivery);
597
+ const handoffDeliveryState = handoffDelivery?.delivery?.status || null;
598
+ const deliveryStates = deliveries.map((item) => item.delivery?.status || item.stage || "unknown");
599
+ const yielded = interaction?.yield?.required ?? false;
600
+ const yieldSignal = interaction?.yield?.signal || null;
601
+ const followUpState = followUpNotification?.followUpState || followUpNotification?.yieldState || null;
602
+ const followUpConsumed = followUpNotification?.status === "consumed" || followUpNotification?.yieldState === "consumed" || followUpNotification?.followUpState === "consumed";
603
+ const followUpTarget = followUpNotification?.targetSurface || interaction?.yield?.targetSurface || null;
604
+ const generatedFiles = new Set(followUpNotification?.generatedNotificationFiles || []);
605
+ const followUpCompleted = deliveries.some((item) => generatedFiles.has(item.notificationFilePath));
606
+ const followUpExpired = followUpState === "expired";
607
+ const followUpEscalated = followUpState === "escalated";
608
+ const deliveryDeduped = deliveryStates.includes("duplicate_prevented");
609
+ const deliveryRetried = deliveries.some((item) => Number(item.attempt) > 1);
610
+ const deliveryUncertain = deliveryStates.includes("delivery_uncertain") || deliveryStates.includes("uncertain");
611
+ const yieldedCompletionHandled = yielded && followUpQueued && Boolean(followUpNotification?.payload?.completion);
612
+ const yieldedHandoffHandled = yielded && followUpQueued && Boolean(followUpNotification?.payload?.handoff);
613
+ const visibleDeliveryCount = yielded ? deliveries.filter((item) => !generatedFiles.has(item.notificationFilePath)).length : deliveries.length;
614
+ const deliveryEvidenceRequired = !yielded && Boolean(ackQueued || completionQueued || handoffQueued);
615
+ const deliveryEvidenceHealthy = !deliveryEvidenceRequired || visibleDeliveryCount > 0;
616
+
617
+ if (ackRequired && !ackQueued) findings.push("Required ack notification was not queued.");
618
+ if (completionRequired && !completionQueued && !yieldedCompletionHandled) {
619
+ findings.push("Required completion notification was not queued.");
620
+ }
621
+ if (handoffRequired && !handoffQueued && !yieldedHandoffHandled) {
622
+ findings.push("Configured handoff was not queued.");
623
+ }
624
+ if (yielded && !followUpQueued) findings.push("Run yielded but no follow-up payload was persisted.");
625
+ if (followUpQueued && !followUpConsumed) findings.push("Follow-up payload was persisted but never consumed.");
626
+ if (followUpExpired) findings.push("Follow-up expired before it was consumed.");
627
+ if (followUpEscalated) findings.push("Follow-up expired and was escalated to the operator.");
628
+ if (handoffPendingChoice) findings.push("Handoff is awaiting an explicit peer choice.");
629
+ if (handoffState === "expired") findings.push("Handoff expired without an operator choice.");
630
+ if (handoffState === "escalated") findings.push("Handoff expired and was escalated to the operator.");
631
+ if (handoffState === "blocked") findings.push(`Handoff is blocked${handoffNotification?.blockedReason ? `: ${handoffNotification.blockedReason}` : "."}`);
632
+ if (handoffQueued && !handoffPendingChoice && !handoffChosen) findings.push("Handoff was queued but no peer choice was recorded.");
633
+ if (handoffChosen && !handoffDelivered) findings.push("Handoff was promoted to a peer but not delivered yet.");
634
+ if (handoffDeliveryState && /error|blocked/i.test(handoffDeliveryState)) findings.push(`Handoff delivery did not complete cleanly: ${handoffDeliveryState}.`);
635
+ if (!deliveryEvidenceHealthy) {
636
+ findings.push("No delivery receipts exist yet for queued interaction notifications.");
637
+ }
638
+ if (deliveryDeduped) findings.push("A duplicate delivery attempt was prevented by dedupe policy.");
639
+ if (deliveryUncertain) findings.push("A delivery attempt was marked uncertain and will not be blindly retried.");
640
+
641
+ const diagnosis = summarizeInteractionDiagnosis({
642
+ ackRequired,
643
+ ackQueued,
644
+ completionRequired,
645
+ completionQueued,
646
+ handoffRequired,
647
+ handoffQueued,
648
+ yielded,
649
+ followUpQueued,
650
+ followUpConsumed,
651
+ handoffDelivered,
652
+ deliveryEvidenceRequired,
653
+ deliveryEvidenceHealthy,
654
+ deliveryUncertain,
655
+ findings
656
+ });
657
+
658
+ return {
659
+ ackRequired,
660
+ ackQueued,
661
+ completionRequired,
662
+ completionQueued,
663
+ handoffRequired,
664
+ handoffQueued,
665
+ yielded,
666
+ yieldSignal,
667
+ followUpState,
668
+ followUpQueued,
669
+ followUpConsumed,
670
+ followUpTarget,
671
+ followUpCompleted,
672
+ followUpExpired,
673
+ followUpEscalated,
674
+ followUpEscalationFilePath: followUpNotification?.escalationNotificationFilePath || null,
675
+ handoffPendingChoice,
676
+ handoffState,
677
+ handoffExpired: handoffState === "expired",
678
+ handoffEscalated: handoffState === "escalated",
679
+ handoffBlocked: handoffState === "blocked",
680
+ handoffChosen,
681
+ handoffChosenPeerId,
682
+ handoffChosenBy,
683
+ handoffDelivered,
684
+ handoffDeliveryState,
685
+ handoffBlockedReason: handoffNotification?.blockedReason || null,
686
+ deliveryEvidenceRequired,
687
+ deliveryEvidenceHealthy,
688
+ deliveryReceiptCount: visibleDeliveryCount,
689
+ deliveryStates,
690
+ deliveryDeduped,
691
+ deliveryRetried,
692
+ deliveryUncertain,
693
+ diagnosis,
694
+ findings
695
+ };
696
+ }
697
+
698
+ function summarizeLiveRun(run) {
699
+ if (!run) return null;
700
+ return {
701
+ ts: run.ts || null,
702
+ status: run.status || null,
703
+ summary: run.summary || null,
704
+ error: run.error || null,
705
+ durationMs: run.durationMs ?? null,
706
+ provider: run.provider || null,
707
+ model: run.model || null,
708
+ usage: run.usage || null
709
+ };
710
+ }
711
+
712
+ const SECTION_HINTS = [
713
+ "calendar",
714
+ "weather",
715
+ "projects",
716
+ "project",
717
+ "inbox",
718
+ "issues",
719
+ "backlog",
720
+ "update",
721
+ "summary",
722
+ "next actions",
723
+ "alerts",
724
+ "status"
725
+ ];
726
+
727
+ const SECTION_ALIASES = new Map([
728
+ ["project", "projects"],
729
+ ["next action", "next actions"]
730
+ ]);
731
+
732
+ function canonicalSectionKey(value) {
733
+ const normalized = normalizeSectionKey(value);
734
+ return SECTION_ALIASES.get(normalized) || normalized;
735
+ }
736
+
737
+ function extractSectionKeys(nonEmptyLines) {
738
+ const sectionKeys = [];
739
+ const seen = new Set();
740
+ const patterns = [
741
+ /^#{1,6}\s+(.+)$/,
742
+ /^\*\*([^*]+)\*\*$/,
743
+ /^[-*]\s+\**([^:*]+?)\**:\s+.+$/,
744
+ /^([^:]{2,40}):\s+.+$/
745
+ ];
746
+
747
+ for (const line of nonEmptyLines) {
748
+ for (const pattern of patterns) {
749
+ const match = line.match(pattern);
750
+ if (!match) continue;
751
+ const key = canonicalSectionKey(match[1]);
752
+ if (!key || key.length < 2) continue;
753
+ if (seen.has(key)) break;
754
+ seen.add(key);
755
+ sectionKeys.push(key);
756
+ break;
757
+ }
758
+ }
759
+
760
+ return sectionKeys;
761
+ }
762
+
763
+ function comparableSections(structure) {
764
+ return structure.sectionKeys.length ? structure.sectionKeys : structure.sectionHints;
765
+ }
766
+
767
+ function analyzeOutputStructure(text) {
768
+ const raw = String(text || "");
769
+ const trimmed = raw.trim();
770
+ const lines = raw.split("\n").map((line) => line.trim());
771
+ const nonEmptyLines = lines.filter(Boolean);
772
+ const lower = trimmed.toLowerCase();
773
+ const sectionKeys = extractSectionKeys(nonEmptyLines);
774
+
775
+ return {
776
+ charCount: trimmed.length,
777
+ lineCount: nonEmptyLines.length,
778
+ headingCount: nonEmptyLines.filter((line) => /^#{1,6}\s/.test(line) || /^\*\*[^*]+\*\*/.test(line)).length,
779
+ bulletCount: nonEmptyLines.filter((line) => /^[-*]\s/.test(line)).length,
780
+ numberedCount: nonEmptyLines.filter((line) => /^\d+\.\s/.test(line)).length,
781
+ codeFenceCount: (raw.match(/```/g) || []).length / 2,
782
+ sectionKeys,
783
+ sectionHints: SECTION_HINTS.filter((hint) => lower.includes(hint)),
784
+ hasMarkdownTable: /\|.+\|/.test(raw),
785
+ hasEmoji: /\p{Extended_Pictographic}/u.test(raw)
786
+ };
787
+ }
788
+
789
+ function buildOutputDiff(v2Run, liveRun, contract = null) {
790
+ const v2Text = extractRunText(v2Run, contract);
791
+ const liveText = extractLiveText(liveRun);
792
+ const v2 = analyzeOutputStructure(v2Text);
793
+ const live = analyzeOutputStructure(liveText);
794
+ const v2Sections = comparableSections(v2);
795
+ const liveSections = comparableSections(live);
796
+ const sharedSections = v2Sections.filter((hint) => liveSections.includes(hint));
797
+ const missingFromV2 = liveSections.filter((hint) => !v2Sections.includes(hint));
798
+ const extraInV2 = v2Sections.filter((hint) => !liveSections.includes(hint));
799
+ const findings = [];
800
+
801
+ if (!liveText) {
802
+ findings.push("No live output is available for structural comparison.");
803
+ } else {
804
+ if (missingFromV2.length) findings.push(`V2 is missing live sections: ${missingFromV2.join(", ")}.`);
805
+ if (v2.bulletCount + v2.numberedCount < live.bulletCount + live.numberedCount) {
806
+ findings.push("V2 output is less structured than the live report.");
807
+ }
808
+ if (v2.charCount > 0 && live.charCount > 0) {
809
+ const verbosityRatio = Number((v2.charCount / live.charCount).toFixed(4));
810
+ if (verbosityRatio < 0.55) findings.push("V2 output is much shorter than the live report.");
811
+ if (verbosityRatio > 1.8) findings.push("V2 output is much longer than the live report.");
812
+ }
813
+ }
814
+
815
+ return {
816
+ v2,
817
+ live,
818
+ alignment: {
819
+ sharedSections,
820
+ missingFromV2,
821
+ extraInV2,
822
+ bulletDelta: (v2.bulletCount + v2.numberedCount) - (live.bulletCount + live.numberedCount),
823
+ headingDelta: v2.headingCount - live.headingCount,
824
+ charDelta: v2.charCount - live.charCount
825
+ },
826
+ findings
827
+ };
828
+ }
829
+
830
+ function assessOutputContract(contract, v2Run, liveRun) {
831
+ if (!contract) return null;
832
+
833
+ const v2Validation = validateOutputContract(contract, v2Run?.result?.output ?? v2Run?.output ?? "");
834
+ const liveValidation = validateOutputContract(contract, liveRun?.summary || liveRun?.error || "");
835
+ const missingFromV2 = v2Validation?.missingSections || [];
836
+ const missingFromLive = liveValidation?.missingSections || [];
837
+ const findings = [
838
+ ...(v2Validation?.findings.map((item) => `V2: ${item}`) || []),
839
+ ...(liveValidation?.findings.map((item) => `Live: ${item}`) || [])
840
+ ];
841
+
842
+ return {
843
+ format: contract.format || null,
844
+ requiredSections: contract.requiredSections || [],
845
+ styleHints: contract.styleHints || [],
846
+ satisfiedRatio: v2Validation?.satisfiedRatio ?? 0,
847
+ missingFromV2,
848
+ missingFromLive,
849
+ emptyInV2: v2Validation?.emptySections || [],
850
+ emptyInLive: liveValidation?.emptySections || [],
851
+ parsedV2: v2Validation?.parsed || null,
852
+ parsedLive: liveValidation?.parsed || null,
853
+ fieldScores: v2Validation?.fieldScores || null,
854
+ findings
855
+ };
856
+ }
857
+
858
+ export { buildRubric };
859
+
860
+ export class Evaluator {
861
+ constructor({ projectRoot, liveRoot, stateRoot }) {
862
+ this.liveRoot = liveRoot;
863
+ this.stateRoot = stateRoot;
864
+ this.reviewer = new RunReviewer({ stateRoot });
865
+ this.scheduler = new Scheduler({ projectRoot, liveRoot, stateRoot });
866
+ this.evalStore = new RunStore({ rootDir: path.join(stateRoot, "evaluations") });
867
+ this.notificationStore = new NotificationStore({ rootDir: path.join(stateRoot, "notifications") });
868
+ this.deliveryStore = new DeliveryStore({ rootDir: path.join(stateRoot, "deliveries") });
869
+ }
870
+
871
+ async resolveInteractionArtifacts(run, review = null) {
872
+ const reviewNotifications = review?.recentNotifications || [];
873
+ const _reviewDeliveries = review?.recentDeliveries || [];
874
+ const baseFiles = run?.notificationFiles || [];
875
+ const relatedNotificationFiles = expandRelatedNotificationFiles(
876
+ baseFiles,
877
+ reviewNotifications
878
+ );
879
+ const knownNotifications = reviewNotifications.filter((item) => relatedNotificationFiles.has(item.filePath));
880
+ const missingNotificationFiles = [...relatedNotificationFiles].filter(
881
+ (filePath) => !knownNotifications.some((item) => item.filePath === filePath)
882
+ );
883
+ const loadedNotifications = await this.notificationStore.getNotifications(missingNotificationFiles);
884
+ const notifications = [...knownNotifications, ...loadedNotifications].sort((a, b) =>
885
+ String(a.timestamp || "").localeCompare(String(b.timestamp || ""))
886
+ );
887
+ const expandedNotificationFiles = expandRelatedNotificationFiles(baseFiles, notifications);
888
+
889
+ // Load all delivery receipts once for deterministic interaction evidence.
890
+ const allDeliveries = await this.deliveryStore.listAll();
891
+ const deliveries = allDeliveries.filter((item) => expandedNotificationFiles.has(item.notificationFilePath));
892
+ return {
893
+ notificationFiles: expandedNotificationFiles,
894
+ notifications,
895
+ deliveries
896
+ };
897
+ }
898
+
899
+ async evaluate(limit = 20) {
900
+ const [review, comparisons] = await Promise.all([
901
+ this.reviewer.review(limit),
902
+ this.scheduler.compareJobs()
903
+ ]);
904
+
905
+ const recentRuns = review.recentRuns.filter((run) => !run.jobId.endsWith("-comparison"));
906
+ const grouped = groupBy(recentRuns, (run) => run.jobId);
907
+
908
+ const jobs = await Promise.all(
909
+ Array.from(grouped.entries()).map(async ([jobId, runs]) => this.evaluateJob(jobId, {
910
+ runs,
911
+ review,
912
+ comparisons
913
+ }))
914
+ );
915
+
916
+ return {
917
+ jobs: [...jobs].sort((a, b) => a.rubric.overallScore - b.rubric.overallScore),
918
+ recentRuns,
919
+ scheduler: review.scheduler
920
+ };
921
+ }
922
+
923
+ async evaluateJob(jobId, options = {}) {
924
+ const runtime = await this.scheduler.loadRuntime();
925
+ const jobConfig = runtime.jobs[jobId] || null;
926
+ const review = options.review || (await this.reviewer.review(options.limit ?? 20));
927
+ const comparisons = options.comparisons || (await this.scheduler.compareJobs());
928
+ const runs = options.runs || review.recentRuns.filter((run) => run.jobId === jobId && !run.jobId.endsWith("-comparison"));
929
+ const schedulerState = review.scheduler.find((item) => item.jobId === jobId) || null;
930
+ const comparison = comparisons.find((item) => item.v2JobId === jobId) || null;
931
+ const liveMatches = await Promise.all(
932
+ (comparison?.closestLiveJobs || []).map(async (match) => ({
933
+ ...match,
934
+ recentRuns: await this.scheduler.bridge.loadCronRunHistory(match.id, 5),
935
+ latestRun: (await this.scheduler.bridge.loadCronRunHistory(match.id, 1))[0] || null
936
+ }))
937
+ );
938
+
939
+ const selectedRun = choosePrimaryV2Run(runs);
940
+ const interactionRun = runs[0] || selectedRun;
941
+ const selectedLiveMatch = pickBestLiveMatch(liveMatches);
942
+ const liveHistory = selectedLiveMatch?.recentRuns || [];
943
+ const outputContract = normalizeOutputContract(jobConfig?.outputContract || null);
944
+ const outputDiff = buildOutputDiff(selectedRun, selectedLiveMatch?.latestRun || null, outputContract);
945
+ const contractCheck = assessOutputContract(outputContract, selectedRun, selectedLiveMatch?.latestRun || null);
946
+ const retrieval = analyzeRetrieval(selectedRun);
947
+ const interactionArtifacts = await this.resolveInteractionArtifacts(interactionRun, review);
948
+ const interaction = analyzeInteraction(interactionRun, interactionArtifacts.notifications, interactionArtifacts.deliveries);
949
+ const rubric = buildRubric({
950
+ v2Run: selectedRun,
951
+ liveHistory,
952
+ matchedLiveJob: selectedLiveMatch,
953
+ outputContract,
954
+ contractCheck
955
+ });
956
+
957
+ return {
958
+ jobId,
959
+ schedulerState,
960
+ maintenance: review?.maintenance || null,
961
+ latestRun: runs[0] || null,
962
+ primaryRun: summarizeV2Run(selectedRun),
963
+ interactionRunTimestamp: interactionRun?.timestamp || null,
964
+ runCount: runs.length,
965
+ modeCounts: countModes(runs),
966
+ liveMatches: liveMatches.map((match) => ({
967
+ id: match.id,
968
+ name: match.name,
969
+ description: match.description || null,
970
+ enabled: match.enabled,
971
+ schedule: match.schedule,
972
+ kind: match.kind || null,
973
+ lastStatus: match.lastStatus || null,
974
+ latestRun: summarizeLiveRun(match.latestRun)
975
+ })),
976
+ selectedLiveMatch: selectedLiveMatch
977
+ ? {
978
+ id: selectedLiveMatch.id,
979
+ name: selectedLiveMatch.name,
980
+ latestRun: summarizeLiveRun(selectedLiveMatch.latestRun)
981
+ }
982
+ : null,
983
+ retrieval,
984
+ interaction,
985
+ outputContract,
986
+ contractCheck,
987
+ outputDiff,
988
+ rubric,
989
+ comparisonNotes: buildComparisonNotes({
990
+ jobId,
991
+ selectedRun,
992
+ selectedLiveMatch,
993
+ schedulerState,
994
+ maintenance: review?.maintenance || null,
995
+ rubric,
996
+ outputDiff,
997
+ contractCheck,
998
+ retrieval,
999
+ interaction
1000
+ }),
1001
+ interactionArtifacts: {
1002
+ notificationCount: interactionArtifacts.notifications.length,
1003
+ deliveryCount: interactionArtifacts.deliveries.length,
1004
+ notificationFileCount: interactionArtifacts.notificationFiles.size
1005
+ }
1006
+ };
1007
+ }
1008
+
1009
+ async evaluateAndPersistJob(jobId, options = {}) {
1010
+ const report = await this.evaluateJob(jobId, options);
1011
+ const artifact = {
1012
+ timestamp: new Date().toISOString(),
1013
+ kind: "job-evaluation",
1014
+ ...report
1015
+ };
1016
+ const filePath = await this.evalStore.saveRun(jobId, artifact);
1017
+ return {
1018
+ filePath,
1019
+ ...artifact
1020
+ };
1021
+ }
1022
+ }
1023
+
1024
+ function countModes(runs) {
1025
+ return runs.reduce((acc, run) => {
1026
+ const mode = run.mode || "unknown";
1027
+ acc[mode] = (acc[mode] || 0) + 1;
1028
+ return acc;
1029
+ }, {});
1030
+ }
1031
+
1032
+ function buildComparisonNotes({
1033
+ jobId,
1034
+ selectedRun,
1035
+ selectedLiveMatch,
1036
+ schedulerState,
1037
+ maintenance,
1038
+ rubric,
1039
+ outputDiff,
1040
+ contractCheck,
1041
+ retrieval = null,
1042
+ interaction = null
1043
+ }) {
1044
+ const notes = [];
1045
+ if (!selectedRun) notes.push("No V2 runs recorded.");
1046
+ if (schedulerState?.lastStatus === "ok") notes.push("Latest V2 scheduler state is healthy.");
1047
+ if (selectedRun?.fallback?.attempted && selectedRun?.fallback?.success) {
1048
+ notes.push(
1049
+ `Run used report fallback from ${selectedRun.fallback.sourceLane || "local"} to ${selectedRun.fallback.finalSourceLane || "remote"} after ${selectedRun.fallback.trigger || "failure"}.`
1050
+ );
1051
+ } else if (selectedRun?.fallback?.attempted && !selectedRun?.fallback?.success) {
1052
+ notes.push(`Report fallback was attempted but did not succeed${selectedRun.fallback?.fallbackError ? `: ${selectedRun.fallback.fallbackError}` : "."}`);
1053
+ } else if (selectedRun?.fallback?.allowed === false && selectedRun?.fallback?.trigger) {
1054
+ notes.push(`Report fallback was blocked after ${selectedRun.fallback.trigger}${selectedRun.fallback?.blockedReason ? `: ${selectedRun.fallback.blockedReason}` : "."}`);
1055
+ }
1056
+ if (maintenance?.wal?.some((item) => item.action && item.action !== "none" && item.action !== "error")) {
1057
+ notes.push("Daemon maintenance recently performed WAL checkpoint work.");
1058
+ }
1059
+ if (maintenance?.handoffs?.expiredCount > 0) {
1060
+ notes.push(`Daemon maintenance expired or escalated ${maintenance.handoffs.expiredCount} pending handoff(s).`);
1061
+ }
1062
+ if (!selectedLiveMatch) {
1063
+ notes.push("No live cron analogue matched.");
1064
+ return uniqueStrings([...notes, ...rubric.findings, ...outputDiff.findings, ...(contractCheck?.findings || []), ...(interaction?.findings || [])]);
1065
+ }
1066
+
1067
+ const latestLive = selectedLiveMatch.latestRun;
1068
+ notes.push(`Closest live cron match: ${selectedLiveMatch.name || selectedLiveMatch.id}.`);
1069
+ if (latestLive?.status) notes.push(`Latest live status: ${latestLive.status}.`);
1070
+ if (latestLive?.provider) notes.push(`Latest live provider: ${latestLive.provider}.`);
1071
+ if (latestLive?.durationMs != null) notes.push(`Latest live duration: ${latestLive.durationMs}ms.`);
1072
+ const retrievalFindings = retrieval?.findings || [];
1073
+ const interactionFindings = interaction?.findings || [];
1074
+ const outputDiffFindings =
1075
+ jobId === "memory-rollup" && Number(contractCheck?.satisfiedRatio || 0) >= 0.99
1076
+ ? (outputDiff.findings || []).filter((item) => !/less structured|much shorter/i.test(item))
1077
+ : outputDiff.findings || [];
1078
+ return uniqueStrings([...notes, ...rubric.findings, ...outputDiffFindings, ...(contractCheck?.findings || []), ...retrievalFindings, ...interactionFindings]);
1079
+ }
1080
+
1081
+ function normalizeOutputContract(contract) {
1082
+ if (!contract) return null;
1083
+ return {
1084
+ format: contract.format || null,
1085
+ requiredSections: contract.requiredSections || [],
1086
+ styleHints: contract.styleHints || [],
1087
+ profile: contract.profile || null
1088
+ };
1089
+ }