nemoris 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (248) hide show
  1. package/.env.example +49 -49
  2. package/LICENSE +21 -21
  3. package/README.md +209 -209
  4. package/SECURITY.md +59 -119
  5. package/bin/nemoris +46 -46
  6. package/config/agents/agent.toml.example +28 -28
  7. package/config/agents/content.toml +23 -0
  8. package/config/agents/default.toml +22 -22
  9. package/config/agents/heartbeat.toml +35 -0
  10. package/config/agents/iris.toml +23 -0
  11. package/config/agents/lab.toml +23 -0
  12. package/config/agents/main.toml +45 -0
  13. package/config/agents/nemo.toml +21 -0
  14. package/config/agents/ops.toml +38 -0
  15. package/config/agents/orchestrator.toml +18 -18
  16. package/config/agents/revenue.toml +23 -0
  17. package/config/agents/testyboo.toml +19 -0
  18. package/config/delivery.toml +73 -73
  19. package/config/embeddings.toml +5 -5
  20. package/config/identity/content-purpose.md +11 -0
  21. package/config/identity/content-soul.md +45 -0
  22. package/config/identity/default-purpose.md +1 -1
  23. package/config/identity/default-soul.md +3 -3
  24. package/config/identity/heartbeat-purpose.md +9 -0
  25. package/config/identity/heartbeat-soul.md +16 -0
  26. package/config/identity/iris-purpose.md +17 -0
  27. package/config/identity/iris-soul.md +68 -0
  28. package/config/identity/lab-purpose.md +10 -0
  29. package/config/identity/lab-soul.md +38 -0
  30. package/config/identity/main-purpose.md +17 -0
  31. package/config/identity/main-soul.md +66 -0
  32. package/config/identity/main-user.md +22 -0
  33. package/config/identity/ops-purpose.md +9 -0
  34. package/config/identity/ops-soul.md +16 -0
  35. package/config/identity/orchestrator-purpose.md +1 -1
  36. package/config/identity/orchestrator-soul.md +1 -1
  37. package/config/identity/revenue-purpose.md +9 -0
  38. package/config/identity/revenue-soul.md +41 -0
  39. package/config/identity/testyboo-purpose.md +13 -0
  40. package/config/identity/testyboo-soul.md +20 -0
  41. package/config/improvement-targets.toml +15 -15
  42. package/config/jobs/heartbeat-check.toml +30 -30
  43. package/config/jobs/memory-rollup.toml +46 -46
  44. package/config/jobs/workspace-health.toml +63 -63
  45. package/config/mcp.toml +16 -16
  46. package/config/output-contracts.toml +17 -17
  47. package/config/peers.toml +32 -32
  48. package/config/peers.toml.example +32 -32
  49. package/config/policies/memory-default.toml +10 -10
  50. package/config/policies/memory-heartbeat.toml +5 -5
  51. package/config/policies/memory-ops.toml +10 -10
  52. package/config/policies/tools-heartbeat-minimal.toml +8 -8
  53. package/config/policies/tools-interactive-safe.toml +8 -8
  54. package/config/policies/tools-ops-bounded.toml +8 -8
  55. package/config/policies/tools-orchestrator.toml +7 -7
  56. package/config/providers/anthropic.toml +15 -15
  57. package/config/providers/ollama.toml +5 -5
  58. package/config/providers/openai-codex.toml +9 -9
  59. package/config/providers/openrouter.toml +5 -5
  60. package/config/router.toml +22 -22
  61. package/config/runtime.toml +114 -114
  62. package/config/skills/self-improvement.toml +15 -15
  63. package/config/skills/telegram-onboarding-spec.md +240 -240
  64. package/config/skills/workspace-monitor.toml +15 -15
  65. package/config/task-router.toml +42 -42
  66. package/install.sh +50 -50
  67. package/package.json +91 -90
  68. package/src/auth/auth-profiles.js +169 -169
  69. package/src/auth/openai-codex-oauth.js +285 -285
  70. package/src/battle.js +449 -449
  71. package/src/cli/help.js +265 -265
  72. package/src/cli/output-filter.js +49 -49
  73. package/src/cli/runtime-control.js +704 -704
  74. package/src/cli-main.js +2763 -2763
  75. package/src/cli.js +78 -78
  76. package/src/config/loader.js +332 -332
  77. package/src/config/schema-validator.js +214 -214
  78. package/src/config/toml-lite.js +8 -8
  79. package/src/daemon/action-handlers.js +71 -71
  80. package/src/daemon/healing-tick.js +87 -87
  81. package/src/daemon/health-probes.js +90 -90
  82. package/src/daemon/notifier.js +57 -57
  83. package/src/daemon/nurse.js +218 -218
  84. package/src/daemon/repair-log.js +106 -106
  85. package/src/daemon/rule-staging.js +90 -90
  86. package/src/daemon/rules.js +29 -29
  87. package/src/daemon/telegram-commands.js +54 -54
  88. package/src/daemon/updater.js +85 -85
  89. package/src/jobs/job-runner.js +78 -78
  90. package/src/mcp/consumer.js +129 -129
  91. package/src/memory/active-recall.js +171 -171
  92. package/src/memory/backend-manager.js +97 -97
  93. package/src/memory/backends/file-backend.js +38 -38
  94. package/src/memory/backends/qmd-backend.js +219 -219
  95. package/src/memory/embedding-guards.js +24 -24
  96. package/src/memory/embedding-index.js +118 -118
  97. package/src/memory/embedding-service.js +179 -179
  98. package/src/memory/file-index.js +177 -177
  99. package/src/memory/memory-signature.js +5 -5
  100. package/src/memory/memory-store.js +648 -648
  101. package/src/memory/retrieval-planner.js +66 -66
  102. package/src/memory/scoring.js +145 -145
  103. package/src/memory/simhash.js +78 -78
  104. package/src/memory/sqlite-active-store.js +824 -824
  105. package/src/memory/write-policy.js +36 -36
  106. package/src/onboarding/aliases.js +33 -33
  107. package/src/onboarding/auth/api-key.js +224 -224
  108. package/src/onboarding/auth/ollama-detect.js +42 -42
  109. package/src/onboarding/clack-prompter.js +77 -77
  110. package/src/onboarding/doctor.js +530 -530
  111. package/src/onboarding/lock.js +42 -42
  112. package/src/onboarding/model-catalog.js +344 -344
  113. package/src/onboarding/phases/auth.js +576 -589
  114. package/src/onboarding/phases/build.js +130 -130
  115. package/src/onboarding/phases/choose.js +82 -82
  116. package/src/onboarding/phases/detect.js +98 -98
  117. package/src/onboarding/phases/hatch.js +216 -216
  118. package/src/onboarding/phases/identity.js +79 -79
  119. package/src/onboarding/phases/ollama.js +345 -345
  120. package/src/onboarding/phases/scaffold.js +99 -99
  121. package/src/onboarding/phases/telegram.js +377 -377
  122. package/src/onboarding/phases/validate.js +204 -204
  123. package/src/onboarding/phases/verify.js +206 -206
  124. package/src/onboarding/platform.js +482 -482
  125. package/src/onboarding/status-bar.js +95 -95
  126. package/src/onboarding/templates.js +794 -794
  127. package/src/onboarding/toml-writer.js +38 -38
  128. package/src/onboarding/tui.js +250 -250
  129. package/src/onboarding/uninstall.js +153 -153
  130. package/src/onboarding/wizard.js +516 -499
  131. package/src/providers/anthropic.js +168 -168
  132. package/src/providers/base.js +247 -247
  133. package/src/providers/circuit-breaker.js +136 -136
  134. package/src/providers/ollama.js +163 -163
  135. package/src/providers/openai-codex.js +149 -149
  136. package/src/providers/openrouter.js +136 -136
  137. package/src/providers/registry.js +36 -36
  138. package/src/providers/router.js +16 -16
  139. package/src/runtime/bootstrap-cache.js +47 -47
  140. package/src/runtime/capabilities-prompt.js +25 -25
  141. package/src/runtime/completion-ping.js +99 -99
  142. package/src/runtime/config-validator.js +121 -121
  143. package/src/runtime/context-ledger.js +360 -360
  144. package/src/runtime/cutover-readiness.js +42 -42
  145. package/src/runtime/daemon.js +729 -729
  146. package/src/runtime/delivery-ack.js +195 -195
  147. package/src/runtime/delivery-adapters/local-file.js +41 -41
  148. package/src/runtime/delivery-adapters/openclaw-cli.js +94 -94
  149. package/src/runtime/delivery-adapters/openclaw-peer.js +98 -98
  150. package/src/runtime/delivery-adapters/shadow.js +13 -13
  151. package/src/runtime/delivery-adapters/standalone-http.js +98 -98
  152. package/src/runtime/delivery-adapters/telegram.js +104 -104
  153. package/src/runtime/delivery-adapters/tui.js +128 -128
  154. package/src/runtime/delivery-manager.js +807 -807
  155. package/src/runtime/delivery-store.js +168 -168
  156. package/src/runtime/dependency-health.js +118 -118
  157. package/src/runtime/envelope.js +114 -114
  158. package/src/runtime/evaluation.js +1089 -1089
  159. package/src/runtime/exec-approvals.js +216 -216
  160. package/src/runtime/executor.js +500 -500
  161. package/src/runtime/failure-ping.js +67 -67
  162. package/src/runtime/flows.js +83 -83
  163. package/src/runtime/guards.js +45 -45
  164. package/src/runtime/handoff.js +51 -51
  165. package/src/runtime/identity-cache.js +28 -28
  166. package/src/runtime/improvement-engine.js +109 -109
  167. package/src/runtime/improvement-harness.js +581 -581
  168. package/src/runtime/input-sanitiser.js +72 -72
  169. package/src/runtime/interaction-contract.js +347 -347
  170. package/src/runtime/lane-readiness.js +226 -226
  171. package/src/runtime/migration.js +323 -323
  172. package/src/runtime/model-resolution.js +78 -78
  173. package/src/runtime/network.js +64 -64
  174. package/src/runtime/notification-store.js +97 -97
  175. package/src/runtime/notifier.js +256 -256
  176. package/src/runtime/orchestrator.js +53 -53
  177. package/src/runtime/orphan-reaper.js +41 -41
  178. package/src/runtime/output-contract-schema.js +139 -139
  179. package/src/runtime/output-contract-validator.js +439 -439
  180. package/src/runtime/peer-readiness.js +69 -69
  181. package/src/runtime/peer-registry.js +133 -133
  182. package/src/runtime/pilot-status.js +108 -108
  183. package/src/runtime/prompt-builder.js +261 -261
  184. package/src/runtime/provider-attempt.js +582 -582
  185. package/src/runtime/report-fallback.js +71 -71
  186. package/src/runtime/result-normalizer.js +183 -183
  187. package/src/runtime/retention.js +74 -74
  188. package/src/runtime/review.js +244 -244
  189. package/src/runtime/route-job.js +15 -15
  190. package/src/runtime/run-store.js +38 -38
  191. package/src/runtime/schedule.js +88 -88
  192. package/src/runtime/scheduler-state.js +434 -434
  193. package/src/runtime/scheduler.js +656 -656
  194. package/src/runtime/session-compactor.js +182 -182
  195. package/src/runtime/session-search.js +155 -155
  196. package/src/runtime/slack-inbound.js +249 -249
  197. package/src/runtime/ssrf.js +102 -102
  198. package/src/runtime/status-aggregator.js +330 -330
  199. package/src/runtime/task-contract.js +140 -140
  200. package/src/runtime/task-packet.js +107 -107
  201. package/src/runtime/task-router.js +140 -140
  202. package/src/runtime/telegram-inbound.js +1565 -1565
  203. package/src/runtime/token-counter.js +134 -134
  204. package/src/runtime/token-estimator.js +59 -59
  205. package/src/runtime/tool-loop.js +200 -200
  206. package/src/runtime/transport-server.js +311 -311
  207. package/src/runtime/tui-server.js +411 -411
  208. package/src/runtime/ulid.js +44 -44
  209. package/src/security/ssrf-check.js +197 -197
  210. package/src/setup.js +369 -369
  211. package/src/shadow/bridge.js +303 -303
  212. package/src/skills/loader.js +84 -84
  213. package/src/tools/catalog.json +49 -49
  214. package/src/tools/cli-delegate.js +44 -44
  215. package/src/tools/mcp-client.js +106 -106
  216. package/src/tools/micro/cancel-task.js +6 -6
  217. package/src/tools/micro/complete-task.js +6 -6
  218. package/src/tools/micro/fail-task.js +6 -6
  219. package/src/tools/micro/http-fetch.js +74 -74
  220. package/src/tools/micro/index.js +36 -36
  221. package/src/tools/micro/lcm-recall.js +60 -60
  222. package/src/tools/micro/list-dir.js +17 -17
  223. package/src/tools/micro/list-skills.js +46 -46
  224. package/src/tools/micro/load-skill.js +38 -38
  225. package/src/tools/micro/memory-search.js +45 -45
  226. package/src/tools/micro/read-file.js +11 -11
  227. package/src/tools/micro/session-search.js +54 -54
  228. package/src/tools/micro/shell-exec.js +43 -43
  229. package/src/tools/micro/trigger-job.js +79 -79
  230. package/src/tools/micro/web-search.js +58 -58
  231. package/src/tools/micro/workspace-paths.js +39 -39
  232. package/src/tools/micro/write-file.js +14 -14
  233. package/src/tools/micro/write-memory.js +41 -41
  234. package/src/tools/registry.js +348 -348
  235. package/src/tools/tool-result-contract.js +36 -36
  236. package/src/tui/chat.js +835 -835
  237. package/src/tui/renderer.js +175 -175
  238. package/src/tui/socket-client.js +217 -217
  239. package/src/utils/canonical-json.js +29 -29
  240. package/src/utils/compaction.js +30 -30
  241. package/src/utils/env-loader.js +5 -5
  242. package/src/utils/errors.js +80 -80
  243. package/src/utils/fs.js +101 -101
  244. package/src/utils/ids.js +5 -5
  245. package/src/utils/model-context-limits.js +30 -30
  246. package/src/utils/token-budget.js +74 -74
  247. package/src/utils/usage-cost.js +25 -25
  248. package/src/utils/usage-metrics.js +14 -14
@@ -1,581 +1,581 @@
1
- import path from "node:path";
2
- import { ConfigLoader } from "../config/loader.js";
3
- import { RunStore } from "./run-store.js";
4
- import { listFilesRecursive, readJson } from "../utils/fs.js";
5
- import { classifyRuntimeFailure } from "./report-fallback.js";
6
-
7
- function _deepMerge(base, override) {
8
- if (Array.isArray(base) || Array.isArray(override)) {
9
- return Array.isArray(override) ? [...override] : Array.isArray(base) ? [...base] : [];
10
- }
11
-
12
- if (!base || typeof base !== "object") {
13
- return override === undefined ? base : override;
14
- }
15
-
16
- if (!override || typeof override !== "object") {
17
- return override === undefined ? { ...base } : override;
18
- }
19
-
20
- const merged = { ...base };
21
- for (const [key, value] of Object.entries(override)) {
22
- merged[key] = key in base ? _deepMerge(base[key], value) : value;
23
- }
24
- return merged;
25
- }
26
-
27
- function buildVariant(target, variantId) {
28
- switch (variantId) {
29
- case "baseline":
30
- return {
31
- id: "baseline",
32
- description: "Run the lane with its current guidance and routing defaults.",
33
- overrides: {
34
- modelOverride: target.defaultModelOverride || null
35
- }
36
- };
37
- case "focus_concrete":
38
- return {
39
- id: "focus_concrete",
40
- description: "Push the report to be more concrete, operator-facing, and less generic.",
41
- overrides: {
42
- modelOverride: target.defaultModelOverride || null,
43
- reportGuidanceOverride: {
44
- focus: ["concrete evidence", "explicit operator signal", "succinct useful status"],
45
- qualityChecks: ["avoid generic reassurance", "prefer named facts over abstractions", "state None explicitly"],
46
- avoid: ["vague encouragement", "boilerplate filler"]
47
- }
48
- }
49
- };
50
- case "retrieval_lexical_heavy":
51
- return {
52
- id: "retrieval_lexical_heavy",
53
- description: "Bias retrieval toward lexical match signal for clearer grounded reports.",
54
- overrides: {
55
- modelOverride: target.defaultModelOverride || null,
56
- retrievalBlendOverride: {
57
- lexicalWeight: 0.48,
58
- embeddingWeight: 0.2,
59
- recencyWeight: 0.14,
60
- salienceWeight: 0.12,
61
- typeWeight: 0.04,
62
- semanticRescueBonus: 0.04
63
- }
64
- }
65
- };
66
- case "retrieval_embedding_heavy":
67
- return {
68
- id: "retrieval_embedding_heavy",
69
- description: "Bias retrieval toward embedding similarity when vectors are fresh and available.",
70
- overrides: {
71
- modelOverride: target.defaultModelOverride || null,
72
- retrievalBlendOverride: {
73
- lexicalWeight: 0.26,
74
- embeddingWeight: 0.42,
75
- recencyWeight: 0.12,
76
- salienceWeight: 0.12,
77
- typeWeight: 0.04,
78
- semanticRescueBonus: 0.08
79
- }
80
- }
81
- };
82
- case "report_model_bump":
83
- return {
84
- id: "report_model_bump",
85
- description: "Try the manual-bump local report model for richer structure.",
86
- overrides: {
87
- modelOverride: "ollama/qwen3:14b"
88
- }
89
- };
90
- default:
91
- throw new Error(`Unknown improvement variant: ${variantId}`);
92
- }
93
- }
94
-
95
- function summarizeEvaluation(evaluation) {
96
- return {
97
- filePath: evaluation.filePath,
98
- overallScore: evaluation.rubric?.overallScore ?? null,
99
- contractAdherence: evaluation.rubric?.components?.contractAdherence ?? null,
100
- v2OutputQuality: evaluation.rubric?.components?.v2OutputQuality ?? null,
101
- retrieval: evaluation.retrieval
102
- ? {
103
- memoryCount: evaluation.retrieval.memoryCount,
104
- lexicalCount: evaluation.retrieval.lexicalCount,
105
- semanticCount: evaluation.retrieval.semanticCount,
106
- qmdCount: evaluation.retrieval.qmdCount,
107
- embeddingQueryMode: evaluation.retrieval.embeddingQueryMode || null,
108
- freshEmbeddingCount: evaluation.retrieval.freshEmbeddingCount || 0,
109
- staleEmbeddingCount: evaluation.retrieval.staleEmbeddingCount || 0,
110
- missingEmbeddingCount: evaluation.retrieval.missingEmbeddingCount || 0,
111
- failedEmbeddingCount: evaluation.retrieval.failedEmbeddingCount || 0,
112
- embeddingError: evaluation.retrieval.embeddingError || null
113
- }
114
- : null,
115
- interaction: evaluation.interaction || null,
116
- findings: evaluation.comparisonNotes || []
117
- };
118
- }
119
-
120
- function normalizeTargetId(targetId) {
121
- return String(targetId || "").replace(/_([a-z])/g, (_match, letter) => letter.toUpperCase());
122
- }
123
-
124
- function summarizeFailure(error) {
125
- return {
126
- message: error.message,
127
- runFile: error.runFile || null,
128
- classification: classifyRuntimeFailure(error?.message || ""),
129
- fallback: error.fallback || null
130
- };
131
- }
132
-
133
- function summarizeSkip(reason, details = {}) {
134
- return {
135
- reason,
136
- ...details
137
- };
138
- }
139
-
140
- function compareEvaluations(baseline, candidate) {
141
- const baselineScore = baseline?.overallScore ?? null;
142
- const candidateScore = candidate?.overallScore ?? null;
143
- const baselineContract = baseline?.contractAdherence ?? null;
144
- const candidateContract = candidate?.contractAdherence ?? null;
145
- const baselineOutput = baseline?.v2OutputQuality ?? null;
146
- const candidateOutput = candidate?.v2OutputQuality ?? null;
147
- const baselineSemantic = baseline?.retrieval?.semanticCount ?? 0;
148
- const candidateSemantic = candidate?.retrieval?.semanticCount ?? 0;
149
-
150
- const findings = [];
151
- if (baselineScore != null && candidateScore != null) {
152
- if (candidateScore > baselineScore) findings.push("Candidate improved overall deterministic eval score.");
153
- else if (candidateScore < baselineScore) findings.push("Candidate regressed on overall deterministic eval score.");
154
- }
155
- if (baselineContract != null && candidateContract != null) {
156
- if (candidateContract > baselineContract) findings.push("Candidate improved contract adherence.");
157
- else if (candidateContract < baselineContract) findings.push("Candidate regressed on contract adherence.");
158
- }
159
- if (baselineOutput != null && candidateOutput != null) {
160
- if (candidateOutput > baselineOutput) findings.push("Candidate improved output-quality signals.");
161
- else if (candidateOutput < baselineOutput) findings.push("Candidate regressed on output-quality signals.");
162
- }
163
- if (candidateSemantic > baselineSemantic) findings.push("Candidate surfaced more semantic retrieval hits.");
164
- if ((candidate?.retrieval?.failedEmbeddingCount || 0) > (baseline?.retrieval?.failedEmbeddingCount || 0)) {
165
- findings.push("Candidate increased embedding failure count.");
166
- }
167
-
168
- const improved =
169
- candidateScore != null &&
170
- baselineScore != null &&
171
- candidateScore > baselineScore &&
172
- (candidateContract ?? 0) >= (baselineContract ?? 0) &&
173
- (candidate?.retrieval?.failedEmbeddingCount || 0) <= (baseline?.retrieval?.failedEmbeddingCount || 0);
174
-
175
- return {
176
- improved,
177
- baseline,
178
- candidate,
179
- findings
180
- };
181
- }
182
-
183
- function buildEmbeddingSummary({ readiness = null, health = null, rebuilt = false, blockedReason = null, queryMode = null } = {}) {
184
- return {
185
- readiness,
186
- health,
187
- rebuilt,
188
- blockedReason,
189
- queryMode
190
- };
191
- }
192
-
193
- export class ImprovementHarness {
194
- constructor({ projectRoot, stateRoot, executor, evaluator }) {
195
- this.projectRoot = projectRoot;
196
- this.stateRoot = stateRoot;
197
- this.executor = executor;
198
- this.evaluator = evaluator;
199
- this.loader = new ConfigLoader({ rootDir: path.join(projectRoot, "config") });
200
- this.runStore = new RunStore({ rootDir: path.join(stateRoot, "improvements") });
201
- }
202
-
203
- async findLatestVariantArtifact(targetId, variantId, options = {}) {
204
- const targetDir = path.join(this.stateRoot, "improvements", normalizeTargetId(targetId));
205
- const files = (await listFilesRecursive(targetDir)).filter((filePath) => filePath.endsWith(".json")).sort().reverse();
206
-
207
- for (const filePath of files) {
208
- const artifact = await readJson(filePath, null);
209
- if (!artifact) continue;
210
- if (artifact.targetId !== normalizeTargetId(targetId)) continue;
211
- if (artifact.variant?.id !== variantId) continue;
212
- if (options.successfulOnly && !artifact.ok) continue;
213
- return {
214
- filePath,
215
- ...artifact
216
- };
217
- }
218
-
219
- return null;
220
- }
221
-
222
- async listTargets() {
223
- const config = await this.loader.loadAll();
224
- return Object.entries(config.improvementTargets || {}).map(([targetId, target]) => ({
225
- id: targetId,
226
- jobId: target.jobId,
227
- defaultMode: target.defaultMode || "provider",
228
- defaultModelOverride: target.defaultModelOverride || null,
229
- defaultTimeoutMs: target.defaultTimeoutMs || null,
230
- allowedKnobs: target.allowedKnobs || [],
231
- recommendedVariants: target.recommendedVariants || []
232
- }));
233
- }
234
-
235
- async getTarget(targetId) {
236
- const config = await this.loader.loadAll();
237
- const resolvedTargetId = normalizeTargetId(targetId);
238
- const target = config.improvementTargets?.[resolvedTargetId];
239
- if (!target) {
240
- throw new Error(`Unknown improvement target: ${targetId}`);
241
- }
242
- return {
243
- id: resolvedTargetId,
244
- ...target
245
- };
246
- }
247
-
248
- async checkVariantPrerequisites(target, variant) {
249
- if (variant.id !== "retrieval_embedding_heavy") {
250
- return null;
251
- }
252
-
253
- const runtime = await this.executor.scheduler.loadRuntime();
254
- const job = runtime.jobs?.[target.jobId];
255
- const agentId = job?.agentId;
256
- if (!agentId) {
257
- return {
258
- blocked: true,
259
- reason: "Could not resolve agent for embedding-heavy variant."
260
- };
261
- }
262
-
263
- const health = await this.executor.scheduler.memoryStore.getEmbeddingHealth(agentId, {
264
- embeddingIndex: this.executor.scheduler.embeddingIndex,
265
- probe: false
266
- });
267
-
268
- if ((health.embeddingHealth?.freshCount || 0) > 0) {
269
- return null;
270
- }
271
-
272
- const embeddingService = this.executor.scheduler.embeddingIndex?.embeddingService || null;
273
- if (!embeddingService) {
274
- return {
275
- blocked: true,
276
- reason: "No embedding service is configured for this runtime.",
277
- health
278
- };
279
- }
280
-
281
- const readiness = await embeddingService.getReadiness();
282
- if (readiness?.ready) {
283
- try {
284
- await this.executor.scheduler.memoryStore.rebuildEmbeddings(agentId, {
285
- embeddingIndex: this.executor.scheduler.embeddingIndex
286
- });
287
- const refreshed = await this.executor.scheduler.memoryStore.getEmbeddingHealth(agentId, {
288
- embeddingIndex: this.executor.scheduler.embeddingIndex,
289
- probe: false
290
- });
291
- if ((refreshed.embeddingHealth?.freshCount || 0) > 0) {
292
- return {
293
- blocked: false,
294
- rebuilt: true,
295
- health: refreshed,
296
- readiness
297
- };
298
- }
299
- return {
300
- blocked: true,
301
- reason: "Embeddings were rebuildable but no fresh vectors were produced for this lane.",
302
- health: refreshed,
303
- readiness,
304
- rebuilt: true
305
- };
306
- } catch (error) {
307
- const refreshed = await this.executor.scheduler.memoryStore.getEmbeddingHealth(agentId, {
308
- embeddingIndex: this.executor.scheduler.embeddingIndex,
309
- probe: false
310
- });
311
- return {
312
- blocked: true,
313
- reason: error?.message || String(error),
314
- health: refreshed,
315
- readiness,
316
- rebuilt: true
317
- };
318
- }
319
- }
320
-
321
- return {
322
- blocked: true,
323
- reason: readiness?.reason || "Embeddings are unavailable or degraded for this lane.",
324
- health,
325
- readiness
326
- };
327
- }
328
-
329
- async runVariant(targetId, variantId = "baseline", options = {}) {
330
- const target = await this.getTarget(targetId);
331
- const variant = buildVariant(target, variantId);
332
- const mode = target.defaultMode || "provider";
333
- let embedding = buildEmbeddingSummary();
334
- let artifact;
335
- try {
336
- const prerequisiteFailure = await this.checkVariantPrerequisites(target, variant);
337
- embedding = buildEmbeddingSummary({
338
- readiness: prerequisiteFailure?.readiness || null,
339
- health: prerequisiteFailure?.health?.embeddingHealth || null,
340
- rebuilt: Boolean(prerequisiteFailure?.rebuilt),
341
- blockedReason: prerequisiteFailure?.blocked ? prerequisiteFailure.reason : null
342
- });
343
- if (prerequisiteFailure?.blocked) {
344
- artifact = {
345
- timestamp: new Date().toISOString(),
346
- targetId: target.id,
347
- jobId: target.jobId,
348
- variant,
349
- ok: false,
350
- skipped: true,
351
- embedding,
352
- skip: summarizeSkip(prerequisiteFailure.reason, {
353
- embeddingHealth: prerequisiteFailure.health?.embeddingHealth || null,
354
- embeddingReadiness: prerequisiteFailure.readiness || null
355
- })
356
- };
357
- const filePath = await this.runStore.saveRun(target.id, artifact);
358
- return {
359
- filePath,
360
- ...artifact
361
- };
362
- }
363
-
364
- const run = await this.executor.executeJob(target.jobId, {
365
- mode,
366
- shadowImport: true,
367
- modelOverride: variant.overrides.modelOverride || null,
368
- reportGuidanceOverride: variant.overrides.reportGuidanceOverride || null,
369
- retrievalBlendOverride: variant.overrides.retrievalBlendOverride || null,
370
- providerTimeoutMs: target.defaultTimeoutMs || null,
371
- allowReportFallback: options.allowReportFallback === true
372
- });
373
- const evaluation = await this.evaluator.evaluateAndPersistJob(target.jobId);
374
- artifact = {
375
- timestamp: new Date().toISOString(),
376
- targetId: target.id,
377
- jobId: target.jobId,
378
- variant,
379
- ok: true,
380
- embedding: buildEmbeddingSummary({
381
- readiness: embedding.readiness,
382
- health: evaluation.retrieval?.embeddingHealth || embedding.health,
383
- rebuilt: embedding.rebuilt,
384
- blockedReason: null,
385
- queryMode: evaluation.retrieval?.embeddingQueryMode || null
386
- }),
387
- run: {
388
- filePath: run.filePath,
389
- mode: run.mode,
390
- providerId: run.providerId,
391
- modelId: run.modelId,
392
- routingDecision: run.routingDecision,
393
- preflight: run.preflight,
394
- summary: run.result?.summary || null,
395
- fallback: run.fallback || null
396
- },
397
- evaluation: summarizeEvaluation(evaluation)
398
- };
399
- } catch (error) {
400
- artifact = {
401
- timestamp: new Date().toISOString(),
402
- targetId: target.id,
403
- jobId: target.jobId,
404
- variant,
405
- ok: false,
406
- embedding,
407
- error: summarizeFailure(error)
408
- };
409
- }
410
- const filePath = await this.runStore.saveRun(target.id, artifact);
411
- return {
412
- filePath,
413
- ...artifact
414
- };
415
- }
416
-
417
- async compareVariants(targetId, baselineVariantId = "baseline", candidateVariantId, options = {}) {
418
- if (!candidateVariantId) {
419
- throw new Error("compareVariants requires a candidate variant id.");
420
- }
421
- const baseline = (await this.findLatestVariantArtifact(targetId, baselineVariantId, { successfulOnly: true })) ||
422
- (await this.runVariant(targetId, baselineVariantId, options));
423
- const candidate = await this.runVariant(targetId, candidateVariantId, options);
424
- let comparisonResult;
425
- if (!baseline.ok || !candidate.ok) {
426
- comparisonResult = {
427
- improved: false,
428
- baseline: baseline.ok ? baseline.evaluation : null,
429
- candidate: candidate.ok ? candidate.evaluation : null,
430
- findings: [
431
- ...(!baseline.ok
432
- ? [baseline.skipped ? `Baseline variant skipped: ${baseline.skip.reason}` : `Baseline variant failed: ${baseline.error.message}`]
433
- : []),
434
- ...(!candidate.ok
435
- ? [candidate.skipped ? `Candidate variant skipped: ${candidate.skip.reason}` : `Candidate variant failed: ${candidate.error.message}`]
436
- : [])
437
- ]
438
- };
439
- } else {
440
- comparisonResult = compareEvaluations(baseline.evaluation, candidate.evaluation);
441
- if (
442
- candidateVariantId === "retrieval_embedding_heavy" &&
443
- (
444
- candidate.evaluation?.retrieval?.embeddingQueryMode === "lexical_fallback" ||
445
- (candidate.evaluation?.retrieval?.freshEmbeddingCount || 0) === 0
446
- )
447
- ) {
448
- comparisonResult.improved = false;
449
- comparisonResult.findings.push(
450
- "Candidate could not exercise embedding-heavy retrieval because embeddings were unavailable or degraded."
451
- );
452
- }
453
- }
454
- const comparison = {
455
- timestamp: new Date().toISOString(),
456
- targetId: baseline.targetId,
457
- baselineVariantId,
458
- candidateVariantId,
459
- fallback: {
460
- baseline: baseline.run?.fallback || baseline.error?.fallback || null,
461
- candidate: candidate.run?.fallback || candidate.error?.fallback || null
462
- },
463
- embedding: {
464
- baseline: baseline.embedding || null,
465
- candidate: candidate.embedding || null
466
- },
467
- comparison: comparisonResult
468
- };
469
- const filePath = await this.runStore.saveRun(`${baseline.targetId}-comparison`, comparison);
470
- return {
471
- filePath,
472
- ...comparison
473
- };
474
- }
475
-
476
- async repairAndCompare(targetId, candidateVariantId, baselineVariantId = "baseline", options = {}) {
477
- if (!candidateVariantId) {
478
- throw new Error("repairAndCompare requires a candidate variant id.");
479
- }
480
-
481
- const target = await this.getTarget(targetId);
482
- const candidateVariant = buildVariant(target, candidateVariantId);
483
- const prerequisite = await this.checkVariantPrerequisites(target, candidateVariant);
484
-
485
- if (prerequisite?.blocked) {
486
- const artifact = {
487
- timestamp: new Date().toISOString(),
488
- targetId: target.id,
489
- baselineVariantId,
490
- candidateVariantId,
491
- repaired: false,
492
- blocked: true,
493
- embedding: buildEmbeddingSummary({
494
- readiness: prerequisite.readiness || null,
495
- health: prerequisite.health?.embeddingHealth || null,
496
- rebuilt: Boolean(prerequisite.rebuilt),
497
- blockedReason: prerequisite.reason
498
- }),
499
- findings: [`Repair blocked: ${prerequisite.reason}`]
500
- };
501
- const filePath = await this.runStore.saveRun(`${target.id}-repair`, artifact);
502
- return {
503
- filePath,
504
- ...artifact
505
- };
506
- }
507
-
508
- const rerunBaseline = Boolean(prerequisite?.rebuilt);
509
- const baseline = rerunBaseline
510
- ? await this.runVariant(targetId, baselineVariantId, options)
511
- : (await this.findLatestVariantArtifact(targetId, baselineVariantId, { successfulOnly: true })) ||
512
- (await this.runVariant(targetId, baselineVariantId, options));
513
- const candidate = await this.runVariant(targetId, candidateVariantId, options);
514
-
515
- let comparisonResult;
516
- if (!baseline.ok || !candidate.ok) {
517
- comparisonResult = {
518
- improved: false,
519
- baseline: baseline.ok ? baseline.evaluation : null,
520
- candidate: candidate.ok ? candidate.evaluation : null,
521
- findings: [
522
- ...(!baseline.ok
523
- ? [baseline.skipped ? `Baseline variant skipped: ${baseline.skip.reason}` : `Baseline variant failed: ${baseline.error.message}`]
524
- : []),
525
- ...(!candidate.ok
526
- ? [candidate.skipped ? `Candidate variant skipped: ${candidate.skip.reason}` : `Candidate variant failed: ${candidate.error.message}`]
527
- : [])
528
- ]
529
- };
530
- } else {
531
- comparisonResult = compareEvaluations(baseline.evaluation, candidate.evaluation);
532
- if (
533
- candidateVariantId === "retrieval_embedding_heavy" &&
534
- (
535
- candidate.evaluation?.retrieval?.embeddingQueryMode === "lexical_fallback" ||
536
- (candidate.evaluation?.retrieval?.freshEmbeddingCount || 0) === 0
537
- )
538
- ) {
539
- comparisonResult.improved = false;
540
- comparisonResult.findings.push(
541
- "Candidate could not exercise embedding-heavy retrieval because embeddings were unavailable or degraded."
542
- );
543
- }
544
- }
545
-
546
- const artifact = {
547
- timestamp: new Date().toISOString(),
548
- targetId: target.id,
549
- baselineVariantId,
550
- candidateVariantId,
551
- repaired: Boolean(prerequisite?.rebuilt),
552
- blocked: false,
553
- fallback: {
554
- baseline: baseline.run?.fallback || baseline.error?.fallback || null,
555
- candidate: candidate.run?.fallback || candidate.error?.fallback || null
556
- },
557
- embedding: {
558
- readiness: prerequisite?.readiness || null,
559
- health: candidate.embedding?.health || prerequisite?.health?.embeddingHealth || null,
560
- rebuilt: Boolean(prerequisite?.rebuilt),
561
- blockedReason: null,
562
- queryMode: candidate.embedding?.queryMode || null
563
- },
564
- baseline: {
565
- filePath: baseline.filePath,
566
- evaluationFile: baseline.evaluation?.filePath || null
567
- },
568
- candidate: {
569
- filePath: candidate.filePath,
570
- evaluationFile: candidate.evaluation?.filePath || null,
571
- status: candidate.ok ? "evaluated" : candidate.skipped ? "skipped" : candidate.error?.classification || "failed"
572
- },
573
- comparison: comparisonResult
574
- };
575
- const filePath = await this.runStore.saveRun(`${target.id}-repair`, artifact);
576
- return {
577
- filePath,
578
- ...artifact
579
- };
580
- }
581
- }
1
+ import path from "node:path";
2
+ import { ConfigLoader } from "../config/loader.js";
3
+ import { RunStore } from "./run-store.js";
4
+ import { listFilesRecursive, readJson } from "../utils/fs.js";
5
+ import { classifyRuntimeFailure } from "./report-fallback.js";
6
+
7
+ function _deepMerge(base, override) {
8
+ if (Array.isArray(base) || Array.isArray(override)) {
9
+ return Array.isArray(override) ? [...override] : Array.isArray(base) ? [...base] : [];
10
+ }
11
+
12
+ if (!base || typeof base !== "object") {
13
+ return override === undefined ? base : override;
14
+ }
15
+
16
+ if (!override || typeof override !== "object") {
17
+ return override === undefined ? { ...base } : override;
18
+ }
19
+
20
+ const merged = { ...base };
21
+ for (const [key, value] of Object.entries(override)) {
22
+ merged[key] = key in base ? _deepMerge(base[key], value) : value;
23
+ }
24
+ return merged;
25
+ }
26
+
27
+ function buildVariant(target, variantId) {
28
+ switch (variantId) {
29
+ case "baseline":
30
+ return {
31
+ id: "baseline",
32
+ description: "Run the lane with its current guidance and routing defaults.",
33
+ overrides: {
34
+ modelOverride: target.defaultModelOverride || null
35
+ }
36
+ };
37
+ case "focus_concrete":
38
+ return {
39
+ id: "focus_concrete",
40
+ description: "Push the report to be more concrete, operator-facing, and less generic.",
41
+ overrides: {
42
+ modelOverride: target.defaultModelOverride || null,
43
+ reportGuidanceOverride: {
44
+ focus: ["concrete evidence", "explicit operator signal", "succinct useful status"],
45
+ qualityChecks: ["avoid generic reassurance", "prefer named facts over abstractions", "state None explicitly"],
46
+ avoid: ["vague encouragement", "boilerplate filler"]
47
+ }
48
+ }
49
+ };
50
+ case "retrieval_lexical_heavy":
51
+ return {
52
+ id: "retrieval_lexical_heavy",
53
+ description: "Bias retrieval toward lexical match signal for clearer grounded reports.",
54
+ overrides: {
55
+ modelOverride: target.defaultModelOverride || null,
56
+ retrievalBlendOverride: {
57
+ lexicalWeight: 0.48,
58
+ embeddingWeight: 0.2,
59
+ recencyWeight: 0.14,
60
+ salienceWeight: 0.12,
61
+ typeWeight: 0.04,
62
+ semanticRescueBonus: 0.04
63
+ }
64
+ }
65
+ };
66
+ case "retrieval_embedding_heavy":
67
+ return {
68
+ id: "retrieval_embedding_heavy",
69
+ description: "Bias retrieval toward embedding similarity when vectors are fresh and available.",
70
+ overrides: {
71
+ modelOverride: target.defaultModelOverride || null,
72
+ retrievalBlendOverride: {
73
+ lexicalWeight: 0.26,
74
+ embeddingWeight: 0.42,
75
+ recencyWeight: 0.12,
76
+ salienceWeight: 0.12,
77
+ typeWeight: 0.04,
78
+ semanticRescueBonus: 0.08
79
+ }
80
+ }
81
+ };
82
+ case "report_model_bump":
83
+ return {
84
+ id: "report_model_bump",
85
+ description: "Try the manual-bump local report model for richer structure.",
86
+ overrides: {
87
+ modelOverride: "ollama/qwen3:14b"
88
+ }
89
+ };
90
+ default:
91
+ throw new Error(`Unknown improvement variant: ${variantId}`);
92
+ }
93
+ }
94
+
95
+ function summarizeEvaluation(evaluation) {
96
+ return {
97
+ filePath: evaluation.filePath,
98
+ overallScore: evaluation.rubric?.overallScore ?? null,
99
+ contractAdherence: evaluation.rubric?.components?.contractAdherence ?? null,
100
+ v2OutputQuality: evaluation.rubric?.components?.v2OutputQuality ?? null,
101
+ retrieval: evaluation.retrieval
102
+ ? {
103
+ memoryCount: evaluation.retrieval.memoryCount,
104
+ lexicalCount: evaluation.retrieval.lexicalCount,
105
+ semanticCount: evaluation.retrieval.semanticCount,
106
+ qmdCount: evaluation.retrieval.qmdCount,
107
+ embeddingQueryMode: evaluation.retrieval.embeddingQueryMode || null,
108
+ freshEmbeddingCount: evaluation.retrieval.freshEmbeddingCount || 0,
109
+ staleEmbeddingCount: evaluation.retrieval.staleEmbeddingCount || 0,
110
+ missingEmbeddingCount: evaluation.retrieval.missingEmbeddingCount || 0,
111
+ failedEmbeddingCount: evaluation.retrieval.failedEmbeddingCount || 0,
112
+ embeddingError: evaluation.retrieval.embeddingError || null
113
+ }
114
+ : null,
115
+ interaction: evaluation.interaction || null,
116
+ findings: evaluation.comparisonNotes || []
117
+ };
118
+ }
119
+
120
+ function normalizeTargetId(targetId) {
121
+ return String(targetId || "").replace(/_([a-z])/g, (_match, letter) => letter.toUpperCase());
122
+ }
123
+
124
+ function summarizeFailure(error) {
125
+ return {
126
+ message: error.message,
127
+ runFile: error.runFile || null,
128
+ classification: classifyRuntimeFailure(error?.message || ""),
129
+ fallback: error.fallback || null
130
+ };
131
+ }
132
+
133
+ function summarizeSkip(reason, details = {}) {
134
+ return {
135
+ reason,
136
+ ...details
137
+ };
138
+ }
139
+
140
+ function compareEvaluations(baseline, candidate) {
141
+ const baselineScore = baseline?.overallScore ?? null;
142
+ const candidateScore = candidate?.overallScore ?? null;
143
+ const baselineContract = baseline?.contractAdherence ?? null;
144
+ const candidateContract = candidate?.contractAdherence ?? null;
145
+ const baselineOutput = baseline?.v2OutputQuality ?? null;
146
+ const candidateOutput = candidate?.v2OutputQuality ?? null;
147
+ const baselineSemantic = baseline?.retrieval?.semanticCount ?? 0;
148
+ const candidateSemantic = candidate?.retrieval?.semanticCount ?? 0;
149
+
150
+ const findings = [];
151
+ if (baselineScore != null && candidateScore != null) {
152
+ if (candidateScore > baselineScore) findings.push("Candidate improved overall deterministic eval score.");
153
+ else if (candidateScore < baselineScore) findings.push("Candidate regressed on overall deterministic eval score.");
154
+ }
155
+ if (baselineContract != null && candidateContract != null) {
156
+ if (candidateContract > baselineContract) findings.push("Candidate improved contract adherence.");
157
+ else if (candidateContract < baselineContract) findings.push("Candidate regressed on contract adherence.");
158
+ }
159
+ if (baselineOutput != null && candidateOutput != null) {
160
+ if (candidateOutput > baselineOutput) findings.push("Candidate improved output-quality signals.");
161
+ else if (candidateOutput < baselineOutput) findings.push("Candidate regressed on output-quality signals.");
162
+ }
163
+ if (candidateSemantic > baselineSemantic) findings.push("Candidate surfaced more semantic retrieval hits.");
164
+ if ((candidate?.retrieval?.failedEmbeddingCount || 0) > (baseline?.retrieval?.failedEmbeddingCount || 0)) {
165
+ findings.push("Candidate increased embedding failure count.");
166
+ }
167
+
168
+ const improved =
169
+ candidateScore != null &&
170
+ baselineScore != null &&
171
+ candidateScore > baselineScore &&
172
+ (candidateContract ?? 0) >= (baselineContract ?? 0) &&
173
+ (candidate?.retrieval?.failedEmbeddingCount || 0) <= (baseline?.retrieval?.failedEmbeddingCount || 0);
174
+
175
+ return {
176
+ improved,
177
+ baseline,
178
+ candidate,
179
+ findings
180
+ };
181
+ }
182
+
183
+ function buildEmbeddingSummary({ readiness = null, health = null, rebuilt = false, blockedReason = null, queryMode = null } = {}) {
184
+ return {
185
+ readiness,
186
+ health,
187
+ rebuilt,
188
+ blockedReason,
189
+ queryMode
190
+ };
191
+ }
192
+
193
+ export class ImprovementHarness {
194
+ constructor({ projectRoot, stateRoot, executor, evaluator }) {
195
+ this.projectRoot = projectRoot;
196
+ this.stateRoot = stateRoot;
197
+ this.executor = executor;
198
+ this.evaluator = evaluator;
199
+ this.loader = new ConfigLoader({ rootDir: path.join(projectRoot, "config") });
200
+ this.runStore = new RunStore({ rootDir: path.join(stateRoot, "improvements") });
201
+ }
202
+
203
+ async findLatestVariantArtifact(targetId, variantId, options = {}) {
204
+ const targetDir = path.join(this.stateRoot, "improvements", normalizeTargetId(targetId));
205
+ const files = (await listFilesRecursive(targetDir)).filter((filePath) => filePath.endsWith(".json")).sort().reverse();
206
+
207
+ for (const filePath of files) {
208
+ const artifact = await readJson(filePath, null);
209
+ if (!artifact) continue;
210
+ if (artifact.targetId !== normalizeTargetId(targetId)) continue;
211
+ if (artifact.variant?.id !== variantId) continue;
212
+ if (options.successfulOnly && !artifact.ok) continue;
213
+ return {
214
+ filePath,
215
+ ...artifact
216
+ };
217
+ }
218
+
219
+ return null;
220
+ }
221
+
222
+ async listTargets() {
223
+ const config = await this.loader.loadAll();
224
+ return Object.entries(config.improvementTargets || {}).map(([targetId, target]) => ({
225
+ id: targetId,
226
+ jobId: target.jobId,
227
+ defaultMode: target.defaultMode || "provider",
228
+ defaultModelOverride: target.defaultModelOverride || null,
229
+ defaultTimeoutMs: target.defaultTimeoutMs || null,
230
+ allowedKnobs: target.allowedKnobs || [],
231
+ recommendedVariants: target.recommendedVariants || []
232
+ }));
233
+ }
234
+
235
+ async getTarget(targetId) {
236
+ const config = await this.loader.loadAll();
237
+ const resolvedTargetId = normalizeTargetId(targetId);
238
+ const target = config.improvementTargets?.[resolvedTargetId];
239
+ if (!target) {
240
+ throw new Error(`Unknown improvement target: ${targetId}`);
241
+ }
242
+ return {
243
+ id: resolvedTargetId,
244
+ ...target
245
+ };
246
+ }
247
+
248
+ async checkVariantPrerequisites(target, variant) {
249
+ if (variant.id !== "retrieval_embedding_heavy") {
250
+ return null;
251
+ }
252
+
253
+ const runtime = await this.executor.scheduler.loadRuntime();
254
+ const job = runtime.jobs?.[target.jobId];
255
+ const agentId = job?.agentId;
256
+ if (!agentId) {
257
+ return {
258
+ blocked: true,
259
+ reason: "Could not resolve agent for embedding-heavy variant."
260
+ };
261
+ }
262
+
263
+ const health = await this.executor.scheduler.memoryStore.getEmbeddingHealth(agentId, {
264
+ embeddingIndex: this.executor.scheduler.embeddingIndex,
265
+ probe: false
266
+ });
267
+
268
+ if ((health.embeddingHealth?.freshCount || 0) > 0) {
269
+ return null;
270
+ }
271
+
272
+ const embeddingService = this.executor.scheduler.embeddingIndex?.embeddingService || null;
273
+ if (!embeddingService) {
274
+ return {
275
+ blocked: true,
276
+ reason: "No embedding service is configured for this runtime.",
277
+ health
278
+ };
279
+ }
280
+
281
+ const readiness = await embeddingService.getReadiness();
282
+ if (readiness?.ready) {
283
+ try {
284
+ await this.executor.scheduler.memoryStore.rebuildEmbeddings(agentId, {
285
+ embeddingIndex: this.executor.scheduler.embeddingIndex
286
+ });
287
+ const refreshed = await this.executor.scheduler.memoryStore.getEmbeddingHealth(agentId, {
288
+ embeddingIndex: this.executor.scheduler.embeddingIndex,
289
+ probe: false
290
+ });
291
+ if ((refreshed.embeddingHealth?.freshCount || 0) > 0) {
292
+ return {
293
+ blocked: false,
294
+ rebuilt: true,
295
+ health: refreshed,
296
+ readiness
297
+ };
298
+ }
299
+ return {
300
+ blocked: true,
301
+ reason: "Embeddings were rebuildable but no fresh vectors were produced for this lane.",
302
+ health: refreshed,
303
+ readiness,
304
+ rebuilt: true
305
+ };
306
+ } catch (error) {
307
+ const refreshed = await this.executor.scheduler.memoryStore.getEmbeddingHealth(agentId, {
308
+ embeddingIndex: this.executor.scheduler.embeddingIndex,
309
+ probe: false
310
+ });
311
+ return {
312
+ blocked: true,
313
+ reason: error?.message || String(error),
314
+ health: refreshed,
315
+ readiness,
316
+ rebuilt: true
317
+ };
318
+ }
319
+ }
320
+
321
+ return {
322
+ blocked: true,
323
+ reason: readiness?.reason || "Embeddings are unavailable or degraded for this lane.",
324
+ health,
325
+ readiness
326
+ };
327
+ }
328
+
329
+ async runVariant(targetId, variantId = "baseline", options = {}) {
330
+ const target = await this.getTarget(targetId);
331
+ const variant = buildVariant(target, variantId);
332
+ const mode = target.defaultMode || "provider";
333
+ let embedding = buildEmbeddingSummary();
334
+ let artifact;
335
+ try {
336
+ const prerequisiteFailure = await this.checkVariantPrerequisites(target, variant);
337
+ embedding = buildEmbeddingSummary({
338
+ readiness: prerequisiteFailure?.readiness || null,
339
+ health: prerequisiteFailure?.health?.embeddingHealth || null,
340
+ rebuilt: Boolean(prerequisiteFailure?.rebuilt),
341
+ blockedReason: prerequisiteFailure?.blocked ? prerequisiteFailure.reason : null
342
+ });
343
+ if (prerequisiteFailure?.blocked) {
344
+ artifact = {
345
+ timestamp: new Date().toISOString(),
346
+ targetId: target.id,
347
+ jobId: target.jobId,
348
+ variant,
349
+ ok: false,
350
+ skipped: true,
351
+ embedding,
352
+ skip: summarizeSkip(prerequisiteFailure.reason, {
353
+ embeddingHealth: prerequisiteFailure.health?.embeddingHealth || null,
354
+ embeddingReadiness: prerequisiteFailure.readiness || null
355
+ })
356
+ };
357
+ const filePath = await this.runStore.saveRun(target.id, artifact);
358
+ return {
359
+ filePath,
360
+ ...artifact
361
+ };
362
+ }
363
+
364
+ const run = await this.executor.executeJob(target.jobId, {
365
+ mode,
366
+ shadowImport: true,
367
+ modelOverride: variant.overrides.modelOverride || null,
368
+ reportGuidanceOverride: variant.overrides.reportGuidanceOverride || null,
369
+ retrievalBlendOverride: variant.overrides.retrievalBlendOverride || null,
370
+ providerTimeoutMs: target.defaultTimeoutMs || null,
371
+ allowReportFallback: options.allowReportFallback === true
372
+ });
373
+ const evaluation = await this.evaluator.evaluateAndPersistJob(target.jobId);
374
+ artifact = {
375
+ timestamp: new Date().toISOString(),
376
+ targetId: target.id,
377
+ jobId: target.jobId,
378
+ variant,
379
+ ok: true,
380
+ embedding: buildEmbeddingSummary({
381
+ readiness: embedding.readiness,
382
+ health: evaluation.retrieval?.embeddingHealth || embedding.health,
383
+ rebuilt: embedding.rebuilt,
384
+ blockedReason: null,
385
+ queryMode: evaluation.retrieval?.embeddingQueryMode || null
386
+ }),
387
+ run: {
388
+ filePath: run.filePath,
389
+ mode: run.mode,
390
+ providerId: run.providerId,
391
+ modelId: run.modelId,
392
+ routingDecision: run.routingDecision,
393
+ preflight: run.preflight,
394
+ summary: run.result?.summary || null,
395
+ fallback: run.fallback || null
396
+ },
397
+ evaluation: summarizeEvaluation(evaluation)
398
+ };
399
+ } catch (error) {
400
+ artifact = {
401
+ timestamp: new Date().toISOString(),
402
+ targetId: target.id,
403
+ jobId: target.jobId,
404
+ variant,
405
+ ok: false,
406
+ embedding,
407
+ error: summarizeFailure(error)
408
+ };
409
+ }
410
+ const filePath = await this.runStore.saveRun(target.id, artifact);
411
+ return {
412
+ filePath,
413
+ ...artifact
414
+ };
415
+ }
416
+
417
+ async compareVariants(targetId, baselineVariantId = "baseline", candidateVariantId, options = {}) {
418
+ if (!candidateVariantId) {
419
+ throw new Error("compareVariants requires a candidate variant id.");
420
+ }
421
+ const baseline = (await this.findLatestVariantArtifact(targetId, baselineVariantId, { successfulOnly: true })) ||
422
+ (await this.runVariant(targetId, baselineVariantId, options));
423
+ const candidate = await this.runVariant(targetId, candidateVariantId, options);
424
+ let comparisonResult;
425
+ if (!baseline.ok || !candidate.ok) {
426
+ comparisonResult = {
427
+ improved: false,
428
+ baseline: baseline.ok ? baseline.evaluation : null,
429
+ candidate: candidate.ok ? candidate.evaluation : null,
430
+ findings: [
431
+ ...(!baseline.ok
432
+ ? [baseline.skipped ? `Baseline variant skipped: ${baseline.skip.reason}` : `Baseline variant failed: ${baseline.error.message}`]
433
+ : []),
434
+ ...(!candidate.ok
435
+ ? [candidate.skipped ? `Candidate variant skipped: ${candidate.skip.reason}` : `Candidate variant failed: ${candidate.error.message}`]
436
+ : [])
437
+ ]
438
+ };
439
+ } else {
440
+ comparisonResult = compareEvaluations(baseline.evaluation, candidate.evaluation);
441
+ if (
442
+ candidateVariantId === "retrieval_embedding_heavy" &&
443
+ (
444
+ candidate.evaluation?.retrieval?.embeddingQueryMode === "lexical_fallback" ||
445
+ (candidate.evaluation?.retrieval?.freshEmbeddingCount || 0) === 0
446
+ )
447
+ ) {
448
+ comparisonResult.improved = false;
449
+ comparisonResult.findings.push(
450
+ "Candidate could not exercise embedding-heavy retrieval because embeddings were unavailable or degraded."
451
+ );
452
+ }
453
+ }
454
+ const comparison = {
455
+ timestamp: new Date().toISOString(),
456
+ targetId: baseline.targetId,
457
+ baselineVariantId,
458
+ candidateVariantId,
459
+ fallback: {
460
+ baseline: baseline.run?.fallback || baseline.error?.fallback || null,
461
+ candidate: candidate.run?.fallback || candidate.error?.fallback || null
462
+ },
463
+ embedding: {
464
+ baseline: baseline.embedding || null,
465
+ candidate: candidate.embedding || null
466
+ },
467
+ comparison: comparisonResult
468
+ };
469
+ const filePath = await this.runStore.saveRun(`${baseline.targetId}-comparison`, comparison);
470
+ return {
471
+ filePath,
472
+ ...comparison
473
+ };
474
+ }
475
+
476
+ async repairAndCompare(targetId, candidateVariantId, baselineVariantId = "baseline", options = {}) {
477
+ if (!candidateVariantId) {
478
+ throw new Error("repairAndCompare requires a candidate variant id.");
479
+ }
480
+
481
+ const target = await this.getTarget(targetId);
482
+ const candidateVariant = buildVariant(target, candidateVariantId);
483
+ const prerequisite = await this.checkVariantPrerequisites(target, candidateVariant);
484
+
485
+ if (prerequisite?.blocked) {
486
+ const artifact = {
487
+ timestamp: new Date().toISOString(),
488
+ targetId: target.id,
489
+ baselineVariantId,
490
+ candidateVariantId,
491
+ repaired: false,
492
+ blocked: true,
493
+ embedding: buildEmbeddingSummary({
494
+ readiness: prerequisite.readiness || null,
495
+ health: prerequisite.health?.embeddingHealth || null,
496
+ rebuilt: Boolean(prerequisite.rebuilt),
497
+ blockedReason: prerequisite.reason
498
+ }),
499
+ findings: [`Repair blocked: ${prerequisite.reason}`]
500
+ };
501
+ const filePath = await this.runStore.saveRun(`${target.id}-repair`, artifact);
502
+ return {
503
+ filePath,
504
+ ...artifact
505
+ };
506
+ }
507
+
508
+ const rerunBaseline = Boolean(prerequisite?.rebuilt);
509
+ const baseline = rerunBaseline
510
+ ? await this.runVariant(targetId, baselineVariantId, options)
511
+ : (await this.findLatestVariantArtifact(targetId, baselineVariantId, { successfulOnly: true })) ||
512
+ (await this.runVariant(targetId, baselineVariantId, options));
513
+ const candidate = await this.runVariant(targetId, candidateVariantId, options);
514
+
515
+ let comparisonResult;
516
+ if (!baseline.ok || !candidate.ok) {
517
+ comparisonResult = {
518
+ improved: false,
519
+ baseline: baseline.ok ? baseline.evaluation : null,
520
+ candidate: candidate.ok ? candidate.evaluation : null,
521
+ findings: [
522
+ ...(!baseline.ok
523
+ ? [baseline.skipped ? `Baseline variant skipped: ${baseline.skip.reason}` : `Baseline variant failed: ${baseline.error.message}`]
524
+ : []),
525
+ ...(!candidate.ok
526
+ ? [candidate.skipped ? `Candidate variant skipped: ${candidate.skip.reason}` : `Candidate variant failed: ${candidate.error.message}`]
527
+ : [])
528
+ ]
529
+ };
530
+ } else {
531
+ comparisonResult = compareEvaluations(baseline.evaluation, candidate.evaluation);
532
+ if (
533
+ candidateVariantId === "retrieval_embedding_heavy" &&
534
+ (
535
+ candidate.evaluation?.retrieval?.embeddingQueryMode === "lexical_fallback" ||
536
+ (candidate.evaluation?.retrieval?.freshEmbeddingCount || 0) === 0
537
+ )
538
+ ) {
539
+ comparisonResult.improved = false;
540
+ comparisonResult.findings.push(
541
+ "Candidate could not exercise embedding-heavy retrieval because embeddings were unavailable or degraded."
542
+ );
543
+ }
544
+ }
545
+
546
+ const artifact = {
547
+ timestamp: new Date().toISOString(),
548
+ targetId: target.id,
549
+ baselineVariantId,
550
+ candidateVariantId,
551
+ repaired: Boolean(prerequisite?.rebuilt),
552
+ blocked: false,
553
+ fallback: {
554
+ baseline: baseline.run?.fallback || baseline.error?.fallback || null,
555
+ candidate: candidate.run?.fallback || candidate.error?.fallback || null
556
+ },
557
+ embedding: {
558
+ readiness: prerequisite?.readiness || null,
559
+ health: candidate.embedding?.health || prerequisite?.health?.embeddingHealth || null,
560
+ rebuilt: Boolean(prerequisite?.rebuilt),
561
+ blockedReason: null,
562
+ queryMode: candidate.embedding?.queryMode || null
563
+ },
564
+ baseline: {
565
+ filePath: baseline.filePath,
566
+ evaluationFile: baseline.evaluation?.filePath || null
567
+ },
568
+ candidate: {
569
+ filePath: candidate.filePath,
570
+ evaluationFile: candidate.evaluation?.filePath || null,
571
+ status: candidate.ok ? "evaluated" : candidate.skipped ? "skipped" : candidate.error?.classification || "failed"
572
+ },
573
+ comparison: comparisonResult
574
+ };
575
+ const filePath = await this.runStore.saveRun(`${target.id}-repair`, artifact);
576
+ return {
577
+ filePath,
578
+ ...artifact
579
+ };
580
+ }
581
+ }