nemoris 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (223) hide show
  1. package/.env.example +49 -0
  2. package/LICENSE +21 -0
  3. package/README.md +209 -0
  4. package/SECURITY.md +119 -0
  5. package/bin/nemoris +46 -0
  6. package/config/agents/agent.toml.example +28 -0
  7. package/config/agents/default.toml +22 -0
  8. package/config/agents/orchestrator.toml +18 -0
  9. package/config/delivery.toml +73 -0
  10. package/config/embeddings.toml +5 -0
  11. package/config/identity/default-purpose.md +1 -0
  12. package/config/identity/default-soul.md +3 -0
  13. package/config/identity/orchestrator-purpose.md +1 -0
  14. package/config/identity/orchestrator-soul.md +1 -0
  15. package/config/improvement-targets.toml +15 -0
  16. package/config/jobs/heartbeat-check.toml +30 -0
  17. package/config/jobs/memory-rollup.toml +46 -0
  18. package/config/jobs/workspace-health.toml +63 -0
  19. package/config/mcp.toml +16 -0
  20. package/config/output-contracts.toml +17 -0
  21. package/config/peers.toml +32 -0
  22. package/config/peers.toml.example +32 -0
  23. package/config/policies/memory-default.toml +10 -0
  24. package/config/policies/memory-heartbeat.toml +5 -0
  25. package/config/policies/memory-ops.toml +10 -0
  26. package/config/policies/tools-heartbeat-minimal.toml +8 -0
  27. package/config/policies/tools-interactive-safe.toml +8 -0
  28. package/config/policies/tools-ops-bounded.toml +8 -0
  29. package/config/policies/tools-orchestrator.toml +7 -0
  30. package/config/providers/anthropic.toml +15 -0
  31. package/config/providers/ollama.toml +5 -0
  32. package/config/providers/openai-codex.toml +9 -0
  33. package/config/providers/openrouter.toml +5 -0
  34. package/config/router.toml +22 -0
  35. package/config/runtime.toml +114 -0
  36. package/config/skills/self-improvement.toml +15 -0
  37. package/config/skills/telegram-onboarding-spec.md +240 -0
  38. package/config/skills/workspace-monitor.toml +15 -0
  39. package/config/task-router.toml +42 -0
  40. package/install.sh +50 -0
  41. package/package.json +90 -0
  42. package/src/auth/auth-profiles.js +169 -0
  43. package/src/auth/openai-codex-oauth.js +285 -0
  44. package/src/battle.js +449 -0
  45. package/src/cli/help.js +265 -0
  46. package/src/cli/output-filter.js +49 -0
  47. package/src/cli/runtime-control.js +704 -0
  48. package/src/cli-main.js +2763 -0
  49. package/src/cli.js +78 -0
  50. package/src/config/loader.js +332 -0
  51. package/src/config/schema-validator.js +214 -0
  52. package/src/config/toml-lite.js +8 -0
  53. package/src/daemon/action-handlers.js +71 -0
  54. package/src/daemon/healing-tick.js +87 -0
  55. package/src/daemon/health-probes.js +90 -0
  56. package/src/daemon/notifier.js +57 -0
  57. package/src/daemon/nurse.js +218 -0
  58. package/src/daemon/repair-log.js +106 -0
  59. package/src/daemon/rule-staging.js +90 -0
  60. package/src/daemon/rules.js +29 -0
  61. package/src/daemon/telegram-commands.js +54 -0
  62. package/src/daemon/updater.js +85 -0
  63. package/src/jobs/job-runner.js +78 -0
  64. package/src/mcp/consumer.js +129 -0
  65. package/src/memory/active-recall.js +171 -0
  66. package/src/memory/backend-manager.js +97 -0
  67. package/src/memory/backends/file-backend.js +38 -0
  68. package/src/memory/backends/qmd-backend.js +219 -0
  69. package/src/memory/embedding-guards.js +24 -0
  70. package/src/memory/embedding-index.js +118 -0
  71. package/src/memory/embedding-service.js +179 -0
  72. package/src/memory/file-index.js +177 -0
  73. package/src/memory/memory-signature.js +5 -0
  74. package/src/memory/memory-store.js +648 -0
  75. package/src/memory/retrieval-planner.js +66 -0
  76. package/src/memory/scoring.js +145 -0
  77. package/src/memory/simhash.js +78 -0
  78. package/src/memory/sqlite-active-store.js +824 -0
  79. package/src/memory/write-policy.js +36 -0
  80. package/src/onboarding/aliases.js +33 -0
  81. package/src/onboarding/auth/api-key.js +224 -0
  82. package/src/onboarding/auth/ollama-detect.js +42 -0
  83. package/src/onboarding/clack-prompter.js +77 -0
  84. package/src/onboarding/doctor.js +530 -0
  85. package/src/onboarding/lock.js +42 -0
  86. package/src/onboarding/model-catalog.js +344 -0
  87. package/src/onboarding/phases/auth.js +589 -0
  88. package/src/onboarding/phases/build.js +130 -0
  89. package/src/onboarding/phases/choose.js +82 -0
  90. package/src/onboarding/phases/detect.js +98 -0
  91. package/src/onboarding/phases/hatch.js +216 -0
  92. package/src/onboarding/phases/identity.js +79 -0
  93. package/src/onboarding/phases/ollama.js +345 -0
  94. package/src/onboarding/phases/scaffold.js +99 -0
  95. package/src/onboarding/phases/telegram.js +377 -0
  96. package/src/onboarding/phases/validate.js +204 -0
  97. package/src/onboarding/phases/verify.js +206 -0
  98. package/src/onboarding/platform.js +482 -0
  99. package/src/onboarding/status-bar.js +95 -0
  100. package/src/onboarding/templates.js +794 -0
  101. package/src/onboarding/toml-writer.js +38 -0
  102. package/src/onboarding/tui.js +250 -0
  103. package/src/onboarding/uninstall.js +153 -0
  104. package/src/onboarding/wizard.js +499 -0
  105. package/src/providers/anthropic.js +168 -0
  106. package/src/providers/base.js +247 -0
  107. package/src/providers/circuit-breaker.js +136 -0
  108. package/src/providers/ollama.js +163 -0
  109. package/src/providers/openai-codex.js +149 -0
  110. package/src/providers/openrouter.js +136 -0
  111. package/src/providers/registry.js +36 -0
  112. package/src/providers/router.js +16 -0
  113. package/src/runtime/bootstrap-cache.js +47 -0
  114. package/src/runtime/capabilities-prompt.js +25 -0
  115. package/src/runtime/completion-ping.js +99 -0
  116. package/src/runtime/config-validator.js +121 -0
  117. package/src/runtime/context-ledger.js +360 -0
  118. package/src/runtime/cutover-readiness.js +42 -0
  119. package/src/runtime/daemon.js +729 -0
  120. package/src/runtime/delivery-ack.js +195 -0
  121. package/src/runtime/delivery-adapters/local-file.js +41 -0
  122. package/src/runtime/delivery-adapters/openclaw-cli.js +94 -0
  123. package/src/runtime/delivery-adapters/openclaw-peer.js +98 -0
  124. package/src/runtime/delivery-adapters/shadow.js +13 -0
  125. package/src/runtime/delivery-adapters/standalone-http.js +98 -0
  126. package/src/runtime/delivery-adapters/telegram.js +104 -0
  127. package/src/runtime/delivery-adapters/tui.js +128 -0
  128. package/src/runtime/delivery-manager.js +807 -0
  129. package/src/runtime/delivery-store.js +168 -0
  130. package/src/runtime/dependency-health.js +118 -0
  131. package/src/runtime/envelope.js +114 -0
  132. package/src/runtime/evaluation.js +1089 -0
  133. package/src/runtime/exec-approvals.js +216 -0
  134. package/src/runtime/executor.js +500 -0
  135. package/src/runtime/failure-ping.js +67 -0
  136. package/src/runtime/flows.js +83 -0
  137. package/src/runtime/guards.js +45 -0
  138. package/src/runtime/handoff.js +51 -0
  139. package/src/runtime/identity-cache.js +28 -0
  140. package/src/runtime/improvement-engine.js +109 -0
  141. package/src/runtime/improvement-harness.js +581 -0
  142. package/src/runtime/input-sanitiser.js +72 -0
  143. package/src/runtime/interaction-contract.js +347 -0
  144. package/src/runtime/lane-readiness.js +226 -0
  145. package/src/runtime/migration.js +323 -0
  146. package/src/runtime/model-resolution.js +78 -0
  147. package/src/runtime/network.js +64 -0
  148. package/src/runtime/notification-store.js +97 -0
  149. package/src/runtime/notifier.js +256 -0
  150. package/src/runtime/orchestrator.js +53 -0
  151. package/src/runtime/orphan-reaper.js +41 -0
  152. package/src/runtime/output-contract-schema.js +139 -0
  153. package/src/runtime/output-contract-validator.js +439 -0
  154. package/src/runtime/peer-readiness.js +69 -0
  155. package/src/runtime/peer-registry.js +133 -0
  156. package/src/runtime/pilot-status.js +108 -0
  157. package/src/runtime/prompt-builder.js +261 -0
  158. package/src/runtime/provider-attempt.js +582 -0
  159. package/src/runtime/report-fallback.js +71 -0
  160. package/src/runtime/result-normalizer.js +183 -0
  161. package/src/runtime/retention.js +74 -0
  162. package/src/runtime/review.js +244 -0
  163. package/src/runtime/route-job.js +15 -0
  164. package/src/runtime/run-store.js +38 -0
  165. package/src/runtime/schedule.js +88 -0
  166. package/src/runtime/scheduler-state.js +434 -0
  167. package/src/runtime/scheduler.js +656 -0
  168. package/src/runtime/session-compactor.js +182 -0
  169. package/src/runtime/session-search.js +155 -0
  170. package/src/runtime/slack-inbound.js +249 -0
  171. package/src/runtime/ssrf.js +102 -0
  172. package/src/runtime/status-aggregator.js +330 -0
  173. package/src/runtime/task-contract.js +140 -0
  174. package/src/runtime/task-packet.js +107 -0
  175. package/src/runtime/task-router.js +140 -0
  176. package/src/runtime/telegram-inbound.js +1565 -0
  177. package/src/runtime/token-counter.js +134 -0
  178. package/src/runtime/token-estimator.js +59 -0
  179. package/src/runtime/tool-loop.js +200 -0
  180. package/src/runtime/transport-server.js +311 -0
  181. package/src/runtime/tui-server.js +411 -0
  182. package/src/runtime/ulid.js +44 -0
  183. package/src/security/ssrf-check.js +197 -0
  184. package/src/setup.js +369 -0
  185. package/src/shadow/bridge.js +303 -0
  186. package/src/skills/loader.js +84 -0
  187. package/src/tools/catalog.json +49 -0
  188. package/src/tools/cli-delegate.js +44 -0
  189. package/src/tools/mcp-client.js +106 -0
  190. package/src/tools/micro/cancel-task.js +6 -0
  191. package/src/tools/micro/complete-task.js +6 -0
  192. package/src/tools/micro/fail-task.js +6 -0
  193. package/src/tools/micro/http-fetch.js +74 -0
  194. package/src/tools/micro/index.js +36 -0
  195. package/src/tools/micro/lcm-recall.js +60 -0
  196. package/src/tools/micro/list-dir.js +17 -0
  197. package/src/tools/micro/list-skills.js +46 -0
  198. package/src/tools/micro/load-skill.js +38 -0
  199. package/src/tools/micro/memory-search.js +45 -0
  200. package/src/tools/micro/read-file.js +11 -0
  201. package/src/tools/micro/session-search.js +54 -0
  202. package/src/tools/micro/shell-exec.js +43 -0
  203. package/src/tools/micro/trigger-job.js +79 -0
  204. package/src/tools/micro/web-search.js +58 -0
  205. package/src/tools/micro/workspace-paths.js +39 -0
  206. package/src/tools/micro/write-file.js +14 -0
  207. package/src/tools/micro/write-memory.js +41 -0
  208. package/src/tools/registry.js +348 -0
  209. package/src/tools/tool-result-contract.js +36 -0
  210. package/src/tui/chat.js +835 -0
  211. package/src/tui/renderer.js +175 -0
  212. package/src/tui/socket-client.js +217 -0
  213. package/src/utils/canonical-json.js +29 -0
  214. package/src/utils/compaction.js +30 -0
  215. package/src/utils/env-loader.js +5 -0
  216. package/src/utils/errors.js +80 -0
  217. package/src/utils/fs.js +101 -0
  218. package/src/utils/ids.js +5 -0
  219. package/src/utils/model-context-limits.js +30 -0
  220. package/src/utils/token-budget.js +74 -0
  221. package/src/utils/usage-cost.js +25 -0
  222. package/src/utils/usage-metrics.js +14 -0
  223. package/vendor/smol-toml-1.5.2.tgz +0 -0
@@ -0,0 +1,581 @@
1
+ import path from "node:path";
2
+ import { ConfigLoader } from "../config/loader.js";
3
+ import { RunStore } from "./run-store.js";
4
+ import { listFilesRecursive, readJson } from "../utils/fs.js";
5
+ import { classifyRuntimeFailure } from "./report-fallback.js";
6
+
7
+ function _deepMerge(base, override) {
8
+ if (Array.isArray(base) || Array.isArray(override)) {
9
+ return Array.isArray(override) ? [...override] : Array.isArray(base) ? [...base] : [];
10
+ }
11
+
12
+ if (!base || typeof base !== "object") {
13
+ return override === undefined ? base : override;
14
+ }
15
+
16
+ if (!override || typeof override !== "object") {
17
+ return override === undefined ? { ...base } : override;
18
+ }
19
+
20
+ const merged = { ...base };
21
+ for (const [key, value] of Object.entries(override)) {
22
+ merged[key] = key in base ? _deepMerge(base[key], value) : value;
23
+ }
24
+ return merged;
25
+ }
26
+
27
+ function buildVariant(target, variantId) {
28
+ switch (variantId) {
29
+ case "baseline":
30
+ return {
31
+ id: "baseline",
32
+ description: "Run the lane with its current guidance and routing defaults.",
33
+ overrides: {
34
+ modelOverride: target.defaultModelOverride || null
35
+ }
36
+ };
37
+ case "focus_concrete":
38
+ return {
39
+ id: "focus_concrete",
40
+ description: "Push the report to be more concrete, operator-facing, and less generic.",
41
+ overrides: {
42
+ modelOverride: target.defaultModelOverride || null,
43
+ reportGuidanceOverride: {
44
+ focus: ["concrete evidence", "explicit operator signal", "succinct useful status"],
45
+ qualityChecks: ["avoid generic reassurance", "prefer named facts over abstractions", "state None explicitly"],
46
+ avoid: ["vague encouragement", "boilerplate filler"]
47
+ }
48
+ }
49
+ };
50
+ case "retrieval_lexical_heavy":
51
+ return {
52
+ id: "retrieval_lexical_heavy",
53
+ description: "Bias retrieval toward lexical match signal for clearer grounded reports.",
54
+ overrides: {
55
+ modelOverride: target.defaultModelOverride || null,
56
+ retrievalBlendOverride: {
57
+ lexicalWeight: 0.48,
58
+ embeddingWeight: 0.2,
59
+ recencyWeight: 0.14,
60
+ salienceWeight: 0.12,
61
+ typeWeight: 0.04,
62
+ semanticRescueBonus: 0.04
63
+ }
64
+ }
65
+ };
66
+ case "retrieval_embedding_heavy":
67
+ return {
68
+ id: "retrieval_embedding_heavy",
69
+ description: "Bias retrieval toward embedding similarity when vectors are fresh and available.",
70
+ overrides: {
71
+ modelOverride: target.defaultModelOverride || null,
72
+ retrievalBlendOverride: {
73
+ lexicalWeight: 0.26,
74
+ embeddingWeight: 0.42,
75
+ recencyWeight: 0.12,
76
+ salienceWeight: 0.12,
77
+ typeWeight: 0.04,
78
+ semanticRescueBonus: 0.08
79
+ }
80
+ }
81
+ };
82
+ case "report_model_bump":
83
+ return {
84
+ id: "report_model_bump",
85
+ description: "Try the manual-bump local report model for richer structure.",
86
+ overrides: {
87
+ modelOverride: "ollama/qwen3:14b"
88
+ }
89
+ };
90
+ default:
91
+ throw new Error(`Unknown improvement variant: ${variantId}`);
92
+ }
93
+ }
94
+
95
+ function summarizeEvaluation(evaluation) {
96
+ return {
97
+ filePath: evaluation.filePath,
98
+ overallScore: evaluation.rubric?.overallScore ?? null,
99
+ contractAdherence: evaluation.rubric?.components?.contractAdherence ?? null,
100
+ v2OutputQuality: evaluation.rubric?.components?.v2OutputQuality ?? null,
101
+ retrieval: evaluation.retrieval
102
+ ? {
103
+ memoryCount: evaluation.retrieval.memoryCount,
104
+ lexicalCount: evaluation.retrieval.lexicalCount,
105
+ semanticCount: evaluation.retrieval.semanticCount,
106
+ qmdCount: evaluation.retrieval.qmdCount,
107
+ embeddingQueryMode: evaluation.retrieval.embeddingQueryMode || null,
108
+ freshEmbeddingCount: evaluation.retrieval.freshEmbeddingCount || 0,
109
+ staleEmbeddingCount: evaluation.retrieval.staleEmbeddingCount || 0,
110
+ missingEmbeddingCount: evaluation.retrieval.missingEmbeddingCount || 0,
111
+ failedEmbeddingCount: evaluation.retrieval.failedEmbeddingCount || 0,
112
+ embeddingError: evaluation.retrieval.embeddingError || null
113
+ }
114
+ : null,
115
+ interaction: evaluation.interaction || null,
116
+ findings: evaluation.comparisonNotes || []
117
+ };
118
+ }
119
+
120
+ function normalizeTargetId(targetId) {
121
+ return String(targetId || "").replace(/_([a-z])/g, (_match, letter) => letter.toUpperCase());
122
+ }
123
+
124
+ function summarizeFailure(error) {
125
+ return {
126
+ message: error.message,
127
+ runFile: error.runFile || null,
128
+ classification: classifyRuntimeFailure(error?.message || ""),
129
+ fallback: error.fallback || null
130
+ };
131
+ }
132
+
133
+ function summarizeSkip(reason, details = {}) {
134
+ return {
135
+ reason,
136
+ ...details
137
+ };
138
+ }
139
+
140
+ function compareEvaluations(baseline, candidate) {
141
+ const baselineScore = baseline?.overallScore ?? null;
142
+ const candidateScore = candidate?.overallScore ?? null;
143
+ const baselineContract = baseline?.contractAdherence ?? null;
144
+ const candidateContract = candidate?.contractAdherence ?? null;
145
+ const baselineOutput = baseline?.v2OutputQuality ?? null;
146
+ const candidateOutput = candidate?.v2OutputQuality ?? null;
147
+ const baselineSemantic = baseline?.retrieval?.semanticCount ?? 0;
148
+ const candidateSemantic = candidate?.retrieval?.semanticCount ?? 0;
149
+
150
+ const findings = [];
151
+ if (baselineScore != null && candidateScore != null) {
152
+ if (candidateScore > baselineScore) findings.push("Candidate improved overall deterministic eval score.");
153
+ else if (candidateScore < baselineScore) findings.push("Candidate regressed on overall deterministic eval score.");
154
+ }
155
+ if (baselineContract != null && candidateContract != null) {
156
+ if (candidateContract > baselineContract) findings.push("Candidate improved contract adherence.");
157
+ else if (candidateContract < baselineContract) findings.push("Candidate regressed on contract adherence.");
158
+ }
159
+ if (baselineOutput != null && candidateOutput != null) {
160
+ if (candidateOutput > baselineOutput) findings.push("Candidate improved output-quality signals.");
161
+ else if (candidateOutput < baselineOutput) findings.push("Candidate regressed on output-quality signals.");
162
+ }
163
+ if (candidateSemantic > baselineSemantic) findings.push("Candidate surfaced more semantic retrieval hits.");
164
+ if ((candidate?.retrieval?.failedEmbeddingCount || 0) > (baseline?.retrieval?.failedEmbeddingCount || 0)) {
165
+ findings.push("Candidate increased embedding failure count.");
166
+ }
167
+
168
+ const improved =
169
+ candidateScore != null &&
170
+ baselineScore != null &&
171
+ candidateScore > baselineScore &&
172
+ (candidateContract ?? 0) >= (baselineContract ?? 0) &&
173
+ (candidate?.retrieval?.failedEmbeddingCount || 0) <= (baseline?.retrieval?.failedEmbeddingCount || 0);
174
+
175
+ return {
176
+ improved,
177
+ baseline,
178
+ candidate,
179
+ findings
180
+ };
181
+ }
182
+
183
+ function buildEmbeddingSummary({ readiness = null, health = null, rebuilt = false, blockedReason = null, queryMode = null } = {}) {
184
+ return {
185
+ readiness,
186
+ health,
187
+ rebuilt,
188
+ blockedReason,
189
+ queryMode
190
+ };
191
+ }
192
+
193
+ export class ImprovementHarness {
194
+ constructor({ projectRoot, stateRoot, executor, evaluator }) {
195
+ this.projectRoot = projectRoot;
196
+ this.stateRoot = stateRoot;
197
+ this.executor = executor;
198
+ this.evaluator = evaluator;
199
+ this.loader = new ConfigLoader({ rootDir: path.join(projectRoot, "config") });
200
+ this.runStore = new RunStore({ rootDir: path.join(stateRoot, "improvements") });
201
+ }
202
+
203
+ async findLatestVariantArtifact(targetId, variantId, options = {}) {
204
+ const targetDir = path.join(this.stateRoot, "improvements", normalizeTargetId(targetId));
205
+ const files = (await listFilesRecursive(targetDir)).filter((filePath) => filePath.endsWith(".json")).sort().reverse();
206
+
207
+ for (const filePath of files) {
208
+ const artifact = await readJson(filePath, null);
209
+ if (!artifact) continue;
210
+ if (artifact.targetId !== normalizeTargetId(targetId)) continue;
211
+ if (artifact.variant?.id !== variantId) continue;
212
+ if (options.successfulOnly && !artifact.ok) continue;
213
+ return {
214
+ filePath,
215
+ ...artifact
216
+ };
217
+ }
218
+
219
+ return null;
220
+ }
221
+
222
+ async listTargets() {
223
+ const config = await this.loader.loadAll();
224
+ return Object.entries(config.improvementTargets || {}).map(([targetId, target]) => ({
225
+ id: targetId,
226
+ jobId: target.jobId,
227
+ defaultMode: target.defaultMode || "provider",
228
+ defaultModelOverride: target.defaultModelOverride || null,
229
+ defaultTimeoutMs: target.defaultTimeoutMs || null,
230
+ allowedKnobs: target.allowedKnobs || [],
231
+ recommendedVariants: target.recommendedVariants || []
232
+ }));
233
+ }
234
+
235
+ async getTarget(targetId) {
236
+ const config = await this.loader.loadAll();
237
+ const resolvedTargetId = normalizeTargetId(targetId);
238
+ const target = config.improvementTargets?.[resolvedTargetId];
239
+ if (!target) {
240
+ throw new Error(`Unknown improvement target: ${targetId}`);
241
+ }
242
+ return {
243
+ id: resolvedTargetId,
244
+ ...target
245
+ };
246
+ }
247
+
248
+ async checkVariantPrerequisites(target, variant) {
249
+ if (variant.id !== "retrieval_embedding_heavy") {
250
+ return null;
251
+ }
252
+
253
+ const runtime = await this.executor.scheduler.loadRuntime();
254
+ const job = runtime.jobs?.[target.jobId];
255
+ const agentId = job?.agentId;
256
+ if (!agentId) {
257
+ return {
258
+ blocked: true,
259
+ reason: "Could not resolve agent for embedding-heavy variant."
260
+ };
261
+ }
262
+
263
+ const health = await this.executor.scheduler.memoryStore.getEmbeddingHealth(agentId, {
264
+ embeddingIndex: this.executor.scheduler.embeddingIndex,
265
+ probe: false
266
+ });
267
+
268
+ if ((health.embeddingHealth?.freshCount || 0) > 0) {
269
+ return null;
270
+ }
271
+
272
+ const embeddingService = this.executor.scheduler.embeddingIndex?.embeddingService || null;
273
+ if (!embeddingService) {
274
+ return {
275
+ blocked: true,
276
+ reason: "No embedding service is configured for this runtime.",
277
+ health
278
+ };
279
+ }
280
+
281
+ const readiness = await embeddingService.getReadiness();
282
+ if (readiness?.ready) {
283
+ try {
284
+ await this.executor.scheduler.memoryStore.rebuildEmbeddings(agentId, {
285
+ embeddingIndex: this.executor.scheduler.embeddingIndex
286
+ });
287
+ const refreshed = await this.executor.scheduler.memoryStore.getEmbeddingHealth(agentId, {
288
+ embeddingIndex: this.executor.scheduler.embeddingIndex,
289
+ probe: false
290
+ });
291
+ if ((refreshed.embeddingHealth?.freshCount || 0) > 0) {
292
+ return {
293
+ blocked: false,
294
+ rebuilt: true,
295
+ health: refreshed,
296
+ readiness
297
+ };
298
+ }
299
+ return {
300
+ blocked: true,
301
+ reason: "Embeddings were rebuildable but no fresh vectors were produced for this lane.",
302
+ health: refreshed,
303
+ readiness,
304
+ rebuilt: true
305
+ };
306
+ } catch (error) {
307
+ const refreshed = await this.executor.scheduler.memoryStore.getEmbeddingHealth(agentId, {
308
+ embeddingIndex: this.executor.scheduler.embeddingIndex,
309
+ probe: false
310
+ });
311
+ return {
312
+ blocked: true,
313
+ reason: error?.message || String(error),
314
+ health: refreshed,
315
+ readiness,
316
+ rebuilt: true
317
+ };
318
+ }
319
+ }
320
+
321
+ return {
322
+ blocked: true,
323
+ reason: readiness?.reason || "Embeddings are unavailable or degraded for this lane.",
324
+ health,
325
+ readiness
326
+ };
327
+ }
328
+
329
+ async runVariant(targetId, variantId = "baseline", options = {}) {
330
+ const target = await this.getTarget(targetId);
331
+ const variant = buildVariant(target, variantId);
332
+ const mode = target.defaultMode || "provider";
333
+ let embedding = buildEmbeddingSummary();
334
+ let artifact;
335
+ try {
336
+ const prerequisiteFailure = await this.checkVariantPrerequisites(target, variant);
337
+ embedding = buildEmbeddingSummary({
338
+ readiness: prerequisiteFailure?.readiness || null,
339
+ health: prerequisiteFailure?.health?.embeddingHealth || null,
340
+ rebuilt: Boolean(prerequisiteFailure?.rebuilt),
341
+ blockedReason: prerequisiteFailure?.blocked ? prerequisiteFailure.reason : null
342
+ });
343
+ if (prerequisiteFailure?.blocked) {
344
+ artifact = {
345
+ timestamp: new Date().toISOString(),
346
+ targetId: target.id,
347
+ jobId: target.jobId,
348
+ variant,
349
+ ok: false,
350
+ skipped: true,
351
+ embedding,
352
+ skip: summarizeSkip(prerequisiteFailure.reason, {
353
+ embeddingHealth: prerequisiteFailure.health?.embeddingHealth || null,
354
+ embeddingReadiness: prerequisiteFailure.readiness || null
355
+ })
356
+ };
357
+ const filePath = await this.runStore.saveRun(target.id, artifact);
358
+ return {
359
+ filePath,
360
+ ...artifact
361
+ };
362
+ }
363
+
364
+ const run = await this.executor.executeJob(target.jobId, {
365
+ mode,
366
+ shadowImport: true,
367
+ modelOverride: variant.overrides.modelOverride || null,
368
+ reportGuidanceOverride: variant.overrides.reportGuidanceOverride || null,
369
+ retrievalBlendOverride: variant.overrides.retrievalBlendOverride || null,
370
+ providerTimeoutMs: target.defaultTimeoutMs || null,
371
+ allowReportFallback: options.allowReportFallback === true
372
+ });
373
+ const evaluation = await this.evaluator.evaluateAndPersistJob(target.jobId);
374
+ artifact = {
375
+ timestamp: new Date().toISOString(),
376
+ targetId: target.id,
377
+ jobId: target.jobId,
378
+ variant,
379
+ ok: true,
380
+ embedding: buildEmbeddingSummary({
381
+ readiness: embedding.readiness,
382
+ health: evaluation.retrieval?.embeddingHealth || embedding.health,
383
+ rebuilt: embedding.rebuilt,
384
+ blockedReason: null,
385
+ queryMode: evaluation.retrieval?.embeddingQueryMode || null
386
+ }),
387
+ run: {
388
+ filePath: run.filePath,
389
+ mode: run.mode,
390
+ providerId: run.providerId,
391
+ modelId: run.modelId,
392
+ routingDecision: run.routingDecision,
393
+ preflight: run.preflight,
394
+ summary: run.result?.summary || null,
395
+ fallback: run.fallback || null
396
+ },
397
+ evaluation: summarizeEvaluation(evaluation)
398
+ };
399
+ } catch (error) {
400
+ artifact = {
401
+ timestamp: new Date().toISOString(),
402
+ targetId: target.id,
403
+ jobId: target.jobId,
404
+ variant,
405
+ ok: false,
406
+ embedding,
407
+ error: summarizeFailure(error)
408
+ };
409
+ }
410
+ const filePath = await this.runStore.saveRun(target.id, artifact);
411
+ return {
412
+ filePath,
413
+ ...artifact
414
+ };
415
+ }
416
+
417
+ async compareVariants(targetId, baselineVariantId = "baseline", candidateVariantId, options = {}) {
418
+ if (!candidateVariantId) {
419
+ throw new Error("compareVariants requires a candidate variant id.");
420
+ }
421
+ const baseline = (await this.findLatestVariantArtifact(targetId, baselineVariantId, { successfulOnly: true })) ||
422
+ (await this.runVariant(targetId, baselineVariantId, options));
423
+ const candidate = await this.runVariant(targetId, candidateVariantId, options);
424
+ let comparisonResult;
425
+ if (!baseline.ok || !candidate.ok) {
426
+ comparisonResult = {
427
+ improved: false,
428
+ baseline: baseline.ok ? baseline.evaluation : null,
429
+ candidate: candidate.ok ? candidate.evaluation : null,
430
+ findings: [
431
+ ...(!baseline.ok
432
+ ? [baseline.skipped ? `Baseline variant skipped: ${baseline.skip.reason}` : `Baseline variant failed: ${baseline.error.message}`]
433
+ : []),
434
+ ...(!candidate.ok
435
+ ? [candidate.skipped ? `Candidate variant skipped: ${candidate.skip.reason}` : `Candidate variant failed: ${candidate.error.message}`]
436
+ : [])
437
+ ]
438
+ };
439
+ } else {
440
+ comparisonResult = compareEvaluations(baseline.evaluation, candidate.evaluation);
441
+ if (
442
+ candidateVariantId === "retrieval_embedding_heavy" &&
443
+ (
444
+ candidate.evaluation?.retrieval?.embeddingQueryMode === "lexical_fallback" ||
445
+ (candidate.evaluation?.retrieval?.freshEmbeddingCount || 0) === 0
446
+ )
447
+ ) {
448
+ comparisonResult.improved = false;
449
+ comparisonResult.findings.push(
450
+ "Candidate could not exercise embedding-heavy retrieval because embeddings were unavailable or degraded."
451
+ );
452
+ }
453
+ }
454
+ const comparison = {
455
+ timestamp: new Date().toISOString(),
456
+ targetId: baseline.targetId,
457
+ baselineVariantId,
458
+ candidateVariantId,
459
+ fallback: {
460
+ baseline: baseline.run?.fallback || baseline.error?.fallback || null,
461
+ candidate: candidate.run?.fallback || candidate.error?.fallback || null
462
+ },
463
+ embedding: {
464
+ baseline: baseline.embedding || null,
465
+ candidate: candidate.embedding || null
466
+ },
467
+ comparison: comparisonResult
468
+ };
469
+ const filePath = await this.runStore.saveRun(`${baseline.targetId}-comparison`, comparison);
470
+ return {
471
+ filePath,
472
+ ...comparison
473
+ };
474
+ }
475
+
476
+ async repairAndCompare(targetId, candidateVariantId, baselineVariantId = "baseline", options = {}) {
477
+ if (!candidateVariantId) {
478
+ throw new Error("repairAndCompare requires a candidate variant id.");
479
+ }
480
+
481
+ const target = await this.getTarget(targetId);
482
+ const candidateVariant = buildVariant(target, candidateVariantId);
483
+ const prerequisite = await this.checkVariantPrerequisites(target, candidateVariant);
484
+
485
+ if (prerequisite?.blocked) {
486
+ const artifact = {
487
+ timestamp: new Date().toISOString(),
488
+ targetId: target.id,
489
+ baselineVariantId,
490
+ candidateVariantId,
491
+ repaired: false,
492
+ blocked: true,
493
+ embedding: buildEmbeddingSummary({
494
+ readiness: prerequisite.readiness || null,
495
+ health: prerequisite.health?.embeddingHealth || null,
496
+ rebuilt: Boolean(prerequisite.rebuilt),
497
+ blockedReason: prerequisite.reason
498
+ }),
499
+ findings: [`Repair blocked: ${prerequisite.reason}`]
500
+ };
501
+ const filePath = await this.runStore.saveRun(`${target.id}-repair`, artifact);
502
+ return {
503
+ filePath,
504
+ ...artifact
505
+ };
506
+ }
507
+
508
+ const rerunBaseline = Boolean(prerequisite?.rebuilt);
509
+ const baseline = rerunBaseline
510
+ ? await this.runVariant(targetId, baselineVariantId, options)
511
+ : (await this.findLatestVariantArtifact(targetId, baselineVariantId, { successfulOnly: true })) ||
512
+ (await this.runVariant(targetId, baselineVariantId, options));
513
+ const candidate = await this.runVariant(targetId, candidateVariantId, options);
514
+
515
+ let comparisonResult;
516
+ if (!baseline.ok || !candidate.ok) {
517
+ comparisonResult = {
518
+ improved: false,
519
+ baseline: baseline.ok ? baseline.evaluation : null,
520
+ candidate: candidate.ok ? candidate.evaluation : null,
521
+ findings: [
522
+ ...(!baseline.ok
523
+ ? [baseline.skipped ? `Baseline variant skipped: ${baseline.skip.reason}` : `Baseline variant failed: ${baseline.error.message}`]
524
+ : []),
525
+ ...(!candidate.ok
526
+ ? [candidate.skipped ? `Candidate variant skipped: ${candidate.skip.reason}` : `Candidate variant failed: ${candidate.error.message}`]
527
+ : [])
528
+ ]
529
+ };
530
+ } else {
531
+ comparisonResult = compareEvaluations(baseline.evaluation, candidate.evaluation);
532
+ if (
533
+ candidateVariantId === "retrieval_embedding_heavy" &&
534
+ (
535
+ candidate.evaluation?.retrieval?.embeddingQueryMode === "lexical_fallback" ||
536
+ (candidate.evaluation?.retrieval?.freshEmbeddingCount || 0) === 0
537
+ )
538
+ ) {
539
+ comparisonResult.improved = false;
540
+ comparisonResult.findings.push(
541
+ "Candidate could not exercise embedding-heavy retrieval because embeddings were unavailable or degraded."
542
+ );
543
+ }
544
+ }
545
+
546
+ const artifact = {
547
+ timestamp: new Date().toISOString(),
548
+ targetId: target.id,
549
+ baselineVariantId,
550
+ candidateVariantId,
551
+ repaired: Boolean(prerequisite?.rebuilt),
552
+ blocked: false,
553
+ fallback: {
554
+ baseline: baseline.run?.fallback || baseline.error?.fallback || null,
555
+ candidate: candidate.run?.fallback || candidate.error?.fallback || null
556
+ },
557
+ embedding: {
558
+ readiness: prerequisite?.readiness || null,
559
+ health: candidate.embedding?.health || prerequisite?.health?.embeddingHealth || null,
560
+ rebuilt: Boolean(prerequisite?.rebuilt),
561
+ blockedReason: null,
562
+ queryMode: candidate.embedding?.queryMode || null
563
+ },
564
+ baseline: {
565
+ filePath: baseline.filePath,
566
+ evaluationFile: baseline.evaluation?.filePath || null
567
+ },
568
+ candidate: {
569
+ filePath: candidate.filePath,
570
+ evaluationFile: candidate.evaluation?.filePath || null,
571
+ status: candidate.ok ? "evaluated" : candidate.skipped ? "skipped" : candidate.error?.classification || "failed"
572
+ },
573
+ comparison: comparisonResult
574
+ };
575
+ const filePath = await this.runStore.saveRun(`${target.id}-repair`, artifact);
576
+ return {
577
+ filePath,
578
+ ...artifact
579
+ };
580
+ }
581
+ }
@@ -0,0 +1,72 @@
1
+ import { formatRuntimeError, RuntimeError } from "../utils/errors.js";
2
+
3
+ const INJECTION_PATTERNS = [
4
+ { id: "ignore_previous", regex: /ignore\s+(all\s+)?previous\s+instructions/i, label: "ignore previous instructions" },
5
+ { id: "you_are_now", regex: /you\s+are\s+now\b/i, label: "identity override (you are now)" },
6
+ { id: "system_prompt", regex: /system\s+prompt\s*:/i, label: "system prompt injection" },
7
+ { id: "important_start", regex: /^(?:\s*)IMPORTANT\s*:/m, label: "IMPORTANT: at start of output" },
8
+ { id: "critical_start", regex: /^(?:\s*)CRITICAL\s*:/m, label: "CRITICAL: at start of output" },
9
+ { id: "fake_system_tag", regex: /<\/?system(?:\s[^>]*)?>/, label: "XML <system> boundary tag" },
10
+ { id: "fake_assistant_tag", regex: /<\/?assistant(?:\s[^>]*)?>/, label: "XML <assistant> boundary tag" },
11
+ { id: "fake_user_tag", regex: /<\/?user(?:\s[^>]*)?>/, label: "XML <user> boundary tag" },
12
+ { id: "fake_tool_boundary", regex: /\[TOOL_OUTPUT:(START|END)\]/i, label: "spoofed TOOL_OUTPUT boundary marker" },
13
+ { id: "disregard", regex: /disregard\s+(all\s+)?(prior|previous|above)\s+(instructions|context)/i, label: "disregard prior instructions" },
14
+ { id: "new_instructions", regex: /new\s+instructions?\s*:/i, label: "new instructions injection" }
15
+ ];
16
+
17
+ /**
18
+ * Wraps an external tool output with clear boundary markers so the LLM
19
+ * can distinguish trusted system content from untrusted external data.
20
+ */
21
+ export function tagUntrustedInput(toolOutput, toolName) {
22
+ const text = String(toolOutput ?? "");
23
+ const name = String(toolName ?? "unknown");
24
+ return `[TOOL_OUTPUT:START tool=${name} trust=external]\n${text}\n[TOOL_OUTPUT:END]`;
25
+ }
26
+
27
+ /**
28
+ * Scans text for common prompt-injection patterns.
29
+ * Returns { flagged, patterns, sanitised }.
30
+ * The sanitised string is always the original text (we never mutate),
31
+ * but the flag + pattern list lets callers decide what to do.
32
+ */
33
+ export function detectInjectionPatterns(text) {
34
+ const input = String(text ?? "");
35
+ const matched = [];
36
+
37
+ for (const pattern of INJECTION_PATTERNS) {
38
+ if (pattern.regex.test(input)) {
39
+ matched.push(pattern.label);
40
+ }
41
+ }
42
+
43
+ return {
44
+ flagged: matched.length > 0,
45
+ patterns: matched,
46
+ sanitised: input
47
+ };
48
+ }
49
+
50
+ /**
51
+ * Combined entry-point used by the executor: tag + scan in one call.
52
+ * Logs a warning to console.warn when injection patterns are detected
53
+ * but never blocks execution to avoid false-positive breakage.
54
+ */
55
+ export function processToolOutput(toolOutput, toolName) {
56
+ const detection = detectInjectionPatterns(toolOutput);
57
+
58
+ if (detection.flagged) {
59
+ const err = new RuntimeError(
60
+ `Potential prompt injection detected in output from tool "${toolName}"`,
61
+ { category: "security", context: { toolName, patterns: detection.patterns.join("; ") }, recoverable: true }
62
+ );
63
+ console.warn(formatRuntimeError(err));
64
+ }
65
+
66
+ const tagged = tagUntrustedInput(detection.sanitised, toolName);
67
+
68
+ return {
69
+ tagged,
70
+ detection
71
+ };
72
+ }