nemoris 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +49 -0
- package/LICENSE +21 -0
- package/README.md +209 -0
- package/SECURITY.md +119 -0
- package/bin/nemoris +46 -0
- package/config/agents/agent.toml.example +28 -0
- package/config/agents/default.toml +22 -0
- package/config/agents/orchestrator.toml +18 -0
- package/config/delivery.toml +73 -0
- package/config/embeddings.toml +5 -0
- package/config/identity/default-purpose.md +1 -0
- package/config/identity/default-soul.md +3 -0
- package/config/identity/orchestrator-purpose.md +1 -0
- package/config/identity/orchestrator-soul.md +1 -0
- package/config/improvement-targets.toml +15 -0
- package/config/jobs/heartbeat-check.toml +30 -0
- package/config/jobs/memory-rollup.toml +46 -0
- package/config/jobs/workspace-health.toml +63 -0
- package/config/mcp.toml +16 -0
- package/config/output-contracts.toml +17 -0
- package/config/peers.toml +32 -0
- package/config/peers.toml.example +32 -0
- package/config/policies/memory-default.toml +10 -0
- package/config/policies/memory-heartbeat.toml +5 -0
- package/config/policies/memory-ops.toml +10 -0
- package/config/policies/tools-heartbeat-minimal.toml +8 -0
- package/config/policies/tools-interactive-safe.toml +8 -0
- package/config/policies/tools-ops-bounded.toml +8 -0
- package/config/policies/tools-orchestrator.toml +7 -0
- package/config/providers/anthropic.toml +15 -0
- package/config/providers/ollama.toml +5 -0
- package/config/providers/openai-codex.toml +9 -0
- package/config/providers/openrouter.toml +5 -0
- package/config/router.toml +22 -0
- package/config/runtime.toml +114 -0
- package/config/skills/self-improvement.toml +15 -0
- package/config/skills/telegram-onboarding-spec.md +240 -0
- package/config/skills/workspace-monitor.toml +15 -0
- package/config/task-router.toml +42 -0
- package/install.sh +50 -0
- package/package.json +90 -0
- package/src/auth/auth-profiles.js +169 -0
- package/src/auth/openai-codex-oauth.js +285 -0
- package/src/battle.js +449 -0
- package/src/cli/help.js +265 -0
- package/src/cli/output-filter.js +49 -0
- package/src/cli/runtime-control.js +704 -0
- package/src/cli-main.js +2763 -0
- package/src/cli.js +78 -0
- package/src/config/loader.js +332 -0
- package/src/config/schema-validator.js +214 -0
- package/src/config/toml-lite.js +8 -0
- package/src/daemon/action-handlers.js +71 -0
- package/src/daemon/healing-tick.js +87 -0
- package/src/daemon/health-probes.js +90 -0
- package/src/daemon/notifier.js +57 -0
- package/src/daemon/nurse.js +218 -0
- package/src/daemon/repair-log.js +106 -0
- package/src/daemon/rule-staging.js +90 -0
- package/src/daemon/rules.js +29 -0
- package/src/daemon/telegram-commands.js +54 -0
- package/src/daemon/updater.js +85 -0
- package/src/jobs/job-runner.js +78 -0
- package/src/mcp/consumer.js +129 -0
- package/src/memory/active-recall.js +171 -0
- package/src/memory/backend-manager.js +97 -0
- package/src/memory/backends/file-backend.js +38 -0
- package/src/memory/backends/qmd-backend.js +219 -0
- package/src/memory/embedding-guards.js +24 -0
- package/src/memory/embedding-index.js +118 -0
- package/src/memory/embedding-service.js +179 -0
- package/src/memory/file-index.js +177 -0
- package/src/memory/memory-signature.js +5 -0
- package/src/memory/memory-store.js +648 -0
- package/src/memory/retrieval-planner.js +66 -0
- package/src/memory/scoring.js +145 -0
- package/src/memory/simhash.js +78 -0
- package/src/memory/sqlite-active-store.js +824 -0
- package/src/memory/write-policy.js +36 -0
- package/src/onboarding/aliases.js +33 -0
- package/src/onboarding/auth/api-key.js +224 -0
- package/src/onboarding/auth/ollama-detect.js +42 -0
- package/src/onboarding/clack-prompter.js +77 -0
- package/src/onboarding/doctor.js +530 -0
- package/src/onboarding/lock.js +42 -0
- package/src/onboarding/model-catalog.js +344 -0
- package/src/onboarding/phases/auth.js +589 -0
- package/src/onboarding/phases/build.js +130 -0
- package/src/onboarding/phases/choose.js +82 -0
- package/src/onboarding/phases/detect.js +98 -0
- package/src/onboarding/phases/hatch.js +216 -0
- package/src/onboarding/phases/identity.js +79 -0
- package/src/onboarding/phases/ollama.js +345 -0
- package/src/onboarding/phases/scaffold.js +99 -0
- package/src/onboarding/phases/telegram.js +377 -0
- package/src/onboarding/phases/validate.js +204 -0
- package/src/onboarding/phases/verify.js +206 -0
- package/src/onboarding/platform.js +482 -0
- package/src/onboarding/status-bar.js +95 -0
- package/src/onboarding/templates.js +794 -0
- package/src/onboarding/toml-writer.js +38 -0
- package/src/onboarding/tui.js +250 -0
- package/src/onboarding/uninstall.js +153 -0
- package/src/onboarding/wizard.js +499 -0
- package/src/providers/anthropic.js +168 -0
- package/src/providers/base.js +247 -0
- package/src/providers/circuit-breaker.js +136 -0
- package/src/providers/ollama.js +163 -0
- package/src/providers/openai-codex.js +149 -0
- package/src/providers/openrouter.js +136 -0
- package/src/providers/registry.js +36 -0
- package/src/providers/router.js +16 -0
- package/src/runtime/bootstrap-cache.js +47 -0
- package/src/runtime/capabilities-prompt.js +25 -0
- package/src/runtime/completion-ping.js +99 -0
- package/src/runtime/config-validator.js +121 -0
- package/src/runtime/context-ledger.js +360 -0
- package/src/runtime/cutover-readiness.js +42 -0
- package/src/runtime/daemon.js +729 -0
- package/src/runtime/delivery-ack.js +195 -0
- package/src/runtime/delivery-adapters/local-file.js +41 -0
- package/src/runtime/delivery-adapters/openclaw-cli.js +94 -0
- package/src/runtime/delivery-adapters/openclaw-peer.js +98 -0
- package/src/runtime/delivery-adapters/shadow.js +13 -0
- package/src/runtime/delivery-adapters/standalone-http.js +98 -0
- package/src/runtime/delivery-adapters/telegram.js +104 -0
- package/src/runtime/delivery-adapters/tui.js +128 -0
- package/src/runtime/delivery-manager.js +807 -0
- package/src/runtime/delivery-store.js +168 -0
- package/src/runtime/dependency-health.js +118 -0
- package/src/runtime/envelope.js +114 -0
- package/src/runtime/evaluation.js +1089 -0
- package/src/runtime/exec-approvals.js +216 -0
- package/src/runtime/executor.js +500 -0
- package/src/runtime/failure-ping.js +67 -0
- package/src/runtime/flows.js +83 -0
- package/src/runtime/guards.js +45 -0
- package/src/runtime/handoff.js +51 -0
- package/src/runtime/identity-cache.js +28 -0
- package/src/runtime/improvement-engine.js +109 -0
- package/src/runtime/improvement-harness.js +581 -0
- package/src/runtime/input-sanitiser.js +72 -0
- package/src/runtime/interaction-contract.js +347 -0
- package/src/runtime/lane-readiness.js +226 -0
- package/src/runtime/migration.js +323 -0
- package/src/runtime/model-resolution.js +78 -0
- package/src/runtime/network.js +64 -0
- package/src/runtime/notification-store.js +97 -0
- package/src/runtime/notifier.js +256 -0
- package/src/runtime/orchestrator.js +53 -0
- package/src/runtime/orphan-reaper.js +41 -0
- package/src/runtime/output-contract-schema.js +139 -0
- package/src/runtime/output-contract-validator.js +439 -0
- package/src/runtime/peer-readiness.js +69 -0
- package/src/runtime/peer-registry.js +133 -0
- package/src/runtime/pilot-status.js +108 -0
- package/src/runtime/prompt-builder.js +261 -0
- package/src/runtime/provider-attempt.js +582 -0
- package/src/runtime/report-fallback.js +71 -0
- package/src/runtime/result-normalizer.js +183 -0
- package/src/runtime/retention.js +74 -0
- package/src/runtime/review.js +244 -0
- package/src/runtime/route-job.js +15 -0
- package/src/runtime/run-store.js +38 -0
- package/src/runtime/schedule.js +88 -0
- package/src/runtime/scheduler-state.js +434 -0
- package/src/runtime/scheduler.js +656 -0
- package/src/runtime/session-compactor.js +182 -0
- package/src/runtime/session-search.js +155 -0
- package/src/runtime/slack-inbound.js +249 -0
- package/src/runtime/ssrf.js +102 -0
- package/src/runtime/status-aggregator.js +330 -0
- package/src/runtime/task-contract.js +140 -0
- package/src/runtime/task-packet.js +107 -0
- package/src/runtime/task-router.js +140 -0
- package/src/runtime/telegram-inbound.js +1565 -0
- package/src/runtime/token-counter.js +134 -0
- package/src/runtime/token-estimator.js +59 -0
- package/src/runtime/tool-loop.js +200 -0
- package/src/runtime/transport-server.js +311 -0
- package/src/runtime/tui-server.js +411 -0
- package/src/runtime/ulid.js +44 -0
- package/src/security/ssrf-check.js +197 -0
- package/src/setup.js +369 -0
- package/src/shadow/bridge.js +303 -0
- package/src/skills/loader.js +84 -0
- package/src/tools/catalog.json +49 -0
- package/src/tools/cli-delegate.js +44 -0
- package/src/tools/mcp-client.js +106 -0
- package/src/tools/micro/cancel-task.js +6 -0
- package/src/tools/micro/complete-task.js +6 -0
- package/src/tools/micro/fail-task.js +6 -0
- package/src/tools/micro/http-fetch.js +74 -0
- package/src/tools/micro/index.js +36 -0
- package/src/tools/micro/lcm-recall.js +60 -0
- package/src/tools/micro/list-dir.js +17 -0
- package/src/tools/micro/list-skills.js +46 -0
- package/src/tools/micro/load-skill.js +38 -0
- package/src/tools/micro/memory-search.js +45 -0
- package/src/tools/micro/read-file.js +11 -0
- package/src/tools/micro/session-search.js +54 -0
- package/src/tools/micro/shell-exec.js +43 -0
- package/src/tools/micro/trigger-job.js +79 -0
- package/src/tools/micro/web-search.js +58 -0
- package/src/tools/micro/workspace-paths.js +39 -0
- package/src/tools/micro/write-file.js +14 -0
- package/src/tools/micro/write-memory.js +41 -0
- package/src/tools/registry.js +348 -0
- package/src/tools/tool-result-contract.js +36 -0
- package/src/tui/chat.js +835 -0
- package/src/tui/renderer.js +175 -0
- package/src/tui/socket-client.js +217 -0
- package/src/utils/canonical-json.js +29 -0
- package/src/utils/compaction.js +30 -0
- package/src/utils/env-loader.js +5 -0
- package/src/utils/errors.js +80 -0
- package/src/utils/fs.js +101 -0
- package/src/utils/ids.js +5 -0
- package/src/utils/model-context-limits.js +30 -0
- package/src/utils/token-budget.js +74 -0
- package/src/utils/usage-cost.js +25 -0
- package/src/utils/usage-metrics.js +14 -0
- package/vendor/smol-toml-1.5.2.tgz +0 -0
|
@@ -0,0 +1,581 @@
|
|
|
1
|
+
import path from "node:path";
|
|
2
|
+
import { ConfigLoader } from "../config/loader.js";
|
|
3
|
+
import { RunStore } from "./run-store.js";
|
|
4
|
+
import { listFilesRecursive, readJson } from "../utils/fs.js";
|
|
5
|
+
import { classifyRuntimeFailure } from "./report-fallback.js";
|
|
6
|
+
|
|
7
|
+
function _deepMerge(base, override) {
|
|
8
|
+
if (Array.isArray(base) || Array.isArray(override)) {
|
|
9
|
+
return Array.isArray(override) ? [...override] : Array.isArray(base) ? [...base] : [];
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
if (!base || typeof base !== "object") {
|
|
13
|
+
return override === undefined ? base : override;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
if (!override || typeof override !== "object") {
|
|
17
|
+
return override === undefined ? { ...base } : override;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
const merged = { ...base };
|
|
21
|
+
for (const [key, value] of Object.entries(override)) {
|
|
22
|
+
merged[key] = key in base ? _deepMerge(base[key], value) : value;
|
|
23
|
+
}
|
|
24
|
+
return merged;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function buildVariant(target, variantId) {
|
|
28
|
+
switch (variantId) {
|
|
29
|
+
case "baseline":
|
|
30
|
+
return {
|
|
31
|
+
id: "baseline",
|
|
32
|
+
description: "Run the lane with its current guidance and routing defaults.",
|
|
33
|
+
overrides: {
|
|
34
|
+
modelOverride: target.defaultModelOverride || null
|
|
35
|
+
}
|
|
36
|
+
};
|
|
37
|
+
case "focus_concrete":
|
|
38
|
+
return {
|
|
39
|
+
id: "focus_concrete",
|
|
40
|
+
description: "Push the report to be more concrete, operator-facing, and less generic.",
|
|
41
|
+
overrides: {
|
|
42
|
+
modelOverride: target.defaultModelOverride || null,
|
|
43
|
+
reportGuidanceOverride: {
|
|
44
|
+
focus: ["concrete evidence", "explicit operator signal", "succinct useful status"],
|
|
45
|
+
qualityChecks: ["avoid generic reassurance", "prefer named facts over abstractions", "state None explicitly"],
|
|
46
|
+
avoid: ["vague encouragement", "boilerplate filler"]
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
};
|
|
50
|
+
case "retrieval_lexical_heavy":
|
|
51
|
+
return {
|
|
52
|
+
id: "retrieval_lexical_heavy",
|
|
53
|
+
description: "Bias retrieval toward lexical match signal for clearer grounded reports.",
|
|
54
|
+
overrides: {
|
|
55
|
+
modelOverride: target.defaultModelOverride || null,
|
|
56
|
+
retrievalBlendOverride: {
|
|
57
|
+
lexicalWeight: 0.48,
|
|
58
|
+
embeddingWeight: 0.2,
|
|
59
|
+
recencyWeight: 0.14,
|
|
60
|
+
salienceWeight: 0.12,
|
|
61
|
+
typeWeight: 0.04,
|
|
62
|
+
semanticRescueBonus: 0.04
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
};
|
|
66
|
+
case "retrieval_embedding_heavy":
|
|
67
|
+
return {
|
|
68
|
+
id: "retrieval_embedding_heavy",
|
|
69
|
+
description: "Bias retrieval toward embedding similarity when vectors are fresh and available.",
|
|
70
|
+
overrides: {
|
|
71
|
+
modelOverride: target.defaultModelOverride || null,
|
|
72
|
+
retrievalBlendOverride: {
|
|
73
|
+
lexicalWeight: 0.26,
|
|
74
|
+
embeddingWeight: 0.42,
|
|
75
|
+
recencyWeight: 0.12,
|
|
76
|
+
salienceWeight: 0.12,
|
|
77
|
+
typeWeight: 0.04,
|
|
78
|
+
semanticRescueBonus: 0.08
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
};
|
|
82
|
+
case "report_model_bump":
|
|
83
|
+
return {
|
|
84
|
+
id: "report_model_bump",
|
|
85
|
+
description: "Try the manual-bump local report model for richer structure.",
|
|
86
|
+
overrides: {
|
|
87
|
+
modelOverride: "ollama/qwen3:14b"
|
|
88
|
+
}
|
|
89
|
+
};
|
|
90
|
+
default:
|
|
91
|
+
throw new Error(`Unknown improvement variant: ${variantId}`);
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function summarizeEvaluation(evaluation) {
|
|
96
|
+
return {
|
|
97
|
+
filePath: evaluation.filePath,
|
|
98
|
+
overallScore: evaluation.rubric?.overallScore ?? null,
|
|
99
|
+
contractAdherence: evaluation.rubric?.components?.contractAdherence ?? null,
|
|
100
|
+
v2OutputQuality: evaluation.rubric?.components?.v2OutputQuality ?? null,
|
|
101
|
+
retrieval: evaluation.retrieval
|
|
102
|
+
? {
|
|
103
|
+
memoryCount: evaluation.retrieval.memoryCount,
|
|
104
|
+
lexicalCount: evaluation.retrieval.lexicalCount,
|
|
105
|
+
semanticCount: evaluation.retrieval.semanticCount,
|
|
106
|
+
qmdCount: evaluation.retrieval.qmdCount,
|
|
107
|
+
embeddingQueryMode: evaluation.retrieval.embeddingQueryMode || null,
|
|
108
|
+
freshEmbeddingCount: evaluation.retrieval.freshEmbeddingCount || 0,
|
|
109
|
+
staleEmbeddingCount: evaluation.retrieval.staleEmbeddingCount || 0,
|
|
110
|
+
missingEmbeddingCount: evaluation.retrieval.missingEmbeddingCount || 0,
|
|
111
|
+
failedEmbeddingCount: evaluation.retrieval.failedEmbeddingCount || 0,
|
|
112
|
+
embeddingError: evaluation.retrieval.embeddingError || null
|
|
113
|
+
}
|
|
114
|
+
: null,
|
|
115
|
+
interaction: evaluation.interaction || null,
|
|
116
|
+
findings: evaluation.comparisonNotes || []
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
function normalizeTargetId(targetId) {
|
|
121
|
+
return String(targetId || "").replace(/_([a-z])/g, (_match, letter) => letter.toUpperCase());
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
function summarizeFailure(error) {
|
|
125
|
+
return {
|
|
126
|
+
message: error.message,
|
|
127
|
+
runFile: error.runFile || null,
|
|
128
|
+
classification: classifyRuntimeFailure(error?.message || ""),
|
|
129
|
+
fallback: error.fallback || null
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
function summarizeSkip(reason, details = {}) {
|
|
134
|
+
return {
|
|
135
|
+
reason,
|
|
136
|
+
...details
|
|
137
|
+
};
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
function compareEvaluations(baseline, candidate) {
|
|
141
|
+
const baselineScore = baseline?.overallScore ?? null;
|
|
142
|
+
const candidateScore = candidate?.overallScore ?? null;
|
|
143
|
+
const baselineContract = baseline?.contractAdherence ?? null;
|
|
144
|
+
const candidateContract = candidate?.contractAdherence ?? null;
|
|
145
|
+
const baselineOutput = baseline?.v2OutputQuality ?? null;
|
|
146
|
+
const candidateOutput = candidate?.v2OutputQuality ?? null;
|
|
147
|
+
const baselineSemantic = baseline?.retrieval?.semanticCount ?? 0;
|
|
148
|
+
const candidateSemantic = candidate?.retrieval?.semanticCount ?? 0;
|
|
149
|
+
|
|
150
|
+
const findings = [];
|
|
151
|
+
if (baselineScore != null && candidateScore != null) {
|
|
152
|
+
if (candidateScore > baselineScore) findings.push("Candidate improved overall deterministic eval score.");
|
|
153
|
+
else if (candidateScore < baselineScore) findings.push("Candidate regressed on overall deterministic eval score.");
|
|
154
|
+
}
|
|
155
|
+
if (baselineContract != null && candidateContract != null) {
|
|
156
|
+
if (candidateContract > baselineContract) findings.push("Candidate improved contract adherence.");
|
|
157
|
+
else if (candidateContract < baselineContract) findings.push("Candidate regressed on contract adherence.");
|
|
158
|
+
}
|
|
159
|
+
if (baselineOutput != null && candidateOutput != null) {
|
|
160
|
+
if (candidateOutput > baselineOutput) findings.push("Candidate improved output-quality signals.");
|
|
161
|
+
else if (candidateOutput < baselineOutput) findings.push("Candidate regressed on output-quality signals.");
|
|
162
|
+
}
|
|
163
|
+
if (candidateSemantic > baselineSemantic) findings.push("Candidate surfaced more semantic retrieval hits.");
|
|
164
|
+
if ((candidate?.retrieval?.failedEmbeddingCount || 0) > (baseline?.retrieval?.failedEmbeddingCount || 0)) {
|
|
165
|
+
findings.push("Candidate increased embedding failure count.");
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
const improved =
|
|
169
|
+
candidateScore != null &&
|
|
170
|
+
baselineScore != null &&
|
|
171
|
+
candidateScore > baselineScore &&
|
|
172
|
+
(candidateContract ?? 0) >= (baselineContract ?? 0) &&
|
|
173
|
+
(candidate?.retrieval?.failedEmbeddingCount || 0) <= (baseline?.retrieval?.failedEmbeddingCount || 0);
|
|
174
|
+
|
|
175
|
+
return {
|
|
176
|
+
improved,
|
|
177
|
+
baseline,
|
|
178
|
+
candidate,
|
|
179
|
+
findings
|
|
180
|
+
};
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
function buildEmbeddingSummary({ readiness = null, health = null, rebuilt = false, blockedReason = null, queryMode = null } = {}) {
|
|
184
|
+
return {
|
|
185
|
+
readiness,
|
|
186
|
+
health,
|
|
187
|
+
rebuilt,
|
|
188
|
+
blockedReason,
|
|
189
|
+
queryMode
|
|
190
|
+
};
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
export class ImprovementHarness {
|
|
194
|
+
constructor({ projectRoot, stateRoot, executor, evaluator }) {
|
|
195
|
+
this.projectRoot = projectRoot;
|
|
196
|
+
this.stateRoot = stateRoot;
|
|
197
|
+
this.executor = executor;
|
|
198
|
+
this.evaluator = evaluator;
|
|
199
|
+
this.loader = new ConfigLoader({ rootDir: path.join(projectRoot, "config") });
|
|
200
|
+
this.runStore = new RunStore({ rootDir: path.join(stateRoot, "improvements") });
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
async findLatestVariantArtifact(targetId, variantId, options = {}) {
|
|
204
|
+
const targetDir = path.join(this.stateRoot, "improvements", normalizeTargetId(targetId));
|
|
205
|
+
const files = (await listFilesRecursive(targetDir)).filter((filePath) => filePath.endsWith(".json")).sort().reverse();
|
|
206
|
+
|
|
207
|
+
for (const filePath of files) {
|
|
208
|
+
const artifact = await readJson(filePath, null);
|
|
209
|
+
if (!artifact) continue;
|
|
210
|
+
if (artifact.targetId !== normalizeTargetId(targetId)) continue;
|
|
211
|
+
if (artifact.variant?.id !== variantId) continue;
|
|
212
|
+
if (options.successfulOnly && !artifact.ok) continue;
|
|
213
|
+
return {
|
|
214
|
+
filePath,
|
|
215
|
+
...artifact
|
|
216
|
+
};
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
return null;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
async listTargets() {
|
|
223
|
+
const config = await this.loader.loadAll();
|
|
224
|
+
return Object.entries(config.improvementTargets || {}).map(([targetId, target]) => ({
|
|
225
|
+
id: targetId,
|
|
226
|
+
jobId: target.jobId,
|
|
227
|
+
defaultMode: target.defaultMode || "provider",
|
|
228
|
+
defaultModelOverride: target.defaultModelOverride || null,
|
|
229
|
+
defaultTimeoutMs: target.defaultTimeoutMs || null,
|
|
230
|
+
allowedKnobs: target.allowedKnobs || [],
|
|
231
|
+
recommendedVariants: target.recommendedVariants || []
|
|
232
|
+
}));
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
async getTarget(targetId) {
|
|
236
|
+
const config = await this.loader.loadAll();
|
|
237
|
+
const resolvedTargetId = normalizeTargetId(targetId);
|
|
238
|
+
const target = config.improvementTargets?.[resolvedTargetId];
|
|
239
|
+
if (!target) {
|
|
240
|
+
throw new Error(`Unknown improvement target: ${targetId}`);
|
|
241
|
+
}
|
|
242
|
+
return {
|
|
243
|
+
id: resolvedTargetId,
|
|
244
|
+
...target
|
|
245
|
+
};
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
async checkVariantPrerequisites(target, variant) {
|
|
249
|
+
if (variant.id !== "retrieval_embedding_heavy") {
|
|
250
|
+
return null;
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
const runtime = await this.executor.scheduler.loadRuntime();
|
|
254
|
+
const job = runtime.jobs?.[target.jobId];
|
|
255
|
+
const agentId = job?.agentId;
|
|
256
|
+
if (!agentId) {
|
|
257
|
+
return {
|
|
258
|
+
blocked: true,
|
|
259
|
+
reason: "Could not resolve agent for embedding-heavy variant."
|
|
260
|
+
};
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
const health = await this.executor.scheduler.memoryStore.getEmbeddingHealth(agentId, {
|
|
264
|
+
embeddingIndex: this.executor.scheduler.embeddingIndex,
|
|
265
|
+
probe: false
|
|
266
|
+
});
|
|
267
|
+
|
|
268
|
+
if ((health.embeddingHealth?.freshCount || 0) > 0) {
|
|
269
|
+
return null;
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
const embeddingService = this.executor.scheduler.embeddingIndex?.embeddingService || null;
|
|
273
|
+
if (!embeddingService) {
|
|
274
|
+
return {
|
|
275
|
+
blocked: true,
|
|
276
|
+
reason: "No embedding service is configured for this runtime.",
|
|
277
|
+
health
|
|
278
|
+
};
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
const readiness = await embeddingService.getReadiness();
|
|
282
|
+
if (readiness?.ready) {
|
|
283
|
+
try {
|
|
284
|
+
await this.executor.scheduler.memoryStore.rebuildEmbeddings(agentId, {
|
|
285
|
+
embeddingIndex: this.executor.scheduler.embeddingIndex
|
|
286
|
+
});
|
|
287
|
+
const refreshed = await this.executor.scheduler.memoryStore.getEmbeddingHealth(agentId, {
|
|
288
|
+
embeddingIndex: this.executor.scheduler.embeddingIndex,
|
|
289
|
+
probe: false
|
|
290
|
+
});
|
|
291
|
+
if ((refreshed.embeddingHealth?.freshCount || 0) > 0) {
|
|
292
|
+
return {
|
|
293
|
+
blocked: false,
|
|
294
|
+
rebuilt: true,
|
|
295
|
+
health: refreshed,
|
|
296
|
+
readiness
|
|
297
|
+
};
|
|
298
|
+
}
|
|
299
|
+
return {
|
|
300
|
+
blocked: true,
|
|
301
|
+
reason: "Embeddings were rebuildable but no fresh vectors were produced for this lane.",
|
|
302
|
+
health: refreshed,
|
|
303
|
+
readiness,
|
|
304
|
+
rebuilt: true
|
|
305
|
+
};
|
|
306
|
+
} catch (error) {
|
|
307
|
+
const refreshed = await this.executor.scheduler.memoryStore.getEmbeddingHealth(agentId, {
|
|
308
|
+
embeddingIndex: this.executor.scheduler.embeddingIndex,
|
|
309
|
+
probe: false
|
|
310
|
+
});
|
|
311
|
+
return {
|
|
312
|
+
blocked: true,
|
|
313
|
+
reason: error?.message || String(error),
|
|
314
|
+
health: refreshed,
|
|
315
|
+
readiness,
|
|
316
|
+
rebuilt: true
|
|
317
|
+
};
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
return {
|
|
322
|
+
blocked: true,
|
|
323
|
+
reason: readiness?.reason || "Embeddings are unavailable or degraded for this lane.",
|
|
324
|
+
health,
|
|
325
|
+
readiness
|
|
326
|
+
};
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
async runVariant(targetId, variantId = "baseline", options = {}) {
|
|
330
|
+
const target = await this.getTarget(targetId);
|
|
331
|
+
const variant = buildVariant(target, variantId);
|
|
332
|
+
const mode = target.defaultMode || "provider";
|
|
333
|
+
let embedding = buildEmbeddingSummary();
|
|
334
|
+
let artifact;
|
|
335
|
+
try {
|
|
336
|
+
const prerequisiteFailure = await this.checkVariantPrerequisites(target, variant);
|
|
337
|
+
embedding = buildEmbeddingSummary({
|
|
338
|
+
readiness: prerequisiteFailure?.readiness || null,
|
|
339
|
+
health: prerequisiteFailure?.health?.embeddingHealth || null,
|
|
340
|
+
rebuilt: Boolean(prerequisiteFailure?.rebuilt),
|
|
341
|
+
blockedReason: prerequisiteFailure?.blocked ? prerequisiteFailure.reason : null
|
|
342
|
+
});
|
|
343
|
+
if (prerequisiteFailure?.blocked) {
|
|
344
|
+
artifact = {
|
|
345
|
+
timestamp: new Date().toISOString(),
|
|
346
|
+
targetId: target.id,
|
|
347
|
+
jobId: target.jobId,
|
|
348
|
+
variant,
|
|
349
|
+
ok: false,
|
|
350
|
+
skipped: true,
|
|
351
|
+
embedding,
|
|
352
|
+
skip: summarizeSkip(prerequisiteFailure.reason, {
|
|
353
|
+
embeddingHealth: prerequisiteFailure.health?.embeddingHealth || null,
|
|
354
|
+
embeddingReadiness: prerequisiteFailure.readiness || null
|
|
355
|
+
})
|
|
356
|
+
};
|
|
357
|
+
const filePath = await this.runStore.saveRun(target.id, artifact);
|
|
358
|
+
return {
|
|
359
|
+
filePath,
|
|
360
|
+
...artifact
|
|
361
|
+
};
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
const run = await this.executor.executeJob(target.jobId, {
|
|
365
|
+
mode,
|
|
366
|
+
shadowImport: true,
|
|
367
|
+
modelOverride: variant.overrides.modelOverride || null,
|
|
368
|
+
reportGuidanceOverride: variant.overrides.reportGuidanceOverride || null,
|
|
369
|
+
retrievalBlendOverride: variant.overrides.retrievalBlendOverride || null,
|
|
370
|
+
providerTimeoutMs: target.defaultTimeoutMs || null,
|
|
371
|
+
allowReportFallback: options.allowReportFallback === true
|
|
372
|
+
});
|
|
373
|
+
const evaluation = await this.evaluator.evaluateAndPersistJob(target.jobId);
|
|
374
|
+
artifact = {
|
|
375
|
+
timestamp: new Date().toISOString(),
|
|
376
|
+
targetId: target.id,
|
|
377
|
+
jobId: target.jobId,
|
|
378
|
+
variant,
|
|
379
|
+
ok: true,
|
|
380
|
+
embedding: buildEmbeddingSummary({
|
|
381
|
+
readiness: embedding.readiness,
|
|
382
|
+
health: evaluation.retrieval?.embeddingHealth || embedding.health,
|
|
383
|
+
rebuilt: embedding.rebuilt,
|
|
384
|
+
blockedReason: null,
|
|
385
|
+
queryMode: evaluation.retrieval?.embeddingQueryMode || null
|
|
386
|
+
}),
|
|
387
|
+
run: {
|
|
388
|
+
filePath: run.filePath,
|
|
389
|
+
mode: run.mode,
|
|
390
|
+
providerId: run.providerId,
|
|
391
|
+
modelId: run.modelId,
|
|
392
|
+
routingDecision: run.routingDecision,
|
|
393
|
+
preflight: run.preflight,
|
|
394
|
+
summary: run.result?.summary || null,
|
|
395
|
+
fallback: run.fallback || null
|
|
396
|
+
},
|
|
397
|
+
evaluation: summarizeEvaluation(evaluation)
|
|
398
|
+
};
|
|
399
|
+
} catch (error) {
|
|
400
|
+
artifact = {
|
|
401
|
+
timestamp: new Date().toISOString(),
|
|
402
|
+
targetId: target.id,
|
|
403
|
+
jobId: target.jobId,
|
|
404
|
+
variant,
|
|
405
|
+
ok: false,
|
|
406
|
+
embedding,
|
|
407
|
+
error: summarizeFailure(error)
|
|
408
|
+
};
|
|
409
|
+
}
|
|
410
|
+
const filePath = await this.runStore.saveRun(target.id, artifact);
|
|
411
|
+
return {
|
|
412
|
+
filePath,
|
|
413
|
+
...artifact
|
|
414
|
+
};
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
async compareVariants(targetId, baselineVariantId = "baseline", candidateVariantId, options = {}) {
|
|
418
|
+
if (!candidateVariantId) {
|
|
419
|
+
throw new Error("compareVariants requires a candidate variant id.");
|
|
420
|
+
}
|
|
421
|
+
const baseline = (await this.findLatestVariantArtifact(targetId, baselineVariantId, { successfulOnly: true })) ||
|
|
422
|
+
(await this.runVariant(targetId, baselineVariantId, options));
|
|
423
|
+
const candidate = await this.runVariant(targetId, candidateVariantId, options);
|
|
424
|
+
let comparisonResult;
|
|
425
|
+
if (!baseline.ok || !candidate.ok) {
|
|
426
|
+
comparisonResult = {
|
|
427
|
+
improved: false,
|
|
428
|
+
baseline: baseline.ok ? baseline.evaluation : null,
|
|
429
|
+
candidate: candidate.ok ? candidate.evaluation : null,
|
|
430
|
+
findings: [
|
|
431
|
+
...(!baseline.ok
|
|
432
|
+
? [baseline.skipped ? `Baseline variant skipped: ${baseline.skip.reason}` : `Baseline variant failed: ${baseline.error.message}`]
|
|
433
|
+
: []),
|
|
434
|
+
...(!candidate.ok
|
|
435
|
+
? [candidate.skipped ? `Candidate variant skipped: ${candidate.skip.reason}` : `Candidate variant failed: ${candidate.error.message}`]
|
|
436
|
+
: [])
|
|
437
|
+
]
|
|
438
|
+
};
|
|
439
|
+
} else {
|
|
440
|
+
comparisonResult = compareEvaluations(baseline.evaluation, candidate.evaluation);
|
|
441
|
+
if (
|
|
442
|
+
candidateVariantId === "retrieval_embedding_heavy" &&
|
|
443
|
+
(
|
|
444
|
+
candidate.evaluation?.retrieval?.embeddingQueryMode === "lexical_fallback" ||
|
|
445
|
+
(candidate.evaluation?.retrieval?.freshEmbeddingCount || 0) === 0
|
|
446
|
+
)
|
|
447
|
+
) {
|
|
448
|
+
comparisonResult.improved = false;
|
|
449
|
+
comparisonResult.findings.push(
|
|
450
|
+
"Candidate could not exercise embedding-heavy retrieval because embeddings were unavailable or degraded."
|
|
451
|
+
);
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
const comparison = {
|
|
455
|
+
timestamp: new Date().toISOString(),
|
|
456
|
+
targetId: baseline.targetId,
|
|
457
|
+
baselineVariantId,
|
|
458
|
+
candidateVariantId,
|
|
459
|
+
fallback: {
|
|
460
|
+
baseline: baseline.run?.fallback || baseline.error?.fallback || null,
|
|
461
|
+
candidate: candidate.run?.fallback || candidate.error?.fallback || null
|
|
462
|
+
},
|
|
463
|
+
embedding: {
|
|
464
|
+
baseline: baseline.embedding || null,
|
|
465
|
+
candidate: candidate.embedding || null
|
|
466
|
+
},
|
|
467
|
+
comparison: comparisonResult
|
|
468
|
+
};
|
|
469
|
+
const filePath = await this.runStore.saveRun(`${baseline.targetId}-comparison`, comparison);
|
|
470
|
+
return {
|
|
471
|
+
filePath,
|
|
472
|
+
...comparison
|
|
473
|
+
};
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
async repairAndCompare(targetId, candidateVariantId, baselineVariantId = "baseline", options = {}) {
|
|
477
|
+
if (!candidateVariantId) {
|
|
478
|
+
throw new Error("repairAndCompare requires a candidate variant id.");
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
const target = await this.getTarget(targetId);
|
|
482
|
+
const candidateVariant = buildVariant(target, candidateVariantId);
|
|
483
|
+
const prerequisite = await this.checkVariantPrerequisites(target, candidateVariant);
|
|
484
|
+
|
|
485
|
+
if (prerequisite?.blocked) {
|
|
486
|
+
const artifact = {
|
|
487
|
+
timestamp: new Date().toISOString(),
|
|
488
|
+
targetId: target.id,
|
|
489
|
+
baselineVariantId,
|
|
490
|
+
candidateVariantId,
|
|
491
|
+
repaired: false,
|
|
492
|
+
blocked: true,
|
|
493
|
+
embedding: buildEmbeddingSummary({
|
|
494
|
+
readiness: prerequisite.readiness || null,
|
|
495
|
+
health: prerequisite.health?.embeddingHealth || null,
|
|
496
|
+
rebuilt: Boolean(prerequisite.rebuilt),
|
|
497
|
+
blockedReason: prerequisite.reason
|
|
498
|
+
}),
|
|
499
|
+
findings: [`Repair blocked: ${prerequisite.reason}`]
|
|
500
|
+
};
|
|
501
|
+
const filePath = await this.runStore.saveRun(`${target.id}-repair`, artifact);
|
|
502
|
+
return {
|
|
503
|
+
filePath,
|
|
504
|
+
...artifact
|
|
505
|
+
};
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
const rerunBaseline = Boolean(prerequisite?.rebuilt);
|
|
509
|
+
const baseline = rerunBaseline
|
|
510
|
+
? await this.runVariant(targetId, baselineVariantId, options)
|
|
511
|
+
: (await this.findLatestVariantArtifact(targetId, baselineVariantId, { successfulOnly: true })) ||
|
|
512
|
+
(await this.runVariant(targetId, baselineVariantId, options));
|
|
513
|
+
const candidate = await this.runVariant(targetId, candidateVariantId, options);
|
|
514
|
+
|
|
515
|
+
let comparisonResult;
|
|
516
|
+
if (!baseline.ok || !candidate.ok) {
|
|
517
|
+
comparisonResult = {
|
|
518
|
+
improved: false,
|
|
519
|
+
baseline: baseline.ok ? baseline.evaluation : null,
|
|
520
|
+
candidate: candidate.ok ? candidate.evaluation : null,
|
|
521
|
+
findings: [
|
|
522
|
+
...(!baseline.ok
|
|
523
|
+
? [baseline.skipped ? `Baseline variant skipped: ${baseline.skip.reason}` : `Baseline variant failed: ${baseline.error.message}`]
|
|
524
|
+
: []),
|
|
525
|
+
...(!candidate.ok
|
|
526
|
+
? [candidate.skipped ? `Candidate variant skipped: ${candidate.skip.reason}` : `Candidate variant failed: ${candidate.error.message}`]
|
|
527
|
+
: [])
|
|
528
|
+
]
|
|
529
|
+
};
|
|
530
|
+
} else {
|
|
531
|
+
comparisonResult = compareEvaluations(baseline.evaluation, candidate.evaluation);
|
|
532
|
+
if (
|
|
533
|
+
candidateVariantId === "retrieval_embedding_heavy" &&
|
|
534
|
+
(
|
|
535
|
+
candidate.evaluation?.retrieval?.embeddingQueryMode === "lexical_fallback" ||
|
|
536
|
+
(candidate.evaluation?.retrieval?.freshEmbeddingCount || 0) === 0
|
|
537
|
+
)
|
|
538
|
+
) {
|
|
539
|
+
comparisonResult.improved = false;
|
|
540
|
+
comparisonResult.findings.push(
|
|
541
|
+
"Candidate could not exercise embedding-heavy retrieval because embeddings were unavailable or degraded."
|
|
542
|
+
);
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
const artifact = {
|
|
547
|
+
timestamp: new Date().toISOString(),
|
|
548
|
+
targetId: target.id,
|
|
549
|
+
baselineVariantId,
|
|
550
|
+
candidateVariantId,
|
|
551
|
+
repaired: Boolean(prerequisite?.rebuilt),
|
|
552
|
+
blocked: false,
|
|
553
|
+
fallback: {
|
|
554
|
+
baseline: baseline.run?.fallback || baseline.error?.fallback || null,
|
|
555
|
+
candidate: candidate.run?.fallback || candidate.error?.fallback || null
|
|
556
|
+
},
|
|
557
|
+
embedding: {
|
|
558
|
+
readiness: prerequisite?.readiness || null,
|
|
559
|
+
health: candidate.embedding?.health || prerequisite?.health?.embeddingHealth || null,
|
|
560
|
+
rebuilt: Boolean(prerequisite?.rebuilt),
|
|
561
|
+
blockedReason: null,
|
|
562
|
+
queryMode: candidate.embedding?.queryMode || null
|
|
563
|
+
},
|
|
564
|
+
baseline: {
|
|
565
|
+
filePath: baseline.filePath,
|
|
566
|
+
evaluationFile: baseline.evaluation?.filePath || null
|
|
567
|
+
},
|
|
568
|
+
candidate: {
|
|
569
|
+
filePath: candidate.filePath,
|
|
570
|
+
evaluationFile: candidate.evaluation?.filePath || null,
|
|
571
|
+
status: candidate.ok ? "evaluated" : candidate.skipped ? "skipped" : candidate.error?.classification || "failed"
|
|
572
|
+
},
|
|
573
|
+
comparison: comparisonResult
|
|
574
|
+
};
|
|
575
|
+
const filePath = await this.runStore.saveRun(`${target.id}-repair`, artifact);
|
|
576
|
+
return {
|
|
577
|
+
filePath,
|
|
578
|
+
...artifact
|
|
579
|
+
};
|
|
580
|
+
}
|
|
581
|
+
}
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import { formatRuntimeError, RuntimeError } from "../utils/errors.js";
|
|
2
|
+
|
|
3
|
+
const INJECTION_PATTERNS = [
|
|
4
|
+
{ id: "ignore_previous", regex: /ignore\s+(all\s+)?previous\s+instructions/i, label: "ignore previous instructions" },
|
|
5
|
+
{ id: "you_are_now", regex: /you\s+are\s+now\b/i, label: "identity override (you are now)" },
|
|
6
|
+
{ id: "system_prompt", regex: /system\s+prompt\s*:/i, label: "system prompt injection" },
|
|
7
|
+
{ id: "important_start", regex: /^(?:\s*)IMPORTANT\s*:/m, label: "IMPORTANT: at start of output" },
|
|
8
|
+
{ id: "critical_start", regex: /^(?:\s*)CRITICAL\s*:/m, label: "CRITICAL: at start of output" },
|
|
9
|
+
{ id: "fake_system_tag", regex: /<\/?system(?:\s[^>]*)?>/, label: "XML <system> boundary tag" },
|
|
10
|
+
{ id: "fake_assistant_tag", regex: /<\/?assistant(?:\s[^>]*)?>/, label: "XML <assistant> boundary tag" },
|
|
11
|
+
{ id: "fake_user_tag", regex: /<\/?user(?:\s[^>]*)?>/, label: "XML <user> boundary tag" },
|
|
12
|
+
{ id: "fake_tool_boundary", regex: /\[TOOL_OUTPUT:(START|END)\]/i, label: "spoofed TOOL_OUTPUT boundary marker" },
|
|
13
|
+
{ id: "disregard", regex: /disregard\s+(all\s+)?(prior|previous|above)\s+(instructions|context)/i, label: "disregard prior instructions" },
|
|
14
|
+
{ id: "new_instructions", regex: /new\s+instructions?\s*:/i, label: "new instructions injection" }
|
|
15
|
+
];
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Wraps an external tool output with clear boundary markers so the LLM
|
|
19
|
+
* can distinguish trusted system content from untrusted external data.
|
|
20
|
+
*/
|
|
21
|
+
export function tagUntrustedInput(toolOutput, toolName) {
|
|
22
|
+
const text = String(toolOutput ?? "");
|
|
23
|
+
const name = String(toolName ?? "unknown");
|
|
24
|
+
return `[TOOL_OUTPUT:START tool=${name} trust=external]\n${text}\n[TOOL_OUTPUT:END]`;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Scans text for common prompt-injection patterns.
|
|
29
|
+
* Returns { flagged, patterns, sanitised }.
|
|
30
|
+
* The sanitised string is always the original text (we never mutate),
|
|
31
|
+
* but the flag + pattern list lets callers decide what to do.
|
|
32
|
+
*/
|
|
33
|
+
export function detectInjectionPatterns(text) {
|
|
34
|
+
const input = String(text ?? "");
|
|
35
|
+
const matched = [];
|
|
36
|
+
|
|
37
|
+
for (const pattern of INJECTION_PATTERNS) {
|
|
38
|
+
if (pattern.regex.test(input)) {
|
|
39
|
+
matched.push(pattern.label);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
return {
|
|
44
|
+
flagged: matched.length > 0,
|
|
45
|
+
patterns: matched,
|
|
46
|
+
sanitised: input
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Combined entry-point used by the executor: tag + scan in one call.
|
|
52
|
+
* Logs a warning to console.warn when injection patterns are detected
|
|
53
|
+
* but never blocks execution to avoid false-positive breakage.
|
|
54
|
+
*/
|
|
55
|
+
export function processToolOutput(toolOutput, toolName) {
|
|
56
|
+
const detection = detectInjectionPatterns(toolOutput);
|
|
57
|
+
|
|
58
|
+
if (detection.flagged) {
|
|
59
|
+
const err = new RuntimeError(
|
|
60
|
+
`Potential prompt injection detected in output from tool "${toolName}"`,
|
|
61
|
+
{ category: "security", context: { toolName, patterns: detection.patterns.join("; ") }, recoverable: true }
|
|
62
|
+
);
|
|
63
|
+
console.warn(formatRuntimeError(err));
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
const tagged = tagUntrustedInput(detection.sanitised, toolName);
|
|
67
|
+
|
|
68
|
+
return {
|
|
69
|
+
tagged,
|
|
70
|
+
detection
|
|
71
|
+
};
|
|
72
|
+
}
|