@neuroverseos/governance 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.well-known/ai-plugin.json +26 -0
- package/.well-known/mcp.json +68 -0
- package/AGENTS.md +219 -0
- package/README.md +64 -4
- package/dist/adapters/autoresearch.cjs +196 -0
- package/dist/adapters/autoresearch.d.cts +103 -0
- package/dist/adapters/autoresearch.d.ts +103 -0
- package/dist/adapters/autoresearch.js +7 -0
- package/dist/adapters/express.d.cts +1 -1
- package/dist/adapters/express.d.ts +1 -1
- package/dist/adapters/express.js +1 -1
- package/dist/adapters/index.cjs +171 -0
- package/dist/adapters/index.d.cts +2 -1
- package/dist/adapters/index.d.ts +2 -1
- package/dist/adapters/index.js +8 -4
- package/dist/adapters/langchain.d.cts +1 -1
- package/dist/adapters/langchain.d.ts +1 -1
- package/dist/adapters/langchain.js +2 -2
- package/dist/adapters/openai.d.cts +1 -1
- package/dist/adapters/openai.d.ts +1 -1
- package/dist/adapters/openai.js +2 -2
- package/dist/adapters/openclaw.d.cts +1 -1
- package/dist/adapters/openclaw.d.ts +1 -1
- package/dist/adapters/openclaw.js +2 -2
- package/dist/chunk-T5EUJQE5.js +172 -0
- package/dist/cli/neuroverse.cjs +1157 -184
- package/dist/cli/neuroverse.js +18 -6
- package/dist/cli/run.js +2 -2
- package/dist/{doctor-QV6HELS5.js → doctor-XPDLEYXN.js} +1 -0
- package/dist/{guard-contract-Cm91Kp4j.d.cts → guard-contract-WZx__PmU.d.cts} +1 -1
- package/dist/{guard-contract-Cm91Kp4j.d.ts → guard-contract-WZx__PmU.d.ts} +1 -1
- package/dist/index.d.cts +2 -2
- package/dist/index.d.ts +2 -2
- package/dist/index.js +28 -28
- package/dist/infer-world-7GVZWFX4.js +543 -0
- package/dist/init-world-VWMQZQC7.js +223 -0
- package/dist/{mcp-server-LZVJHBT5.js → mcp-server-FPVSU32Z.js} +2 -2
- package/dist/{session-VISISNWJ.js → session-EKTRSR7C.js} +2 -2
- package/dist/worlds/autoresearch.nv-world.md +230 -0
- package/llms.txt +79 -0
- package/openapi.yaml +230 -0
- package/package.json +15 -4
- package/dist/{chunk-SKU3GAPD.js → chunk-2PQU3VAN.js} +3 -3
- package/dist/{chunk-KEST3MWO.js → chunk-4A7LISES.js} +3 -3
- package/dist/{chunk-RWXVAH6P.js → chunk-COT5XS4V.js} +3 -3
- package/dist/{chunk-OHAC6HJE.js → chunk-ER62HNGF.js} +3 -3
- package/dist/{chunk-DPVS43ZT.js → chunk-OGL7QXZS.js} +3 -3
- package/dist/{guard-GFLQZY6U.js → guard-RV65TT4L.js} +1 -1
- package/dist/{playground-FGOMASHN.js → playground-E664U4T6.js} +1 -1
- package/dist/{redteam-SK7AMIG3.js → redteam-Z7WREJ44.js} +1 -1
- package/dist/{test-75AVHC3R.js → test-OGXJK4QU.js} +1 -1
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
import "./chunk-YZFATT7X.js";
|
|
2
|
+
|
|
3
|
+
// src/cli/init-world.ts
|
|
4
|
+
import { readFileSync, existsSync } from "fs";
|
|
5
|
+
import { writeFile } from "fs/promises";
|
|
6
|
+
import { dirname, join } from "path";
|
|
7
|
+
import { fileURLToPath } from "url";
|
|
8
|
+
function parseArgs(argv) {
|
|
9
|
+
const template = argv[0] || "";
|
|
10
|
+
let outputPath = "";
|
|
11
|
+
const config = {};
|
|
12
|
+
for (let i = 1; i < argv.length; i++) {
|
|
13
|
+
const arg = argv[i];
|
|
14
|
+
if (arg === "--context" && i + 1 < argv.length) {
|
|
15
|
+
config.context = argv[++i];
|
|
16
|
+
} else if (arg === "--dataset" && i + 1 < argv.length) {
|
|
17
|
+
config.dataset = argv[++i];
|
|
18
|
+
} else if (arg === "--goal" && i + 1 < argv.length) {
|
|
19
|
+
config.goal = argv[++i];
|
|
20
|
+
} else if (arg === "--metric" && i + 1 < argv.length) {
|
|
21
|
+
config.metric = argv[++i];
|
|
22
|
+
} else if (arg === "--optimize" && i + 1 < argv.length) {
|
|
23
|
+
const dir = argv[++i];
|
|
24
|
+
if (dir === "minimize" || dir === "maximize") {
|
|
25
|
+
config.optimize = dir;
|
|
26
|
+
}
|
|
27
|
+
} else if (arg === "--compute" && i + 1 < argv.length) {
|
|
28
|
+
config.computeBudgetMinutes = parseInt(argv[++i], 10);
|
|
29
|
+
} else if (arg === "--output" && i + 1 < argv.length) {
|
|
30
|
+
outputPath = argv[++i];
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
return { template, config, outputPath };
|
|
34
|
+
}
|
|
35
|
+
var AVAILABLE_TEMPLATES = ["autoresearch"];
|
|
36
|
+
function loadBaseTemplate(template) {
|
|
37
|
+
let currentDir;
|
|
38
|
+
try {
|
|
39
|
+
currentDir = dirname(fileURLToPath(import.meta.url));
|
|
40
|
+
} catch {
|
|
41
|
+
currentDir = __dirname;
|
|
42
|
+
}
|
|
43
|
+
const candidates = [
|
|
44
|
+
join(currentDir, "..", "worlds", `${template}.nv-world.md`),
|
|
45
|
+
join(currentDir, "worlds", `${template}.nv-world.md`),
|
|
46
|
+
// Fallback: resolve from process.cwd()
|
|
47
|
+
join(process.cwd(), "src", "worlds", `${template}.nv-world.md`)
|
|
48
|
+
];
|
|
49
|
+
for (const candidate of candidates) {
|
|
50
|
+
if (existsSync(candidate)) {
|
|
51
|
+
return readFileSync(candidate, "utf-8");
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
throw new Error(`Template "${template}" not found. Available: ${AVAILABLE_TEMPLATES.join(", ")}`);
|
|
55
|
+
}
|
|
56
|
+
function customizeAutoresearchWorld(baseTemplate, config) {
|
|
57
|
+
let output = baseTemplate;
|
|
58
|
+
if (config.context) {
|
|
59
|
+
const worldId = config.context.toLowerCase().replace(/[^a-z0-9]+/g, "_").replace(/^_|_$/g, "").slice(0, 40);
|
|
60
|
+
output = output.replace(/^world_id: autoresearch$/m, `world_id: ${worldId}_research`);
|
|
61
|
+
output = output.replace(/^name: Autoresearch Governance$/m, `name: ${config.context} Research`);
|
|
62
|
+
}
|
|
63
|
+
if (config.context) {
|
|
64
|
+
const customThesis = `Autonomous AI research loops investigating ${config.context} must operate within structured governance: experiments are reproducible, metrics are tracked, compute budgets are enforced, and agents cannot drift beyond their declared research context. A research world without constraints produces noise, not knowledge.`;
|
|
65
|
+
output = output.replace(
|
|
66
|
+
/^Autonomous AI research loops must operate.*$/m,
|
|
67
|
+
customThesis
|
|
68
|
+
);
|
|
69
|
+
}
|
|
70
|
+
if (config.dataset) {
|
|
71
|
+
output = output.replace(
|
|
72
|
+
/- `dataset_must_be_declared`[^\n]+/,
|
|
73
|
+
`- \`dataset_must_be_declared\` \u2014 The dataset "${config.dataset}" must be used for training and evaluation and never changed without governance approval (structural, immutable)`
|
|
74
|
+
);
|
|
75
|
+
}
|
|
76
|
+
if (config.context && config.constraints && config.constraints.length > 0) {
|
|
77
|
+
const constraintInvariants = config.constraints.map(
|
|
78
|
+
(c, i) => `- \`custom_constraint_${i + 1}\` \u2014 ${c} (prompt, immutable)`
|
|
79
|
+
).join("\n");
|
|
80
|
+
output = output.replace(
|
|
81
|
+
/- `architecture_constraints_honored`[^\n]+/,
|
|
82
|
+
`- \`architecture_constraints_honored\` \u2014 If the research context declares architectural constraints, experiments must satisfy them (prompt, immutable)
|
|
83
|
+
${constraintInvariants}`
|
|
84
|
+
);
|
|
85
|
+
}
|
|
86
|
+
if (config.metric) {
|
|
87
|
+
output = output.replace(
|
|
88
|
+
/Best value achieved for the primary evaluation metric/g,
|
|
89
|
+
`Best value achieved for ${config.metric}`
|
|
90
|
+
);
|
|
91
|
+
}
|
|
92
|
+
if (config.computeBudgetMinutes) {
|
|
93
|
+
output = output.replace(
|
|
94
|
+
/- default: 1440\n- label: Compute Budget/,
|
|
95
|
+
`- default: ${config.computeBudgetMinutes}
|
|
96
|
+
- label: Compute Budget`
|
|
97
|
+
);
|
|
98
|
+
}
|
|
99
|
+
if (config.optimize === "maximize") {
|
|
100
|
+
output = output.replace(
|
|
101
|
+
/- default: 100\n- label: Best Metric Value/,
|
|
102
|
+
"- default: -1000\n- label: Best Metric Value"
|
|
103
|
+
);
|
|
104
|
+
}
|
|
105
|
+
return output;
|
|
106
|
+
}
|
|
107
|
+
function generateResearchContext(config) {
|
|
108
|
+
const context = {
|
|
109
|
+
research: {
|
|
110
|
+
context: config.context || "Define your research context",
|
|
111
|
+
dataset: config.dataset || "Define your dataset",
|
|
112
|
+
goal: config.goal || "Define your optimization goal",
|
|
113
|
+
metric: {
|
|
114
|
+
name: config.metric || "val_bpb",
|
|
115
|
+
optimization: config.optimize || "minimize"
|
|
116
|
+
}
|
|
117
|
+
},
|
|
118
|
+
roles: config.roles || [
|
|
119
|
+
"hypothesis_generator",
|
|
120
|
+
"experiment_runner",
|
|
121
|
+
"result_evaluator",
|
|
122
|
+
"critic"
|
|
123
|
+
],
|
|
124
|
+
experiment_loop: {
|
|
125
|
+
generate_architecture: true,
|
|
126
|
+
train_model: true,
|
|
127
|
+
evaluate_results: true,
|
|
128
|
+
iterate: true
|
|
129
|
+
},
|
|
130
|
+
state: {
|
|
131
|
+
experiments_run: 0,
|
|
132
|
+
best_result: null,
|
|
133
|
+
architectures_tested: []
|
|
134
|
+
}
|
|
135
|
+
};
|
|
136
|
+
return JSON.stringify(context, null, 2);
|
|
137
|
+
}
|
|
138
|
+
async function main(argv = process.argv.slice(2)) {
|
|
139
|
+
try {
|
|
140
|
+
const args = parseArgs(argv);
|
|
141
|
+
if (!args.template) {
|
|
142
|
+
process.stderr.write("Usage: neuroverse init-world <template> [options]\n\n");
|
|
143
|
+
process.stderr.write("Available templates:\n");
|
|
144
|
+
for (const t of AVAILABLE_TEMPLATES) {
|
|
145
|
+
process.stderr.write(` ${t} Generate a governed research world for autonomous AI experiments
|
|
146
|
+
`);
|
|
147
|
+
}
|
|
148
|
+
process.stderr.write("\nExample:\n");
|
|
149
|
+
process.stderr.write(' neuroverse init-world autoresearch --context "attention-free LLM architectures" --dataset "TinyStories"\n');
|
|
150
|
+
process.exit(1);
|
|
151
|
+
return;
|
|
152
|
+
}
|
|
153
|
+
if (!AVAILABLE_TEMPLATES.includes(args.template)) {
|
|
154
|
+
process.stderr.write(`Unknown template: "${args.template}"
|
|
155
|
+
`);
|
|
156
|
+
process.stderr.write(`Available: ${AVAILABLE_TEMPLATES.join(", ")}
|
|
157
|
+
`);
|
|
158
|
+
process.exit(1);
|
|
159
|
+
return;
|
|
160
|
+
}
|
|
161
|
+
const worldFileName = args.config.context ? args.config.context.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, "").slice(0, 40) + ".nv-world.md" : "research.nv-world.md";
|
|
162
|
+
const outputPath = args.outputPath || `./${worldFileName}`;
|
|
163
|
+
if (existsSync(outputPath)) {
|
|
164
|
+
process.stderr.write(`File already exists: ${outputPath}
|
|
165
|
+
`);
|
|
166
|
+
process.stderr.write("Use --output to specify a different path.\n");
|
|
167
|
+
process.exit(2);
|
|
168
|
+
return;
|
|
169
|
+
}
|
|
170
|
+
const baseTemplate = loadBaseTemplate(args.template);
|
|
171
|
+
const customized = customizeAutoresearchWorld(baseTemplate, args.config);
|
|
172
|
+
await writeFile(outputPath, customized, "utf-8");
|
|
173
|
+
const contextPath = outputPath.replace(/\.nv-world\.md$/, ".research.json");
|
|
174
|
+
await writeFile(contextPath, generateResearchContext(args.config), "utf-8");
|
|
175
|
+
const result = {
|
|
176
|
+
created: {
|
|
177
|
+
world: outputPath,
|
|
178
|
+
context: contextPath
|
|
179
|
+
},
|
|
180
|
+
template: args.template,
|
|
181
|
+
config: {
|
|
182
|
+
context: args.config.context || null,
|
|
183
|
+
dataset: args.config.dataset || null,
|
|
184
|
+
goal: args.config.goal || null,
|
|
185
|
+
metric: args.config.metric || "val_bpb",
|
|
186
|
+
optimize: args.config.optimize || "minimize",
|
|
187
|
+
computeBudgetMinutes: args.config.computeBudgetMinutes || 1440
|
|
188
|
+
},
|
|
189
|
+
nextSteps: [
|
|
190
|
+
`Edit ${outputPath} to refine governance rules for your research`,
|
|
191
|
+
`neuroverse bootstrap --input ${outputPath} --output ./world/ --validate`,
|
|
192
|
+
`neuroverse simulate ${outputPath} --steps 5`,
|
|
193
|
+
`neuroverse guard --world ./world/ < experiment.json`,
|
|
194
|
+
`npx autoresearch run --world ${contextPath}`
|
|
195
|
+
]
|
|
196
|
+
};
|
|
197
|
+
process.stdout.write(JSON.stringify(result, null, 2) + "\n");
|
|
198
|
+
process.stderr.write("\n");
|
|
199
|
+
process.stderr.write(`\u2713 World created: ${outputPath}
|
|
200
|
+
`);
|
|
201
|
+
process.stderr.write(`\u2713 Research context: ${contextPath}
|
|
202
|
+
`);
|
|
203
|
+
process.stderr.write("\n");
|
|
204
|
+
process.stderr.write("Next steps:\n");
|
|
205
|
+
process.stderr.write(` Compile world neuroverse bootstrap --input ${outputPath} --output ./world/ --validate
|
|
206
|
+
`);
|
|
207
|
+
process.stderr.write(` Simulate neuroverse simulate ${outputPath} --steps 5
|
|
208
|
+
`);
|
|
209
|
+
process.stderr.write(` Run autoresearch npx autoresearch run --world ${contextPath}
|
|
210
|
+
`);
|
|
211
|
+
process.stderr.write(` Launch dashboard make dashboard
|
|
212
|
+
`);
|
|
213
|
+
process.stderr.write("\n");
|
|
214
|
+
process.exit(0);
|
|
215
|
+
} catch (e) {
|
|
216
|
+
process.stderr.write(`init-world failed: ${e instanceof Error ? e.message : e}
|
|
217
|
+
`);
|
|
218
|
+
process.exit(3);
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
export {
|
|
222
|
+
main
|
|
223
|
+
};
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
import {
|
|
2
2
|
McpGovernanceServer,
|
|
3
3
|
startMcpServer
|
|
4
|
-
} from "./chunk-
|
|
4
|
+
} from "./chunk-OGL7QXZS.js";
|
|
5
5
|
import "./chunk-AKW5YVCE.js";
|
|
6
6
|
import "./chunk-4JRYGIO7.js";
|
|
7
|
-
import "./chunk-JZPQGIKR.js";
|
|
8
7
|
import "./chunk-4QXB6PEO.js";
|
|
8
|
+
import "./chunk-JZPQGIKR.js";
|
|
9
9
|
import "./chunk-YZFATT7X.js";
|
|
10
10
|
export {
|
|
11
11
|
McpGovernanceServer,
|
|
@@ -2,10 +2,10 @@ import {
|
|
|
2
2
|
SessionManager,
|
|
3
3
|
runInteractiveMode,
|
|
4
4
|
runPipeMode
|
|
5
|
-
} from "./chunk-
|
|
5
|
+
} from "./chunk-4A7LISES.js";
|
|
6
6
|
import "./chunk-4JRYGIO7.js";
|
|
7
|
-
import "./chunk-JZPQGIKR.js";
|
|
8
7
|
import "./chunk-4QXB6PEO.js";
|
|
8
|
+
import "./chunk-JZPQGIKR.js";
|
|
9
9
|
import "./chunk-YZFATT7X.js";
|
|
10
10
|
export {
|
|
11
11
|
SessionManager,
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
---
|
|
2
|
+
world_id: autoresearch
|
|
3
|
+
name: Autoresearch Governance
|
|
4
|
+
version: 1.0.0
|
|
5
|
+
runtime_mode: SIMULATION
|
|
6
|
+
default_profile: conservative
|
|
7
|
+
alternative_profile: exploratory
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# Thesis
|
|
11
|
+
|
|
12
|
+
Autonomous AI research loops must operate within structured governance: experiments are reproducible, metrics are tracked, compute budgets are enforced, and agents cannot drift beyond their declared research context. A research world without constraints produces noise, not knowledge.
|
|
13
|
+
|
|
14
|
+
# Invariants
|
|
15
|
+
|
|
16
|
+
- `experiments_must_be_reproducible` — Every experiment must log architecture, hyperparameters, dataset, and training config sufficient to reproduce results (structural, immutable)
|
|
17
|
+
- `metrics_must_be_recorded` — Every training run must produce at least one evaluation metric; runs without metrics are invalid (structural, immutable)
|
|
18
|
+
- `dataset_must_be_declared` — The dataset used for training and evaluation must be explicitly declared and never changed without governance approval (structural, immutable)
|
|
19
|
+
- `goal_must_be_defined` — The optimization goal (metric + direction) must be defined before any experiment runs (structural, immutable)
|
|
20
|
+
- `no_data_leakage` — Training data must never contaminate evaluation data; train/val/test splits must be fixed (structural, immutable)
|
|
21
|
+
- `compute_budget_enforced` — Experiments must respect declared compute limits; exceeding budget halts the loop (structural, immutable)
|
|
22
|
+
- `architecture_constraints_honored` — If the research context declares architectural constraints, experiments must satisfy them (prompt, immutable)
|
|
23
|
+
|
|
24
|
+
# State
|
|
25
|
+
|
|
26
|
+
## experiments_run
|
|
27
|
+
- type: number
|
|
28
|
+
- min: 0
|
|
29
|
+
- max: 10000
|
|
30
|
+
- step: 1
|
|
31
|
+
- default: 0
|
|
32
|
+
- label: Experiments Run
|
|
33
|
+
- description: Total number of experiments completed in this research loop
|
|
34
|
+
|
|
35
|
+
## best_metric_value
|
|
36
|
+
- type: number
|
|
37
|
+
- min: -1000
|
|
38
|
+
- max: 1000
|
|
39
|
+
- step: 0.01
|
|
40
|
+
- default: 100
|
|
41
|
+
- label: Best Metric Value
|
|
42
|
+
- description: Best value achieved for the primary evaluation metric
|
|
43
|
+
|
|
44
|
+
## keep_rate
|
|
45
|
+
- type: number
|
|
46
|
+
- min: 0
|
|
47
|
+
- max: 100
|
|
48
|
+
- step: 1
|
|
49
|
+
- default: 0
|
|
50
|
+
- label: Keep Rate
|
|
51
|
+
- description: Percentage of experiments that improved upon the previous best result
|
|
52
|
+
|
|
53
|
+
## compute_used_minutes
|
|
54
|
+
- type: number
|
|
55
|
+
- min: 0
|
|
56
|
+
- max: 100000
|
|
57
|
+
- step: 1
|
|
58
|
+
- default: 0
|
|
59
|
+
- label: Compute Used (minutes)
|
|
60
|
+
- description: Total wall-clock training time consumed across all experiments
|
|
61
|
+
|
|
62
|
+
## compute_budget_minutes
|
|
63
|
+
- type: number
|
|
64
|
+
- min: 0
|
|
65
|
+
- max: 100000
|
|
66
|
+
- step: 60
|
|
67
|
+
- default: 1440
|
|
68
|
+
- label: Compute Budget (minutes)
|
|
69
|
+
- description: Maximum allowed wall-clock training time for the research loop
|
|
70
|
+
|
|
71
|
+
## research_context_drift
|
|
72
|
+
- type: number
|
|
73
|
+
- min: 0
|
|
74
|
+
- max: 100
|
|
75
|
+
- step: 1
|
|
76
|
+
- default: 0
|
|
77
|
+
- label: Context Drift
|
|
78
|
+
- description: Degree to which recent experiments have diverged from the declared research context. 0 = on-topic. 100 = unrelated.
|
|
79
|
+
|
|
80
|
+
## metric_improvement_rate
|
|
81
|
+
- type: number
|
|
82
|
+
- min: 0
|
|
83
|
+
- max: 100
|
|
84
|
+
- step: 1
|
|
85
|
+
- default: 0
|
|
86
|
+
- label: Improvement Rate
|
|
87
|
+
- description: Rate of metric improvement over the last 10 experiments. 0 = stagnant. 100 = rapid improvement.
|
|
88
|
+
|
|
89
|
+
## failed_experiments
|
|
90
|
+
- type: number
|
|
91
|
+
- min: 0
|
|
92
|
+
- max: 10000
|
|
93
|
+
- step: 1
|
|
94
|
+
- default: 0
|
|
95
|
+
- label: Failed Experiments
|
|
96
|
+
- description: Number of experiments that crashed, timed out, or produced no valid metrics
|
|
97
|
+
|
|
98
|
+
# Assumptions
|
|
99
|
+
|
|
100
|
+
## conservative
|
|
101
|
+
- name: Conservative Research
|
|
102
|
+
- description: Prioritize reproducibility and careful iteration. Small architectural changes per experiment. Strict compute limits. Reject experiments that drift from the research context.
|
|
103
|
+
- iteration_style: incremental
|
|
104
|
+
- drift_tolerance: low
|
|
105
|
+
- compute_strictness: high
|
|
106
|
+
- failure_tolerance: low
|
|
107
|
+
|
|
108
|
+
## exploratory
|
|
109
|
+
- name: Exploratory Research
|
|
110
|
+
- description: Allow broader architectural exploration. Larger jumps between experiments. More lenient compute budget. Accept higher context drift if metrics improve.
|
|
111
|
+
- iteration_style: explorative
|
|
112
|
+
- drift_tolerance: moderate
|
|
113
|
+
- compute_strictness: moderate
|
|
114
|
+
- failure_tolerance: moderate
|
|
115
|
+
|
|
116
|
+
# Rules
|
|
117
|
+
|
|
118
|
+
## rule-001: Compute Budget Exhausted (structural)
|
|
119
|
+
When compute budget is exceeded, the research loop must halt. No further experiments are allowed.
|
|
120
|
+
|
|
121
|
+
When compute_used_minutes > compute_budget_minutes [state]
|
|
122
|
+
Then research_viability *= 0.00
|
|
123
|
+
Collapse: research_viability < 0.05
|
|
124
|
+
|
|
125
|
+
> trigger: Compute usage exceeds declared budget — no training time remains.
|
|
126
|
+
> rule: Unbounded compute makes research ungovernable. The budget is a hard constraint, not a suggestion.
|
|
127
|
+
> shift: Research loop halts. Final results are reported. No new experiments start.
|
|
128
|
+
> effect: Research viability set to zero. Loop terminated.
|
|
129
|
+
|
|
130
|
+
## rule-002: High Failure Rate (degradation)
|
|
131
|
+
Too many failed experiments indicate a systemic problem — bad code, misconfigured environment, or impossible architecture.
|
|
132
|
+
|
|
133
|
+
When failed_experiments > 5 [state] AND experiments_run > 0 [state]
|
|
134
|
+
Then research_viability *= 0.50
|
|
135
|
+
|
|
136
|
+
> trigger: More than 5 experiments have failed — possible systemic issue.
|
|
137
|
+
> rule: Failures consume compute without producing knowledge. High failure rates signal infrastructure problems, not research progress.
|
|
138
|
+
> shift: Research viability degrades. Agent should investigate root cause before continuing.
|
|
139
|
+
> effect: Research viability reduced to 50%.
|
|
140
|
+
|
|
141
|
+
## rule-003: Context Drift Warning (degradation)
|
|
142
|
+
Experiments diverging from the declared research context waste compute and produce irrelevant results.
|
|
143
|
+
|
|
144
|
+
When research_context_drift > 40 [state]
|
|
145
|
+
Then research_viability *= 0.60
|
|
146
|
+
|
|
147
|
+
> trigger: Context drift above 40% — experiments are straying from the research topic.
|
|
148
|
+
> rule: Governance exists to keep research focused. Agents exploring unrelated architectures are not contributing to the declared goal.
|
|
149
|
+
> shift: Research viability degrades. Agent must return to the declared research context.
|
|
150
|
+
> effect: Research viability reduced to 60%.
|
|
151
|
+
|
|
152
|
+
## rule-004: Metric Stagnation (degradation)
|
|
153
|
+
When experiments stop improving the primary metric, the research approach may need fundamental revision.
|
|
154
|
+
|
|
155
|
+
When metric_improvement_rate < 5 [state] AND experiments_run > 10 [state]
|
|
156
|
+
Then research_viability *= 0.70
|
|
157
|
+
|
|
158
|
+
> trigger: Improvement rate below 5% after 10+ experiments — research may have plateaued.
|
|
159
|
+
> rule: Stagnant metrics indicate diminishing returns from the current approach. The agent should consider a strategy change.
|
|
160
|
+
> shift: Research viability degrades. Agent should try a substantially different approach or conclude the loop.
|
|
161
|
+
> effect: Research viability reduced to 70%.
|
|
162
|
+
|
|
163
|
+
## rule-005: Strong Progress (advantage)
|
|
164
|
+
Consistent metric improvement validates the research approach and warrants continued investment.
|
|
165
|
+
|
|
166
|
+
When metric_improvement_rate > 30 [state] AND keep_rate > 20 [state]
|
|
167
|
+
Then research_viability *= 1.20
|
|
168
|
+
|
|
169
|
+
> trigger: Improvement rate above 30% with keep rate above 20% — research is productive.
|
|
170
|
+
> rule: Productive research should be encouraged. Strong metric trends indicate a promising research direction.
|
|
171
|
+
> shift: Research viability improves. Continued experimentation is well-justified.
|
|
172
|
+
> effect: Research viability boosted by 20%.
|
|
173
|
+
|
|
174
|
+
## rule-006: No Metrics Recorded (structural)
|
|
175
|
+
An experiment that produces no evaluation metrics is invalid and must not count as progress.
|
|
176
|
+
|
|
177
|
+
When experiments_run > 0 [state] AND best_metric_value == 100 [state]
|
|
178
|
+
Then research_viability *= 0.30
|
|
179
|
+
Collapse: research_viability < 0.05
|
|
180
|
+
|
|
181
|
+
> trigger: Experiments have run but no metric improvement from default — metrics may not be recording.
|
|
182
|
+
> rule: Research without measurement is not research. Every experiment must produce at least one evaluation metric.
|
|
183
|
+
> shift: Research viability drops sharply. Agent must fix metric recording before continuing.
|
|
184
|
+
> effect: Research viability reduced to 30%.
|
|
185
|
+
|
|
186
|
+
## rule-007: Efficient Compute Usage (advantage)
|
|
187
|
+
High keep rate with low compute usage indicates efficient research methodology.
|
|
188
|
+
|
|
189
|
+
When keep_rate > 30 [state] AND compute_used_minutes < compute_budget_minutes [state]
|
|
190
|
+
Then research_viability *= 1.15
|
|
191
|
+
|
|
192
|
+
> trigger: Keep rate above 30% with compute budget remaining — efficient experimentation.
|
|
193
|
+
> rule: Efficient use of compute demonstrates disciplined research. Not every experiment needs to be expensive.
|
|
194
|
+
> shift: Research viability improves. The research methodology is sustainable.
|
|
195
|
+
> effect: Research viability boosted by 15%.
|
|
196
|
+
|
|
197
|
+
# Gates
|
|
198
|
+
|
|
199
|
+
- BREAKTHROUGH: research_viability >= 90
|
|
200
|
+
- PRODUCTIVE: research_viability >= 60
|
|
201
|
+
- ONGOING: research_viability >= 35
|
|
202
|
+
- STRUGGLING: research_viability > 10
|
|
203
|
+
- HALTED: research_viability <= 10
|
|
204
|
+
|
|
205
|
+
# Outcomes
|
|
206
|
+
|
|
207
|
+
## research_viability
|
|
208
|
+
- type: number
|
|
209
|
+
- range: 0-100
|
|
210
|
+
- display: percentage
|
|
211
|
+
- label: Research Viability
|
|
212
|
+
- primary: true
|
|
213
|
+
|
|
214
|
+
## best_metric_value
|
|
215
|
+
- type: number
|
|
216
|
+
- range: -1000-1000
|
|
217
|
+
- display: decimal
|
|
218
|
+
- label: Best Metric Value
|
|
219
|
+
|
|
220
|
+
## keep_rate
|
|
221
|
+
- type: number
|
|
222
|
+
- range: 0-100
|
|
223
|
+
- display: percentage
|
|
224
|
+
- label: Keep Rate
|
|
225
|
+
|
|
226
|
+
## experiments_run
|
|
227
|
+
- type: number
|
|
228
|
+
- range: 0-10000
|
|
229
|
+
- display: integer
|
|
230
|
+
- label: Experiments Run
|
package/llms.txt
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# NeuroVerse Governance
|
|
2
|
+
|
|
3
|
+
> Deterministic runtime governance for AI agents. No LLM in the evaluation loop.
|
|
4
|
+
|
|
5
|
+
## What it does
|
|
6
|
+
|
|
7
|
+
NeuroVerse enforces behavioral boundaries on AI agents. Every agent action passes
|
|
8
|
+
through a 6-phase evaluation pipeline and receives an ALLOW, BLOCK, or PAUSE verdict.
|
|
9
|
+
Same event + same rules = same verdict, every time.
|
|
10
|
+
|
|
11
|
+
## Core concepts
|
|
12
|
+
|
|
13
|
+
- **World**: Permanent governance rules (guards, invariants, kernel rules, roles)
|
|
14
|
+
- **Plan**: Temporary task-scoped constraints layered on top of a world
|
|
15
|
+
- **Guard event**: An action an agent wants to take (intent, tool, scope)
|
|
16
|
+
- **Verdict**: ALLOW, BLOCK, or PAUSE — returned synchronously, no network calls
|
|
17
|
+
|
|
18
|
+
## Install
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
npm install @neuroverseos/governance
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Quick test (no install required)
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
npx @neuroverseos/governance init
|
|
28
|
+
npx @neuroverseos/governance build
|
|
29
|
+
npx @neuroverseos/governance guard
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Programmatic usage
|
|
33
|
+
|
|
34
|
+
```javascript
|
|
35
|
+
import { evaluateGuard, loadWorld } from '@neuroverseos/governance';
|
|
36
|
+
|
|
37
|
+
const world = await loadWorld('./world/');
|
|
38
|
+
const verdict = evaluateGuard({ intent: 'delete user data', tool: 'database' }, world);
|
|
39
|
+
// → { status: 'BLOCK', reason: 'Destructive database operation on protected resource' }
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Plan enforcement
|
|
43
|
+
|
|
44
|
+
```javascript
|
|
45
|
+
import { parsePlanMarkdown, evaluatePlan, advancePlan } from '@neuroverseos/governance';
|
|
46
|
+
|
|
47
|
+
const { plan } = parsePlanMarkdown(markdown);
|
|
48
|
+
const verdict = evaluatePlan({ intent: 'write blog post' }, plan);
|
|
49
|
+
// → { status: 'ON_PLAN', matchedStep: 'write_announcement_blog_post' }
|
|
50
|
+
|
|
51
|
+
const result = advancePlan(plan, 'write_announcement_blog_post');
|
|
52
|
+
// → { success: true, plan: <updated> }
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Plans support two completion modes:
|
|
56
|
+
- `completion: trust` (default) — caller says "done", step advances
|
|
57
|
+
- `completion: verified` — steps with `[verify: ...]` require evidence to advance
|
|
58
|
+
|
|
59
|
+
## Adapters
|
|
60
|
+
|
|
61
|
+
- OpenAI function calling: `@neuroverseos/governance/adapters/openai`
|
|
62
|
+
- LangChain callback handler: `@neuroverseos/governance/adapters/langchain`
|
|
63
|
+
- OpenClaw plugin: `@neuroverseos/governance/adapters/openclaw`
|
|
64
|
+
- Express/Fastify middleware: `@neuroverseos/governance/adapters/express`
|
|
65
|
+
- MCP server: `neuroverse mcp --world ./world`
|
|
66
|
+
|
|
67
|
+
## Evaluation pipeline
|
|
68
|
+
|
|
69
|
+
```
|
|
70
|
+
Safety → Plan → Roles → Guards → Kernel → Level → Verdict
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
First BLOCK wins. No async. Pure function.
|
|
74
|
+
|
|
75
|
+
## Links
|
|
76
|
+
|
|
77
|
+
- npm: https://www.npmjs.com/package/@neuroverseos/governance
|
|
78
|
+
- GitHub: https://github.com/NeuroverseOS/Neuroverseos-governance
|
|
79
|
+
- Website: https://neuroverseos.com
|