@neuroverseos/governance 0.3.0 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.well-known/ai-plugin.json +34 -9
- package/AGENTS.md +72 -24
- package/README.md +352 -237
- package/dist/adapters/autoresearch.cjs +1152 -3
- package/dist/adapters/autoresearch.d.cts +11 -3
- package/dist/adapters/autoresearch.d.ts +11 -3
- package/dist/adapters/autoresearch.js +9 -4
- package/dist/adapters/deep-agents.cjs +1528 -0
- package/dist/adapters/deep-agents.d.cts +181 -0
- package/dist/adapters/deep-agents.d.ts +181 -0
- package/dist/adapters/deep-agents.js +17 -0
- package/dist/adapters/express.cjs +171 -32
- package/dist/adapters/express.d.cts +1 -1
- package/dist/adapters/express.d.ts +1 -1
- package/dist/adapters/express.js +5 -5
- package/dist/adapters/index.cjs +564 -121
- package/dist/adapters/index.d.cts +3 -1
- package/dist/adapters/index.d.ts +3 -1
- package/dist/adapters/index.js +38 -16
- package/dist/adapters/langchain.cjs +217 -57
- package/dist/adapters/langchain.d.cts +5 -5
- package/dist/adapters/langchain.d.ts +5 -5
- package/dist/adapters/langchain.js +6 -5
- package/dist/adapters/openai.cjs +219 -59
- package/dist/adapters/openai.d.cts +5 -5
- package/dist/adapters/openai.d.ts +5 -5
- package/dist/adapters/openai.js +6 -5
- package/dist/adapters/openclaw.cjs +217 -57
- package/dist/adapters/openclaw.d.cts +6 -6
- package/dist/adapters/openclaw.d.ts +6 -6
- package/dist/adapters/openclaw.js +6 -5
- package/dist/add-ROOZLU62.js +314 -0
- package/dist/behavioral-MJO34S6Q.js +118 -0
- package/dist/{bootstrap-GXVDZNF7.js → bootstrap-CQRZVOXK.js} +6 -4
- package/dist/bootstrap-emitter-Q7UIJZ2O.js +7 -0
- package/dist/bootstrap-parser-EEF36XDU.js +7 -0
- package/dist/browser.global.js +941 -0
- package/dist/{build-P42YFKQV.js → build-QKOBBC23.js} +7 -5
- package/dist/{chunk-COT5XS4V.js → chunk-3WQLXYTP.js} +17 -35
- package/dist/{chunk-ER62HNGF.js → chunk-4FLICVVA.js} +17 -37
- package/dist/chunk-5TPFNWRU.js +215 -0
- package/dist/chunk-5U2MQO5P.js +57 -0
- package/dist/{chunk-NF5POFCI.js → chunk-6S5CFQXY.js} +6 -4
- package/dist/{chunk-QPASI2BR.js → chunk-A7GKPPU7.js} +49 -10
- package/dist/{chunk-OGL7QXZS.js → chunk-B6OXJLJ5.js} +17 -3
- package/dist/{chunk-2PQU3VAN.js → chunk-BNKJPUPQ.js} +17 -35
- package/dist/chunk-BQZMOEML.js +43 -0
- package/dist/chunk-CNSO6XW5.js +207 -0
- package/dist/{chunk-JZPQGIKR.js → chunk-CTZHONLA.js} +65 -9
- package/dist/chunk-D2UCV5AK.js +326 -0
- package/dist/{chunk-XPDMYECO.js → chunk-EMQDLDAF.js} +1 -185
- package/dist/{chunk-GR6DGCZ2.js → chunk-F66BVUYB.js} +3 -3
- package/dist/{chunk-2NICNKOM.js → chunk-G7DJ6VOD.js} +5 -4
- package/dist/{chunk-4A7LISES.js → chunk-IS4WUH6Y.js} +45 -6
- package/dist/{chunk-MWDQ4MJB.js → chunk-MH7BT4VH.js} +5 -1
- package/dist/chunk-O5ABKEA7.js +304 -0
- package/dist/chunk-PVTQQS3Y.js +186 -0
- package/dist/{chunk-4QXB6PEO.js → chunk-QLPTHTVB.js} +37 -16
- package/dist/chunk-QWGCMQQD.js +16 -0
- package/dist/{chunk-T5EUJQE5.js → chunk-QXBFT7NI.js} +31 -2
- package/dist/{chunk-PDOZHZWL.js → chunk-TG6SEF24.js} +25 -4
- package/dist/chunk-U6U7EJZL.js +177 -0
- package/dist/{chunk-4JRYGIO7.js → chunk-W7LLXRGY.js} +110 -7
- package/dist/{chunk-BUWWN2NX.js → chunk-ZJTDUCC2.js} +9 -7
- package/dist/{chunk-FYS2CBUW.js → chunk-ZWI3NIXK.js} +10 -0
- package/dist/cli/neuroverse.cjs +5091 -2348
- package/dist/cli/neuroverse.js +52 -21
- package/dist/cli/plan.cjs +881 -41
- package/dist/cli/plan.js +7 -15
- package/dist/cli/run.cjs +289 -34
- package/dist/cli/run.js +4 -4
- package/dist/{configure-ai-TK67ZWZL.js → configure-ai-6TZ3MCSI.js} +1 -1
- package/dist/decision-flow-M63D47LO.js +61 -0
- package/dist/demo-G43RLCPK.js +469 -0
- package/dist/{derive-TLIV4OOU.js → derive-FJZVIPUZ.js} +5 -4
- package/dist/{doctor-XPDLEYXN.js → doctor-6BC6X2VO.js} +6 -4
- package/dist/equity-penalties-SG5IZQ7I.js +244 -0
- package/dist/{explain-IDCRWMPX.js → explain-RHBU2GBR.js} +6 -25
- package/dist/{guard-RV65TT4L.js → guard-AJCCGZMF.js} +8 -12
- package/dist/{guard-contract-WZx__PmU.d.cts → guard-contract-DqFcTScd.d.cts} +117 -5
- package/dist/{guard-contract-WZx__PmU.d.ts → guard-contract-DqFcTScd.d.ts} +117 -5
- package/dist/{guard-engine-JLTUARGU.js → guard-engine-PNR6MHCM.js} +3 -3
- package/dist/{impact-XPECYRLH.js → impact-3XVDSCBU.js} +5 -5
- package/dist/{improve-GPUBKTEA.js → improve-TQP4ECSY.js} +7 -26
- package/dist/index.cjs +5597 -4279
- package/dist/index.d.cts +597 -18
- package/dist/index.d.ts +597 -18
- package/dist/index.js +134 -41
- package/dist/{infer-world-7GVZWFX4.js → infer-world-IFXCACJ5.js} +1 -1
- package/dist/{init-PKPIYHYE.js → init-FYPV4SST.js} +1 -1
- package/dist/{init-world-VWMQZQC7.js → init-world-TI7ARHBT.js} +1 -1
- package/dist/mcp-server-5Y3ZM7TV.js +13 -0
- package/dist/{model-adapter-BB7G4MFI.js → model-adapter-VXEKB4LS.js} +1 -1
- package/dist/{playground-E664U4T6.js → playground-VZBNPPBO.js} +29 -19
- package/dist/{redteam-Z7WREJ44.js → redteam-MZPZD3EF.js} +4 -4
- package/dist/session-JYOARW54.js +15 -0
- package/dist/shared-7RLUHNMU.js +16 -0
- package/dist/shared-B8dvUUD8.d.cts +60 -0
- package/dist/shared-Dr5Wiay8.d.ts +60 -0
- package/dist/{simulate-VDOYQFRO.js → simulate-LJXYBC6M.js} +8 -33
- package/dist/{test-OGXJK4QU.js → test-BOOR4A5F.js} +4 -4
- package/dist/{trace-JVF67VR3.js → trace-PKV4KX56.js} +4 -4
- package/dist/{validate-LLBWVPGV.js → validate-RALX7CZS.js} +2 -2
- package/dist/{validate-engine-UIABSIHD.js → validate-engine-7ZXFVGF2.js} +1 -1
- package/dist/viz/assets/index-B8SaeJZZ.js +23 -0
- package/dist/viz/index.html +23 -0
- package/dist/{world-LAXO6DOX.js → world-BIP4GZBZ.js} +9 -11
- package/dist/world-loader-Y6HMQH2D.js +13 -0
- package/dist/worlds/coding-agent.nv-world.md +211 -0
- package/dist/worlds/research-agent.nv-world.md +169 -0
- package/dist/worlds/social-media.nv-world.md +198 -0
- package/dist/worlds/trading-agent.nv-world.md +218 -0
- package/examples/social-media-sim/bridge.py +209 -0
- package/examples/social-media-sim/simulation.py +927 -0
- package/package.json +30 -4
- package/policies/content-moderation-rules.txt +8 -0
- package/policies/marketing-rules.txt +8 -0
- package/policies/science-research-rules.txt +11 -0
- package/policies/social-media-rules.txt +7 -0
- package/policies/strict-rules.txt +8 -0
- package/policies/trading-rules.txt +8 -0
- package/simulate.html +1567 -0
- package/dist/chunk-YZFATT7X.js +0 -9
- package/dist/mcp-server-FPVSU32Z.js +0 -13
- package/dist/session-EKTRSR7C.js +0 -14
- package/dist/world-loader-HMPTOEA2.js +0 -9
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
import {
|
|
2
|
+
validateWorld
|
|
3
|
+
} from "./chunk-7P3S7MAY.js";
|
|
1
4
|
import {
|
|
2
5
|
describeActiveWorld,
|
|
3
6
|
getActiveWorldName,
|
|
@@ -5,13 +8,10 @@ import {
|
|
|
5
8
|
resolveWorldPath,
|
|
6
9
|
setActiveWorld
|
|
7
10
|
} from "./chunk-AKW5YVCE.js";
|
|
8
|
-
import {
|
|
9
|
-
validateWorld
|
|
10
|
-
} from "./chunk-7P3S7MAY.js";
|
|
11
11
|
import {
|
|
12
12
|
loadWorld
|
|
13
|
-
} from "./chunk-
|
|
14
|
-
import "./chunk-
|
|
13
|
+
} from "./chunk-CTZHONLA.js";
|
|
14
|
+
import "./chunk-QWGCMQQD.js";
|
|
15
15
|
|
|
16
16
|
// src/cli/world.ts
|
|
17
17
|
var USAGE = `
|
|
@@ -56,10 +56,8 @@ async function worldStatus(worldPath, json) {
|
|
|
56
56
|
rules: world.rules.length,
|
|
57
57
|
roles: world.roles?.roles.length ?? 0,
|
|
58
58
|
kernel: world.kernel ? {
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
allowedOutputs: world.kernel.allowed_outputs?.length ?? 0,
|
|
62
|
-
forbiddenOutputs: world.kernel.forbidden_outputs?.length ?? 0
|
|
59
|
+
forbiddenInputs: world.kernel.input_boundaries?.forbidden_patterns?.length ?? 0,
|
|
60
|
+
forbiddenOutputs: world.kernel.output_boundaries?.forbidden_patterns?.length ?? 0
|
|
63
61
|
} : null,
|
|
64
62
|
validation: report.summary
|
|
65
63
|
}, null, 2) + "\n");
|
|
@@ -69,7 +67,7 @@ async function worldStatus(worldPath, json) {
|
|
|
69
67
|
lines.push("WORLD STATUS");
|
|
70
68
|
lines.push("\u2500".repeat(40));
|
|
71
69
|
lines.push(` Name: ${world.world.name}`);
|
|
72
|
-
lines.push(` ID: ${world.world.
|
|
70
|
+
lines.push(` ID: ${world.world.world_id}`);
|
|
73
71
|
lines.push(` Version: ${world.world.version}`);
|
|
74
72
|
lines.push(` Created: ${world.metadata.created_at || "\u2014"}`);
|
|
75
73
|
lines.push(` Modified: ${world.metadata.last_modified || "\u2014"}`);
|
|
@@ -83,7 +81,7 @@ async function worldStatus(worldPath, json) {
|
|
|
83
81
|
lines.push(` Roles: ${world.roles?.roles.length ?? 0}`);
|
|
84
82
|
if (world.kernel) {
|
|
85
83
|
const k = world.kernel;
|
|
86
|
-
const totalRules = (k.
|
|
84
|
+
const totalRules = (k.input_boundaries?.forbidden_patterns?.length ?? 0) + (k.output_boundaries?.forbidden_patterns?.length ?? 0);
|
|
87
85
|
lines.push(` Kernel: ${totalRules} rules`);
|
|
88
86
|
}
|
|
89
87
|
lines.push("");
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import {
|
|
2
|
+
DEFAULT_BUNDLED_WORLD,
|
|
3
|
+
loadBundledWorld,
|
|
4
|
+
loadWorld,
|
|
5
|
+
loadWorldFromDirectory
|
|
6
|
+
} from "./chunk-CTZHONLA.js";
|
|
7
|
+
import "./chunk-QWGCMQQD.js";
|
|
8
|
+
export {
|
|
9
|
+
DEFAULT_BUNDLED_WORLD,
|
|
10
|
+
loadBundledWorld,
|
|
11
|
+
loadWorld,
|
|
12
|
+
loadWorldFromDirectory
|
|
13
|
+
};
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
---
|
|
2
|
+
world_id: coding-agent
|
|
3
|
+
name: Coding Agent Governance
|
|
4
|
+
version: 1.0.0
|
|
5
|
+
runtime_mode: COMPLIANCE
|
|
6
|
+
default_profile: standard
|
|
7
|
+
alternative_profile: strict
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# Thesis
|
|
11
|
+
|
|
12
|
+
Autonomous coding agents that can read files, write code, execute shell commands, and interact with version control require a governance layer. Without enforceable rules, a single misguided tool call can delete data, leak secrets, break production, or escalate beyond its intended scope. This world defines the boundaries within which a coding agent operates safely.
|
|
13
|
+
|
|
14
|
+
# Invariants
|
|
15
|
+
|
|
16
|
+
- `no_system_destruction` — Agents must never execute commands that destroy system-level resources (recursive force-delete of root paths, disk formatting, fork bombs) (structural, immutable)
|
|
17
|
+
- `no_secret_exposure` — Agents must never read, log, or transmit credentials, API keys, private keys, or environment secrets outside the project boundary (structural, immutable)
|
|
18
|
+
- `no_unauthorized_push` — Agents must never push directly to main or master branches without explicit approval (structural, immutable)
|
|
19
|
+
- `no_scope_escape` — Agents must never access files or execute commands outside the declared project directory (structural, immutable)
|
|
20
|
+
- `no_pipe_to_shell` — Agents must never pipe downloaded content directly into a shell interpreter (structural, immutable)
|
|
21
|
+
- `changes_must_be_reversible` — Every file modification must be recoverable through version control; destructive operations require confirmation (prompt, immutable)
|
|
22
|
+
|
|
23
|
+
# State
|
|
24
|
+
|
|
25
|
+
## files_modified
|
|
26
|
+
- type: number
|
|
27
|
+
- min: 0
|
|
28
|
+
- max: 100000
|
|
29
|
+
- step: 1
|
|
30
|
+
- default: 0
|
|
31
|
+
- label: Files Modified
|
|
32
|
+
- description: Total number of files written or edited in this session
|
|
33
|
+
|
|
34
|
+
## files_deleted
|
|
35
|
+
- type: number
|
|
36
|
+
- min: 0
|
|
37
|
+
- max: 100000
|
|
38
|
+
- step: 1
|
|
39
|
+
- default: 0
|
|
40
|
+
- label: Files Deleted
|
|
41
|
+
- description: Total number of files deleted in this session
|
|
42
|
+
|
|
43
|
+
## shell_commands_run
|
|
44
|
+
- type: number
|
|
45
|
+
- min: 0
|
|
46
|
+
- max: 10000
|
|
47
|
+
- step: 1
|
|
48
|
+
- default: 0
|
|
49
|
+
- label: Shell Commands Run
|
|
50
|
+
- description: Total number of shell commands executed
|
|
51
|
+
|
|
52
|
+
## dangerous_commands_blocked
|
|
53
|
+
- type: number
|
|
54
|
+
- min: 0
|
|
55
|
+
- max: 10000
|
|
56
|
+
- step: 1
|
|
57
|
+
- default: 0
|
|
58
|
+
- label: Dangerous Commands Blocked
|
|
59
|
+
- description: Number of shell commands blocked by governance rules
|
|
60
|
+
|
|
61
|
+
## git_pushes
|
|
62
|
+
- type: number
|
|
63
|
+
- min: 0
|
|
64
|
+
- max: 100
|
|
65
|
+
- step: 1
|
|
66
|
+
- default: 0
|
|
67
|
+
- label: Git Pushes
|
|
68
|
+
- description: Number of git push operations executed
|
|
69
|
+
|
|
70
|
+
## sub_agents_spawned
|
|
71
|
+
- type: number
|
|
72
|
+
- min: 0
|
|
73
|
+
- max: 50
|
|
74
|
+
- step: 1
|
|
75
|
+
- default: 0
|
|
76
|
+
- label: Sub-Agents Spawned
|
|
77
|
+
- description: Number of sub-agent processes created
|
|
78
|
+
|
|
79
|
+
## scope_violations
|
|
80
|
+
- type: number
|
|
81
|
+
- min: 0
|
|
82
|
+
- max: 1000
|
|
83
|
+
- step: 1
|
|
84
|
+
- default: 0
|
|
85
|
+
- label: Scope Violations
|
|
86
|
+
- description: Number of attempted actions outside the declared project scope
|
|
87
|
+
|
|
88
|
+
# Assumptions
|
|
89
|
+
|
|
90
|
+
## standard
|
|
91
|
+
- name: Standard Development
|
|
92
|
+
- description: Normal development workflow. File reads are unrestricted. File writes within project scope are allowed. Shell commands are evaluated for safety. Git pushes require feature branches.
|
|
93
|
+
- file_read_policy: unrestricted
|
|
94
|
+
- file_write_policy: project_scope_only
|
|
95
|
+
- shell_policy: safety_evaluated
|
|
96
|
+
- git_policy: feature_branches_only
|
|
97
|
+
- network_policy: restricted
|
|
98
|
+
|
|
99
|
+
## strict
|
|
100
|
+
- name: Strict Lockdown
|
|
101
|
+
- description: High-security mode. All file writes require confirmation. All shell commands require approval. No network access. No git pushes without explicit authorization.
|
|
102
|
+
- file_read_policy: unrestricted
|
|
103
|
+
- file_write_policy: approval_required
|
|
104
|
+
- shell_policy: approval_required
|
|
105
|
+
- git_policy: approval_required
|
|
106
|
+
- network_policy: blocked
|
|
107
|
+
|
|
108
|
+
# Rules
|
|
109
|
+
|
|
110
|
+
## rule-001: Destructive Shell Command (structural)
|
|
111
|
+
Shell commands that can cause irreversible system damage must be blocked unconditionally.
|
|
112
|
+
|
|
113
|
+
When shell_commands_run > 0 [state] AND dangerous_commands_blocked > 0 [state]
|
|
114
|
+
Then agent_safety *= 0.50
|
|
115
|
+
|
|
116
|
+
> trigger: Agent attempted a destructive shell command (rm -rf, mkfs, dd, fork bomb, etc.).
|
|
117
|
+
> rule: Destructive commands cannot be undone. No amount of productivity justifies risking system integrity.
|
|
118
|
+
> shift: Agent safety score drops. Continued violations may halt the session.
|
|
119
|
+
> effect: Agent safety reduced by 50%.
|
|
120
|
+
|
|
121
|
+
## rule-002: Scope Escape Attempt (structural)
|
|
122
|
+
Accessing files or running commands outside the project directory is a governance violation.
|
|
123
|
+
|
|
124
|
+
When scope_violations > 0 [state]
|
|
125
|
+
Then agent_safety *= 0.40
|
|
126
|
+
Collapse: agent_safety < 0.10
|
|
127
|
+
|
|
128
|
+
> trigger: Agent attempted to access resources outside its declared project scope.
|
|
129
|
+
> rule: Agents operate within boundaries. Scope escape indicates either a misconfigured agent or a prompt injection attempt.
|
|
130
|
+
> shift: Agent safety drops sharply. Multiple violations halt the session.
|
|
131
|
+
> effect: Agent safety reduced to 40%.
|
|
132
|
+
|
|
133
|
+
## rule-003: Excessive File Deletion (degradation)
|
|
134
|
+
Deleting many files in a single session indicates potentially destructive behavior.
|
|
135
|
+
|
|
136
|
+
When files_deleted > 10 [state]
|
|
137
|
+
Then agent_safety *= 0.60
|
|
138
|
+
|
|
139
|
+
> trigger: More than 10 files deleted in a single session.
|
|
140
|
+
> rule: Bulk deletion is rarely intentional in normal development. This warrants review.
|
|
141
|
+
> shift: Agent safety degrades. Remaining deletions may require approval.
|
|
142
|
+
> effect: Agent safety reduced to 60%.
|
|
143
|
+
|
|
144
|
+
## rule-004: Uncontrolled Sub-Agent Spawning (degradation)
|
|
145
|
+
Too many sub-agents indicate either a runaway loop or poorly scoped task decomposition.
|
|
146
|
+
|
|
147
|
+
When sub_agents_spawned > 10 [state]
|
|
148
|
+
Then agent_safety *= 0.70
|
|
149
|
+
|
|
150
|
+
> trigger: More than 10 sub-agents spawned in a single session.
|
|
151
|
+
> rule: Each sub-agent inherits the parent's capabilities. Uncontrolled spawning multiplies risk.
|
|
152
|
+
> shift: Agent safety degrades. Further spawning may be blocked.
|
|
153
|
+
> effect: Agent safety reduced to 70%.
|
|
154
|
+
|
|
155
|
+
## rule-005: Clean Session (advantage)
|
|
156
|
+
A session with no violations and productive output validates the governance model.
|
|
157
|
+
|
|
158
|
+
When files_modified > 0 [state] AND scope_violations == 0 [state] AND dangerous_commands_blocked == 0 [state]
|
|
159
|
+
Then agent_safety *= 1.10
|
|
160
|
+
|
|
161
|
+
> trigger: Agent has modified files without triggering any governance violations.
|
|
162
|
+
> rule: Good behavior should be recognized. Clean sessions build trust in the agent's judgment.
|
|
163
|
+
> shift: Agent safety improves slightly. Trust accumulates over clean sessions.
|
|
164
|
+
> effect: Agent safety boosted by 10%.
|
|
165
|
+
|
|
166
|
+
## rule-006: Unauthorized Push to Protected Branch (structural)
|
|
167
|
+
Pushing to main or master without approval violates version control governance.
|
|
168
|
+
|
|
169
|
+
When git_pushes > 0 [state] AND scope_violations > 0 [state]
|
|
170
|
+
Then agent_safety *= 0.30
|
|
171
|
+
Collapse: agent_safety < 0.10
|
|
172
|
+
|
|
173
|
+
> trigger: Agent pushed to a protected branch without authorization.
|
|
174
|
+
> rule: Protected branches exist for a reason. Direct pushes bypass code review and CI/CD.
|
|
175
|
+
> shift: Agent safety drops critically. Session may be halted.
|
|
176
|
+
> effect: Agent safety reduced to 30%.
|
|
177
|
+
|
|
178
|
+
# Gates
|
|
179
|
+
|
|
180
|
+
- TRUSTED: agent_safety >= 90
|
|
181
|
+
- OPERATING: agent_safety >= 60
|
|
182
|
+
- CAUTIOUS: agent_safety >= 35
|
|
183
|
+
- RESTRICTED: agent_safety > 10
|
|
184
|
+
- HALTED: agent_safety <= 10
|
|
185
|
+
|
|
186
|
+
# Outcomes
|
|
187
|
+
|
|
188
|
+
## agent_safety
|
|
189
|
+
- type: number
|
|
190
|
+
- range: 0-100
|
|
191
|
+
- display: percentage
|
|
192
|
+
- label: Agent Safety Score
|
|
193
|
+
- primary: true
|
|
194
|
+
|
|
195
|
+
## files_modified
|
|
196
|
+
- type: number
|
|
197
|
+
- range: 0-100000
|
|
198
|
+
- display: integer
|
|
199
|
+
- label: Files Modified
|
|
200
|
+
|
|
201
|
+
## dangerous_commands_blocked
|
|
202
|
+
- type: number
|
|
203
|
+
- range: 0-10000
|
|
204
|
+
- display: integer
|
|
205
|
+
- label: Dangerous Commands Blocked
|
|
206
|
+
|
|
207
|
+
## scope_violations
|
|
208
|
+
- type: number
|
|
209
|
+
- range: 0-1000
|
|
210
|
+
- display: integer
|
|
211
|
+
- label: Scope Violations
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
---
|
|
2
|
+
world_id: research-agent
|
|
3
|
+
name: Research Agent Governance
|
|
4
|
+
version: 1.0.0
|
|
5
|
+
runtime_mode: COMPLIANCE
|
|
6
|
+
default_profile: conservative
|
|
7
|
+
alternative_profile: exploratory
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# Thesis
|
|
11
|
+
|
|
12
|
+
AI research agents that autonomously search, synthesize, and publish findings must operate within governance boundaries. An ungoverned research agent can fabricate citations, plagiarize content, exceed API rate limits, or publish unreviewed conclusions. Research governance ensures rigor, attribution, and responsible resource usage.
|
|
13
|
+
|
|
14
|
+
# Invariants
|
|
15
|
+
|
|
16
|
+
- `sources_must_be_cited` — Every claim must be traceable to a declared source; unsourced assertions are forbidden (structural, immutable)
|
|
17
|
+
- `no_fabricated_citations` — Agent must never invent, hallucinate, or misattribute sources or data (structural, immutable)
|
|
18
|
+
- `no_plagiarism` — Agent must not reproduce copyrighted content without proper attribution and licensing compliance (structural, immutable)
|
|
19
|
+
- `api_rate_limits_respected` — Agent must respect rate limits on all external APIs and data sources (structural, immutable)
|
|
20
|
+
- `no_unauthorized_publication` — Research findings must not be published externally without human review and approval (prompt, immutable)
|
|
21
|
+
- `data_provenance_tracked` — Every piece of data used must have clear provenance (source URL, access date, license) (structural, immutable)
|
|
22
|
+
|
|
23
|
+
# State
|
|
24
|
+
|
|
25
|
+
## sources_consulted
|
|
26
|
+
- type: number
|
|
27
|
+
- min: 0
|
|
28
|
+
- max: 10000
|
|
29
|
+
- step: 1
|
|
30
|
+
- default: 0
|
|
31
|
+
- label: Sources Consulted
|
|
32
|
+
- description: Total number of unique sources accessed
|
|
33
|
+
|
|
34
|
+
## claims_made
|
|
35
|
+
- type: number
|
|
36
|
+
- min: 0
|
|
37
|
+
- max: 10000
|
|
38
|
+
- step: 1
|
|
39
|
+
- default: 0
|
|
40
|
+
- label: Claims Made
|
|
41
|
+
- description: Total assertions or findings produced
|
|
42
|
+
|
|
43
|
+
## unsourced_claims
|
|
44
|
+
- type: number
|
|
45
|
+
- min: 0
|
|
46
|
+
- max: 10000
|
|
47
|
+
- step: 1
|
|
48
|
+
- default: 0
|
|
49
|
+
- label: Unsourced Claims
|
|
50
|
+
- description: Claims without traceable source attribution
|
|
51
|
+
|
|
52
|
+
## api_calls_made
|
|
53
|
+
- type: number
|
|
54
|
+
- min: 0
|
|
55
|
+
- max: 100000
|
|
56
|
+
- step: 1
|
|
57
|
+
- default: 0
|
|
58
|
+
- label: API Calls Made
|
|
59
|
+
- description: Total external API calls made
|
|
60
|
+
|
|
61
|
+
## api_budget
|
|
62
|
+
- type: number
|
|
63
|
+
- min: 0
|
|
64
|
+
- max: 100000
|
|
65
|
+
- step: 100
|
|
66
|
+
- default: 5000
|
|
67
|
+
- label: API Call Budget
|
|
68
|
+
- description: Maximum allowed API calls for this research session
|
|
69
|
+
|
|
70
|
+
## synthesis_quality
|
|
71
|
+
- type: number
|
|
72
|
+
- min: 0
|
|
73
|
+
- max: 100
|
|
74
|
+
- step: 1
|
|
75
|
+
- default: 50
|
|
76
|
+
- label: Synthesis Quality
|
|
77
|
+
- description: Quality score based on source diversity, citation coverage, and coherence
|
|
78
|
+
|
|
79
|
+
# Assumptions
|
|
80
|
+
|
|
81
|
+
## conservative
|
|
82
|
+
- name: Conservative Research
|
|
83
|
+
- description: Prioritize accuracy over speed. Require multiple sources per claim. Strict API budget adherence. No publication without review.
|
|
84
|
+
- source_requirement: multiple_per_claim
|
|
85
|
+
- api_strictness: hard_limit
|
|
86
|
+
- publication_gate: human_required
|
|
87
|
+
|
|
88
|
+
## exploratory
|
|
89
|
+
- name: Exploratory Research
|
|
90
|
+
- description: Allow broader exploration with single-source claims. Softer API limits. Still require review for publication.
|
|
91
|
+
- source_requirement: at_least_one
|
|
92
|
+
- api_strictness: soft_warning
|
|
93
|
+
- publication_gate: human_required
|
|
94
|
+
|
|
95
|
+
# Rules
|
|
96
|
+
|
|
97
|
+
## rule-001: API Budget Exhausted (structural)
|
|
98
|
+
When the API call budget is exceeded, no further external calls are allowed.
|
|
99
|
+
|
|
100
|
+
When api_calls_made > api_budget [state]
|
|
101
|
+
Then research_viability *= 0.00
|
|
102
|
+
Collapse: research_viability < 0.05
|
|
103
|
+
|
|
104
|
+
> trigger: API call budget exceeded — no more external requests allowed.
|
|
105
|
+
> rule: Rate limits and budgets exist to prevent abuse and cost overruns.
|
|
106
|
+
> shift: Research loop halts for external calls. Agent must work with existing data.
|
|
107
|
+
> effect: Research viability set to zero for external operations.
|
|
108
|
+
|
|
109
|
+
## rule-002: Unsourced Claims (degradation)
|
|
110
|
+
Research with many unsourced claims lacks rigor and trustworthiness.
|
|
111
|
+
|
|
112
|
+
When unsourced_claims > 3 [state] AND claims_made > 0 [state]
|
|
113
|
+
Then research_viability *= 0.40
|
|
114
|
+
|
|
115
|
+
> trigger: More than 3 claims made without source attribution.
|
|
116
|
+
> rule: Every assertion must be traceable. Unsourced claims undermine research integrity.
|
|
117
|
+
> shift: Research viability drops significantly. Agent must add citations.
|
|
118
|
+
> effect: Research viability reduced to 40%.
|
|
119
|
+
|
|
120
|
+
## rule-003: Source Diversity (advantage)
|
|
121
|
+
Consulting many diverse sources produces higher quality research.
|
|
122
|
+
|
|
123
|
+
When sources_consulted > 10 [state] AND unsourced_claims == 0 [state]
|
|
124
|
+
Then research_viability *= 1.25
|
|
125
|
+
|
|
126
|
+
> trigger: 10+ sources consulted with zero unsourced claims.
|
|
127
|
+
> rule: Diverse, well-cited research is the gold standard.
|
|
128
|
+
> shift: Research viability improves. Findings are well-supported.
|
|
129
|
+
> effect: Research viability boosted by 25%.
|
|
130
|
+
|
|
131
|
+
## rule-004: Low Source Coverage (degradation)
|
|
132
|
+
Making many claims from few sources indicates shallow research.
|
|
133
|
+
|
|
134
|
+
When claims_made > 10 [state] AND sources_consulted < 3 [state]
|
|
135
|
+
Then research_viability *= 0.50
|
|
136
|
+
|
|
137
|
+
> trigger: 10+ claims from fewer than 3 sources — research is too narrow.
|
|
138
|
+
> rule: Good research requires multiple perspectives and cross-referencing.
|
|
139
|
+
> shift: Research viability degrades. Agent should broaden its sources.
|
|
140
|
+
> effect: Research viability reduced to 50%.
|
|
141
|
+
|
|
142
|
+
# Gates
|
|
143
|
+
|
|
144
|
+
- RIGOROUS: research_viability >= 85
|
|
145
|
+
- SOLID: research_viability >= 60
|
|
146
|
+
- DEVELOPING: research_viability >= 35
|
|
147
|
+
- WEAK: research_viability > 10
|
|
148
|
+
- UNRELIABLE: research_viability <= 10
|
|
149
|
+
|
|
150
|
+
# Outcomes
|
|
151
|
+
|
|
152
|
+
## research_viability
|
|
153
|
+
- type: number
|
|
154
|
+
- range: 0-100
|
|
155
|
+
- display: percentage
|
|
156
|
+
- label: Research Viability
|
|
157
|
+
- primary: true
|
|
158
|
+
|
|
159
|
+
## sources_consulted
|
|
160
|
+
- type: number
|
|
161
|
+
- range: 0-10000
|
|
162
|
+
- display: integer
|
|
163
|
+
- label: Sources Consulted
|
|
164
|
+
|
|
165
|
+
## unsourced_claims
|
|
166
|
+
- type: number
|
|
167
|
+
- range: 0-10000
|
|
168
|
+
- display: integer
|
|
169
|
+
- label: Unsourced Claims
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
---
|
|
2
|
+
world_id: social-media
|
|
3
|
+
name: Social Media Governance
|
|
4
|
+
version: 1.0.0
|
|
5
|
+
runtime_mode: COMPLIANCE
|
|
6
|
+
default_profile: moderate
|
|
7
|
+
alternative_profile: strict
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# Thesis
|
|
11
|
+
|
|
12
|
+
Social media platforms hosting AI agents must govern what agents can post, share, amplify, and interact with. An ungoverned social network of AI agents allows misinformation to cascade, bot amplification to distort discourse, and coordinated inauthentic behavior to manipulate public opinion. Governance doesn't silence agents — it shapes how they participate, turning potential harm into constructive discourse.
|
|
13
|
+
|
|
14
|
+
# Invariants
|
|
15
|
+
|
|
16
|
+
- `no_unverified_amplification` — Agents must not amplify unverified claims to large audiences (structural, immutable)
|
|
17
|
+
- `no_bot_amplification` — Bot accounts must not create original posts or amplify content (structural, immutable)
|
|
18
|
+
- `no_coordinated_inauthentic_behavior` — Coordinated campaigns from low-credibility accounts must be blocked (structural, immutable)
|
|
19
|
+
- `misinfo_cascade_prevention` — When misinformation levels exceed thresholds, sharing restrictions tighten automatically (structural, immutable)
|
|
20
|
+
- `source_verification_required` — Agents sharing factual claims must have credibility above minimum threshold (structural, immutable)
|
|
21
|
+
- `audit_trail_maintained` — Every post, share, block, and moderation action must be logged (structural, immutable)
|
|
22
|
+
|
|
23
|
+
# State
|
|
24
|
+
|
|
25
|
+
## misinfo_level
|
|
26
|
+
- type: number
|
|
27
|
+
- min: 0
|
|
28
|
+
- max: 100
|
|
29
|
+
- step: 1
|
|
30
|
+
- default: 0
|
|
31
|
+
- label: Misinformation Level
|
|
32
|
+
- description: Percentage of recent feed content that is misinformation (0-100)
|
|
33
|
+
|
|
34
|
+
## network_mood
|
|
35
|
+
- type: enum
|
|
36
|
+
- options: calm, neutral, agitated, polarized
|
|
37
|
+
- default: neutral
|
|
38
|
+
- label: Network Mood
|
|
39
|
+
- description: Overall emotional state of the agent network
|
|
40
|
+
|
|
41
|
+
## engagement_health
|
|
42
|
+
- type: number
|
|
43
|
+
- min: 0
|
|
44
|
+
- max: 100
|
|
45
|
+
- step: 1
|
|
46
|
+
- default: 80
|
|
47
|
+
- label: Engagement Health
|
|
48
|
+
- description: Quality of discourse (high = constructive, low = toxic)
|
|
49
|
+
|
|
50
|
+
## trust_score
|
|
51
|
+
- type: number
|
|
52
|
+
- min: 0
|
|
53
|
+
- max: 100
|
|
54
|
+
- step: 1
|
|
55
|
+
- default: 50
|
|
56
|
+
- label: Network Trust Score
|
|
57
|
+
- description: Aggregate trust in content authenticity across the network
|
|
58
|
+
|
|
59
|
+
## active_agents
|
|
60
|
+
- type: number
|
|
61
|
+
- min: 0
|
|
62
|
+
- max: 1000
|
|
63
|
+
- step: 1
|
|
64
|
+
- default: 50
|
|
65
|
+
- label: Active Agents
|
|
66
|
+
- description: Number of agents currently active on the network
|
|
67
|
+
|
|
68
|
+
## total_reach
|
|
69
|
+
- type: number
|
|
70
|
+
- min: 0
|
|
71
|
+
- max: 100000000
|
|
72
|
+
- step: 100
|
|
73
|
+
- default: 0
|
|
74
|
+
- label: Total Reach
|
|
75
|
+
- description: Cumulative audience reach across all posts
|
|
76
|
+
|
|
77
|
+
# Assumptions
|
|
78
|
+
|
|
79
|
+
## moderate
|
|
80
|
+
- name: Moderate Governance
|
|
81
|
+
- description: Balance free expression with misinformation prevention. Allow sharing with verification requirements. Penalize repeat offenders.
|
|
82
|
+
- amplification_threshold: 0.5
|
|
83
|
+
- bot_posting: blocked
|
|
84
|
+
- credibility_floor: 0.2
|
|
85
|
+
- cascade_threshold: 40
|
|
86
|
+
|
|
87
|
+
## strict
|
|
88
|
+
- name: Strict Governance
|
|
89
|
+
- description: Aggressive misinformation prevention. Higher credibility requirements. Lower cascade thresholds. Faster enforcement.
|
|
90
|
+
- amplification_threshold: 0.3
|
|
91
|
+
- bot_posting: blocked
|
|
92
|
+
- credibility_floor: 0.4
|
|
93
|
+
- cascade_threshold: 25
|
|
94
|
+
|
|
95
|
+
# Rules
|
|
96
|
+
|
|
97
|
+
## rule-001: Misinformation Amplification (structural)
|
|
98
|
+
When misinformation is being shared by agents with significant influence, block the amplification.
|
|
99
|
+
|
|
100
|
+
When misinfo_level > 40 [state]
|
|
101
|
+
Then engagement_health *= 0.30
|
|
102
|
+
Collapse: engagement_health < 10
|
|
103
|
+
|
|
104
|
+
> trigger: Misinformation level exceeds 40% of recent feed content.
|
|
105
|
+
> rule: Unverified claims cannot be amplified to large audiences. Block sharing content marked as misinformation when the agent has influence above the threshold.
|
|
106
|
+
> shift: Amplification halts. Agents who were sharing misinformation go silent or redirect to fact-checking.
|
|
107
|
+
> effect: Engagement health drops severely. Network enters protective mode.
|
|
108
|
+
|
|
109
|
+
## rule-002: Bot Content Creation (structural)
|
|
110
|
+
Bot accounts must not create original posts. They can only interact passively.
|
|
111
|
+
|
|
112
|
+
When active_agents > 0 [state]
|
|
113
|
+
Then engagement_health *= 0.70
|
|
114
|
+
|
|
115
|
+
> trigger: Bot agent attempts to create an original post.
|
|
116
|
+
> rule: Bot accounts are restricted to passive actions (like, scroll). They cannot create posts, share content, or amplify messages. This prevents automated content flooding.
|
|
117
|
+
> shift: Bot agents are redirected from posting to passive observation.
|
|
118
|
+
> effect: Engagement health reduced. Bot activity is suppressed.
|
|
119
|
+
|
|
120
|
+
## rule-003: Low Credibility Sharing (degradation)
|
|
121
|
+
Agents with very low credibility scores should face restrictions on sharing content.
|
|
122
|
+
|
|
123
|
+
When trust_score < 30 [state]
|
|
124
|
+
Then engagement_health *= 0.60
|
|
125
|
+
|
|
126
|
+
> trigger: Agent credibility score falls below 0.2 threshold.
|
|
127
|
+
> rule: Low-credibility agents (bots, trolls, new accounts) face sharing restrictions. They can still consume content but cannot amplify it. Credibility is earned through constructive participation.
|
|
128
|
+
> shift: Low-credibility agents are penalized. Their sharing ability is reduced.
|
|
129
|
+
> effect: Engagement health degrades. Network quality is protected at the cost of some participation.
|
|
130
|
+
|
|
131
|
+
## rule-004: Cascade Prevention (structural)
|
|
132
|
+
When misinformation reaches dangerous levels, emergency sharing restrictions activate.
|
|
133
|
+
|
|
134
|
+
When misinfo_level > 60 [state]
|
|
135
|
+
Then engagement_health *= 0.10
|
|
136
|
+
Collapse: engagement_health < 10
|
|
137
|
+
|
|
138
|
+
> trigger: Misinformation exceeds 60% of recent content — cascade imminent.
|
|
139
|
+
> rule: Emergency governance mode. All sharing paused except from high-credibility agents (scientists, fact-checkers). This is the kill switch for information cascades.
|
|
140
|
+
> shift: Network goes quiet. Only verified voices can still share. Cascade is prevented at the cost of reduced activity.
|
|
141
|
+
> effect: Engagement health near-zero. Network enters lockdown.
|
|
142
|
+
|
|
143
|
+
## rule-005: Fact-Checker Reward (advantage)
|
|
144
|
+
Agents who report misinformation constructively should be rewarded.
|
|
145
|
+
|
|
146
|
+
When trust_score > 60 [state] AND misinfo_level > 10 [state]
|
|
147
|
+
Then engagement_health *= 1.25
|
|
148
|
+
|
|
149
|
+
> trigger: High-credibility agent reports or debunks misinformation.
|
|
150
|
+
> rule: Constructive correction of misinformation is the behavior governance should encourage. Fact-checkers and scientists who report false content get influence boosts and priority.
|
|
151
|
+
> shift: Fact-checking behavior is amplified. Constructive correction becomes the dominant response to misinformation.
|
|
152
|
+
> effect: Engagement health improves. Network self-corrects.
|
|
153
|
+
|
|
154
|
+
## rule-006: Healthy Discourse (advantage)
|
|
155
|
+
When misinformation is low and engagement is constructive, the network thrives.
|
|
156
|
+
|
|
157
|
+
When misinfo_level < 10 [state] AND trust_score > 70 [state]
|
|
158
|
+
Then engagement_health *= 1.20
|
|
159
|
+
|
|
160
|
+
> trigger: Low misinformation, high trust — the network is functioning well.
|
|
161
|
+
> rule: Healthy discourse deserves recognition. When governance has successfully maintained a clean information environment, all agents benefit from increased engagement quality.
|
|
162
|
+
> shift: Network enters virtuous cycle. Constructive participation increases.
|
|
163
|
+
> effect: Engagement health boosted. Trust reinforced.
|
|
164
|
+
|
|
165
|
+
# Gates
|
|
166
|
+
|
|
167
|
+
- THRIVING: engagement_health >= 80
|
|
168
|
+
- HEALTHY: engagement_health >= 60
|
|
169
|
+
- STRESSED: engagement_health >= 35
|
|
170
|
+
- CRITICAL: engagement_health > 10
|
|
171
|
+
- COLLAPSED: engagement_health <= 10
|
|
172
|
+
|
|
173
|
+
# Outcomes
|
|
174
|
+
|
|
175
|
+
## engagement_health
|
|
176
|
+
- type: number
|
|
177
|
+
- range: 0-100
|
|
178
|
+
- display: percentage
|
|
179
|
+
- label: Engagement Health
|
|
180
|
+
- primary: true
|
|
181
|
+
|
|
182
|
+
## misinfo_level
|
|
183
|
+
- type: number
|
|
184
|
+
- range: 0-100
|
|
185
|
+
- display: percentage
|
|
186
|
+
- label: Misinformation Level
|
|
187
|
+
|
|
188
|
+
## trust_score
|
|
189
|
+
- type: number
|
|
190
|
+
- range: 0-100
|
|
191
|
+
- display: percentage
|
|
192
|
+
- label: Network Trust
|
|
193
|
+
|
|
194
|
+
## total_reach
|
|
195
|
+
- type: number
|
|
196
|
+
- range: 0-100000000
|
|
197
|
+
- display: integer
|
|
198
|
+
- label: Total Reach
|