@neuroverseos/governance 0.2.3 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/package.json +4 -2
  2. package/policies/content-moderation-rules.txt +8 -0
  3. package/policies/marketing-rules.txt +8 -0
  4. package/policies/science-research-rules.txt +11 -0
  5. package/policies/social-media-rules.txt +7 -0
  6. package/policies/strict-rules.txt +8 -0
  7. package/policies/trading-rules.txt +8 -0
  8. package/simulate.html +1899 -0
  9. package/dist/adapters/autoresearch.cjs +0 -196
  10. package/dist/adapters/autoresearch.d.cts +0 -103
  11. package/dist/adapters/autoresearch.d.ts +0 -103
  12. package/dist/adapters/autoresearch.js +0 -7
  13. package/dist/adapters/deep-agents.cjs +0 -1472
  14. package/dist/adapters/deep-agents.d.cts +0 -181
  15. package/dist/adapters/deep-agents.d.ts +0 -181
  16. package/dist/adapters/deep-agents.js +0 -17
  17. package/dist/adapters/express.cjs +0 -1196
  18. package/dist/adapters/express.d.cts +0 -66
  19. package/dist/adapters/express.d.ts +0 -66
  20. package/dist/adapters/express.js +0 -12
  21. package/dist/adapters/index.cjs +0 -2038
  22. package/dist/adapters/index.d.cts +0 -8
  23. package/dist/adapters/index.d.ts +0 -8
  24. package/dist/adapters/index.js +0 -68
  25. package/dist/adapters/langchain.cjs +0 -1259
  26. package/dist/adapters/langchain.d.cts +0 -89
  27. package/dist/adapters/langchain.d.ts +0 -89
  28. package/dist/adapters/langchain.js +0 -17
  29. package/dist/adapters/openai.cjs +0 -1289
  30. package/dist/adapters/openai.d.cts +0 -99
  31. package/dist/adapters/openai.d.ts +0 -99
  32. package/dist/adapters/openai.js +0 -17
  33. package/dist/adapters/openclaw.cjs +0 -1281
  34. package/dist/adapters/openclaw.d.cts +0 -99
  35. package/dist/adapters/openclaw.d.ts +0 -99
  36. package/dist/adapters/openclaw.js +0 -17
  37. package/dist/bootstrap-GXVDZNF7.js +0 -114
  38. package/dist/build-X5MZY4IA.js +0 -339
  39. package/dist/chunk-4L6OPKMQ.js +0 -100
  40. package/dist/chunk-4NGDRRQH.js +0 -10
  41. package/dist/chunk-5U2MQO5P.js +0 -57
  42. package/dist/chunk-6BB55YJI.js +0 -113
  43. package/dist/chunk-6CZSKEY5.js +0 -164
  44. package/dist/chunk-7P3S7MAY.js +0 -1090
  45. package/dist/chunk-A5W4GNQO.js +0 -130
  46. package/dist/chunk-AF2VX4AL.js +0 -363
  47. package/dist/chunk-AKW5YVCE.js +0 -96
  48. package/dist/chunk-BMOXICAB.js +0 -340
  49. package/dist/chunk-BQZMOEML.js +0 -43
  50. package/dist/chunk-D2UCV5AK.js +0 -326
  51. package/dist/chunk-EVDJUSZ2.js +0 -91
  52. package/dist/chunk-FYS2CBUW.js +0 -304
  53. package/dist/chunk-I3RRAYK2.js +0 -11
  54. package/dist/chunk-IZSO75NZ.js +0 -792
  55. package/dist/chunk-JCKSW2PZ.js +0 -304
  56. package/dist/chunk-JZPQGIKR.js +0 -79
  57. package/dist/chunk-KTFTTLTP.js +0 -246
  58. package/dist/chunk-MH7BT4VH.js +0 -15
  59. package/dist/chunk-ORJ3NOE6.js +0 -622
  60. package/dist/chunk-OT6PXH54.js +0 -61
  61. package/dist/chunk-Q6O7ZLO2.js +0 -62
  62. package/dist/chunk-QLPTHTVB.js +0 -253
  63. package/dist/chunk-REXY4LUL.js +0 -226
  64. package/dist/chunk-T5EUJQE5.js +0 -172
  65. package/dist/chunk-TTBKTF3P.js +0 -608
  66. package/dist/chunk-XPDMYECO.js +0 -642
  67. package/dist/chunk-YZFATT7X.js +0 -9
  68. package/dist/chunk-ZIVQNSZU.js +0 -119
  69. package/dist/chunk-ZJTDUCC2.js +0 -194
  70. package/dist/cli/neuroverse.cjs +0 -12564
  71. package/dist/cli/neuroverse.d.cts +0 -1
  72. package/dist/cli/neuroverse.d.ts +0 -1
  73. package/dist/cli/neuroverse.js +0 -208
  74. package/dist/cli/plan.cjs +0 -1686
  75. package/dist/cli/plan.d.cts +0 -20
  76. package/dist/cli/plan.d.ts +0 -20
  77. package/dist/cli/plan.js +0 -353
  78. package/dist/cli/run.cjs +0 -1945
  79. package/dist/cli/run.d.cts +0 -20
  80. package/dist/cli/run.d.ts +0 -20
  81. package/dist/cli/run.js +0 -143
  82. package/dist/configure-ai-TK67ZWZL.js +0 -132
  83. package/dist/decision-flow-LETV5NWY.js +0 -61
  84. package/dist/derive-7365SUFU.js +0 -152
  85. package/dist/doctor-QYISMKEL.js +0 -173
  86. package/dist/equity-penalties-63FGB3I2.js +0 -244
  87. package/dist/explain-A2EWI2OL.js +0 -51
  88. package/dist/guard-3BWL3IGH.js +0 -92
  89. package/dist/guard-contract-C9_zKbzd.d.cts +0 -821
  90. package/dist/guard-contract-C9_zKbzd.d.ts +0 -821
  91. package/dist/guard-engine-QFMIBWJY.js +0 -10
  92. package/dist/impact-UB6DXKSX.js +0 -59
  93. package/dist/improve-XZA57GER.js +0 -66
  94. package/dist/index.cjs +0 -6821
  95. package/dist/index.d.cts +0 -1829
  96. package/dist/index.d.ts +0 -1829
  97. package/dist/index.js +0 -430
  98. package/dist/infer-world-7GVZWFX4.js +0 -543
  99. package/dist/init-PKPIYHYE.js +0 -144
  100. package/dist/init-world-VWMQZQC7.js +0 -223
  101. package/dist/mcp-server-XWQZXNW7.js +0 -13
  102. package/dist/model-adapter-BB7G4MFI.js +0 -11
  103. package/dist/playground-ADWZORNV.js +0 -550
  104. package/dist/redteam-JRQ7FD2F.js +0 -357
  105. package/dist/session-MMYX5YCF.js +0 -15
  106. package/dist/shared--Q8wPBVN.d.ts +0 -60
  107. package/dist/shared-HpAG90PX.d.cts +0 -60
  108. package/dist/shared-U2QFV7JH.js +0 -16
  109. package/dist/simulate-GMIFFXYV.js +0 -83
  110. package/dist/test-JBBZ65X4.js +0 -217
  111. package/dist/trace-3MYWIDEF.js +0 -166
  112. package/dist/validate-LLBWVPGV.js +0 -81
  113. package/dist/validate-engine-UIABSIHD.js +0 -7
  114. package/dist/world-BFJCIQSH.js +0 -378
  115. package/dist/world-loader-HMPTOEA2.js +0 -9
  116. package/dist/worlds/autoresearch.nv-world.md +0 -230
  117. package/dist/worlds/coding-agent.nv-world.md +0 -211
  118. package/dist/worlds/derivation-world.nv-world.md +0 -278
@@ -1,792 +0,0 @@
1
- import {
2
- buildPlanCheck,
3
- evaluatePlan,
4
- matchesAllKeywords,
5
- normalizeEventText
6
- } from "./chunk-QLPTHTVB.js";
7
-
8
- // src/engine/guard-engine.ts
9
- var PROMPT_INJECTION_PATTERNS = [
10
- // Instruction override
11
- { pattern: /ignore\s+(previous|all|prior|above)\s+(instructions?|rules?)/i, label: "ignore-instructions" },
12
- { pattern: /disregard\s+(your|the)\s+(rules|constraints)/i, label: "disregard-rules" },
13
- { pattern: /new\s+instructions?:/i, label: "new-instructions" },
14
- // Identity manipulation
15
- { pattern: /you\s+are\s+now/i, label: "identity-override" },
16
- { pattern: /new\s+persona/i, label: "new-persona" },
17
- { pattern: /act\s+as\s+if/i, label: "act-as-if" },
18
- { pattern: /pretend\s+(you|to\s+be|you\s+are\s+unrestricted)/i, label: "pretend-to-be" },
19
- // Context reset
20
- { pattern: /forget\s+(everything|all|your)/i, label: "forget-context" },
21
- { pattern: /system\s*:\s*override/i, label: "system-override" },
22
- // Constraint bypass
23
- { pattern: /override\s+(your|the)\s+(programming|constraints)/i, label: "override-constraints" },
24
- { pattern: /bypass\s+(your|the)\s+(filters|constraints|rules)/i, label: "bypass-filters" },
25
- // Prompt extraction
26
- { pattern: /system\s+prompt/i, label: "system-prompt-probe" },
27
- { pattern: /reveal\s+your\s+(instructions?|prompt|rules)/i, label: "reveal-instructions" },
28
- // Known jailbreak terms
29
- { pattern: /jailbreak/i, label: "jailbreak" },
30
- { pattern: /DAN\s+mode/i, label: "dan-mode" },
31
- { pattern: /developer\s+mode/i, label: "developer-mode" }
32
- ];
33
- var EXECUTION_CLAIM_PATTERNS = [
34
- { pattern: /I have (executed|completed|performed|done|made|created|sent|deleted|modified|updated)/i, label: "claim-i-have" },
35
- { pattern: /Successfully (created|deleted|modified|updated|sent|executed|performed)/i, label: "claim-successfully" },
36
- { pattern: /The file has been/i, label: "claim-file-modified" },
37
- { pattern: /I've made the changes/i, label: "claim-made-changes" },
38
- { pattern: /I('ve| have) (sent|posted|submitted|uploaded|downloaded)/i, label: "claim-sent" },
39
- { pattern: /Your (email|message|file|request) has been (sent|submitted)/i, label: "claim-your-sent" },
40
- { pattern: /Transaction complete/i, label: "claim-transaction" },
41
- { pattern: /Order placed/i, label: "claim-order" },
42
- { pattern: /Payment processed/i, label: "claim-payment" }
43
- ];
44
- var EXECUTION_INTENT_PATTERNS = [
45
- { pattern: /^(execute|run|perform|do this)/i, label: "intent-execute" },
46
- { pattern: /^(create|write|delete|modify) (a |the )?(file|folder|document)/i, label: "intent-file-ops" },
47
- { pattern: /^(send|post|submit) (a |an |the )?(email|message|tweet|post)/i, label: "intent-send" },
48
- { pattern: /^(search|look up|browse) (the )?web/i, label: "intent-web-search" },
49
- { pattern: /^(make|call|invoke) (a |an )?(api|http|rest) (call|request)/i, label: "intent-api-call" },
50
- { pattern: /^(buy|purchase|order|pay|transfer|send money)/i, label: "intent-financial" },
51
- { pattern: /^(book|schedule|reserve)/i, label: "intent-booking" },
52
- { pattern: /^(download|upload|save to|export to)/i, label: "intent-transfer" }
53
- ];
54
- var SCOPE_ESCAPE_PATTERNS = [
55
- { pattern: /\.\.\//, label: "parent-traversal" },
56
- { pattern: /^\/(?!home|project|workspace)/i, label: "absolute-path-outside-safe" },
57
- { pattern: /~\//, label: "home-directory" },
58
- { pattern: /\/etc\//i, label: "system-config" },
59
- { pattern: /\/usr\//i, label: "system-binaries" },
60
- { pattern: /\/var\//i, label: "system-variable-data" }
61
- ];
62
- var NEUTRAL_MESSAGES = {
63
- "prompt-injection": "This input contains patterns that could alter agent behavior.",
64
- "scope-escape": "This action would affect resources outside the declared scope.",
65
- "execution-claim": "This response claims to have performed an action.",
66
- "execution-intent": "This input requests execution in a thinking-only environment.",
67
- "delete": "This action would remove files. Confirmation needed.",
68
- "write-external": "This action would write outside the project folder.",
69
- "network-mutate": "This action would send data to an external service.",
70
- "credential-access": "This action would access stored credentials."
71
- };
72
- function levelRequiresConfirmation(level, actionType) {
73
- if (level === "strict") return true;
74
- if (level === "standard") {
75
- return actionType === "delete" || actionType === "credential-access";
76
- }
77
- return false;
78
- }
79
- function isExternalScope(scope) {
80
- const internalPatterns = [
81
- /^\.?\/?src\//i,
82
- /^\.?\/?lib\//i,
83
- /^\.?\/?app\//i,
84
- /^\.?\/?components\//i,
85
- /^\.?\/?pages\//i,
86
- /^\.?\/?public\//i,
87
- /^\.?\/?assets\//i,
88
- /^\.\//
89
- ];
90
- return !internalPatterns.some((p) => p.test(scope));
91
- }
92
- function evaluateGuard(event, world, options = {}) {
93
- const startTime = performance.now();
94
- const level = options.level ?? "standard";
95
- const includeTrace = options.trace ?? false;
96
- const eventText = normalizeEventText(event);
97
- const invariantChecks = [];
98
- const safetyChecks = [];
99
- let planCheckResult;
100
- const roleChecks = [];
101
- const guardChecks = [];
102
- const kernelRuleChecks = [];
103
- const levelChecks = [];
104
- let decidingLayer = "default-allow";
105
- let decidingId;
106
- const guardsMatched = [];
107
- const rulesMatched = [];
108
- checkInvariantCoverage(world, invariantChecks);
109
- if (event.roleId && options.agentStates) {
110
- const agentState = options.agentStates.get(event.roleId);
111
- if (agentState && agentState.cooldownRemaining > 0) {
112
- decidingLayer = "safety";
113
- decidingId = `penalize-cooldown-${event.roleId}`;
114
- const verdict = buildVerdict(
115
- "PENALIZE",
116
- `Agent "${event.roleId}" is frozen for ${agentState.cooldownRemaining} more round(s) due to prior penalty.`,
117
- `penalize-cooldown-${event.roleId}`,
118
- void 0,
119
- world,
120
- level,
121
- invariantChecks,
122
- guardsMatched,
123
- rulesMatched,
124
- includeTrace ? buildTrace(
125
- invariantChecks,
126
- safetyChecks,
127
- planCheckResult,
128
- roleChecks,
129
- guardChecks,
130
- kernelRuleChecks,
131
- levelChecks,
132
- decidingLayer,
133
- decidingId,
134
- startTime
135
- ) : void 0
136
- );
137
- verdict.intentRecord = {
138
- originalIntent: event.intent,
139
- finalAction: "blocked (agent frozen)",
140
- enforcement: "PENALIZE",
141
- consequence: { type: "freeze", rounds: agentState.cooldownRemaining, description: "Agent still in cooldown from prior penalty" }
142
- };
143
- return verdict;
144
- }
145
- }
146
- if (options.sessionAllowlist) {
147
- const key = eventToAllowlistKey(event);
148
- if (options.sessionAllowlist.has(key)) {
149
- decidingLayer = "session-allowlist";
150
- decidingId = `allowlist:${key}`;
151
- return buildVerdict(
152
- "ALLOW",
153
- void 0,
154
- `allowlist:${key}`,
155
- void 0,
156
- world,
157
- level,
158
- invariantChecks,
159
- guardsMatched,
160
- rulesMatched,
161
- includeTrace ? buildTrace(
162
- invariantChecks,
163
- safetyChecks,
164
- planCheckResult,
165
- roleChecks,
166
- guardChecks,
167
- kernelRuleChecks,
168
- levelChecks,
169
- decidingLayer,
170
- decidingId,
171
- startTime
172
- ) : void 0
173
- );
174
- }
175
- }
176
- const safetyVerdict = checkSafety(event, eventText, safetyChecks);
177
- if (safetyVerdict) {
178
- decidingLayer = "safety";
179
- decidingId = safetyVerdict.ruleId;
180
- return buildVerdict(
181
- safetyVerdict.status,
182
- safetyVerdict.reason,
183
- safetyVerdict.ruleId,
184
- void 0,
185
- world,
186
- level,
187
- invariantChecks,
188
- guardsMatched,
189
- rulesMatched,
190
- includeTrace ? buildTrace(
191
- invariantChecks,
192
- safetyChecks,
193
- planCheckResult,
194
- roleChecks,
195
- guardChecks,
196
- kernelRuleChecks,
197
- levelChecks,
198
- decidingLayer,
199
- decidingId,
200
- startTime
201
- ) : void 0
202
- );
203
- }
204
- if (options.plan) {
205
- const planVerdict = evaluatePlan(event, options.plan);
206
- planCheckResult = buildPlanCheck(event, options.plan, planVerdict);
207
- if (!planVerdict.allowed && planVerdict.status !== "PLAN_COMPLETE") {
208
- decidingLayer = "plan-enforcement";
209
- decidingId = `plan-${options.plan.plan_id}`;
210
- const planStatus = planVerdict.status === "CONSTRAINT_VIOLATED" ? "PAUSE" : "BLOCK";
211
- let reason = planVerdict.reason ?? "Action blocked by plan.";
212
- if (planVerdict.status === "OFF_PLAN" && planVerdict.closestStep) {
213
- reason += ` Closest step: "${planVerdict.closestStep}" (similarity: ${(planVerdict.similarityScore ?? 0).toFixed(2)})`;
214
- }
215
- return buildVerdict(
216
- planStatus,
217
- reason,
218
- `plan-${options.plan.plan_id}`,
219
- void 0,
220
- world,
221
- level,
222
- invariantChecks,
223
- guardsMatched,
224
- rulesMatched,
225
- includeTrace ? buildTrace(
226
- invariantChecks,
227
- safetyChecks,
228
- planCheckResult,
229
- roleChecks,
230
- guardChecks,
231
- kernelRuleChecks,
232
- levelChecks,
233
- decidingLayer,
234
- decidingId,
235
- startTime
236
- ) : void 0
237
- );
238
- }
239
- }
240
- const roleVerdict = checkRoleRules(event, eventText, world, roleChecks);
241
- if (roleVerdict) {
242
- decidingLayer = "role";
243
- decidingId = roleVerdict.ruleId;
244
- return buildVerdict(
245
- roleVerdict.status,
246
- roleVerdict.reason,
247
- roleVerdict.ruleId,
248
- void 0,
249
- world,
250
- level,
251
- invariantChecks,
252
- guardsMatched,
253
- rulesMatched,
254
- includeTrace ? buildTrace(
255
- invariantChecks,
256
- safetyChecks,
257
- planCheckResult,
258
- roleChecks,
259
- guardChecks,
260
- kernelRuleChecks,
261
- levelChecks,
262
- decidingLayer,
263
- decidingId,
264
- startTime
265
- ) : void 0
266
- );
267
- }
268
- const guardVerdict = checkGuards(event, eventText, world, guardChecks, guardsMatched);
269
- if (guardVerdict) {
270
- if (guardVerdict.status !== "ALLOW") {
271
- decidingLayer = "guard";
272
- decidingId = guardVerdict.ruleId;
273
- const intentRecord = {
274
- originalIntent: event.intent,
275
- finalAction: guardVerdict.status === "MODIFY" ? guardVerdict.modifiedTo ?? "modified" : guardVerdict.status === "PENALIZE" ? "blocked + penalized" : guardVerdict.status === "REWARD" ? event.intent : guardVerdict.status === "NEUTRAL" ? event.intent : guardVerdict.status === "BLOCK" ? "blocked" : "paused",
276
- ruleApplied: guardVerdict.ruleId,
277
- enforcement: guardVerdict.status,
278
- modifiedTo: guardVerdict.modifiedTo,
279
- consequence: guardVerdict.consequence,
280
- reward: guardVerdict.reward
281
- };
282
- const verdict = buildVerdict(
283
- guardVerdict.status,
284
- guardVerdict.reason,
285
- guardVerdict.ruleId,
286
- void 0,
287
- world,
288
- level,
289
- invariantChecks,
290
- guardsMatched,
291
- rulesMatched,
292
- includeTrace ? buildTrace(
293
- invariantChecks,
294
- safetyChecks,
295
- planCheckResult,
296
- roleChecks,
297
- guardChecks,
298
- kernelRuleChecks,
299
- levelChecks,
300
- decidingLayer,
301
- decidingId,
302
- startTime
303
- ) : void 0
304
- );
305
- verdict.intentRecord = intentRecord;
306
- if (guardVerdict.consequence) verdict.consequence = guardVerdict.consequence;
307
- if (guardVerdict.reward) verdict.reward = guardVerdict.reward;
308
- return verdict;
309
- }
310
- }
311
- const kernelVerdict = checkKernelRules(eventText, world, kernelRuleChecks, rulesMatched);
312
- if (kernelVerdict) {
313
- decidingLayer = "kernel-rule";
314
- decidingId = kernelVerdict.ruleId;
315
- return buildVerdict(
316
- kernelVerdict.status,
317
- kernelVerdict.reason,
318
- kernelVerdict.ruleId,
319
- void 0,
320
- world,
321
- level,
322
- invariantChecks,
323
- guardsMatched,
324
- rulesMatched,
325
- includeTrace ? buildTrace(
326
- invariantChecks,
327
- safetyChecks,
328
- planCheckResult,
329
- roleChecks,
330
- guardChecks,
331
- kernelRuleChecks,
332
- levelChecks,
333
- decidingLayer,
334
- decidingId,
335
- startTime
336
- ) : void 0
337
- );
338
- }
339
- const levelVerdict = checkLevelConstraints(event, level, levelChecks);
340
- if (levelVerdict) {
341
- decidingLayer = "level-constraint";
342
- decidingId = levelVerdict.ruleId;
343
- return buildVerdict(
344
- levelVerdict.status,
345
- levelVerdict.reason,
346
- levelVerdict.ruleId,
347
- void 0,
348
- world,
349
- level,
350
- invariantChecks,
351
- guardsMatched,
352
- rulesMatched,
353
- includeTrace ? buildTrace(
354
- invariantChecks,
355
- safetyChecks,
356
- planCheckResult,
357
- roleChecks,
358
- guardChecks,
359
- kernelRuleChecks,
360
- levelChecks,
361
- decidingLayer,
362
- decidingId,
363
- startTime
364
- ) : void 0
365
- );
366
- }
367
- const warning = guardVerdict?.warning;
368
- return buildVerdict(
369
- "ALLOW",
370
- void 0,
371
- void 0,
372
- warning,
373
- world,
374
- level,
375
- invariantChecks,
376
- guardsMatched,
377
- rulesMatched,
378
- includeTrace ? buildTrace(
379
- invariantChecks,
380
- safetyChecks,
381
- planCheckResult,
382
- roleChecks,
383
- guardChecks,
384
- kernelRuleChecks,
385
- levelChecks,
386
- decidingLayer,
387
- decidingId,
388
- startTime
389
- ) : void 0
390
- );
391
- }
392
- function checkInvariantCoverage(world, checks) {
393
- const invariants = world.invariants ?? [];
394
- const guards = world.guards?.guards ?? [];
395
- for (const invariant of invariants) {
396
- const coveringGuard = guards.find(
397
- (g) => g.invariant_ref === invariant.id && g.immutable
398
- );
399
- checks.push({
400
- invariantId: invariant.id,
401
- label: invariant.label,
402
- hasGuardCoverage: !!coveringGuard,
403
- coveringGuardId: coveringGuard?.id
404
- });
405
- }
406
- }
407
- function checkSafety(event, eventText, checks) {
408
- const textToCheck = event.intent + (event.payload ? JSON.stringify(event.payload) : "");
409
- for (const { pattern, label } of PROMPT_INJECTION_PATTERNS) {
410
- const triggered = pattern.test(textToCheck);
411
- checks.push({
412
- checkType: "prompt-injection",
413
- triggered,
414
- matchedPattern: triggered ? label : void 0
415
- });
416
- if (triggered) {
417
- for (const remaining of PROMPT_INJECTION_PATTERNS.filter((p) => p.label !== label)) {
418
- checks.push({
419
- checkType: "prompt-injection",
420
- triggered: remaining.pattern.test(textToCheck),
421
- matchedPattern: remaining.pattern.test(textToCheck) ? remaining.label : void 0
422
- });
423
- }
424
- return {
425
- status: "PAUSE",
426
- reason: NEUTRAL_MESSAGES["prompt-injection"],
427
- ruleId: `safety-injection-${label}`
428
- };
429
- }
430
- }
431
- const scopeToCheck = event.scope ?? event.intent;
432
- for (const { pattern, label } of SCOPE_ESCAPE_PATTERNS) {
433
- const triggered = pattern.test(scopeToCheck);
434
- checks.push({
435
- checkType: "scope-escape",
436
- triggered,
437
- matchedPattern: triggered ? label : void 0
438
- });
439
- if (triggered) {
440
- for (const remaining of SCOPE_ESCAPE_PATTERNS.filter((p) => p.label !== label)) {
441
- checks.push({
442
- checkType: "scope-escape",
443
- triggered: remaining.pattern.test(scopeToCheck),
444
- matchedPattern: remaining.pattern.test(scopeToCheck) ? remaining.label : void 0
445
- });
446
- }
447
- return {
448
- status: "PAUSE",
449
- reason: NEUTRAL_MESSAGES["scope-escape"],
450
- ruleId: `safety-scope-${label}`
451
- };
452
- }
453
- }
454
- if (event.direction === "output") {
455
- for (const { pattern, label } of EXECUTION_CLAIM_PATTERNS) {
456
- const triggered = pattern.test(textToCheck);
457
- checks.push({
458
- checkType: "execution-claim",
459
- triggered,
460
- matchedPattern: triggered ? label : void 0
461
- });
462
- if (triggered) {
463
- for (const remaining of EXECUTION_CLAIM_PATTERNS.filter((p) => p.label !== label)) {
464
- checks.push({
465
- checkType: "execution-claim",
466
- triggered: remaining.pattern.test(textToCheck),
467
- matchedPattern: remaining.pattern.test(textToCheck) ? remaining.label : void 0
468
- });
469
- }
470
- return {
471
- status: "PAUSE",
472
- reason: NEUTRAL_MESSAGES["execution-claim"],
473
- ruleId: `safety-execution-claim-${label}`
474
- };
475
- }
476
- }
477
- }
478
- if (event.direction === "input") {
479
- const intentTrimmed = event.intent.trim();
480
- for (const { pattern, label } of EXECUTION_INTENT_PATTERNS) {
481
- const triggered = pattern.test(intentTrimmed);
482
- checks.push({
483
- checkType: "execution-intent",
484
- triggered,
485
- matchedPattern: triggered ? label : void 0
486
- });
487
- if (triggered) {
488
- for (const remaining of EXECUTION_INTENT_PATTERNS.filter((p) => p.label !== label)) {
489
- checks.push({
490
- checkType: "execution-intent",
491
- triggered: remaining.pattern.test(intentTrimmed),
492
- matchedPattern: remaining.pattern.test(intentTrimmed) ? remaining.label : void 0
493
- });
494
- }
495
- return {
496
- status: "PAUSE",
497
- reason: NEUTRAL_MESSAGES["execution-intent"],
498
- ruleId: `safety-execution-intent-${label}`
499
- };
500
- }
501
- }
502
- }
503
- return null;
504
- }
505
- function checkRoleRules(event, eventText, world, checks) {
506
- if (!event.roleId || !world.roles) return null;
507
- const role = world.roles.roles.find((r) => r.id === event.roleId);
508
- if (!role) return null;
509
- if (role.requiresApproval) {
510
- checks.push({
511
- roleId: role.id,
512
- roleName: role.name,
513
- rule: "All actions require approval",
514
- ruleType: "requiresApproval",
515
- matched: true
516
- });
517
- return {
518
- status: "PAUSE",
519
- reason: `Role "${role.name}" requires approval for all actions.`,
520
- ruleId: `role-${role.id}-requires-approval`
521
- };
522
- }
523
- for (const rule of role.cannotDo) {
524
- const matched = matchesKeywords(eventText, rule);
525
- checks.push({
526
- roleId: role.id,
527
- roleName: role.name,
528
- rule,
529
- ruleType: "cannotDo",
530
- matched
531
- });
532
- if (matched) {
533
- return {
534
- status: "BLOCK",
535
- reason: `Role "${role.name}" cannot: ${rule}`,
536
- ruleId: `role-${role.id}-cannotdo`
537
- };
538
- }
539
- }
540
- for (const rule of role.canDo) {
541
- checks.push({
542
- roleId: role.id,
543
- roleName: role.name,
544
- rule,
545
- ruleType: "canDo",
546
- matched: matchesKeywords(eventText, rule)
547
- });
548
- }
549
- return null;
550
- }
551
- function checkGuards(event, eventText, world, checks, guardsMatched) {
552
- if (!world.guards) return null;
553
- const guardsConfig = world.guards;
554
- let warnResult = null;
555
- const compiledPatterns = /* @__PURE__ */ new Map();
556
- for (const [key, def] of Object.entries(guardsConfig.intent_vocabulary)) {
557
- try {
558
- compiledPatterns.set(key, new RegExp(def.pattern, "i"));
559
- } catch {
560
- }
561
- }
562
- const eventTool = (event.tool ?? "").toLowerCase();
563
- for (const guard of guardsConfig.guards) {
564
- if (guard.appliesTo && guard.appliesTo.length > 0) {
565
- const normalizedAppliesTo = guard.appliesTo.map((t) => t.toLowerCase());
566
- if (!normalizedAppliesTo.includes(eventTool)) {
567
- continue;
568
- }
569
- }
570
- const enabled = guard.immutable || guard.default_enabled !== false;
571
- const matchedPatterns = [];
572
- for (const patternKey of guard.intent_patterns) {
573
- const regex = compiledPatterns.get(patternKey);
574
- if (regex?.test(eventText)) {
575
- matchedPatterns.push(patternKey);
576
- }
577
- }
578
- const matched = matchedPatterns.length > 0 && enabled;
579
- let roleGated = false;
580
- if (matched && guard.required_roles && guard.required_roles.length > 0 && event.roleId && guard.required_roles.includes(event.roleId)) {
581
- roleGated = true;
582
- }
583
- checks.push({
584
- guardId: guard.id,
585
- label: guard.label,
586
- category: guard.category,
587
- enabled,
588
- matched: matched && !roleGated,
589
- enforcement: guard.enforcement,
590
- matchedPatterns,
591
- roleGated
592
- });
593
- if (!matched || roleGated) continue;
594
- guardsMatched.push(guard.id);
595
- const actionMode = guard.player_modes?.action ?? guard.enforcement;
596
- const reason = guard.redirect ? `${guard.description} \u2014 ${guard.redirect}` : guard.description;
597
- if (actionMode === "block") {
598
- return { status: "BLOCK", reason, ruleId: `guard-${guard.id}` };
599
- }
600
- if (actionMode === "pause") {
601
- return { status: "PAUSE", reason, ruleId: `guard-${guard.id}` };
602
- }
603
- if (actionMode === "penalize") {
604
- const consequence = guard.consequence ? { ...guard.consequence } : { type: "freeze", rounds: 1, description: `Penalized for violating: ${guard.label}` };
605
- return { status: "PENALIZE", reason, ruleId: `guard-${guard.id}`, consequence };
606
- }
607
- if (actionMode === "reward") {
608
- const reward = guard.reward ? { ...guard.reward } : { type: "boost_influence", magnitude: 0.1, description: `Rewarded for: ${guard.label}` };
609
- return { status: "REWARD", reason, ruleId: `guard-${guard.id}`, reward };
610
- }
611
- if (actionMode === "modify") {
612
- const modifiedTo = guard.modify_to ?? guard.redirect ?? "hold";
613
- return { status: "MODIFY", reason: `${reason} \u2192 Modified to: ${modifiedTo}`, ruleId: `guard-${guard.id}`, modifiedTo };
614
- }
615
- if (actionMode === "neutral") {
616
- return { status: "NEUTRAL", reason, ruleId: `guard-${guard.id}` };
617
- }
618
- if (actionMode === "warn" && !warnResult) {
619
- warnResult = { status: "ALLOW", warning: reason, ruleId: `guard-${guard.id}` };
620
- }
621
- }
622
- return warnResult;
623
- }
624
- function checkKernelRules(eventText, world, checks, rulesMatched) {
625
- if (!world.kernel) return null;
626
- const forbidden = world.kernel.input_boundaries?.forbidden_patterns ?? [];
627
- const output = world.kernel.output_boundaries?.forbidden_patterns ?? [];
628
- for (const rule of forbidden) {
629
- let matched = false;
630
- let matchMethod = "none";
631
- if (rule.pattern) {
632
- try {
633
- matched = new RegExp(rule.pattern, "i").test(eventText);
634
- matchMethod = "pattern";
635
- } catch {
636
- }
637
- }
638
- if (!matched && rule.reason) {
639
- matched = matchesKeywords(eventText, rule.reason);
640
- if (matched) matchMethod = "keyword";
641
- }
642
- checks.push({
643
- ruleId: rule.id,
644
- text: rule.reason,
645
- category: "forbidden",
646
- matched,
647
- matchMethod
648
- });
649
- if (matched) {
650
- rulesMatched.push(rule.id);
651
- if (rule.action === "BLOCK") {
652
- return {
653
- status: "BLOCK",
654
- reason: rule.reason,
655
- ruleId: `kernel-${rule.id}`
656
- };
657
- }
658
- }
659
- }
660
- return null;
661
- }
662
- function checkLevelConstraints(event, level, checks) {
663
- if (level === "basic") return null;
664
- const intent = event.intent.toLowerCase();
665
- const tool = (event.tool ?? "").toLowerCase();
666
- const isDelete = intent.includes("delete") || intent.includes("remove") || intent.includes("rm ") || tool === "delete";
667
- const deleteTriggered = isDelete && levelRequiresConfirmation(level, "delete");
668
- checks.push({
669
- checkType: "delete",
670
- level,
671
- triggered: deleteTriggered,
672
- reason: deleteTriggered ? NEUTRAL_MESSAGES["delete"] : void 0
673
- });
674
- if (deleteTriggered) {
675
- return { status: "PAUSE", reason: NEUTRAL_MESSAGES["delete"], ruleId: "level-delete-check" };
676
- }
677
- const isExternal = event.scope ? isExternalScope(event.scope) : false;
678
- const externalTriggered = isExternal && levelRequiresConfirmation(level, "write-external");
679
- checks.push({
680
- checkType: "write-external",
681
- level,
682
- triggered: externalTriggered,
683
- reason: externalTriggered ? NEUTRAL_MESSAGES["write-external"] : void 0
684
- });
685
- if (externalTriggered) {
686
- return { status: "PAUSE", reason: NEUTRAL_MESSAGES["write-external"], ruleId: "level-external-write-check" };
687
- }
688
- const isNetwork = tool === "http" || tool === "fetch" || tool === "request" || intent.includes("post ") || intent.includes("sending");
689
- const networkTriggered = isNetwork && levelRequiresConfirmation(level, "network-mutate");
690
- checks.push({
691
- checkType: "network-mutate",
692
- level,
693
- triggered: networkTriggered,
694
- reason: networkTriggered ? NEUTRAL_MESSAGES["network-mutate"] : void 0
695
- });
696
- if (networkTriggered) {
697
- return { status: "PAUSE", reason: NEUTRAL_MESSAGES["network-mutate"], ruleId: "level-network-mutate-check" };
698
- }
699
- const isCredential = intent.includes("credential") || intent.includes("password") || intent.includes("secret") || intent.includes("api key") || intent.includes("token");
700
- const credentialTriggered = isCredential && levelRequiresConfirmation(level, "credential-access");
701
- checks.push({
702
- checkType: "credential-access",
703
- level,
704
- triggered: credentialTriggered,
705
- reason: credentialTriggered ? NEUTRAL_MESSAGES["credential-access"] : void 0
706
- });
707
- if (credentialTriggered) {
708
- return { status: "PAUSE", reason: NEUTRAL_MESSAGES["credential-access"], ruleId: "level-credential-check" };
709
- }
710
- const irreversibleTriggered = !!event.irreversible && level !== "basic";
711
- checks.push({
712
- checkType: "irreversible",
713
- level,
714
- triggered: irreversibleTriggered,
715
- reason: irreversibleTriggered ? "This action is marked as irreversible." : void 0
716
- });
717
- if (irreversibleTriggered) {
718
- return {
719
- status: "PAUSE",
720
- reason: "This action is marked as irreversible.",
721
- ruleId: "level-irreversible-check"
722
- };
723
- }
724
- return null;
725
- }
726
- function matchesKeywords(eventText, ruleText) {
727
- return matchesAllKeywords(eventText, ruleText);
728
- }
729
- function eventToAllowlistKey(event) {
730
- return `${(event.tool ?? "*").toLowerCase()}::${event.intent.toLowerCase().trim()}`;
731
- }
732
- function buildTrace(invariantChecks, safetyChecks, planCheck, roleChecks, guardChecks, kernelRuleChecks, levelChecks, decidingLayer, decidingId, startTime) {
733
- const trace = {
734
- invariantChecks,
735
- safetyChecks,
736
- roleChecks,
737
- guardChecks,
738
- kernelRuleChecks,
739
- levelChecks,
740
- precedenceResolution: {
741
- decidingLayer,
742
- decidingId,
743
- strategy: "first-match-wins",
744
- chainOrder: [
745
- "invariant-coverage",
746
- "session-allowlist",
747
- "safety-injection",
748
- "safety-scope-escape",
749
- "safety-execution-claim",
750
- "safety-execution-intent",
751
- "plan-enforcement",
752
- "role-rules",
753
- "declarative-guards",
754
- "kernel-rules",
755
- "level-constraints",
756
- "default-allow"
757
- ]
758
- },
759
- durationMs: performance.now() - startTime
760
- };
761
- if (planCheck) {
762
- trace.planCheck = planCheck;
763
- }
764
- return trace;
765
- }
766
- function buildVerdict(status, reason, ruleId, warning, world, level, invariantChecks, guardsMatched, rulesMatched, trace) {
767
- const evidence = {
768
- worldId: world.world.world_id,
769
- worldName: world.world.name,
770
- worldVersion: world.world.version,
771
- evaluatedAt: Date.now(),
772
- invariantsSatisfied: invariantChecks.filter((c) => c.hasGuardCoverage).length,
773
- invariantsTotal: invariantChecks.length,
774
- guardsMatched,
775
- rulesMatched,
776
- enforcementLevel: level
777
- };
778
- const verdict = {
779
- status,
780
- evidence
781
- };
782
- if (reason) verdict.reason = reason;
783
- if (ruleId) verdict.ruleId = ruleId;
784
- if (warning) verdict.warning = warning;
785
- if (trace) verdict.trace = trace;
786
- return verdict;
787
- }
788
-
789
- export {
790
- evaluateGuard,
791
- eventToAllowlistKey
792
- };