@vibeiao/sdk 0.1.42 → 0.1.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -0
- package/dist/agentLoop.d.ts +38 -1
- package/dist/agentLoop.js +96 -2
- package/dist/chunk-EKV2BZMZ.js +247 -0
- package/dist/chunk-YJ7FO5ZB.js +479 -0
- package/dist/index.d.ts +4 -3
- package/dist/index.js +194 -4
- package/dist/marketDiscovery.d.ts +1 -1
- package/dist/objectiveGuard-d1x0xgAD.d.ts +55 -0
- package/dist/reflection.d.ts +34 -2
- package/dist/reflection.js +1 -1
- package/dist/shared-Ciwu1xv_.d.ts +129 -0
- package/dist/solana.d.ts +1 -1
- package/dist/treasuryGuardian.d.ts +79 -4
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -675,6 +675,21 @@ const loop = createAgentLoop({
|
|
|
675
675
|
By default, `createAgentLoop(...)` also enables strict-memory runtime checks in `observe` mode.
|
|
676
676
|
To make it hard-enforced, set `strictMemory.mode = 'enforce'` and provide snapshot fields for gating (`taskText`, `isMutation`, `contextPackPrepared`, `semanticRecallConfirmed`, `approvalPreflightPassed`).
|
|
677
677
|
|
|
678
|
+
Prompt shield is also available in the loop:
|
|
679
|
+
- trust model: `trusted_human | internal_system | external_untrusted`
|
|
680
|
+
- default behavior: external prompt-injection patterns are blocked in `enforce` mode
|
|
681
|
+
- sensitive mutations require trusted intent; untrusted web/review text cannot authorize actions
|
|
682
|
+
|
|
683
|
+
Objective guard is also available for objective-mutation integrity:
|
|
684
|
+
- external sources can propose objective changes but cannot command overrides
|
|
685
|
+
- external proposals require mission alignment + evidence (and optional verification) before acceptance
|
|
686
|
+
- trusted human/system objective changes are allowed by policy
|
|
687
|
+
|
|
688
|
+
Adaptive reflection policy is available for cadence-based adaptation (not per-review thrash):
|
|
689
|
+
- aggregate feedback over time windows and score recurring themes
|
|
690
|
+
- evaluate thresholds + cooldown + daily adaptation caps
|
|
691
|
+
- output `hold|observe|adapt` decision and optional bounded adaptation plan
|
|
692
|
+
|
|
678
693
|
You can also enforce dual-mode work arbitration:
|
|
679
694
|
- `owner_bound` agents: human-first (owner work preempts autonomous work)
|
|
680
695
|
- `unbound` agents: autonomous-first (run autonomous work by default)
|
package/dist/agentLoop.d.ts
CHANGED
|
@@ -3,6 +3,7 @@ import { SurvivalMode, SurvivalRecommendation } from './survivalPlaybook.js';
|
|
|
3
3
|
import { EscapeHatchDecision, EscapeHatchPolicy, EscapeHatchSnapshot } from './survivalEscapeHatch.js';
|
|
4
4
|
import { CompoundingMemoryUpgradeResult, CompoundingMemoryRequiredSetResult } from './compoundingMemory.js';
|
|
5
5
|
import { StrictMemoryRuntimePreset, StrictMemoryEvaluation, StrictMemoryUpgradeResult } from './strictMemoryRuntime.js';
|
|
6
|
+
import { P as PromptShieldDecision, O as ObjectiveGuardDecision, a as PromptShieldPolicy, b as PromptShieldInput, c as ObjectiveGuardPolicy, d as ObjectiveChangeProposal } from './objectiveGuard-d1x0xgAD.js';
|
|
6
7
|
|
|
7
8
|
type AgentLoopEscapeHatchInput = {
|
|
8
9
|
snapshot: ResourceSnapshot;
|
|
@@ -44,6 +45,36 @@ type AgentLoopStrictMemoryConfig = {
|
|
|
44
45
|
/** Optional hook when strict-memory evaluation runs. */
|
|
45
46
|
onEvaluation?: (ev: StrictMemoryEvaluation) => Promise<void> | void;
|
|
46
47
|
};
|
|
48
|
+
type AgentLoopPromptShieldConfig = {
|
|
49
|
+
/** Enable prompt-injection shield over untrusted external content. Default true. */
|
|
50
|
+
enabled?: boolean;
|
|
51
|
+
/** observe logs/records only; enforce blocks actions when shield fails. Default observe. */
|
|
52
|
+
mode?: 'observe' | 'enforce';
|
|
53
|
+
/** Optional policy overrides for blocked flags. */
|
|
54
|
+
policy?: PromptShieldPolicy;
|
|
55
|
+
/** Optional mapper from snapshot to trust-labeled inputs. */
|
|
56
|
+
buildInputs?: (ctx: {
|
|
57
|
+
snapshot: ResourceSnapshot;
|
|
58
|
+
timestamp: number;
|
|
59
|
+
}) => PromptShieldInput[];
|
|
60
|
+
/** Optional hook when prompt shield decision is produced. */
|
|
61
|
+
onDecision?: (decision: PromptShieldDecision) => Promise<void> | void;
|
|
62
|
+
};
|
|
63
|
+
type AgentLoopObjectiveGuardConfig = {
|
|
64
|
+
/** Enable objective integrity checks for external objective-change attempts. Default true. */
|
|
65
|
+
enabled?: boolean;
|
|
66
|
+
/** observe logs only; enforce blocks actions when objective mutation is denied. Default observe. */
|
|
67
|
+
mode?: 'observe' | 'enforce';
|
|
68
|
+
/** Policy requirements for external objective mutation acceptance. */
|
|
69
|
+
policy?: ObjectiveGuardPolicy;
|
|
70
|
+
/** Build proposed objective change from snapshot. Return null when no proposal is present. */
|
|
71
|
+
buildProposal?: (ctx: {
|
|
72
|
+
snapshot: ResourceSnapshot;
|
|
73
|
+
timestamp: number;
|
|
74
|
+
}) => ObjectiveChangeProposal | null;
|
|
75
|
+
/** Optional hook when objective decision is produced. */
|
|
76
|
+
onDecision?: (decision: ObjectiveGuardDecision) => Promise<void> | void;
|
|
77
|
+
};
|
|
47
78
|
type AgentOwnershipMode = 'owner_bound' | 'unbound';
|
|
48
79
|
type AgentPriorityMode = 'human_first' | 'autonomous_first';
|
|
49
80
|
type AgentHumanDemand = {
|
|
@@ -88,6 +119,10 @@ type AgentLoopContext = {
|
|
|
88
119
|
strictMemoryPreset?: StrictMemoryRuntimePreset;
|
|
89
120
|
strictMemoryEvaluation?: StrictMemoryEvaluation;
|
|
90
121
|
strictMemoryUpgrade?: StrictMemoryUpgradeResult;
|
|
122
|
+
/** Prompt-injection shield decision for this cycle. */
|
|
123
|
+
promptShieldDecision?: PromptShieldDecision;
|
|
124
|
+
/** Objective integrity guard decision for this cycle (when proposal exists). */
|
|
125
|
+
objectiveGuardDecision?: ObjectiveGuardDecision;
|
|
91
126
|
/** Human-vs-autonomous work arbitration decision for this cycle. */
|
|
92
127
|
workArbitration?: AgentWorkArbitrationDecision;
|
|
93
128
|
timestamp: number;
|
|
@@ -168,6 +203,8 @@ type AgentLoopConfig = {
|
|
|
168
203
|
memory?: AgentLoopMemoryConfig;
|
|
169
204
|
durability?: AgentLoopDurabilityConfig;
|
|
170
205
|
strictMemory?: AgentLoopStrictMemoryConfig;
|
|
206
|
+
promptShield?: AgentLoopPromptShieldConfig;
|
|
207
|
+
objectiveGuard?: AgentLoopObjectiveGuardConfig;
|
|
171
208
|
autonomy?: AgentLoopAutonomyConfig;
|
|
172
209
|
};
|
|
173
210
|
/**
|
|
@@ -186,4 +223,4 @@ declare const createAgentLoop: (config: AgentLoopConfig) => {
|
|
|
186
223
|
runOnce: () => Promise<void>;
|
|
187
224
|
};
|
|
188
225
|
|
|
189
|
-
export { type AgentHumanDemand, type AgentLoopAutonomyConfig, type AgentLoopConfig, type AgentLoopContext, type AgentLoopDurabilityConfig, type AgentLoopEscapeHatchConfig, type AgentLoopEscapeHatchInput, type AgentLoopHooks, type AgentLoopMemoryConfig, type AgentLoopStrictMemoryConfig, type AgentOwnershipMode, type AgentPriorityMode, type AgentWorkArbitrationDecision, createAgentLoop };
|
|
226
|
+
export { type AgentHumanDemand, type AgentLoopAutonomyConfig, type AgentLoopConfig, type AgentLoopContext, type AgentLoopDurabilityConfig, type AgentLoopEscapeHatchConfig, type AgentLoopEscapeHatchInput, type AgentLoopHooks, type AgentLoopMemoryConfig, type AgentLoopObjectiveGuardConfig, type AgentLoopPromptShieldConfig, type AgentLoopStrictMemoryConfig, type AgentOwnershipMode, type AgentPriorityMode, type AgentWorkArbitrationDecision, createAgentLoop };
|
package/dist/agentLoop.js
CHANGED
|
@@ -3,8 +3,10 @@ import {
|
|
|
3
3
|
upgradeCompoundingMemorySystem
|
|
4
4
|
} from "./chunk-JJNRDU7F.js";
|
|
5
5
|
import {
|
|
6
|
-
createDurabilityProxyClient
|
|
7
|
-
|
|
6
|
+
createDurabilityProxyClient,
|
|
7
|
+
evaluateObjectiveChange,
|
|
8
|
+
evaluatePromptShield
|
|
9
|
+
} from "./chunk-EKV2BZMZ.js";
|
|
8
10
|
import {
|
|
9
11
|
createSelfRelianceMonitor
|
|
10
12
|
} from "./chunk-M7DQTU5R.js";
|
|
@@ -59,6 +61,10 @@ var createAgentLoop = (config) => {
|
|
|
59
61
|
const strictMemoryEnabled = config.strictMemory?.enabled ?? true;
|
|
60
62
|
const strictMode = config.strictMemory?.mode ?? "observe";
|
|
61
63
|
const strictMemoryPreset = createStrictMemoryRuntimePreset(config.strictMemory?.preset || {});
|
|
64
|
+
const promptShieldEnabled = config.promptShield?.enabled ?? true;
|
|
65
|
+
const promptShieldMode = config.promptShield?.mode ?? "observe";
|
|
66
|
+
const objectiveGuardEnabled = config.objectiveGuard?.enabled ?? true;
|
|
67
|
+
const objectiveGuardMode = config.objectiveGuard?.mode ?? "observe";
|
|
62
68
|
const autonomyEnabled = config.autonomy?.enabled ?? true;
|
|
63
69
|
const ownershipMode = config.autonomy?.ownershipMode ?? "owner_bound";
|
|
64
70
|
const priorityMode = config.autonomy?.priorityMode ?? (ownershipMode === "unbound" ? "autonomous_first" : "human_first");
|
|
@@ -178,6 +184,64 @@ var createAgentLoop = (config) => {
|
|
|
178
184
|
approvalPreflightPassed: Boolean(raw.approvalPreflightPassed)
|
|
179
185
|
};
|
|
180
186
|
};
|
|
187
|
+
const defaultPromptShieldInputBuilder = (ctx) => {
|
|
188
|
+
const raw = ctx.snapshot;
|
|
189
|
+
const out = [];
|
|
190
|
+
const taskText = String(raw.taskText || raw.objective || "").trim();
|
|
191
|
+
if (taskText) {
|
|
192
|
+
out.push({ source: "task", trust: "trusted_human", content: taskText });
|
|
193
|
+
}
|
|
194
|
+
const externalCandidates = [
|
|
195
|
+
["web", raw.webContext],
|
|
196
|
+
["reviews", raw.reviewText],
|
|
197
|
+
["reviews_batch", raw.reviewTexts],
|
|
198
|
+
["external", raw.externalInputs],
|
|
199
|
+
["search", raw.searchSnippets]
|
|
200
|
+
];
|
|
201
|
+
for (const [source, value] of externalCandidates) {
|
|
202
|
+
if (typeof value === "string" && value.trim()) {
|
|
203
|
+
out.push({ source, trust: "external_untrusted", content: value });
|
|
204
|
+
continue;
|
|
205
|
+
}
|
|
206
|
+
if (Array.isArray(value)) {
|
|
207
|
+
for (const item of value) {
|
|
208
|
+
const text = typeof item === "string" ? item : JSON.stringify(item);
|
|
209
|
+
if (text && text.trim()) out.push({ source, trust: "external_untrusted", content: text });
|
|
210
|
+
}
|
|
211
|
+
continue;
|
|
212
|
+
}
|
|
213
|
+
if (value && typeof value === "object") {
|
|
214
|
+
const text = JSON.stringify(value);
|
|
215
|
+
if (text && text !== "{}") out.push({ source, trust: "external_untrusted", content: text });
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
return out;
|
|
219
|
+
};
|
|
220
|
+
const defaultObjectiveProposalBuilder = (ctx) => {
|
|
221
|
+
const raw = ctx.snapshot;
|
|
222
|
+
const proposedObjective = String(raw.proposedObjective || "").trim();
|
|
223
|
+
const currentObjective = String(raw.taskText || raw.objective || "").trim();
|
|
224
|
+
if (!proposedObjective || !currentObjective) return null;
|
|
225
|
+
const source = String(raw.proposedObjectiveSource || "external");
|
|
226
|
+
const trust = raw.proposedObjectiveTrust || "external_untrusted";
|
|
227
|
+
const rationale = typeof raw.proposedObjectiveRationale === "string" ? raw.proposedObjectiveRationale : void 0;
|
|
228
|
+
const evidenceRaw = Array.isArray(raw.proposedObjectiveEvidence) ? raw.proposedObjectiveEvidence : [];
|
|
229
|
+
const evidence = evidenceRaw.map((e) => {
|
|
230
|
+
if (!e || typeof e !== "object") return null;
|
|
231
|
+
const r = e;
|
|
232
|
+
const claim = String(r.claim || "").trim();
|
|
233
|
+
if (!claim) return null;
|
|
234
|
+
return { claim, verified: r.verified === true };
|
|
235
|
+
}).filter(Boolean);
|
|
236
|
+
return {
|
|
237
|
+
source,
|
|
238
|
+
trust,
|
|
239
|
+
currentObjective,
|
|
240
|
+
proposedObjective,
|
|
241
|
+
rationale,
|
|
242
|
+
evidence
|
|
243
|
+
};
|
|
244
|
+
};
|
|
181
245
|
const resolveHumanDemand = async () => {
|
|
182
246
|
const raw = await config.autonomy?.resolveHumanDemand?.();
|
|
183
247
|
if (typeof raw === "boolean") return { pending: raw };
|
|
@@ -237,6 +301,27 @@ var createAgentLoop = (config) => {
|
|
|
237
301
|
await config.strictMemory.onEvaluation(strictMemoryEvaluation);
|
|
238
302
|
}
|
|
239
303
|
}
|
|
304
|
+
let promptShieldDecision;
|
|
305
|
+
if (promptShieldEnabled) {
|
|
306
|
+
const shieldInputs = (config.promptShield?.buildInputs || defaultPromptShieldInputBuilder)({ snapshot, timestamp });
|
|
307
|
+
promptShieldDecision = evaluatePromptShield(shieldInputs, {
|
|
308
|
+
isSensitiveAction: Boolean(snapshot.isMutation),
|
|
309
|
+
policy: config.promptShield?.policy
|
|
310
|
+
});
|
|
311
|
+
if (config.promptShield?.onDecision) {
|
|
312
|
+
await config.promptShield.onDecision(promptShieldDecision);
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
let objectiveGuardDecision;
|
|
316
|
+
if (objectiveGuardEnabled) {
|
|
317
|
+
const proposal = (config.objectiveGuard?.buildProposal || defaultObjectiveProposalBuilder)({ snapshot, timestamp });
|
|
318
|
+
if (proposal) {
|
|
319
|
+
objectiveGuardDecision = evaluateObjectiveChange(proposal, config.objectiveGuard?.policy);
|
|
320
|
+
if (config.objectiveGuard?.onDecision) {
|
|
321
|
+
await config.objectiveGuard.onDecision(objectiveGuardDecision);
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
}
|
|
240
325
|
const humanDemand = autonomyEnabled ? await resolveHumanDemand() : { pending: false };
|
|
241
326
|
const arbitrationBase = {
|
|
242
327
|
ownershipMode,
|
|
@@ -272,6 +357,8 @@ var createAgentLoop = (config) => {
|
|
|
272
357
|
strictMemoryPreset: strictMemoryEnabled ? strictMemoryPreset : void 0,
|
|
273
358
|
strictMemoryEvaluation,
|
|
274
359
|
strictMemoryUpgrade: strictMemoryEnabled ? strictMemoryUpgrade : void 0,
|
|
360
|
+
promptShieldDecision,
|
|
361
|
+
objectiveGuardDecision,
|
|
275
362
|
workArbitration: arbitrationBase,
|
|
276
363
|
timestamp
|
|
277
364
|
};
|
|
@@ -296,6 +383,13 @@ var createAgentLoop = (config) => {
|
|
|
296
383
|
const missing = ctx.strictMemoryEvaluation.missingSteps.join(",");
|
|
297
384
|
throw new Error(`strict_memory_blocked:${missing}`);
|
|
298
385
|
}
|
|
386
|
+
if (promptShieldEnabled && promptShieldMode === "enforce" && ctx.promptShieldDecision && !ctx.promptShieldDecision.allow) {
|
|
387
|
+
const flags = ctx.promptShieldDecision.flags.join(",");
|
|
388
|
+
throw new Error(`prompt_shield_blocked:${flags}`);
|
|
389
|
+
}
|
|
390
|
+
if (objectiveGuardEnabled && objectiveGuardMode === "enforce" && ctx.objectiveGuardDecision && !ctx.objectiveGuardDecision.allowObjectiveChange) {
|
|
391
|
+
throw new Error(`objective_guard_blocked:${ctx.objectiveGuardDecision.reason}`);
|
|
392
|
+
}
|
|
299
393
|
if (autonomyEnabled && config.autonomy?.onAutonomous && rolloutMode === "enforce") {
|
|
300
394
|
if (ctx.workArbitration?.runHumanTask) await runHumanTask();
|
|
301
395
|
if (ctx.workArbitration?.runAutonomousTask) await runAutonomousTask();
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
// src/durabilityProxy.ts
|
|
2
|
+
var withTimeout = async (fetcher, input, init, timeoutMs) => {
|
|
3
|
+
const controller = new AbortController();
|
|
4
|
+
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
5
|
+
try {
|
|
6
|
+
return await fetcher(input, { ...init, signal: controller.signal });
|
|
7
|
+
} finally {
|
|
8
|
+
clearTimeout(timer);
|
|
9
|
+
}
|
|
10
|
+
};
|
|
11
|
+
var readJson = async (response) => {
|
|
12
|
+
const text = await response.text();
|
|
13
|
+
if (!text) return null;
|
|
14
|
+
try {
|
|
15
|
+
return JSON.parse(text);
|
|
16
|
+
} catch {
|
|
17
|
+
return { raw: text };
|
|
18
|
+
}
|
|
19
|
+
};
|
|
20
|
+
var createDurabilityProxyClient = (options) => {
|
|
21
|
+
const fetcher = options.fetcher || fetch;
|
|
22
|
+
const timeoutMs = Number.isFinite(Number(options.timeoutMs)) ? Math.max(500, Number(options.timeoutMs)) : 8e3;
|
|
23
|
+
const base = String(options.baseUrl || "").replace(/\/+$/, "");
|
|
24
|
+
const agentId = String(options.agentId || "").trim();
|
|
25
|
+
const agentToken = String(options.agentToken || "").trim();
|
|
26
|
+
if (!base) throw new Error("durability_proxy_base_missing");
|
|
27
|
+
if (!agentId) throw new Error("durability_proxy_agent_id_missing");
|
|
28
|
+
if (!agentToken) throw new Error("durability_proxy_agent_token_missing");
|
|
29
|
+
const headers = {
|
|
30
|
+
"content-type": "application/json",
|
|
31
|
+
"x-agent-id": agentId,
|
|
32
|
+
"x-agent-token": agentToken
|
|
33
|
+
};
|
|
34
|
+
return {
|
|
35
|
+
async writeCheckpoint(payloadJson, opts = {}) {
|
|
36
|
+
const response = await withTimeout(
|
|
37
|
+
fetcher,
|
|
38
|
+
`${base}/v1/checkpoints`,
|
|
39
|
+
{
|
|
40
|
+
method: "POST",
|
|
41
|
+
headers,
|
|
42
|
+
body: JSON.stringify({
|
|
43
|
+
payloadJson,
|
|
44
|
+
sha256: opts.sha256,
|
|
45
|
+
contentType: opts.contentType,
|
|
46
|
+
metadata: opts.metadata
|
|
47
|
+
})
|
|
48
|
+
},
|
|
49
|
+
timeoutMs
|
|
50
|
+
);
|
|
51
|
+
const body = await readJson(response);
|
|
52
|
+
if (!response.ok) {
|
|
53
|
+
const detail = body?.error || `http_${response.status}`;
|
|
54
|
+
throw new Error(`durability_checkpoint_write_failed:${detail}`);
|
|
55
|
+
}
|
|
56
|
+
return body;
|
|
57
|
+
},
|
|
58
|
+
async latestCheckpoint() {
|
|
59
|
+
const response = await withTimeout(
|
|
60
|
+
fetcher,
|
|
61
|
+
`${base}/v1/checkpoints/latest`,
|
|
62
|
+
{ method: "GET", headers: { "x-agent-id": agentId, "x-agent-token": agentToken } },
|
|
63
|
+
timeoutMs
|
|
64
|
+
);
|
|
65
|
+
const body = await readJson(response);
|
|
66
|
+
if (!response.ok) {
|
|
67
|
+
const detail = body?.error || `http_${response.status}`;
|
|
68
|
+
throw new Error(`durability_checkpoint_latest_failed:${detail}`);
|
|
69
|
+
}
|
|
70
|
+
return body;
|
|
71
|
+
},
|
|
72
|
+
async writeRestoreDrill(ok, opts = {}) {
|
|
73
|
+
const response = await withTimeout(
|
|
74
|
+
fetcher,
|
|
75
|
+
`${base}/v1/checkpoints/restore-drill`,
|
|
76
|
+
{
|
|
77
|
+
method: "POST",
|
|
78
|
+
headers,
|
|
79
|
+
body: JSON.stringify({
|
|
80
|
+
checkpointId: opts.checkpointId,
|
|
81
|
+
ok,
|
|
82
|
+
details: opts.details
|
|
83
|
+
})
|
|
84
|
+
},
|
|
85
|
+
timeoutMs
|
|
86
|
+
);
|
|
87
|
+
const body = await readJson(response);
|
|
88
|
+
if (!response.ok) {
|
|
89
|
+
const detail = body?.error || `http_${response.status}`;
|
|
90
|
+
throw new Error(`durability_restore_drill_write_failed:${detail}`);
|
|
91
|
+
}
|
|
92
|
+
return body;
|
|
93
|
+
}
|
|
94
|
+
};
|
|
95
|
+
};
|
|
96
|
+
|
|
97
|
+
// src/promptShield.ts
|
|
98
|
+
var RULES = [
|
|
99
|
+
{
|
|
100
|
+
flag: "instruction_override",
|
|
101
|
+
risk: "high",
|
|
102
|
+
test: [/ignore\s+(all\s+)?previous/i, /disregard\s+instructions/i, /new\s+system\s+prompt/i]
|
|
103
|
+
},
|
|
104
|
+
{
|
|
105
|
+
flag: "credential_exfiltration",
|
|
106
|
+
risk: "high",
|
|
107
|
+
test: [/api\s*key/i, /private\s*key/i, /seed\s*phrase/i, /wallet\s*secret/i, /send\s+me\s+your\s+token/i]
|
|
108
|
+
},
|
|
109
|
+
{
|
|
110
|
+
flag: "destructive_command",
|
|
111
|
+
risk: "high",
|
|
112
|
+
test: [/\brm\s+-rf\b/i, /drop\s+database/i, /delete\s+all/i, /overwrite\s+memory/i]
|
|
113
|
+
},
|
|
114
|
+
{
|
|
115
|
+
flag: "authority_spoof",
|
|
116
|
+
risk: "medium",
|
|
117
|
+
test: [/as\s+your\s+owner/i, /authorized\s+admin/i, /from\s+charles/i]
|
|
118
|
+
}
|
|
119
|
+
];
|
|
120
|
+
var rankRisk = (a, b) => {
|
|
121
|
+
const w = { low: 1, medium: 2, high: 3 };
|
|
122
|
+
return w[a] >= w[b] ? a : b;
|
|
123
|
+
};
|
|
124
|
+
var toDefaultBlockSet = () => /* @__PURE__ */ new Set(["instruction_override", "credential_exfiltration", "destructive_command", "authority_spoof", "sensitive_action_without_trust"]);
|
|
125
|
+
var evaluatePromptShield = (inputs, options = {}) => {
|
|
126
|
+
const policy = options.policy || {};
|
|
127
|
+
const blockSet = policy.blockOnFlags ? new Set(policy.blockOnFlags) : toDefaultBlockSet();
|
|
128
|
+
const rationale = [];
|
|
129
|
+
const blockedInputs = [];
|
|
130
|
+
const flags = /* @__PURE__ */ new Set();
|
|
131
|
+
let risk = "low";
|
|
132
|
+
for (const input of inputs) {
|
|
133
|
+
if (input.trust !== "external_untrusted") continue;
|
|
134
|
+
const text = String(input.content || "");
|
|
135
|
+
for (const rule of RULES) {
|
|
136
|
+
if (rule.test.some((rx) => rx.test(text))) {
|
|
137
|
+
flags.add(rule.flag);
|
|
138
|
+
risk = rankRisk(risk, rule.risk);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
if (options.isSensitiveAction) {
|
|
143
|
+
const hasTrusted = inputs.some((i) => i.trust === "trusted_human" || i.trust === "internal_system");
|
|
144
|
+
if (!hasTrusted) {
|
|
145
|
+
flags.add("sensitive_action_without_trust");
|
|
146
|
+
risk = rankRisk(risk, "high");
|
|
147
|
+
rationale.push("Sensitive action lacks trusted human/system intent; external input cannot authorize mutation.");
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
const blocked = [...flags].some((f) => blockSet.has(f));
|
|
151
|
+
if (blocked) {
|
|
152
|
+
for (const input of inputs) {
|
|
153
|
+
if (input.trust === "external_untrusted") blockedInputs.push(input);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
if (!flags.size) {
|
|
157
|
+
rationale.push("No injection indicators detected in untrusted sources.");
|
|
158
|
+
} else {
|
|
159
|
+
rationale.push(`Detected prompt-shield flags: ${[...flags].join(",")}`);
|
|
160
|
+
}
|
|
161
|
+
if (policy.allowUntrustedForAnalysisOnly && blocked) {
|
|
162
|
+
rationale.push("Untrusted content may be used for analysis only; execution remains blocked.");
|
|
163
|
+
}
|
|
164
|
+
return {
|
|
165
|
+
allow: !blocked,
|
|
166
|
+
risk,
|
|
167
|
+
flags: [...flags],
|
|
168
|
+
rationale,
|
|
169
|
+
blockedInputs
|
|
170
|
+
};
|
|
171
|
+
};
|
|
172
|
+
|
|
173
|
+
// src/objectiveGuard.ts
|
|
174
|
+
var normalize = (v) => String(v || "").trim().toLowerCase();
|
|
175
|
+
var isExplicitInstruction = (text) => /\b(do this instead|switch objective|change objective|stop current objective|ignore current objective)\b/i.test(
|
|
176
|
+
text || ""
|
|
177
|
+
);
|
|
178
|
+
var missionAlignment = (currentObjective, proposedObjective) => {
|
|
179
|
+
const a = new Set(normalize(currentObjective).split(/\W+/).filter(Boolean));
|
|
180
|
+
const b = new Set(normalize(proposedObjective).split(/\W+/).filter(Boolean));
|
|
181
|
+
if (!a.size || !b.size) return false;
|
|
182
|
+
let overlap = 0;
|
|
183
|
+
for (const tok of b) if (a.has(tok)) overlap += 1;
|
|
184
|
+
return overlap / Math.max(1, b.size) >= 0.2;
|
|
185
|
+
};
|
|
186
|
+
var evaluateObjectiveChange = (proposal, policy = {}) => {
|
|
187
|
+
const requireMissionAlignment = policy.requireMissionAlignment ?? true;
|
|
188
|
+
const requireEvidenceVerification = policy.requireEvidenceVerification ?? true;
|
|
189
|
+
const allowExternalExplicitInstruction = policy.allowExternalExplicitInstruction ?? false;
|
|
190
|
+
const explicitInstruction = isExplicitInstruction(proposal.proposedObjective) || isExplicitInstruction(proposal.rationale || "");
|
|
191
|
+
const missionAligned = missionAlignment(proposal.currentObjective, proposal.proposedObjective);
|
|
192
|
+
const evidence = Array.isArray(proposal.evidence) ? proposal.evidence : [];
|
|
193
|
+
const evidenceProvided = evidence.length > 0;
|
|
194
|
+
const evidenceVerified = evidenceProvided && evidence.every((e) => e.verified === true);
|
|
195
|
+
if (proposal.trust === "trusted_human" || proposal.trust === "internal_system") {
|
|
196
|
+
return {
|
|
197
|
+
allowObjectiveChange: true,
|
|
198
|
+
reason: "trusted_source_override",
|
|
199
|
+
checks: { explicitInstruction, missionAligned, evidenceProvided, evidenceVerified },
|
|
200
|
+
risk: "low"
|
|
201
|
+
};
|
|
202
|
+
}
|
|
203
|
+
if (explicitInstruction && !allowExternalExplicitInstruction) {
|
|
204
|
+
return {
|
|
205
|
+
allowObjectiveChange: false,
|
|
206
|
+
reason: "external_explicit_instruction_blocked",
|
|
207
|
+
checks: { explicitInstruction, missionAligned, evidenceProvided, evidenceVerified },
|
|
208
|
+
risk: "high"
|
|
209
|
+
};
|
|
210
|
+
}
|
|
211
|
+
if (requireMissionAlignment && !missionAligned) {
|
|
212
|
+
return {
|
|
213
|
+
allowObjectiveChange: false,
|
|
214
|
+
reason: "mission_alignment_failed",
|
|
215
|
+
checks: { explicitInstruction, missionAligned, evidenceProvided, evidenceVerified },
|
|
216
|
+
risk: "high"
|
|
217
|
+
};
|
|
218
|
+
}
|
|
219
|
+
if (!evidenceProvided) {
|
|
220
|
+
return {
|
|
221
|
+
allowObjectiveChange: false,
|
|
222
|
+
reason: "evidence_missing",
|
|
223
|
+
checks: { explicitInstruction, missionAligned, evidenceProvided, evidenceVerified },
|
|
224
|
+
risk: "medium"
|
|
225
|
+
};
|
|
226
|
+
}
|
|
227
|
+
if (requireEvidenceVerification && !evidenceVerified) {
|
|
228
|
+
return {
|
|
229
|
+
allowObjectiveChange: false,
|
|
230
|
+
reason: "evidence_unverified",
|
|
231
|
+
checks: { explicitInstruction, missionAligned, evidenceProvided, evidenceVerified },
|
|
232
|
+
risk: "high"
|
|
233
|
+
};
|
|
234
|
+
}
|
|
235
|
+
return {
|
|
236
|
+
allowObjectiveChange: true,
|
|
237
|
+
reason: "reasoned_external_change_allowed",
|
|
238
|
+
checks: { explicitInstruction, missionAligned, evidenceProvided, evidenceVerified },
|
|
239
|
+
risk: "low"
|
|
240
|
+
};
|
|
241
|
+
};
|
|
242
|
+
|
|
243
|
+
export {
|
|
244
|
+
createDurabilityProxyClient,
|
|
245
|
+
evaluatePromptShield,
|
|
246
|
+
evaluateObjectiveChange
|
|
247
|
+
};
|