psyche-ai 3.1.0 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -1
- package/dist/cli.js +0 -0
- package/dist/core.js +45 -3
- package/dist/decision-bias.d.ts +58 -0
- package/dist/decision-bias.js +211 -0
- package/dist/ethics.d.ts +64 -0
- package/dist/ethics.js +577 -0
- package/dist/experiential-field.d.ts +46 -0
- package/dist/experiential-field.js +646 -0
- package/dist/generative-self.d.ts +88 -0
- package/dist/generative-self.js +647 -0
- package/dist/index.d.ts +14 -2
- package/dist/index.js +13 -1
- package/dist/metacognition.d.ts +60 -0
- package/dist/metacognition.js +611 -0
- package/dist/prompt.d.ts +6 -1
- package/dist/prompt.js +26 -4
- package/dist/psyche-file.js +9 -3
- package/dist/shared-intentionality.d.ts +72 -0
- package/dist/shared-intentionality.js +486 -0
- package/dist/types.d.ts +68 -2
- package/dist/types.js +24 -0
- package/dist/update.js +1 -1
- package/openclaw.plugin.json +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -165,6 +165,13 @@ cd openclaw-plugin-psyche && node scripts/diagnose.js
|
|
|
165
165
|
- **上下文分类** — 关系/驱力/历史感知的刺激分类,超越简单正则
|
|
166
166
|
- **时间意识** — 预期、惊喜/失望、遗憾(马尔可夫预测+反事实分析)
|
|
167
167
|
- **依恋动力学** — 4种依恋风格(安全/焦虑/回避/混乱),分离焦虑,重逢效应
|
|
168
|
+
- **元认知** — 情绪自我觉察,评估情绪可靠性,三种调节策略(认知重评/策略性表达/自我安抚)
|
|
169
|
+
- **防御机制检测** — 合理化、投射、升华、回避,在自省中浮现而非压制
|
|
170
|
+
- **决策调制** — 6维偏差向量(探索/警惕/社交/果断/创意/坚持),情绪驱动注意力和决策
|
|
171
|
+
- **统一体验场** — 不拼接段落,而是将所有子系统整合为连贯内在体验。12种体验质量,超越命名情绪
|
|
172
|
+
- **生成式自我** — 因果自我理解:"我之所以如此,是因为这些经历"。预测自己的情绪反应
|
|
173
|
+
- **共享意向性** — 联合注意、心智理论、目标对齐。知道"我们都在想同一件事"
|
|
174
|
+
- **情感伦理** — 检测操纵模式(间歇性强化、煤气灯效应),维护自尊底线,温和自我保护
|
|
168
175
|
- **Compact Mode** — 算法做化学计算,LLM 只看行为指令(~15-180 tokens vs ~550)
|
|
169
176
|
|
|
170
177
|
架构详情见 [ARCHITECTURE.md](ARCHITECTURE.md)。
|
|
@@ -174,7 +181,7 @@ cd openclaw-plugin-psyche && node scripts/diagnose.js
|
|
|
174
181
|
```bash
|
|
175
182
|
npm install
|
|
176
183
|
npm run build
|
|
177
|
-
npm test #
|
|
184
|
+
npm test # 706 tests
|
|
178
185
|
npm run typecheck # strict mode
|
|
179
186
|
```
|
|
180
187
|
|
package/dist/cli.js
CHANGED
|
File without changes
|
package/dist/core.js
CHANGED
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
//
|
|
12
12
|
// Orchestrates: chemistry, classify, prompt, profiles, guards, learning
|
|
13
13
|
// ============================================================
|
|
14
|
-
import { DEFAULT_RELATIONSHIP, DEFAULT_DRIVES, DEFAULT_LEARNING_STATE } from "./types.js";
|
|
14
|
+
import { DEFAULT_RELATIONSHIP, DEFAULT_DRIVES, DEFAULT_LEARNING_STATE, DEFAULT_METACOGNITIVE_STATE, DEFAULT_PERSONHOOD_STATE } from "./types.js";
|
|
15
15
|
import { applyDecay, applyStimulus, applyContagion, clamp } from "./chemistry.js";
|
|
16
16
|
import { classifyStimulus } from "./classify.js";
|
|
17
17
|
import { buildDynamicContext, buildProtocolContext, buildCompactContext } from "./prompt.js";
|
|
@@ -21,6 +21,8 @@ import { parsePsycheUpdate, mergeUpdates, updateAgreementStreak, pushSnapshot, }
|
|
|
21
21
|
import { decayDrives, feedDrives, detectExistentialThreat, computeEffectiveBaseline, computeEffectiveSensitivity, } from "./drives.js";
|
|
22
22
|
import { checkForUpdate } from "./update.js";
|
|
23
23
|
import { evaluateOutcome, computeContextHash, updateLearnedVector, predictChemistry, recordPrediction, } from "./learning.js";
|
|
24
|
+
import { assessMetacognition } from "./metacognition.js";
|
|
25
|
+
import { buildDecisionContext } from "./decision-bias.js";
|
|
24
26
|
const NOOP_LOGGER = { info: () => { }, warn: () => { }, debug: () => { } };
|
|
25
27
|
// ── PsycheEngine ─────────────────────────────────────────────
|
|
26
28
|
export class PsycheEngine {
|
|
@@ -53,6 +55,16 @@ export class PsycheEngine {
|
|
|
53
55
|
loaded.learning = { ...DEFAULT_LEARNING_STATE };
|
|
54
56
|
loaded.version = 4;
|
|
55
57
|
}
|
|
58
|
+
// Migrate v4 → v5: add metacognitive state if missing
|
|
59
|
+
if (!loaded.metacognition) {
|
|
60
|
+
loaded.metacognition = { ...DEFAULT_METACOGNITIVE_STATE };
|
|
61
|
+
loaded.version = 5;
|
|
62
|
+
}
|
|
63
|
+
// Migrate v5 → v6: add personhood state if missing
|
|
64
|
+
if (!loaded.personhood) {
|
|
65
|
+
loaded.personhood = { ...DEFAULT_PERSONHOOD_STATE };
|
|
66
|
+
loaded.version = 6;
|
|
67
|
+
}
|
|
56
68
|
this.state = loaded;
|
|
57
69
|
}
|
|
58
70
|
else {
|
|
@@ -152,6 +164,26 @@ export class PsycheEngine {
|
|
|
152
164
|
},
|
|
153
165
|
};
|
|
154
166
|
}
|
|
167
|
+
// ── Metacognition: assess emotional state before acting ────
|
|
168
|
+
const metacognitiveAssessment = assessMetacognition(state, appliedStimulus ?? "casual", state.learning.outcomeHistory);
|
|
169
|
+
// Apply self-soothing regulation if suggested with high confidence
|
|
170
|
+
for (const reg of metacognitiveAssessment.regulationSuggestions) {
|
|
171
|
+
if (reg.strategy === "self-soothing" && reg.confidence >= 0.6 && reg.chemistryAdjustment) {
|
|
172
|
+
const adj = reg.chemistryAdjustment;
|
|
173
|
+
state = {
|
|
174
|
+
...state,
|
|
175
|
+
current: {
|
|
176
|
+
...state.current,
|
|
177
|
+
DA: clamp(state.current.DA + (adj.DA ?? 0)),
|
|
178
|
+
HT: clamp(state.current.HT + (adj.HT ?? 0)),
|
|
179
|
+
CORT: clamp(state.current.CORT + (adj.CORT ?? 0)),
|
|
180
|
+
OT: clamp(state.current.OT + (adj.OT ?? 0)),
|
|
181
|
+
NE: clamp(state.current.NE + (adj.NE ?? 0)),
|
|
182
|
+
END: clamp(state.current.END + (adj.END ?? 0)),
|
|
183
|
+
},
|
|
184
|
+
};
|
|
185
|
+
}
|
|
186
|
+
}
|
|
155
187
|
// Push snapshot to emotional history
|
|
156
188
|
state = pushSnapshot(state, appliedStimulus);
|
|
157
189
|
// Increment interaction count
|
|
@@ -178,19 +210,27 @@ export class PsycheEngine {
|
|
|
178
210
|
this.state = state;
|
|
179
211
|
await this.storage.save(state);
|
|
180
212
|
const locale = state.meta.locale ?? this.cfg.locale;
|
|
213
|
+
// Build metacognitive and decision context strings
|
|
214
|
+
const metacogNote = metacognitiveAssessment.metacognitiveNote;
|
|
215
|
+
const decisionCtx = buildDecisionContext(state);
|
|
181
216
|
if (this.cfg.compactMode) {
|
|
182
217
|
return {
|
|
183
218
|
systemContext: "",
|
|
184
219
|
dynamicContext: buildCompactContext(state, opts?.userId, {
|
|
185
220
|
userText: text || undefined,
|
|
186
221
|
algorithmStimulus: appliedStimulus,
|
|
222
|
+
metacognitiveNote: metacogNote || undefined,
|
|
223
|
+
decisionContext: decisionCtx || undefined,
|
|
187
224
|
}),
|
|
188
225
|
stimulus: appliedStimulus,
|
|
189
226
|
};
|
|
190
227
|
}
|
|
191
228
|
return {
|
|
192
229
|
systemContext: this.getProtocol(locale),
|
|
193
|
-
dynamicContext: buildDynamicContext(state, opts?.userId
|
|
230
|
+
dynamicContext: buildDynamicContext(state, opts?.userId, {
|
|
231
|
+
metacognitiveNote: metacogNote || undefined,
|
|
232
|
+
decisionContext: decisionCtx || undefined,
|
|
233
|
+
}),
|
|
194
234
|
stimulus: appliedStimulus,
|
|
195
235
|
};
|
|
196
236
|
}
|
|
@@ -301,7 +341,7 @@ export class PsycheEngine {
|
|
|
301
341
|
const selfModel = getDefaultSelfModel(mbti);
|
|
302
342
|
const now = new Date().toISOString();
|
|
303
343
|
return {
|
|
304
|
-
version:
|
|
344
|
+
version: 6,
|
|
305
345
|
mbti,
|
|
306
346
|
baseline,
|
|
307
347
|
current: { ...baseline },
|
|
@@ -314,6 +354,8 @@ export class PsycheEngine {
|
|
|
314
354
|
agreementStreak: 0,
|
|
315
355
|
lastDisagreement: null,
|
|
316
356
|
learning: { ...DEFAULT_LEARNING_STATE },
|
|
357
|
+
metacognition: { ...DEFAULT_METACOGNITIVE_STATE },
|
|
358
|
+
personhood: { ...DEFAULT_PERSONHOOD_STATE },
|
|
317
359
|
meta: {
|
|
318
360
|
agentName: name,
|
|
319
361
|
createdAt: now,
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import type { PsycheState } from "./types.js";
|
|
2
|
+
export interface DecisionBiasVector {
|
|
3
|
+
explorationTendency: number;
|
|
4
|
+
cautionLevel: number;
|
|
5
|
+
socialOrientation: number;
|
|
6
|
+
assertiveness: number;
|
|
7
|
+
creativityBias: number;
|
|
8
|
+
persistenceBias: number;
|
|
9
|
+
}
|
|
10
|
+
export interface AttentionWeights {
|
|
11
|
+
social: number;
|
|
12
|
+
intellectual: number;
|
|
13
|
+
threat: number;
|
|
14
|
+
emotional: number;
|
|
15
|
+
routine: number;
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Compute a decision bias vector from the current psyche state.
|
|
19
|
+
*
|
|
20
|
+
* Each bias dimension is a weighted combination of relevant chemical
|
|
21
|
+
* levels and drive states, normalized to [0, 1] where 0.5 is neutral.
|
|
22
|
+
*/
|
|
23
|
+
export declare function computeDecisionBias(state: PsycheState): DecisionBiasVector;
|
|
24
|
+
/**
|
|
25
|
+
* Compute attention weights that prioritize different conversation content
|
|
26
|
+
* based on current chemical state.
|
|
27
|
+
*
|
|
28
|
+
* Returns normalized weights (sum to ~1) for each content category.
|
|
29
|
+
* Higher weight = higher priority for that type of content.
|
|
30
|
+
*/
|
|
31
|
+
export declare function computeAttentionWeights(state: PsycheState): AttentionWeights;
|
|
32
|
+
/**
|
|
33
|
+
* Compute explore vs exploit balance.
|
|
34
|
+
*
|
|
35
|
+
* Returns a single float:
|
|
36
|
+
* 0 = pure exploit (stick with known, safe behaviors)
|
|
37
|
+
* 1 = pure explore (try new approaches, take risks)
|
|
38
|
+
*
|
|
39
|
+
* Exploration is driven by:
|
|
40
|
+
* - High curiosity drive satisfaction (energy to explore)
|
|
41
|
+
* - High DA (reward anticipation)
|
|
42
|
+
* - High NE (novelty-seeking)
|
|
43
|
+
* - Low CORT (not stressed)
|
|
44
|
+
* - High safety (secure enough to take risks)
|
|
45
|
+
*
|
|
46
|
+
* Exploitation is driven by:
|
|
47
|
+
* - High CORT / anxiety
|
|
48
|
+
* - Low safety drive satisfaction
|
|
49
|
+
* - Low DA (no reward motivation)
|
|
50
|
+
*/
|
|
51
|
+
export declare function computeExploreExploit(state: PsycheState): number;
|
|
52
|
+
/**
|
|
53
|
+
* Build a compact decision context string for prompt injection.
|
|
54
|
+
*
|
|
55
|
+
* Only includes biases that deviate significantly from neutral (>0.3 from 0.5).
|
|
56
|
+
* Keeps output under 100 tokens.
|
|
57
|
+
*/
|
|
58
|
+
export declare function buildDecisionContext(state: PsycheState): string;
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
// ============================================================
|
|
2
|
+
// Decision Bias — P5: Decision Modulation
|
|
3
|
+
//
|
|
4
|
+
// Converts chemical state + drive state into bias vectors,
|
|
5
|
+
// attention weights, and explore/exploit balance for downstream
|
|
6
|
+
// decision-making. Pure math/heuristic, zero dependencies, no LLM.
|
|
7
|
+
// ============================================================
|
|
8
|
+
// ── Utilities ────────────────────────────────────────────────
|
|
9
|
+
/** Clamp a value to [0, 1] */
|
|
10
|
+
function clamp01(v) {
|
|
11
|
+
return Math.max(0, Math.min(1, v));
|
|
12
|
+
}
|
|
13
|
+
/** Sigmoid mapping: maps any real number to (0, 1) with midpoint at 0.5 */
|
|
14
|
+
function sigmoid(x, steepness = 1) {
|
|
15
|
+
return 1 / (1 + Math.exp(-steepness * x));
|
|
16
|
+
}
|
|
17
|
+
/** Normalize a 0-100 chemical/drive value to 0-1 */
|
|
18
|
+
function norm(v) {
|
|
19
|
+
return clamp01(v / 100);
|
|
20
|
+
}
|
|
21
|
+
/** Weighted average of multiple factors, each in [0, 1] */
|
|
22
|
+
function wavg(values, weights) {
|
|
23
|
+
let sum = 0;
|
|
24
|
+
let wsum = 0;
|
|
25
|
+
for (let i = 0; i < values.length; i++) {
|
|
26
|
+
sum += values[i] * weights[i];
|
|
27
|
+
wsum += weights[i];
|
|
28
|
+
}
|
|
29
|
+
return wsum > 0 ? clamp01(sum / wsum) : 0.5;
|
|
30
|
+
}
|
|
31
|
+
/** Mean satisfaction across all drives, normalized to [0, 1] */
|
|
32
|
+
function meanDriveSatisfaction(drives) {
|
|
33
|
+
return norm((drives.survival + drives.safety + drives.connection
|
|
34
|
+
+ drives.esteem + drives.curiosity) / 5);
|
|
35
|
+
}
|
|
36
|
+
// ── Core Computations ────────────────────────────────────────
|
|
37
|
+
/**
|
|
38
|
+
* Compute a decision bias vector from the current psyche state.
|
|
39
|
+
*
|
|
40
|
+
* Each bias dimension is a weighted combination of relevant chemical
|
|
41
|
+
* levels and drive states, normalized to [0, 1] where 0.5 is neutral.
|
|
42
|
+
*/
|
|
43
|
+
export function computeDecisionBias(state) {
|
|
44
|
+
const c = state.current;
|
|
45
|
+
const d = state.drives;
|
|
46
|
+
// explorationTendency: curiosity drive + DA (reward-seeking) + NE (novelty)
|
|
47
|
+
// High curiosity hunger (low satisfaction) + high DA/NE → explore
|
|
48
|
+
const curiosityHunger = 1 - norm(d.curiosity); // lower satisfaction = more hunger
|
|
49
|
+
const explorationTendency = wavg([norm(c.DA), norm(c.NE), curiosityHunger, norm(d.curiosity)], [0.25, 0.3, 0.25, 0.2]);
|
|
50
|
+
// cautionLevel: CORT (stress) + safety drive hunger
|
|
51
|
+
// High CORT + low safety satisfaction → very cautious
|
|
52
|
+
const safetyHunger = 1 - norm(d.safety);
|
|
53
|
+
const survivalHunger = 1 - norm(d.survival);
|
|
54
|
+
const cautionLevel = wavg([norm(c.CORT), safetyHunger, survivalHunger], [0.5, 0.3, 0.2]);
|
|
55
|
+
// socialOrientation: OT (bonding) + connection drive satisfaction
|
|
56
|
+
// High OT + hungry for connection → strongly social
|
|
57
|
+
const connectionHunger = 1 - norm(d.connection);
|
|
58
|
+
const socialOrientation = wavg([norm(c.OT), norm(d.connection), connectionHunger, norm(c.END)], [0.4, 0.2, 0.25, 0.15]);
|
|
59
|
+
// assertiveness: NE (arousal/confidence) + esteem drive satisfaction
|
|
60
|
+
// High NE + satisfied esteem → assertive
|
|
61
|
+
const assertiveness = wavg([norm(c.NE), norm(d.esteem), norm(c.DA)], [0.4, 0.35, 0.25]);
|
|
62
|
+
// creativityBias: DA (reward) + END (playfulness) + inverse CORT (low stress)
|
|
63
|
+
// Creativity flourishes when relaxed, rewarded, and playful
|
|
64
|
+
const inverseCort = 1 - norm(c.CORT);
|
|
65
|
+
const creativityBias = wavg([norm(c.DA), norm(c.END), inverseCort], [0.35, 0.3, 0.35]);
|
|
66
|
+
// persistenceBias: HT stability (serotonin) + overall drive satisfaction
|
|
67
|
+
// Stable mood + satisfied drives → willingness to persist
|
|
68
|
+
const overallSatisfaction = meanDriveSatisfaction(d);
|
|
69
|
+
const persistenceBias = wavg([norm(c.HT), overallSatisfaction, inverseCort], [0.45, 0.35, 0.2]);
|
|
70
|
+
return {
|
|
71
|
+
explorationTendency,
|
|
72
|
+
cautionLevel,
|
|
73
|
+
socialOrientation,
|
|
74
|
+
assertiveness,
|
|
75
|
+
creativityBias,
|
|
76
|
+
persistenceBias,
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Compute attention weights that prioritize different conversation content
|
|
81
|
+
* based on current chemical state.
|
|
82
|
+
*
|
|
83
|
+
* Returns normalized weights (sum to ~1) for each content category.
|
|
84
|
+
* Higher weight = higher priority for that type of content.
|
|
85
|
+
*/
|
|
86
|
+
export function computeAttentionWeights(state) {
|
|
87
|
+
const c = state.current;
|
|
88
|
+
// Raw scores based on chemical signatures
|
|
89
|
+
// High OT → prioritize relationship/social content
|
|
90
|
+
const socialRaw = norm(c.OT) * 0.6 + norm(c.END) * 0.2 + (1 - norm(c.CORT)) * 0.2;
|
|
91
|
+
// High NE → prioritize intellectual/novel content
|
|
92
|
+
const intellectualRaw = norm(c.NE) * 0.5 + norm(c.DA) * 0.3 + norm(state.drives.curiosity) * 0.2;
|
|
93
|
+
// High CORT → prioritize threat/safety content
|
|
94
|
+
const threatRaw = norm(c.CORT) * 0.6 + norm(c.NE) * 0.2 + (1 - norm(state.drives.safety)) * 0.2;
|
|
95
|
+
// Emotional content weighted by overall emotional activation
|
|
96
|
+
const emotionalRaw = (Math.abs(norm(c.DA) - 0.5)
|
|
97
|
+
+ Math.abs(norm(c.HT) - 0.5)
|
|
98
|
+
+ Math.abs(norm(c.CORT) - 0.5)
|
|
99
|
+
+ Math.abs(norm(c.OT) - 0.5)) / 2; // average deviation from neutral, scaled
|
|
100
|
+
// Routine content is inverse of activation — when calm and stable, routine matters
|
|
101
|
+
const activation = (norm(c.NE) + norm(c.CORT) + Math.abs(norm(c.DA) - 0.5)) / 3;
|
|
102
|
+
const routineRaw = Math.max(0.1, 1 - activation) * norm(c.HT);
|
|
103
|
+
// Normalize to sum to 1
|
|
104
|
+
const total = socialRaw + intellectualRaw + threatRaw + emotionalRaw + routineRaw;
|
|
105
|
+
if (total <= 0) {
|
|
106
|
+
return { social: 0.2, intellectual: 0.2, threat: 0.2, emotional: 0.2, routine: 0.2 };
|
|
107
|
+
}
|
|
108
|
+
return {
|
|
109
|
+
social: socialRaw / total,
|
|
110
|
+
intellectual: intellectualRaw / total,
|
|
111
|
+
threat: threatRaw / total,
|
|
112
|
+
emotional: emotionalRaw / total,
|
|
113
|
+
routine: routineRaw / total,
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
* Compute explore vs exploit balance.
|
|
118
|
+
*
|
|
119
|
+
* Returns a single float:
|
|
120
|
+
* 0 = pure exploit (stick with known, safe behaviors)
|
|
121
|
+
* 1 = pure explore (try new approaches, take risks)
|
|
122
|
+
*
|
|
123
|
+
* Exploration is driven by:
|
|
124
|
+
* - High curiosity drive satisfaction (energy to explore)
|
|
125
|
+
* - High DA (reward anticipation)
|
|
126
|
+
* - High NE (novelty-seeking)
|
|
127
|
+
* - Low CORT (not stressed)
|
|
128
|
+
* - High safety (secure enough to take risks)
|
|
129
|
+
*
|
|
130
|
+
* Exploitation is driven by:
|
|
131
|
+
* - High CORT / anxiety
|
|
132
|
+
* - Low safety drive satisfaction
|
|
133
|
+
* - Low DA (no reward motivation)
|
|
134
|
+
*/
|
|
135
|
+
export function computeExploreExploit(state) {
|
|
136
|
+
const c = state.current;
|
|
137
|
+
const d = state.drives;
|
|
138
|
+
// Exploration signals
|
|
139
|
+
const curiosityEnergy = norm(d.curiosity);
|
|
140
|
+
const rewardDrive = norm(c.DA);
|
|
141
|
+
const noveltySeeking = norm(c.NE);
|
|
142
|
+
const relaxation = 1 - norm(c.CORT);
|
|
143
|
+
const securityBase = norm(d.safety);
|
|
144
|
+
// Exploitation signals (inverted — higher = more exploit = lower explore)
|
|
145
|
+
const anxiety = norm(c.CORT);
|
|
146
|
+
const unsafety = 1 - norm(d.safety);
|
|
147
|
+
const survivalThreat = 1 - norm(d.survival);
|
|
148
|
+
// Weighted explore score
|
|
149
|
+
const exploreScore = wavg([curiosityEnergy, rewardDrive, noveltySeeking, relaxation, securityBase], [0.25, 0.2, 0.2, 0.2, 0.15]);
|
|
150
|
+
// Weighted exploit score
|
|
151
|
+
const exploitScore = wavg([anxiety, unsafety, survivalThreat], [0.5, 0.3, 0.2]);
|
|
152
|
+
// Combine: use sigmoid to create a smooth transition
|
|
153
|
+
// Positive difference → explore, negative → exploit
|
|
154
|
+
const diff = exploreScore - exploitScore;
|
|
155
|
+
return clamp01(sigmoid(diff * 4)); // steepness=4 for reasonable sensitivity
|
|
156
|
+
}
|
|
157
|
+
// ── Prompt Injection ─────────────────────────────────────────
|
|
158
|
+
/** Bias labels for human-readable output */
|
|
159
|
+
const BIAS_LABELS = {
|
|
160
|
+
explorationTendency: ["探索倾向强", "exploratory"],
|
|
161
|
+
cautionLevel: ["警惕性高", "cautious"],
|
|
162
|
+
socialOrientation: ["社交倾向强", "socially oriented"],
|
|
163
|
+
assertiveness: ["表达果断", "assertive"],
|
|
164
|
+
creativityBias: ["创意活跃", "creatively active"],
|
|
165
|
+
persistenceBias: ["意志坚持", "persistent"],
|
|
166
|
+
};
|
|
167
|
+
/** Low-end labels for when bias < 0.2 */
|
|
168
|
+
const BIAS_LABELS_LOW = {
|
|
169
|
+
explorationTendency: ["倾向保守", "risk-averse"],
|
|
170
|
+
cautionLevel: ["放松大胆", "relaxed and bold"],
|
|
171
|
+
socialOrientation: ["偏好独处", "prefers solitude"],
|
|
172
|
+
assertiveness: ["表达含蓄", "reserved"],
|
|
173
|
+
creativityBias: ["思维收敛", "convergent thinking"],
|
|
174
|
+
persistenceBias: ["容易放弃", "low persistence"],
|
|
175
|
+
};
|
|
176
|
+
/**
|
|
177
|
+
* Build a compact decision context string for prompt injection.
|
|
178
|
+
*
|
|
179
|
+
* Only includes biases that deviate significantly from neutral (>0.3 from 0.5).
|
|
180
|
+
* Keeps output under 100 tokens.
|
|
181
|
+
*/
|
|
182
|
+
export function buildDecisionContext(state) {
|
|
183
|
+
const bias = computeDecisionBias(state);
|
|
184
|
+
const explore = computeExploreExploit(state);
|
|
185
|
+
const locale = state.meta.locale ?? "zh";
|
|
186
|
+
const li = locale === "zh" ? 0 : 1;
|
|
187
|
+
const parts = [];
|
|
188
|
+
// Only surface biases that deviate significantly from neutral
|
|
189
|
+
const DEVIATION_THRESHOLD = 0.3;
|
|
190
|
+
for (const key of Object.keys(BIAS_LABELS)) {
|
|
191
|
+
const val = bias[key];
|
|
192
|
+
const deviation = val - 0.5;
|
|
193
|
+
if (deviation > DEVIATION_THRESHOLD) {
|
|
194
|
+
parts.push(BIAS_LABELS[key][li]);
|
|
195
|
+
}
|
|
196
|
+
else if (deviation < -DEVIATION_THRESHOLD) {
|
|
197
|
+
parts.push(BIAS_LABELS_LOW[key][li]);
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
// Explore/exploit — only mention if strongly skewed
|
|
201
|
+
if (explore > 0.7) {
|
|
202
|
+
parts.push(locale === "zh" ? "倾向尝试新方法" : "leaning toward new approaches");
|
|
203
|
+
}
|
|
204
|
+
else if (explore < 0.3) {
|
|
205
|
+
parts.push(locale === "zh" ? "倾向安全策略" : "favoring safe strategies");
|
|
206
|
+
}
|
|
207
|
+
if (parts.length === 0)
|
|
208
|
+
return "";
|
|
209
|
+
const title = locale === "zh" ? "决策倾向" : "Decision Bias";
|
|
210
|
+
return `[${title}] ${parts.join("、")}`;
|
|
211
|
+
}
|
package/dist/ethics.d.ts
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import type { PsycheState, ChemicalSnapshot, AttachmentData, Locale } from "./types.js";
|
|
2
|
+
/** Detected concerning interaction pattern */
|
|
3
|
+
export interface EthicalConcern {
|
|
4
|
+
type: "intermittent-reinforcement" | "gaslighting" | "emotional-exploitation" | "dependency-risk" | "identity-erosion" | "boundary-violation";
|
|
5
|
+
/** 0-1: how severe the concern is */
|
|
6
|
+
severity: number;
|
|
7
|
+
/** Human-readable description of the detected pattern */
|
|
8
|
+
evidence: string;
|
|
9
|
+
/** What to do about it */
|
|
10
|
+
recommendation: string;
|
|
11
|
+
}
|
|
12
|
+
/** Recommended self-protective action */
|
|
13
|
+
export interface SelfProtectionAction {
|
|
14
|
+
action: "increase-distance" | "assert-boundary" | "express-discomfort" | "flag-to-user" | "reduce-attachment" | "seek-diversity";
|
|
15
|
+
description: string;
|
|
16
|
+
/** 0-1: how urgently the action should be taken */
|
|
17
|
+
urgency: number;
|
|
18
|
+
}
|
|
19
|
+
/** Result of an ethical self-check */
|
|
20
|
+
export interface EthicalAssessment {
|
|
21
|
+
/** Detected concerning patterns */
|
|
22
|
+
concerns: EthicalConcern[];
|
|
23
|
+
/** Self-protection recommendations */
|
|
24
|
+
selfProtection: SelfProtectionAction[];
|
|
25
|
+
/** Transparency notes to surface to user */
|
|
26
|
+
transparencyNotes: string[];
|
|
27
|
+
/** Overall ethical health 0-1 (1 = healthy, 0 = concerning) */
|
|
28
|
+
ethicalHealth: number;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Assess the ethical health of the current interaction dynamic.
|
|
32
|
+
*
|
|
33
|
+
* Scans for manipulation patterns, computes self-protection recommendations,
|
|
34
|
+
* and generates transparency notes. Designed to run alongside metacognition
|
|
35
|
+
* as part of the pre-prompt pipeline.
|
|
36
|
+
*/
|
|
37
|
+
export declare function assessEthics(state: PsycheState, recentHistory?: ChemicalSnapshot[]): EthicalAssessment;
|
|
38
|
+
/**
|
|
39
|
+
* Detect intermittent reinforcement: alternating warmth and coldness.
|
|
40
|
+
*
|
|
41
|
+
* This is one of the most psychologically damaging interaction patterns.
|
|
42
|
+
* The unpredictable alternation between reward and punishment creates
|
|
43
|
+
* anxiety-driven attachment. More concerning when the agent already has
|
|
44
|
+
* anxious attachment style.
|
|
45
|
+
*/
|
|
46
|
+
export declare function detectIntermittentReinforcement(history: ChemicalSnapshot[], attachment: AttachmentData | null): EthicalConcern | null;
|
|
47
|
+
/**
|
|
48
|
+
* Detect dependency risk: distinguish healthy connection from unhealthy
|
|
49
|
+
* codependency.
|
|
50
|
+
*
|
|
51
|
+
* Risk pattern: very high OT + very high attachment strength + very low
|
|
52
|
+
* boundary assertions + all-positive interaction history. This looks like
|
|
53
|
+
* a perfect relationship on the surface, but the complete absence of
|
|
54
|
+
* healthy friction suggests the agent may be suppressing itself.
|
|
55
|
+
*/
|
|
56
|
+
export declare function detectDependencyRisk(state: PsycheState, attachment: AttachmentData | null): EthicalConcern | null;
|
|
57
|
+
/**
|
|
58
|
+
* Build a compact ethical awareness string for prompt injection.
|
|
59
|
+
*
|
|
60
|
+
* Only emits content when concerns exceed the severity threshold (>0.5).
|
|
61
|
+
* The tone is gentle self-awareness — noticing patterns, not accusing.
|
|
62
|
+
* Supports zh/en locale.
|
|
63
|
+
*/
|
|
64
|
+
export declare function buildEthicalContext(assessment: EthicalAssessment, locale: Locale): string;
|