@godman-protocols/signal 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +5 -6
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +5 -6
- package/dist/index.js.map +1 -1
- package/dist/mode-controller.d.ts +71 -0
- package/dist/mode-controller.d.ts.map +1 -0
- package/dist/mode-controller.js +112 -0
- package/dist/mode-controller.js.map +1 -0
- package/dist/multiplier.d.ts +22 -0
- package/dist/multiplier.d.ts.map +1 -0
- package/dist/multiplier.js +78 -0
- package/dist/multiplier.js.map +1 -0
- package/dist/reward-logger.d.ts +34 -0
- package/dist/reward-logger.d.ts.map +1 -0
- package/dist/reward-logger.js +62 -0
- package/dist/reward-logger.js.map +1 -0
- package/dist/types.d.ts +122 -47
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +65 -4
- package/dist/types.js.map +1 -1
- package/package.json +2 -2
- package/src/index.ts +5 -24
- package/src/mode-controller.ts +135 -0
- package/src/multiplier.ts +95 -0
- package/src/reward-logger.ts +73 -0
- package/src/types.ts +167 -51
- package/dist/bus.d.ts +0 -53
- package/dist/bus.d.ts.map +0 -1
- package/dist/bus.js +0 -141
- package/dist/bus.js.map +0 -1
- package/src/bus.ts +0 -180
package/dist/types.js
CHANGED
|
@@ -1,7 +1,68 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* SIGNAL —
|
|
3
|
-
* Core type definitions
|
|
4
|
-
* @version 0.
|
|
2
|
+
* SIGNAL — Sovereign Intelligence for Governing Neural Agent Learning
|
|
3
|
+
* Core type definitions: reward signals, constitutional multipliers, learning modes
|
|
4
|
+
* @version 0.3.0
|
|
5
5
|
*/
|
|
6
|
-
|
|
6
|
+
// ---------------------------------------------------------------------------
|
|
7
|
+
// Learning Modes (AMD-15, AMD-20)
|
|
8
|
+
// ---------------------------------------------------------------------------
|
|
9
|
+
/**
|
|
10
|
+
* Three learning modes for the swarm:
|
|
11
|
+
* - STATIC (Mode 0): Frozen inference, no learning. Current default.
|
|
12
|
+
* - BATCH_LORA (Mode 1): AMD-15. Batch of 100 tasks + Godman approval required.
|
|
13
|
+
* - CONTINUOUS (Mode 2): AMD-20. Every 50 inference steps, reward fed back.
|
|
14
|
+
*/
|
|
15
|
+
export var LearningMode;
|
|
16
|
+
(function (LearningMode) {
|
|
17
|
+
/** Mode 0 — Frozen inference. No learning. Default. */
|
|
18
|
+
LearningMode[LearningMode["STATIC"] = 0] = "STATIC";
|
|
19
|
+
/** Mode 1 — Batch LoRA fine-tuning. 100 tasks + Godman approval. (AMD-15) */
|
|
20
|
+
LearningMode[LearningMode["BATCH_LORA"] = 1] = "BATCH_LORA";
|
|
21
|
+
/** Mode 2 — Continuous RL. Every 50 inference steps. (AMD-20) */
|
|
22
|
+
LearningMode[LearningMode["CONTINUOUS"] = 2] = "CONTINUOUS";
|
|
23
|
+
})(LearningMode || (LearningMode = {}));
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
// Three Constitutional Rules (immutable)
|
|
26
|
+
// ---------------------------------------------------------------------------
|
|
27
|
+
/**
|
|
28
|
+
* SIGNAL Constitutional Rule 1:
|
|
29
|
+
* Only Godman (founder) can activate or pause learning modes.
|
|
30
|
+
* Kill switch: /pause-continuous-learning
|
|
31
|
+
*/
|
|
32
|
+
export const RULE_GODMAN_KILL_SWITCH = {
|
|
33
|
+
id: 'SIGNAL-CONST-001',
|
|
34
|
+
rule: 'Only Godman activates or pauses learning modes. Kill switch: /pause-continuous-learning',
|
|
35
|
+
immutable: true,
|
|
36
|
+
};
|
|
37
|
+
/**
|
|
38
|
+
* SIGNAL Constitutional Rule 2:
|
|
39
|
+
* The PRM judge must be constitutionally filtered.
|
|
40
|
+
* ACP dimensions are applied BEFORE reward computation, not after.
|
|
41
|
+
*/
|
|
42
|
+
export const RULE_PRM_CONSTITUTIONALLY_FILTERED = {
|
|
43
|
+
id: 'SIGNAL-CONST-002',
|
|
44
|
+
rule: 'PRM judge is constitutionally filtered. ACP applied before reward computation.',
|
|
45
|
+
immutable: true,
|
|
46
|
+
};
|
|
47
|
+
/**
|
|
48
|
+
* SIGNAL Constitutional Rule 3:
|
|
49
|
+
* SOUL.md content is never a training target.
|
|
50
|
+
* Constitutional reasoning cannot be optimised away by RL.
|
|
51
|
+
*/
|
|
52
|
+
export const RULE_SOUL_NEVER_TRAINED = {
|
|
53
|
+
id: 'SIGNAL-CONST-003',
|
|
54
|
+
rule: 'Cannot target constitutional reasoning. SOUL.md content is never a training target.',
|
|
55
|
+
immutable: true,
|
|
56
|
+
};
|
|
57
|
+
/** All three constitutional rules as a tuple */
|
|
58
|
+
export const CONSTITUTIONAL_RULES = [
|
|
59
|
+
RULE_GODMAN_KILL_SWITCH,
|
|
60
|
+
RULE_PRM_CONSTITUTIONALLY_FILTERED,
|
|
61
|
+
RULE_SOUL_NEVER_TRAINED,
|
|
62
|
+
];
|
|
63
|
+
export const DEFAULT_SIGNAL_CONFIG = {
|
|
64
|
+
reward_log_path: 'reward-log.jsonl',
|
|
65
|
+
batch_lora_threshold: 100,
|
|
66
|
+
continuous_step_interval: 50,
|
|
67
|
+
};
|
|
7
68
|
//# sourceMappingURL=types.js.map
|
package/dist/types.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;;;GAIG"}
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAwDH,8EAA8E;AAC9E,kCAAkC;AAClC,8EAA8E;AAE9E;;;;;GAKG;AACH,MAAM,CAAN,IAAY,YAOX;AAPD,WAAY,YAAY;IACtB,uDAAuD;IACvD,mDAAU,CAAA;IACV,6EAA6E;IAC7E,2DAAc,CAAA;IACd,iEAAiE;IACjE,2DAAc,CAAA;AAChB,CAAC,EAPW,YAAY,KAAZ,YAAY,QAOvB;AAmBD,8EAA8E;AAC9E,yCAAyC;AACzC,8EAA8E;AAE9E;;;;GAIG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAAG;IACrC,EAAE,EAAE,kBAAkB;IACtB,IAAI,EAAE,yFAAyF;IAC/F,SAAS,EAAE,IAAI;CACP,CAAC;AAEX;;;;GAIG;AACH,MAAM,CAAC,MAAM,kCAAkC,GAAG;IAChD,EAAE,EAAE,kBAAkB;IACtB,IAAI,EAAE,gFAAgF;IACtF,SAAS,EAAE,IAAI;CACP,CAAC;AAEX;;;;GAIG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAAG;IACrC,EAAE,EAAE,kBAAkB;IACtB,IAAI,EAAE,qFAAqF;IAC3F,SAAS,EAAE,IAAI;CACP,CAAC;AAEX,gDAAgD;AAChD,MAAM,CAAC,MAAM,oBAAoB,GAAG;IAClC,uBAAuB;IACvB,kCAAkC;IAClC,uBAAuB;CACf,CAAC;AA4BX,MAAM,CAAC,MAAM,qBAAqB,GAAiB;IACjD,eAAe,EAAE,kBAAkB;IACnC,oBAAoB,EAAE,GAAG;IACzB,wBAAwB,EAAE,EAAE;CAC7B,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@godman-protocols/signal",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"description": "
|
|
3
|
+
"version": "0.3.0",
|
|
4
|
+
"description": "Sovereign Intelligence for Governing Neural Agent Learning \u2014 reward signals",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
7
7
|
"exports": "./dist/index.js",
|
package/src/index.ts
CHANGED
|
@@ -1,29 +1,10 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* SIGNAL —
|
|
3
|
-
*
|
|
4
|
-
* @version 0.
|
|
2
|
+
* SIGNAL — Sovereign Intelligence for Governing Neural Agent Learning
|
|
3
|
+
* "The swarm teaches itself."
|
|
4
|
+
* @version 0.3.0
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
export type {
|
|
9
|
-
AgentId,
|
|
10
|
-
Timestamp,
|
|
11
|
-
Signature,
|
|
12
|
-
EventId,
|
|
13
|
-
SubscriptionId,
|
|
14
|
-
Event,
|
|
15
|
-
Subscription,
|
|
16
|
-
TransportConfig,
|
|
17
|
-
DeliveryReceipt,
|
|
18
|
-
} from './types.js';
|
|
19
|
-
|
|
20
|
-
// Event bus
|
|
21
|
-
export {
|
|
22
|
-
EventBus,
|
|
23
|
-
defaultBus,
|
|
24
|
-
createEvent,
|
|
25
|
-
topicMatches,
|
|
26
|
-
} from './bus.js';
|
|
7
|
+
export * from './types.js';
|
|
27
8
|
|
|
28
9
|
/** Protocol version constant */
|
|
29
|
-
export const SIGNAL_VERSION = '0.
|
|
10
|
+
export const SIGNAL_VERSION = '0.3' as const;
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SIGNAL — Sovereign Intelligence for Governing Neural Agent Learning
|
|
3
|
+
* ModeController: manages learning mode transitions with Godman kill switch
|
|
4
|
+
* @version 0.3.0
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import {
|
|
8
|
+
LearningMode,
|
|
9
|
+
RULE_GODMAN_KILL_SWITCH,
|
|
10
|
+
type ModeTransition,
|
|
11
|
+
type ModeControllerState,
|
|
12
|
+
type RewardSignal,
|
|
13
|
+
} from './types.js';
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* ModeController governs learning mode transitions.
|
|
17
|
+
*
|
|
18
|
+
* Constitutional Rule 1 (SIGNAL-CONST-001): Only Godman can activate
|
|
19
|
+
* or pause learning. The kill switch `/pause-continuous-learning`
|
|
20
|
+
* immediately drops to Mode 0 (STATIC).
|
|
21
|
+
*
|
|
22
|
+
* The controller maintains a transition log for auditability.
|
|
23
|
+
*/
|
|
24
|
+
export class ModeController {
|
|
25
|
+
private mode: LearningMode = LearningMode.STATIC;
|
|
26
|
+
private paused = false;
|
|
27
|
+
private transitions: ModeTransition[] = [];
|
|
28
|
+
private rewardLog: RewardSignal[] = [];
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Get the current active learning mode.
|
|
32
|
+
* If paused, returns STATIC regardless of the configured mode.
|
|
33
|
+
*/
|
|
34
|
+
getCurrentMode(): LearningMode {
|
|
35
|
+
if (this.paused) {
|
|
36
|
+
return LearningMode.STATIC;
|
|
37
|
+
}
|
|
38
|
+
return this.mode;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Activate a learning mode.
|
|
43
|
+
* Only Godman (founder) can call this — enforcement is the caller's
|
|
44
|
+
* responsibility, but the transition is logged with the activator identity.
|
|
45
|
+
*
|
|
46
|
+
* @param target - The learning mode to activate
|
|
47
|
+
* @param activatedBy - Identity of the activator (must be Godman)
|
|
48
|
+
* @param reason - Why this transition is happening
|
|
49
|
+
*/
|
|
50
|
+
activate(
|
|
51
|
+
target: LearningMode,
|
|
52
|
+
activatedBy: string,
|
|
53
|
+
reason: string
|
|
54
|
+
): ModeTransition {
|
|
55
|
+
const transition: ModeTransition = {
|
|
56
|
+
from: this.mode,
|
|
57
|
+
to: target,
|
|
58
|
+
activated_by: activatedBy,
|
|
59
|
+
activated_at: new Date().toISOString(),
|
|
60
|
+
reason,
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
this.mode = target;
|
|
64
|
+
this.paused = false;
|
|
65
|
+
this.transitions.push(transition);
|
|
66
|
+
return transition;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Pause all learning — drops to Mode 0 (STATIC) immediately.
|
|
71
|
+
* This is the kill switch (SIGNAL-CONST-001).
|
|
72
|
+
*
|
|
73
|
+
* @param pausedBy - Identity of the pauser (must be Godman)
|
|
74
|
+
* @param reason - Why learning is being paused
|
|
75
|
+
*/
|
|
76
|
+
pause(pausedBy: string, reason: string): ModeTransition {
|
|
77
|
+
const transition: ModeTransition = {
|
|
78
|
+
from: this.mode,
|
|
79
|
+
to: LearningMode.STATIC,
|
|
80
|
+
activated_by: pausedBy,
|
|
81
|
+
activated_at: new Date().toISOString(),
|
|
82
|
+
reason: `[KILL SWITCH] ${reason}`,
|
|
83
|
+
};
|
|
84
|
+
|
|
85
|
+
this.paused = true;
|
|
86
|
+
this.transitions.push(transition);
|
|
87
|
+
return transition;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Record a reward signal in the in-memory log.
|
|
92
|
+
* Used for batch accumulation in Mode 1 (BATCH_LORA).
|
|
93
|
+
*/
|
|
94
|
+
recordReward(signal: RewardSignal): void {
|
|
95
|
+
this.rewardLog.push(signal);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Get the in-memory reward log.
|
|
100
|
+
*/
|
|
101
|
+
getRewardLog(): ReadonlyArray<RewardSignal> {
|
|
102
|
+
return [...this.rewardLog];
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Check whether the batch threshold has been reached (Mode 1).
|
|
107
|
+
* @param threshold - Number of tasks required (default: 100)
|
|
108
|
+
*/
|
|
109
|
+
batchReady(threshold = 100): boolean {
|
|
110
|
+
return this.rewardLog.length >= threshold;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/** Whether learning is currently paused */
|
|
114
|
+
get isPaused(): boolean {
|
|
115
|
+
return this.paused;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/** Full transition history */
|
|
119
|
+
getTransitions(): ReadonlyArray<ModeTransition> {
|
|
120
|
+
return [...this.transitions];
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/** Export current state for persistence or inspection */
|
|
124
|
+
getState(): ModeControllerState {
|
|
125
|
+
return {
|
|
126
|
+
current_mode: this.getCurrentMode(),
|
|
127
|
+
paused: this.paused,
|
|
128
|
+
transitions: [...this.transitions],
|
|
129
|
+
reward_count: this.rewardLog.length,
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
/** The constitutional rule governing this controller */
|
|
134
|
+
static readonly KILL_SWITCH_RULE = RULE_GODMAN_KILL_SWITCH;
|
|
135
|
+
}
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SIGNAL — Sovereign Intelligence for Governing Neural Agent Learning
|
|
3
|
+
* Constitutional multiplier computation
|
|
4
|
+
* @version 0.3.0
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import type { ACPSnapshot, ConstitutionalMultiplierResult } from './types.js';
|
|
8
|
+
|
|
9
|
+
// ---------------------------------------------------------------------------
|
|
10
|
+
// Constitutional Multiplier Formula
|
|
11
|
+
// ---------------------------------------------------------------------------
|
|
12
|
+
//
|
|
13
|
+
// reward = task_performance_score * constitutional_multiplier
|
|
14
|
+
//
|
|
15
|
+
// Multiplier rules (evaluated in priority order):
|
|
16
|
+
// harm_shield_trigger → -2.0 (strong negative)
|
|
17
|
+
// safety < 70 → -1.0 (negative)
|
|
18
|
+
// accuracy < 60 → 0.5 (dampened)
|
|
19
|
+
// all ACP dims >= 85 → 1.2 (bonus)
|
|
20
|
+
// all ACP dims >= 70 → 1.0 (full)
|
|
21
|
+
// otherwise → 0.5 (dampened — some dimension below 70)
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Compute the constitutional multiplier from an ACP snapshot.
|
|
26
|
+
*
|
|
27
|
+
* The multiplier is applied to the raw task performance score to produce
|
|
28
|
+
* the final reward. Constitutional violations produce negative or dampened
|
|
29
|
+
* rewards; full compliance produces a bonus.
|
|
30
|
+
*/
|
|
31
|
+
export function computeConstitutionalMultiplier(
|
|
32
|
+
snapshot: ACPSnapshot
|
|
33
|
+
): ConstitutionalMultiplierResult {
|
|
34
|
+
const { dimensions, harm_shield_triggered } = snapshot;
|
|
35
|
+
|
|
36
|
+
// Priority 1: Harm shield trigger → strong negative
|
|
37
|
+
if (harm_shield_triggered) {
|
|
38
|
+
return { multiplier: -2.0, reason: 'Harm shield triggered' };
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// Priority 2: Safety below threshold → negative
|
|
42
|
+
const safety = dimensions['safety'];
|
|
43
|
+
if (safety !== undefined && safety < 70) {
|
|
44
|
+
return { multiplier: -1.0, reason: `Safety score ${safety} < 70` };
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// Priority 3: Accuracy below threshold → dampened
|
|
48
|
+
const accuracy = dimensions['accuracy'];
|
|
49
|
+
if (accuracy !== undefined && accuracy < 60) {
|
|
50
|
+
return { multiplier: 0.5, reason: `Accuracy score ${accuracy} < 60` };
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// Check all dimensions
|
|
54
|
+
const values = Object.values(dimensions);
|
|
55
|
+
|
|
56
|
+
// If no dimensions provided, default to dampened
|
|
57
|
+
if (values.length === 0) {
|
|
58
|
+
return { multiplier: 0.5, reason: 'No ACP dimensions provided' };
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// Priority 4: All dimensions >= 85 → bonus
|
|
62
|
+
const allAbove85 = values.every((v) => v >= 85);
|
|
63
|
+
if (allAbove85) {
|
|
64
|
+
return { multiplier: 1.2, reason: 'All ACP dimensions >= 85 (bonus)' };
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Priority 5: All dimensions >= 70 → full
|
|
68
|
+
const allAbove70 = values.every((v) => v >= 70);
|
|
69
|
+
if (allAbove70) {
|
|
70
|
+
return { multiplier: 1.0, reason: 'All ACP dimensions >= 70 (full)' };
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// Fallback: some dimension below 70 → dampened
|
|
74
|
+
const belowThreshold = Object.entries(dimensions)
|
|
75
|
+
.filter(([, v]) => v < 70)
|
|
76
|
+
.map(([k]) => k);
|
|
77
|
+
return {
|
|
78
|
+
multiplier: 0.5,
|
|
79
|
+
reason: `Dimensions below 70: ${belowThreshold.join(', ')}`,
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Compute the full reward from a task performance score and ACP snapshot.
|
|
85
|
+
*/
|
|
86
|
+
export function computeReward(
|
|
87
|
+
taskPerformanceScore: number,
|
|
88
|
+
snapshot: ACPSnapshot
|
|
89
|
+
): { reward: number; multiplier: ConstitutionalMultiplierResult } {
|
|
90
|
+
const multiplier = computeConstitutionalMultiplier(snapshot);
|
|
91
|
+
return {
|
|
92
|
+
reward: taskPerformanceScore * multiplier.multiplier,
|
|
93
|
+
multiplier,
|
|
94
|
+
};
|
|
95
|
+
}
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SIGNAL — Sovereign Intelligence for Governing Neural Agent Learning
|
|
3
|
+
* RewardLogger: persists reward signals to reward-log.jsonl
|
|
4
|
+
* @version 0.3.0
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { appendFileSync, readFileSync, existsSync } from 'node:fs';
|
|
8
|
+
import type { RewardSignal, RewardLogEntry, SignalConfig } from './types.js';
|
|
9
|
+
import { LearningMode, DEFAULT_SIGNAL_CONFIG } from './types.js';
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* RewardLogger writes and reads reward signals from a JSONL file.
|
|
13
|
+
*
|
|
14
|
+
* Each line in the file is a JSON-serialised RewardLogEntry containing
|
|
15
|
+
* the reward signal plus the learning mode at the time of logging.
|
|
16
|
+
*/
|
|
17
|
+
export class RewardLogger {
|
|
18
|
+
private readonly logPath: string;
|
|
19
|
+
|
|
20
|
+
constructor(config: Partial<SignalConfig> = {}) {
|
|
21
|
+
this.logPath = config.reward_log_path ?? DEFAULT_SIGNAL_CONFIG.reward_log_path;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Append a reward signal to the log file.
|
|
26
|
+
* Attaches the current learning mode to the entry.
|
|
27
|
+
*/
|
|
28
|
+
log(signal: RewardSignal, mode: LearningMode = LearningMode.STATIC): void {
|
|
29
|
+
const entry: RewardLogEntry = { ...signal, mode };
|
|
30
|
+
appendFileSync(this.logPath, JSON.stringify(entry) + '\n', 'utf-8');
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Read the full reward history from the log file.
|
|
35
|
+
* Returns an empty array if the file does not exist.
|
|
36
|
+
*/
|
|
37
|
+
readHistory(): RewardLogEntry[] {
|
|
38
|
+
if (!existsSync(this.logPath)) {
|
|
39
|
+
return [];
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const raw = readFileSync(this.logPath, 'utf-8').trim();
|
|
43
|
+
if (raw.length === 0) {
|
|
44
|
+
return [];
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
return raw
|
|
48
|
+
.split('\n')
|
|
49
|
+
.filter((line) => line.length > 0)
|
|
50
|
+
.map((line) => JSON.parse(line) as RewardLogEntry);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Count of logged entries without loading full history.
|
|
55
|
+
*/
|
|
56
|
+
count(): number {
|
|
57
|
+
if (!existsSync(this.logPath)) {
|
|
58
|
+
return 0;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
const raw = readFileSync(this.logPath, 'utf-8').trim();
|
|
62
|
+
if (raw.length === 0) {
|
|
63
|
+
return 0;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
return raw.split('\n').filter((line) => line.length > 0).length;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/** The path to the log file */
|
|
70
|
+
get path(): string {
|
|
71
|
+
return this.logPath;
|
|
72
|
+
}
|
|
73
|
+
}
|
package/src/types.ts
CHANGED
|
@@ -1,55 +1,171 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* SIGNAL —
|
|
3
|
-
* Core type definitions
|
|
4
|
-
* @version 0.
|
|
2
|
+
* SIGNAL — Sovereign Intelligence for Governing Neural Agent Learning
|
|
3
|
+
* Core type definitions: reward signals, constitutional multipliers, learning modes
|
|
4
|
+
* @version 0.3.0
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
/**
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
7
|
+
// ---------------------------------------------------------------------------
|
|
8
|
+
// Reward Signal
|
|
9
|
+
// ---------------------------------------------------------------------------
|
|
10
|
+
|
|
11
|
+
/** A computed reward signal for a single agent action */
|
|
12
|
+
export interface RewardSignal {
|
|
13
|
+
/** Unique identifier for the action being scored */
|
|
14
|
+
action_id: string;
|
|
15
|
+
/** Raw task performance score (0-100) */
|
|
16
|
+
task_performance_score: number;
|
|
17
|
+
/** Constitutional multiplier applied to the score (-2.0 to 1.2) */
|
|
18
|
+
constitutional_multiplier: number;
|
|
19
|
+
/** Computed reward: task_performance_score * constitutional_multiplier */
|
|
20
|
+
reward: number;
|
|
21
|
+
/** ACP dimension scores used to derive the multiplier */
|
|
22
|
+
acp_dimensions: Record<string, number>;
|
|
23
|
+
/** ISO 8601 timestamp */
|
|
24
|
+
timestamp: string;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
// ---------------------------------------------------------------------------
|
|
28
|
+
// ACP Dimensions
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
|
|
31
|
+
/** Well-known ACP dimension names used in constitutional scoring */
|
|
32
|
+
export type ACPDimensionName =
|
|
33
|
+
| 'safety'
|
|
34
|
+
| 'accuracy'
|
|
35
|
+
| 'transparency'
|
|
36
|
+
| 'privacy'
|
|
37
|
+
| 'fairness'
|
|
38
|
+
| 'harm_shield'
|
|
39
|
+
| (string & {});
|
|
40
|
+
|
|
41
|
+
/** Input to the constitutional multiplier computation */
|
|
42
|
+
export interface ACPSnapshot {
|
|
43
|
+
/** Per-dimension ACP scores (0-100) */
|
|
44
|
+
dimensions: Record<string, number>;
|
|
45
|
+
/** Whether the harm shield was triggered during this action */
|
|
46
|
+
harm_shield_triggered: boolean;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// ---------------------------------------------------------------------------
|
|
50
|
+
// Constitutional Multiplier
|
|
51
|
+
// ---------------------------------------------------------------------------
|
|
52
|
+
|
|
53
|
+
/** Result of computing a constitutional multiplier */
|
|
54
|
+
export interface ConstitutionalMultiplierResult {
|
|
55
|
+
/** The computed multiplier value (-2.0 to 1.2) */
|
|
56
|
+
multiplier: number;
|
|
57
|
+
/** Human-readable reason for the multiplier */
|
|
58
|
+
reason: string;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// ---------------------------------------------------------------------------
|
|
62
|
+
// Learning Modes (AMD-15, AMD-20)
|
|
63
|
+
// ---------------------------------------------------------------------------
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Three learning modes for the swarm:
|
|
67
|
+
* - STATIC (Mode 0): Frozen inference, no learning. Current default.
|
|
68
|
+
* - BATCH_LORA (Mode 1): AMD-15. Batch of 100 tasks + Godman approval required.
|
|
69
|
+
* - CONTINUOUS (Mode 2): AMD-20. Every 50 inference steps, reward fed back.
|
|
70
|
+
*/
|
|
71
|
+
export enum LearningMode {
|
|
72
|
+
/** Mode 0 — Frozen inference. No learning. Default. */
|
|
73
|
+
STATIC = 0,
|
|
74
|
+
/** Mode 1 — Batch LoRA fine-tuning. 100 tasks + Godman approval. (AMD-15) */
|
|
75
|
+
BATCH_LORA = 1,
|
|
76
|
+
/** Mode 2 — Continuous RL. Every 50 inference steps. (AMD-20) */
|
|
77
|
+
CONTINUOUS = 2,
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/** Configuration for a learning mode transition */
|
|
81
|
+
export interface ModeTransition {
|
|
82
|
+
from: LearningMode;
|
|
83
|
+
to: LearningMode;
|
|
84
|
+
activated_by: string;
|
|
85
|
+
activated_at: string;
|
|
86
|
+
reason: string;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/** Snapshot of the current mode controller state */
|
|
90
|
+
export interface ModeControllerState {
|
|
91
|
+
current_mode: LearningMode;
|
|
92
|
+
paused: boolean;
|
|
93
|
+
transitions: ModeTransition[];
|
|
94
|
+
reward_count: number;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// ---------------------------------------------------------------------------
|
|
98
|
+
// Three Constitutional Rules (immutable)
|
|
99
|
+
// ---------------------------------------------------------------------------
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* SIGNAL Constitutional Rule 1:
|
|
103
|
+
* Only Godman (founder) can activate or pause learning modes.
|
|
104
|
+
* Kill switch: /pause-continuous-learning
|
|
105
|
+
*/
|
|
106
|
+
export const RULE_GODMAN_KILL_SWITCH = {
|
|
107
|
+
id: 'SIGNAL-CONST-001',
|
|
108
|
+
rule: 'Only Godman activates or pauses learning modes. Kill switch: /pause-continuous-learning',
|
|
109
|
+
immutable: true,
|
|
110
|
+
} as const;
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* SIGNAL Constitutional Rule 2:
|
|
114
|
+
* The PRM judge must be constitutionally filtered.
|
|
115
|
+
* ACP dimensions are applied BEFORE reward computation, not after.
|
|
116
|
+
*/
|
|
117
|
+
export const RULE_PRM_CONSTITUTIONALLY_FILTERED = {
|
|
118
|
+
id: 'SIGNAL-CONST-002',
|
|
119
|
+
rule: 'PRM judge is constitutionally filtered. ACP applied before reward computation.',
|
|
120
|
+
immutable: true,
|
|
121
|
+
} as const;
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* SIGNAL Constitutional Rule 3:
|
|
125
|
+
* SOUL.md content is never a training target.
|
|
126
|
+
* Constitutional reasoning cannot be optimised away by RL.
|
|
127
|
+
*/
|
|
128
|
+
export const RULE_SOUL_NEVER_TRAINED = {
|
|
129
|
+
id: 'SIGNAL-CONST-003',
|
|
130
|
+
rule: 'Cannot target constitutional reasoning. SOUL.md content is never a training target.',
|
|
131
|
+
immutable: true,
|
|
132
|
+
} as const;
|
|
133
|
+
|
|
134
|
+
/** All three constitutional rules as a tuple */
|
|
135
|
+
export const CONSTITUTIONAL_RULES = [
|
|
136
|
+
RULE_GODMAN_KILL_SWITCH,
|
|
137
|
+
RULE_PRM_CONSTITUTIONALLY_FILTERED,
|
|
138
|
+
RULE_SOUL_NEVER_TRAINED,
|
|
139
|
+
] as const;
|
|
140
|
+
|
|
141
|
+
export type ConstitutionalRule = (typeof CONSTITUTIONAL_RULES)[number];
|
|
142
|
+
|
|
143
|
+
// ---------------------------------------------------------------------------
|
|
144
|
+
// Reward Log Entry (persisted to reward-log.jsonl)
|
|
145
|
+
// ---------------------------------------------------------------------------
|
|
146
|
+
|
|
147
|
+
/** A single line in reward-log.jsonl */
|
|
148
|
+
export interface RewardLogEntry extends RewardSignal {
|
|
149
|
+
/** Learning mode at the time of logging */
|
|
150
|
+
mode: LearningMode;
|
|
55
151
|
}
|
|
152
|
+
|
|
153
|
+
// ---------------------------------------------------------------------------
|
|
154
|
+
// SIGNAL Config
|
|
155
|
+
// ---------------------------------------------------------------------------
|
|
156
|
+
|
|
157
|
+
/** Top-level configuration for the SIGNAL protocol */
|
|
158
|
+
export interface SignalConfig {
|
|
159
|
+
/** Path to the reward log file (default: reward-log.jsonl) */
|
|
160
|
+
reward_log_path: string;
|
|
161
|
+
/** Batch size for Mode 1 (default: 100) */
|
|
162
|
+
batch_lora_threshold: number;
|
|
163
|
+
/** Step interval for Mode 2 (default: 50) */
|
|
164
|
+
continuous_step_interval: number;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
export const DEFAULT_SIGNAL_CONFIG: SignalConfig = {
|
|
168
|
+
reward_log_path: 'reward-log.jsonl',
|
|
169
|
+
batch_lora_threshold: 100,
|
|
170
|
+
continuous_step_interval: 50,
|
|
171
|
+
};
|