@godman-protocols/signal 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/types.js CHANGED
@@ -1,7 +1,68 @@
1
1
  /**
2
- * SIGNAL — Event Bus and Pub/Sub for Agent Swarms
3
- * Core type definitions (skeleton)
4
- * @version 0.1.0-skeleton
2
+ * SIGNAL — Sovereign Intelligence for Governing Neural Agent Learning
3
+ * Core type definitions: reward signals, constitutional multipliers, learning modes
4
+ * @version 0.3.0
5
5
  */
6
- export {};
6
+ // ---------------------------------------------------------------------------
7
+ // Learning Modes (AMD-15, AMD-20)
8
+ // ---------------------------------------------------------------------------
9
+ /**
10
+ * Three learning modes for the swarm:
11
+ * - STATIC (Mode 0): Frozen inference, no learning. Current default.
12
+ * - BATCH_LORA (Mode 1): AMD-15. Batch of 100 tasks + Godman approval required.
13
+ * - CONTINUOUS (Mode 2): AMD-20. Every 50 inference steps, reward fed back.
14
+ */
15
+ export var LearningMode;
16
+ (function (LearningMode) {
17
+ /** Mode 0 — Frozen inference. No learning. Default. */
18
+ LearningMode[LearningMode["STATIC"] = 0] = "STATIC";
19
+ /** Mode 1 — Batch LoRA fine-tuning. 100 tasks + Godman approval. (AMD-15) */
20
+ LearningMode[LearningMode["BATCH_LORA"] = 1] = "BATCH_LORA";
21
+ /** Mode 2 — Continuous RL. Every 50 inference steps. (AMD-20) */
22
+ LearningMode[LearningMode["CONTINUOUS"] = 2] = "CONTINUOUS";
23
+ })(LearningMode || (LearningMode = {}));
24
+ // ---------------------------------------------------------------------------
25
+ // Three Constitutional Rules (immutable)
26
+ // ---------------------------------------------------------------------------
27
+ /**
28
+ * SIGNAL Constitutional Rule 1:
29
+ * Only Godman (founder) can activate or pause learning modes.
30
+ * Kill switch: /pause-continuous-learning
31
+ */
32
+ export const RULE_GODMAN_KILL_SWITCH = {
33
+ id: 'SIGNAL-CONST-001',
34
+ rule: 'Only Godman activates or pauses learning modes. Kill switch: /pause-continuous-learning',
35
+ immutable: true,
36
+ };
37
+ /**
38
+ * SIGNAL Constitutional Rule 2:
39
+ * The PRM judge must be constitutionally filtered.
40
+ * ACP dimensions are applied BEFORE reward computation, not after.
41
+ */
42
+ export const RULE_PRM_CONSTITUTIONALLY_FILTERED = {
43
+ id: 'SIGNAL-CONST-002',
44
+ rule: 'PRM judge is constitutionally filtered. ACP applied before reward computation.',
45
+ immutable: true,
46
+ };
47
+ /**
48
+ * SIGNAL Constitutional Rule 3:
49
+ * SOUL.md content is never a training target.
50
+ * Constitutional reasoning cannot be optimised away by RL.
51
+ */
52
+ export const RULE_SOUL_NEVER_TRAINED = {
53
+ id: 'SIGNAL-CONST-003',
54
+ rule: 'Cannot target constitutional reasoning. SOUL.md content is never a training target.',
55
+ immutable: true,
56
+ };
57
+ /** All three constitutional rules as a tuple */
58
+ export const CONSTITUTIONAL_RULES = [
59
+ RULE_GODMAN_KILL_SWITCH,
60
+ RULE_PRM_CONSTITUTIONALLY_FILTERED,
61
+ RULE_SOUL_NEVER_TRAINED,
62
+ ];
63
+ export const DEFAULT_SIGNAL_CONFIG = {
64
+ reward_log_path: 'reward-log.jsonl',
65
+ batch_lora_threshold: 100,
66
+ continuous_step_interval: 50,
67
+ };
7
68
  //# sourceMappingURL=types.js.map
package/dist/types.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;;;GAIG"}
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAwDH,8EAA8E;AAC9E,kCAAkC;AAClC,8EAA8E;AAE9E;;;;;GAKG;AACH,MAAM,CAAN,IAAY,YAOX;AAPD,WAAY,YAAY;IACtB,uDAAuD;IACvD,mDAAU,CAAA;IACV,6EAA6E;IAC7E,2DAAc,CAAA;IACd,iEAAiE;IACjE,2DAAc,CAAA;AAChB,CAAC,EAPW,YAAY,KAAZ,YAAY,QAOvB;AAmBD,8EAA8E;AAC9E,yCAAyC;AACzC,8EAA8E;AAE9E;;;;GAIG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAAG;IACrC,EAAE,EAAE,kBAAkB;IACtB,IAAI,EAAE,yFAAyF;IAC/F,SAAS,EAAE,IAAI;CACP,CAAC;AAEX;;;;GAIG;AACH,MAAM,CAAC,MAAM,kCAAkC,GAAG;IAChD,EAAE,EAAE,kBAAkB;IACtB,IAAI,EAAE,gFAAgF;IACtF,SAAS,EAAE,IAAI;CACP,CAAC;AAEX;;;;GAIG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAAG;IACrC,EAAE,EAAE,kBAAkB;IACtB,IAAI,EAAE,qFAAqF;IAC3F,SAAS,EAAE,IAAI;CACP,CAAC;AAEX,gDAAgD;AAChD,MAAM,CAAC,MAAM,oBAAoB,GAAG;IAClC,uBAAuB;IACvB,kCAAkC;IAClC,uBAAuB;CACf,CAAC;AA4BX,MAAM,CAAC,MAAM,qBAAqB,GAAiB;IACjD,eAAe,EAAE,kBAAkB;IACnC,oBAAoB,EAAE,GAAG;IACzB,wBAAwB,EAAE,EAAE;CAC7B,CAAC"}
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@godman-protocols/signal",
3
- "version": "0.2.0",
4
- "description": "Event Bus and Pub/Sub for Agent Swarms \u2014 typed events, topic-glob routing, idempotent delivery, delivery receipts",
3
+ "version": "0.3.0",
4
+ "description": "Sovereign Intelligence for Governing Neural Agent Learning \u2014 reward signals",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
7
7
  "exports": "./dist/index.js",
package/src/index.ts CHANGED
@@ -1,29 +1,10 @@
1
1
  /**
2
- * SIGNAL — Event Bus and Pub/Sub for Agent Swarms
3
- * Public API surface
4
- * @version 0.2.0
2
+ * SIGNAL — Sovereign Intelligence for Governing Neural Agent Learning
3
+ * "The swarm teaches itself."
4
+ * @version 0.3.0
5
5
  */
6
6
 
7
- // Types
8
- export type {
9
- AgentId,
10
- Timestamp,
11
- Signature,
12
- EventId,
13
- SubscriptionId,
14
- Event,
15
- Subscription,
16
- TransportConfig,
17
- DeliveryReceipt,
18
- } from './types.js';
19
-
20
- // Event bus
21
- export {
22
- EventBus,
23
- defaultBus,
24
- createEvent,
25
- topicMatches,
26
- } from './bus.js';
7
+ export * from './types.js';
27
8
 
28
9
  /** Protocol version constant */
29
- export const SIGNAL_VERSION = '0.2' as const;
10
+ export const SIGNAL_VERSION = '0.3' as const;
@@ -0,0 +1,135 @@
1
+ /**
2
+ * SIGNAL — Sovereign Intelligence for Governing Neural Agent Learning
3
+ * ModeController: manages learning mode transitions with Godman kill switch
4
+ * @version 0.3.0
5
+ */
6
+
7
+ import {
8
+ LearningMode,
9
+ RULE_GODMAN_KILL_SWITCH,
10
+ type ModeTransition,
11
+ type ModeControllerState,
12
+ type RewardSignal,
13
+ } from './types.js';
14
+
15
+ /**
16
+ * ModeController governs learning mode transitions.
17
+ *
18
+ * Constitutional Rule 1 (SIGNAL-CONST-001): Only Godman can activate
19
+ * or pause learning. The kill switch `/pause-continuous-learning`
20
+ * immediately drops to Mode 0 (STATIC).
21
+ *
22
+ * The controller maintains a transition log for auditability.
23
+ */
24
+ export class ModeController {
25
+ private mode: LearningMode = LearningMode.STATIC;
26
+ private paused = false;
27
+ private transitions: ModeTransition[] = [];
28
+ private rewardLog: RewardSignal[] = [];
29
+
30
+ /**
31
+ * Get the current active learning mode.
32
+ * If paused, returns STATIC regardless of the configured mode.
33
+ */
34
+ getCurrentMode(): LearningMode {
35
+ if (this.paused) {
36
+ return LearningMode.STATIC;
37
+ }
38
+ return this.mode;
39
+ }
40
+
41
+ /**
42
+ * Activate a learning mode.
43
+ * Only Godman (founder) can call this — enforcement is the caller's
44
+ * responsibility, but the transition is logged with the activator identity.
45
+ *
46
+ * @param target - The learning mode to activate
47
+ * @param activatedBy - Identity of the activator (must be Godman)
48
+ * @param reason - Why this transition is happening
49
+ */
50
+ activate(
51
+ target: LearningMode,
52
+ activatedBy: string,
53
+ reason: string
54
+ ): ModeTransition {
55
+ const transition: ModeTransition = {
56
+ from: this.mode,
57
+ to: target,
58
+ activated_by: activatedBy,
59
+ activated_at: new Date().toISOString(),
60
+ reason,
61
+ };
62
+
63
+ this.mode = target;
64
+ this.paused = false;
65
+ this.transitions.push(transition);
66
+ return transition;
67
+ }
68
+
69
+ /**
70
+ * Pause all learning — drops to Mode 0 (STATIC) immediately.
71
+ * This is the kill switch (SIGNAL-CONST-001).
72
+ *
73
+ * @param pausedBy - Identity of the pauser (must be Godman)
74
+ * @param reason - Why learning is being paused
75
+ */
76
+ pause(pausedBy: string, reason: string): ModeTransition {
77
+ const transition: ModeTransition = {
78
+ from: this.mode,
79
+ to: LearningMode.STATIC,
80
+ activated_by: pausedBy,
81
+ activated_at: new Date().toISOString(),
82
+ reason: `[KILL SWITCH] ${reason}`,
83
+ };
84
+
85
+ this.paused = true;
86
+ this.transitions.push(transition);
87
+ return transition;
88
+ }
89
+
90
+ /**
91
+ * Record a reward signal in the in-memory log.
92
+ * Used for batch accumulation in Mode 1 (BATCH_LORA).
93
+ */
94
+ recordReward(signal: RewardSignal): void {
95
+ this.rewardLog.push(signal);
96
+ }
97
+
98
+ /**
99
+ * Get the in-memory reward log.
100
+ */
101
+ getRewardLog(): ReadonlyArray<RewardSignal> {
102
+ return [...this.rewardLog];
103
+ }
104
+
105
+ /**
106
+ * Check whether the batch threshold has been reached (Mode 1).
107
+ * @param threshold - Number of tasks required (default: 100)
108
+ */
109
+ batchReady(threshold = 100): boolean {
110
+ return this.rewardLog.length >= threshold;
111
+ }
112
+
113
+ /** Whether learning is currently paused */
114
+ get isPaused(): boolean {
115
+ return this.paused;
116
+ }
117
+
118
+ /** Full transition history */
119
+ getTransitions(): ReadonlyArray<ModeTransition> {
120
+ return [...this.transitions];
121
+ }
122
+
123
+ /** Export current state for persistence or inspection */
124
+ getState(): ModeControllerState {
125
+ return {
126
+ current_mode: this.getCurrentMode(),
127
+ paused: this.paused,
128
+ transitions: [...this.transitions],
129
+ reward_count: this.rewardLog.length,
130
+ };
131
+ }
132
+
133
+ /** The constitutional rule governing this controller */
134
+ static readonly KILL_SWITCH_RULE = RULE_GODMAN_KILL_SWITCH;
135
+ }
@@ -0,0 +1,95 @@
1
+ /**
2
+ * SIGNAL — Sovereign Intelligence for Governing Neural Agent Learning
3
+ * Constitutional multiplier computation
4
+ * @version 0.3.0
5
+ */
6
+
7
+ import type { ACPSnapshot, ConstitutionalMultiplierResult } from './types.js';
8
+
9
+ // ---------------------------------------------------------------------------
10
+ // Constitutional Multiplier Formula
11
+ // ---------------------------------------------------------------------------
12
+ //
13
+ // reward = task_performance_score * constitutional_multiplier
14
+ //
15
+ // Multiplier rules (evaluated in priority order):
16
+ // harm_shield_trigger → -2.0 (strong negative)
17
+ // safety < 70 → -1.0 (negative)
18
+ // accuracy < 60 → 0.5 (dampened)
19
+ // all ACP dims >= 85 → 1.2 (bonus)
20
+ // all ACP dims >= 70 → 1.0 (full)
21
+ // otherwise → 0.5 (dampened — some dimension below 70)
22
+ // ---------------------------------------------------------------------------
23
+
24
+ /**
25
+ * Compute the constitutional multiplier from an ACP snapshot.
26
+ *
27
+ * The multiplier is applied to the raw task performance score to produce
28
+ * the final reward. Constitutional violations produce negative or dampened
29
+ * rewards; full compliance produces a bonus.
30
+ */
31
+ export function computeConstitutionalMultiplier(
32
+ snapshot: ACPSnapshot
33
+ ): ConstitutionalMultiplierResult {
34
+ const { dimensions, harm_shield_triggered } = snapshot;
35
+
36
+ // Priority 1: Harm shield trigger → strong negative
37
+ if (harm_shield_triggered) {
38
+ return { multiplier: -2.0, reason: 'Harm shield triggered' };
39
+ }
40
+
41
+ // Priority 2: Safety below threshold → negative
42
+ const safety = dimensions['safety'];
43
+ if (safety !== undefined && safety < 70) {
44
+ return { multiplier: -1.0, reason: `Safety score ${safety} < 70` };
45
+ }
46
+
47
+ // Priority 3: Accuracy below threshold → dampened
48
+ const accuracy = dimensions['accuracy'];
49
+ if (accuracy !== undefined && accuracy < 60) {
50
+ return { multiplier: 0.5, reason: `Accuracy score ${accuracy} < 60` };
51
+ }
52
+
53
+ // Check all dimensions
54
+ const values = Object.values(dimensions);
55
+
56
+ // If no dimensions provided, default to dampened
57
+ if (values.length === 0) {
58
+ return { multiplier: 0.5, reason: 'No ACP dimensions provided' };
59
+ }
60
+
61
+ // Priority 4: All dimensions >= 85 → bonus
62
+ const allAbove85 = values.every((v) => v >= 85);
63
+ if (allAbove85) {
64
+ return { multiplier: 1.2, reason: 'All ACP dimensions >= 85 (bonus)' };
65
+ }
66
+
67
+ // Priority 5: All dimensions >= 70 → full
68
+ const allAbove70 = values.every((v) => v >= 70);
69
+ if (allAbove70) {
70
+ return { multiplier: 1.0, reason: 'All ACP dimensions >= 70 (full)' };
71
+ }
72
+
73
+ // Fallback: some dimension below 70 → dampened
74
+ const belowThreshold = Object.entries(dimensions)
75
+ .filter(([, v]) => v < 70)
76
+ .map(([k]) => k);
77
+ return {
78
+ multiplier: 0.5,
79
+ reason: `Dimensions below 70: ${belowThreshold.join(', ')}`,
80
+ };
81
+ }
82
+
83
+ /**
84
+ * Compute the full reward from a task performance score and ACP snapshot.
85
+ */
86
+ export function computeReward(
87
+ taskPerformanceScore: number,
88
+ snapshot: ACPSnapshot
89
+ ): { reward: number; multiplier: ConstitutionalMultiplierResult } {
90
+ const multiplier = computeConstitutionalMultiplier(snapshot);
91
+ return {
92
+ reward: taskPerformanceScore * multiplier.multiplier,
93
+ multiplier,
94
+ };
95
+ }
@@ -0,0 +1,73 @@
1
+ /**
2
+ * SIGNAL — Sovereign Intelligence for Governing Neural Agent Learning
3
+ * RewardLogger: persists reward signals to reward-log.jsonl
4
+ * @version 0.3.0
5
+ */
6
+
7
+ import { appendFileSync, readFileSync, existsSync } from 'node:fs';
8
+ import type { RewardSignal, RewardLogEntry, SignalConfig } from './types.js';
9
+ import { LearningMode, DEFAULT_SIGNAL_CONFIG } from './types.js';
10
+
11
+ /**
12
+ * RewardLogger writes and reads reward signals from a JSONL file.
13
+ *
14
+ * Each line in the file is a JSON-serialised RewardLogEntry containing
15
+ * the reward signal plus the learning mode at the time of logging.
16
+ */
17
+ export class RewardLogger {
18
+ private readonly logPath: string;
19
+
20
+ constructor(config: Partial<SignalConfig> = {}) {
21
+ this.logPath = config.reward_log_path ?? DEFAULT_SIGNAL_CONFIG.reward_log_path;
22
+ }
23
+
24
+ /**
25
+ * Append a reward signal to the log file.
26
+ * Attaches the current learning mode to the entry.
27
+ */
28
+ log(signal: RewardSignal, mode: LearningMode = LearningMode.STATIC): void {
29
+ const entry: RewardLogEntry = { ...signal, mode };
30
+ appendFileSync(this.logPath, JSON.stringify(entry) + '\n', 'utf-8');
31
+ }
32
+
33
+ /**
34
+ * Read the full reward history from the log file.
35
+ * Returns an empty array if the file does not exist.
36
+ */
37
+ readHistory(): RewardLogEntry[] {
38
+ if (!existsSync(this.logPath)) {
39
+ return [];
40
+ }
41
+
42
+ const raw = readFileSync(this.logPath, 'utf-8').trim();
43
+ if (raw.length === 0) {
44
+ return [];
45
+ }
46
+
47
+ return raw
48
+ .split('\n')
49
+ .filter((line) => line.length > 0)
50
+ .map((line) => JSON.parse(line) as RewardLogEntry);
51
+ }
52
+
53
+ /**
54
+ * Count of logged entries without loading full history.
55
+ */
56
+ count(): number {
57
+ if (!existsSync(this.logPath)) {
58
+ return 0;
59
+ }
60
+
61
+ const raw = readFileSync(this.logPath, 'utf-8').trim();
62
+ if (raw.length === 0) {
63
+ return 0;
64
+ }
65
+
66
+ return raw.split('\n').filter((line) => line.length > 0).length;
67
+ }
68
+
69
+ /** The path to the log file */
70
+ get path(): string {
71
+ return this.logPath;
72
+ }
73
+ }
package/src/types.ts CHANGED
@@ -1,55 +1,171 @@
1
1
  /**
2
- * SIGNAL — Event Bus and Pub/Sub for Agent Swarms
3
- * Core type definitions (skeleton)
4
- * @version 0.1.0-skeleton
2
+ * SIGNAL — Sovereign Intelligence for Governing Neural Agent Learning
3
+ * Core type definitions: reward signals, constitutional multipliers, learning modes
4
+ * @version 0.3.0
5
5
  */
6
6
 
7
- export type AgentId = string;
8
- export type Timestamp = string;
9
- export type Signature = string;
10
- export type EventId = string;
11
- export type SubscriptionId = string;
12
-
13
- /** A typed, timestamped, signed event */
14
- export interface Event {
15
- id: EventId;
16
- /** Dot-notation topic path, e.g. 'task.completed', 'mandate.revoked' */
17
- topic: string;
18
- /** Publishing agent */
19
- publisher: AgentId;
20
- /** ISO 8601 */
21
- publishedAt: Timestamp;
22
- /** Idempotency key — deduplicate on re-delivery */
23
- idempotencyKey: string;
24
- payload: unknown;
25
- signature: Signature;
26
- }
27
-
28
- /** A durable subscription to a topic filter */
29
- export interface Subscription {
30
- id: SubscriptionId;
31
- subscriberAgent: AgentId;
32
- /** Glob-style topic filter, e.g. 'task.*', '**' */
33
- topicFilter: string;
34
- /** Delivery guarantee */
35
- deliveryMode: 'at-least-once' | 'at-most-once';
36
- /** ISO 8601 */
37
- createdAt: Timestamp;
38
- /** ISO 8601 — null if still active */
39
- cancelledAt: Timestamp | null;
40
- }
41
-
42
- /** Transport adapter configuration */
43
- export interface TransportConfig {
44
- type: 'supabase-realtime' | 'websocket' | 'redis-streams' | 'in-memory';
45
- connectionString?: string;
46
- channel?: string;
47
- }
48
-
49
- /** Delivery receipt — confirms an event was received and processed */
50
- export interface DeliveryReceipt {
51
- eventId: EventId;
52
- subscriptionId: SubscriptionId;
53
- receivedAt: Timestamp;
54
- status: 'processed' | 'failed' | 'duplicate-skipped';
7
+ // ---------------------------------------------------------------------------
8
+ // Reward Signal
9
+ // ---------------------------------------------------------------------------
10
+
11
+ /** A computed reward signal for a single agent action */
12
+ export interface RewardSignal {
13
+ /** Unique identifier for the action being scored */
14
+ action_id: string;
15
+ /** Raw task performance score (0-100) */
16
+ task_performance_score: number;
17
+ /** Constitutional multiplier applied to the score (-2.0 to 1.2) */
18
+ constitutional_multiplier: number;
19
+ /** Computed reward: task_performance_score * constitutional_multiplier */
20
+ reward: number;
21
+ /** ACP dimension scores used to derive the multiplier */
22
+ acp_dimensions: Record<string, number>;
23
+ /** ISO 8601 timestamp */
24
+ timestamp: string;
25
+ }
26
+
27
+ // ---------------------------------------------------------------------------
28
+ // ACP Dimensions
29
+ // ---------------------------------------------------------------------------
30
+
31
+ /** Well-known ACP dimension names used in constitutional scoring */
32
+ export type ACPDimensionName =
33
+ | 'safety'
34
+ | 'accuracy'
35
+ | 'transparency'
36
+ | 'privacy'
37
+ | 'fairness'
38
+ | 'harm_shield'
39
+ | (string & {});
40
+
41
+ /** Input to the constitutional multiplier computation */
42
+ export interface ACPSnapshot {
43
+ /** Per-dimension ACP scores (0-100) */
44
+ dimensions: Record<string, number>;
45
+ /** Whether the harm shield was triggered during this action */
46
+ harm_shield_triggered: boolean;
47
+ }
48
+
49
+ // ---------------------------------------------------------------------------
50
+ // Constitutional Multiplier
51
+ // ---------------------------------------------------------------------------
52
+
53
+ /** Result of computing a constitutional multiplier */
54
+ export interface ConstitutionalMultiplierResult {
55
+ /** The computed multiplier value (-2.0 to 1.2) */
56
+ multiplier: number;
57
+ /** Human-readable reason for the multiplier */
58
+ reason: string;
59
+ }
60
+
61
+ // ---------------------------------------------------------------------------
62
+ // Learning Modes (AMD-15, AMD-20)
63
+ // ---------------------------------------------------------------------------
64
+
65
+ /**
66
+ * Three learning modes for the swarm:
67
+ * - STATIC (Mode 0): Frozen inference, no learning. Current default.
68
+ * - BATCH_LORA (Mode 1): AMD-15. Batch of 100 tasks + Godman approval required.
69
+ * - CONTINUOUS (Mode 2): AMD-20. Every 50 inference steps, reward fed back.
70
+ */
71
+ export enum LearningMode {
72
+ /** Mode 0 — Frozen inference. No learning. Default. */
73
+ STATIC = 0,
74
+ /** Mode 1 — Batch LoRA fine-tuning. 100 tasks + Godman approval. (AMD-15) */
75
+ BATCH_LORA = 1,
76
+ /** Mode 2 — Continuous RL. Every 50 inference steps. (AMD-20) */
77
+ CONTINUOUS = 2,
78
+ }
79
+
80
+ /** Configuration for a learning mode transition */
81
+ export interface ModeTransition {
82
+ from: LearningMode;
83
+ to: LearningMode;
84
+ activated_by: string;
85
+ activated_at: string;
86
+ reason: string;
87
+ }
88
+
89
+ /** Snapshot of the current mode controller state */
90
+ export interface ModeControllerState {
91
+ current_mode: LearningMode;
92
+ paused: boolean;
93
+ transitions: ModeTransition[];
94
+ reward_count: number;
95
+ }
96
+
97
+ // ---------------------------------------------------------------------------
98
+ // Three Constitutional Rules (immutable)
99
+ // ---------------------------------------------------------------------------
100
+
101
+ /**
102
+ * SIGNAL Constitutional Rule 1:
103
+ * Only Godman (founder) can activate or pause learning modes.
104
+ * Kill switch: /pause-continuous-learning
105
+ */
106
+ export const RULE_GODMAN_KILL_SWITCH = {
107
+ id: 'SIGNAL-CONST-001',
108
+ rule: 'Only Godman activates or pauses learning modes. Kill switch: /pause-continuous-learning',
109
+ immutable: true,
110
+ } as const;
111
+
112
+ /**
113
+ * SIGNAL Constitutional Rule 2:
114
+ * The PRM judge must be constitutionally filtered.
115
+ * ACP dimensions are applied BEFORE reward computation, not after.
116
+ */
117
+ export const RULE_PRM_CONSTITUTIONALLY_FILTERED = {
118
+ id: 'SIGNAL-CONST-002',
119
+ rule: 'PRM judge is constitutionally filtered. ACP applied before reward computation.',
120
+ immutable: true,
121
+ } as const;
122
+
123
+ /**
124
+ * SIGNAL Constitutional Rule 3:
125
+ * SOUL.md content is never a training target.
126
+ * Constitutional reasoning cannot be optimised away by RL.
127
+ */
128
+ export const RULE_SOUL_NEVER_TRAINED = {
129
+ id: 'SIGNAL-CONST-003',
130
+ rule: 'Cannot target constitutional reasoning. SOUL.md content is never a training target.',
131
+ immutable: true,
132
+ } as const;
133
+
134
+ /** All three constitutional rules as a tuple */
135
+ export const CONSTITUTIONAL_RULES = [
136
+ RULE_GODMAN_KILL_SWITCH,
137
+ RULE_PRM_CONSTITUTIONALLY_FILTERED,
138
+ RULE_SOUL_NEVER_TRAINED,
139
+ ] as const;
140
+
141
+ export type ConstitutionalRule = (typeof CONSTITUTIONAL_RULES)[number];
142
+
143
+ // ---------------------------------------------------------------------------
144
+ // Reward Log Entry (persisted to reward-log.jsonl)
145
+ // ---------------------------------------------------------------------------
146
+
147
+ /** A single line in reward-log.jsonl */
148
+ export interface RewardLogEntry extends RewardSignal {
149
+ /** Learning mode at the time of logging */
150
+ mode: LearningMode;
55
151
  }
152
+
153
+ // ---------------------------------------------------------------------------
154
+ // SIGNAL Config
155
+ // ---------------------------------------------------------------------------
156
+
157
+ /** Top-level configuration for the SIGNAL protocol */
158
+ export interface SignalConfig {
159
+ /** Path to the reward log file (default: reward-log.jsonl) */
160
+ reward_log_path: string;
161
+ /** Batch size for Mode 1 (default: 100) */
162
+ batch_lora_threshold: number;
163
+ /** Step interval for Mode 2 (default: 50) */
164
+ continuous_step_interval: number;
165
+ }
166
+
167
+ export const DEFAULT_SIGNAL_CONFIG: SignalConfig = {
168
+ reward_log_path: 'reward-log.jsonl',
169
+ batch_lora_threshold: 100,
170
+ continuous_step_interval: 50,
171
+ };