principles-disciple 1.7.5 → 1.7.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. package/dist/commands/context.js +5 -15
  2. package/dist/commands/evolution-status.js +29 -48
  3. package/dist/commands/export.js +61 -8
  4. package/dist/commands/nocturnal-review.d.ts +24 -0
  5. package/dist/commands/nocturnal-review.js +265 -0
  6. package/dist/commands/nocturnal-rollout.d.ts +27 -0
  7. package/dist/commands/nocturnal-rollout.js +671 -0
  8. package/dist/commands/nocturnal-train.d.ts +25 -0
  9. package/dist/commands/nocturnal-train.js +919 -0
  10. package/dist/commands/pain.js +8 -21
  11. package/dist/config/defaults/runtime.d.ts +40 -0
  12. package/dist/config/defaults/runtime.js +44 -0
  13. package/dist/config/errors.d.ts +84 -0
  14. package/dist/config/errors.js +94 -0
  15. package/dist/config/index.d.ts +7 -0
  16. package/dist/config/index.js +7 -0
  17. package/dist/constants/diagnostician.d.ts +0 -4
  18. package/dist/constants/diagnostician.js +0 -4
  19. package/dist/constants/tools.d.ts +2 -2
  20. package/dist/constants/tools.js +1 -1
  21. package/dist/core/adaptive-thresholds.d.ts +186 -0
  22. package/dist/core/adaptive-thresholds.js +300 -0
  23. package/dist/core/config.d.ts +2 -38
  24. package/dist/core/config.js +6 -61
  25. package/dist/core/control-ui-db.d.ts +27 -0
  26. package/dist/core/control-ui-db.js +18 -0
  27. package/dist/core/event-log.d.ts +1 -2
  28. package/dist/core/event-log.js +0 -3
  29. package/dist/core/evolution-engine.js +1 -21
  30. package/dist/core/evolution-reducer.d.ts +7 -1
  31. package/dist/core/evolution-reducer.js +56 -4
  32. package/dist/core/evolution-types.d.ts +61 -9
  33. package/dist/core/evolution-types.js +31 -9
  34. package/dist/core/external-training-contract.d.ts +276 -0
  35. package/dist/core/external-training-contract.js +269 -0
  36. package/dist/core/local-worker-routing.d.ts +175 -0
  37. package/dist/core/local-worker-routing.js +525 -0
  38. package/dist/core/model-deployment-registry.d.ts +218 -0
  39. package/dist/core/model-deployment-registry.js +503 -0
  40. package/dist/core/model-training-registry.d.ts +295 -0
  41. package/dist/core/model-training-registry.js +475 -0
  42. package/dist/core/nocturnal-arbiter.d.ts +159 -0
  43. package/dist/core/nocturnal-arbiter.js +534 -0
  44. package/dist/core/nocturnal-candidate-scoring.d.ts +137 -0
  45. package/dist/core/nocturnal-candidate-scoring.js +266 -0
  46. package/dist/core/nocturnal-compliance.d.ts +175 -0
  47. package/dist/core/nocturnal-compliance.js +824 -0
  48. package/dist/core/nocturnal-dataset.d.ts +224 -0
  49. package/dist/core/nocturnal-dataset.js +443 -0
  50. package/dist/core/nocturnal-executability.d.ts +85 -0
  51. package/dist/core/nocturnal-executability.js +331 -0
  52. package/dist/core/nocturnal-export.d.ts +124 -0
  53. package/dist/core/nocturnal-export.js +275 -0
  54. package/dist/core/nocturnal-paths.d.ts +124 -0
  55. package/dist/core/nocturnal-paths.js +214 -0
  56. package/dist/core/nocturnal-trajectory-extractor.d.ts +242 -0
  57. package/dist/core/nocturnal-trajectory-extractor.js +307 -0
  58. package/dist/core/nocturnal-trinity.d.ts +311 -0
  59. package/dist/core/nocturnal-trinity.js +880 -0
  60. package/dist/core/path-resolver.js +2 -1
  61. package/dist/core/paths.d.ts +6 -0
  62. package/dist/core/paths.js +6 -0
  63. package/dist/core/principle-training-state.d.ts +121 -0
  64. package/dist/core/principle-training-state.js +321 -0
  65. package/dist/core/promotion-gate.d.ts +238 -0
  66. package/dist/core/promotion-gate.js +529 -0
  67. package/dist/core/session-tracker.d.ts +10 -0
  68. package/dist/core/session-tracker.js +14 -0
  69. package/dist/core/shadow-observation-registry.d.ts +217 -0
  70. package/dist/core/shadow-observation-registry.js +308 -0
  71. package/dist/core/training-program.d.ts +233 -0
  72. package/dist/core/training-program.js +433 -0
  73. package/dist/core/trajectory.d.ts +155 -1
  74. package/dist/core/trajectory.js +292 -8
  75. package/dist/core/workspace-context.d.ts +0 -6
  76. package/dist/core/workspace-context.js +0 -12
  77. package/dist/hooks/bash-risk.d.ts +57 -0
  78. package/dist/hooks/bash-risk.js +137 -0
  79. package/dist/hooks/edit-verification.d.ts +62 -0
  80. package/dist/hooks/edit-verification.js +256 -0
  81. package/dist/hooks/gate-block-helper.d.ts +44 -0
  82. package/dist/hooks/gate-block-helper.js +119 -0
  83. package/dist/hooks/gate.d.ts +18 -0
  84. package/dist/hooks/gate.js +62 -751
  85. package/dist/hooks/gfi-gate.d.ts +40 -0
  86. package/dist/hooks/gfi-gate.js +113 -0
  87. package/dist/hooks/pain.js +6 -9
  88. package/dist/hooks/progressive-trust-gate.d.ts +51 -0
  89. package/dist/hooks/progressive-trust-gate.js +89 -0
  90. package/dist/hooks/prompt.d.ts +11 -11
  91. package/dist/hooks/prompt.js +167 -77
  92. package/dist/hooks/subagent.js +43 -6
  93. package/dist/hooks/thinking-checkpoint.d.ts +37 -0
  94. package/dist/hooks/thinking-checkpoint.js +51 -0
  95. package/dist/http/principles-console-route.js +13 -3
  96. package/dist/i18n/commands.js +8 -8
  97. package/dist/index.js +129 -28
  98. package/dist/service/central-database.js +2 -1
  99. package/dist/service/control-ui-query-service.d.ts +1 -1
  100. package/dist/service/control-ui-query-service.js +3 -3
  101. package/dist/service/evolution-query-service.d.ts +1 -1
  102. package/dist/service/evolution-query-service.js +5 -5
  103. package/dist/service/evolution-worker.d.ts +52 -4
  104. package/dist/service/evolution-worker.js +328 -16
  105. package/dist/service/nocturnal-runtime.d.ts +183 -0
  106. package/dist/service/nocturnal-runtime.js +352 -0
  107. package/dist/service/nocturnal-service.d.ts +163 -0
  108. package/dist/service/nocturnal-service.js +787 -0
  109. package/dist/service/nocturnal-target-selector.d.ts +145 -0
  110. package/dist/service/nocturnal-target-selector.js +315 -0
  111. package/dist/service/phase3-input-filter.d.ts +48 -12
  112. package/dist/service/phase3-input-filter.js +84 -18
  113. package/dist/service/runtime-summary-service.d.ts +34 -10
  114. package/dist/service/runtime-summary-service.js +87 -48
  115. package/dist/tools/deep-reflect.js +2 -1
  116. package/dist/types/event-types.d.ts +4 -10
  117. package/dist/types/runtime-summary.d.ts +47 -0
  118. package/dist/types/runtime-summary.js +1 -0
  119. package/dist/types.d.ts +0 -3
  120. package/dist/types.js +0 -2
  121. package/openclaw.plugin.json +1 -1
  122. package/package.json +1 -1
  123. package/templates/langs/en/skills/pd-mentor/SKILL.md +5 -5
  124. package/templates/langs/zh/skills/pd-mentor/SKILL.md +5 -5
  125. package/templates/pain_settings.json +0 -6
  126. package/dist/commands/trust.d.ts +0 -4
  127. package/dist/commands/trust.js +0 -78
  128. package/dist/core/trust-engine.d.ts +0 -96
  129. package/dist/core/trust-engine.js +0 -286
@@ -6,6 +6,7 @@ import { PathResolver } from './path-resolver.js';
6
6
  import { SystemLogger } from './system-logger.js';
7
7
  import { shouldIgnorePainProtocolText } from './dictionary.js';
8
8
  import { TrajectoryRegistry } from './trajectory.js';
9
+ import { isCompleteDetectorMetadata } from './evolution-types.js';
9
10
  const PROBATION_SUCCESS_THRESHOLD = 3;
10
11
  const CIRCUIT_BREAKER_THRESHOLD = 3;
11
12
  const PROBATION_MAX_AGE_DAYS = 30;
@@ -147,15 +148,45 @@ export class EvolutionReducerImpl {
147
148
  SystemLogger.log(this.workspaceDir, 'PRINCIPLE_BLACKLISTED', `Principle creation blocked by blacklist for trigger: "${params.triggerPattern.slice(0, 50)}..."`);
148
149
  return null;
149
150
  }
151
+ // Evaluability defaults to 'manual_only' — the only way to get auto-trainable
152
+ // is to explicitly provide valid detectorMetadata.
153
+ // Enforce: deterministic/weak_heuristic requires complete detectorMetadata to be present.
154
+ let evaluability = params.evaluability ?? 'manual_only';
155
+ if (evaluability !== 'manual_only' && !isCompleteDetectorMetadata(params.detectorMetadata)) {
156
+ SystemLogger.log(this.workspaceDir, 'EVALUABILITY_DOWNGRADED', `Principle for painId "${params.painId}" requested evaluability="${evaluability}" without detectorMetadata — downgrading to "manual_only". Provide valid detectorMetadata to enable auto-training.`);
157
+ evaluability = 'manual_only';
158
+ }
150
159
  // Check if a principle already exists for this painId
151
160
  const existingPrinciple = [...this.principles.values()].find(p => p.source.painId === params.painId);
152
161
  if (existingPrinciple) {
153
- // Update existing principle instead of creating new one
162
+ // Update existing principle instead of creating new one.
163
+ // Apply the same evaluability normalization as new creation:
164
+ // deterministic/weak_heuristic without detectorMetadata → downgraded to manual_only.
154
165
  existingPrinciple.trigger = params.triggerPattern;
155
166
  existingPrinciple.action = params.action;
156
167
  existingPrinciple.text = `When ${params.triggerPattern}, then ${params.action}.`;
157
168
  existingPrinciple.version += 1;
158
- SystemLogger.log(this.workspaceDir, 'PRINCIPLE_UPDATED', `Principle ${existingPrinciple.id} updated from diagnostician: "${params.triggerPattern.slice(0, 50)}..."`);
169
+ if (params.evaluability !== undefined) {
170
+ // Apply normalization (params.evaluability may be invalid without complete metadata)
171
+ const normalizedEvaluability = (() => {
172
+ if (params.evaluability === 'manual_only' || isCompleteDetectorMetadata(params.detectorMetadata)) {
173
+ return params.evaluability;
174
+ }
175
+ SystemLogger.log(this.workspaceDir, 'EVALUABILITY_DOWNGRADED', `Principle update for painId "${params.painId}" requested evaluability="${params.evaluability}" without detectorMetadata — downgrading to "manual_only".`);
176
+ return 'manual_only';
177
+ })();
178
+ existingPrinciple.evaluability = normalizedEvaluability;
179
+ }
180
+ // Preserve detectorMetadata unless explicitly provided in this call.
181
+ // Accept only if complete (defense in depth — subagent should already filter).
182
+ if (isCompleteDetectorMetadata(params.detectorMetadata)) {
183
+ existingPrinciple.detectorMetadata = structuredClone(params.detectorMetadata);
184
+ }
185
+ else if (params.detectorMetadata !== undefined) {
186
+ // Malformed metadata provided — clear any existing metadata
187
+ existingPrinciple.detectorMetadata = undefined;
188
+ }
189
+ SystemLogger.log(this.workspaceDir, 'PRINCIPLE_UPDATED', `Principle ${existingPrinciple.id} updated from diagnostician: "${params.triggerPattern.slice(0, 50)}..." [evaluability: ${existingPrinciple.evaluability}]`);
159
190
  return existingPrinciple.id;
160
191
  }
161
192
  // Create new principle with generalized content
@@ -178,6 +209,10 @@ export class EvolutionReducerImpl {
178
209
  feedbackScore: 0,
179
210
  usageCount: 0,
180
211
  createdAt: now,
212
+ evaluability,
213
+ detectorMetadata: isCompleteDetectorMetadata(params.detectorMetadata)
214
+ ? structuredClone(params.detectorMetadata)
215
+ : undefined,
181
216
  };
182
217
  this.principles.set(principleId, principle);
183
218
  this.emitSync({
@@ -185,15 +220,20 @@ export class EvolutionReducerImpl {
185
220
  type: 'candidate_created',
186
221
  data: {
187
222
  painId: principle.source.painId,
223
+ painType: params.painType,
188
224
  principleId,
189
225
  trigger: params.triggerPattern,
190
226
  action: params.action,
191
227
  status: 'candidate',
228
+ evaluability,
229
+ detectorMetadata: isCompleteDetectorMetadata(params.detectorMetadata)
230
+ ? structuredClone(params.detectorMetadata)
231
+ : undefined,
192
232
  },
193
233
  });
194
234
  // Auto-promote since it's already generalized
195
235
  this.promote(principleId, 'diagnostician_generalized');
196
- SystemLogger.log(this.workspaceDir, 'PRINCIPLE_CREATED', `Principle ${principleId} created from diagnostician: "${params.triggerPattern.slice(0, 50)}..."`);
236
+ SystemLogger.log(this.workspaceDir, 'PRINCIPLE_CREATED', `Principle ${principleId} created from diagnostician: "${params.triggerPattern.slice(0, 50)}..." [evaluability: ${evaluability}]`);
197
237
  return principleId;
198
238
  }
199
239
  getStats() {
@@ -260,6 +300,13 @@ export class EvolutionReducerImpl {
260
300
  const existing = this.principles.get(data.principleId);
261
301
  if (existing) {
262
302
  existing.status = 'candidate';
303
+ // Apply evaluability from event if present (supports event replay)
304
+ if (data.evaluability) {
305
+ existing.evaluability = data.evaluability;
306
+ }
307
+ if (data.detectorMetadata) {
308
+ existing.detectorMetadata = structuredClone(data.detectorMetadata);
309
+ }
263
310
  return;
264
311
  }
265
312
  const principle = {
@@ -268,7 +315,7 @@ export class EvolutionReducerImpl {
268
315
  text: `When ${data.trigger}, then ${data.action}.`,
269
316
  source: {
270
317
  painId: data.painId,
271
- painType: 'tool_failure',
318
+ painType: data.painType ?? 'tool_failure',
272
319
  timestamp: ts,
273
320
  },
274
321
  trigger: data.trigger,
@@ -279,6 +326,11 @@ export class EvolutionReducerImpl {
279
326
  feedbackScore: 0,
280
327
  usageCount: 0,
281
328
  createdAt: ts,
329
+ // Evaluability defaults to 'manual_only' for replayed events without the field
330
+ evaluability: data.evaluability ?? 'manual_only',
331
+ detectorMetadata: data.detectorMetadata
332
+ ? structuredClone(data.detectorMetadata)
333
+ : undefined,
282
334
  };
283
335
  this.principles.set(principle.id, principle);
284
336
  }
@@ -8,10 +8,10 @@
8
8
  * - 5级成长路径:Seed → Forest
9
9
  */
10
10
  export declare enum EvolutionTier {
11
- Seed = 1,// 萌芽:只读 + 基础文档
12
- Sprout = 2,// 新芽:单文件编辑 (<50行)
13
- Sapling = 3,// 幼苗:多文件 + 测试 + 子智能体
14
- Tree = 4,// 大树:重构 + 风险路径
11
+ Seed = 1,// 起步:150行 + 3文件 + 子智能体(现代 AI 能力已足够强)
12
+ Sprout = 2,// 成长:300行 + 5文件
13
+ Sapling = 3,// 独当:500行 + 10文件 + 风险路径
14
+ Tree = 4,// 专家:1000行 + 20文件
15
15
  Forest = 5
16
16
  }
17
17
  export interface TierPermissions {
@@ -91,11 +91,6 @@ export interface EvolutionConfig {
91
91
  tier5Trivial: number;
92
92
  tier5Normal: number;
93
93
  };
94
- /** 信任分系统双轨运行时的配置 */
95
- dualTrack: {
96
- enabled: boolean;
97
- primarySystem: 'trust' | 'evolution';
98
- };
99
94
  }
100
95
  export declare const DEFAULT_EVOLUTION_CONFIG: EvolutionConfig;
101
96
  export interface ArchivedEventStats {
@@ -125,6 +120,45 @@ export interface TierPromotionEvent {
125
120
  newPermissions: TierPermissions;
126
121
  }
127
122
  export type PrincipleStatus = 'candidate' | 'probation' | 'active' | 'deprecated';
123
+ /**
124
+ * Evaluability classification — determines whether a P_xxx principle can enter
125
+ * automatic nocturnal targeting.
126
+ *
127
+ * - deterministic: Machine-checkable via deterministic rules/tool detectors
128
+ * - weak_heuristic: Checkable via heuristic signals, may have false positives
129
+ * - manual_only: No machine-checkable detector — stays in prompts only
130
+ */
131
+ export type PrincipleEvaluatorLevel = 'deterministic' | 'weak_heuristic' | 'manual_only';
132
+ /**
133
+ * Shared alias for PrincipleEvaluatorLevel — used by modules that reference
134
+ * the same evaluability classification without direct coupling to evolution-types.
135
+ * @deprecated Use PrincipleEvaluatorLevel directly. This alias exists for
136
+ * backwards compatibility with principle-training-state.ts.
137
+ */
138
+ export type Evaluability = PrincipleEvaluatorLevel;
139
+ /**
140
+ * Structured detector metadata for P_xxx principles.
141
+ * Allows the principle to enter automatic nocturnal targeting.
142
+ *
143
+ * If any required field is missing, the principle defaults to 'manual_only'.
144
+ */
145
+ export interface PrincipleDetectorSpec {
146
+ /** Topic/scenario tags where this detector applies */
147
+ applicabilityTags: string[];
148
+ /** Evidence that the principle was followed */
149
+ positiveSignals: string[];
150
+ /** Evidence that the principle was violated */
151
+ negativeSignals: string[];
152
+ /** Tool call sequences that indicate the principle is relevant */
153
+ toolSequenceHints: string[][];
154
+ /** Confidence in the detector's signal quality */
155
+ confidence: 'high' | 'medium' | 'low';
156
+ }
157
+ /**
158
+ * Validates that a detector metadata object has all required fields with non-empty values.
159
+ * Used as defense-in-depth before accepting detectorMetadata for auto-trainable principles.
160
+ */
161
+ export declare function isCompleteDetectorMetadata(meta: unknown): meta is PrincipleDetectorSpec;
128
162
  export interface Principle {
129
163
  id: string;
130
164
  version: number;
@@ -148,6 +182,18 @@ export interface Principle {
148
182
  createdAt: string;
149
183
  activatedAt?: string;
150
184
  deprecatedAt?: string;
185
+ /**
186
+ * Evaluability classification. Defaults to 'manual_only' if not set.
187
+ * Principles with 'manual_only' evaluability cannot enter automatic
188
+ * nocturnal targeting.
189
+ */
190
+ evaluability: PrincipleEvaluatorLevel;
191
+ /**
192
+ * Structured detector metadata. If present and valid, the principle
193
+ * may be auto-trainable (deterministic / weak_heuristic).
194
+ * Absent or malformed = 'manual_only' evaluability.
195
+ */
196
+ detectorMetadata?: PrincipleDetectorSpec;
151
197
  }
152
198
  export type EvolutionLoopEventType = 'pain_detected' | 'candidate_created' | 'principle_promoted' | 'principle_deprecated' | 'principle_rolled_back' | 'circuit_breaker_opened' | 'legacy_import';
153
199
  export interface PainDetectedData {
@@ -167,6 +213,12 @@ export interface CandidateCreatedData {
167
213
  trigger: string;
168
214
  action: string;
169
215
  status: 'candidate';
216
+ /** Pain type that generated this candidate — preserved on replay */
217
+ painType?: 'tool_failure' | 'subagent_error' | 'user_frustration';
218
+ /** Optional evaluability — defaults to 'manual_only' if omitted */
219
+ evaluability?: PrincipleEvaluatorLevel;
220
+ /** Optional detector metadata — absent = manual_only */
221
+ detectorMetadata?: PrincipleDetectorSpec;
170
222
  }
171
223
  export interface PrinciplePromotedData {
172
224
  principleId: string;
@@ -14,13 +14,19 @@ export var EvolutionTier;
14
14
  EvolutionTier[EvolutionTier["Sprout"] = 2] = "Sprout";
15
15
  EvolutionTier[EvolutionTier["Sapling"] = 3] = "Sapling";
16
16
  EvolutionTier[EvolutionTier["Tree"] = 4] = "Tree";
17
- EvolutionTier[EvolutionTier["Forest"] = 5] = "Forest"; // 森林:完全自主
17
+ EvolutionTier[EvolutionTier["Forest"] = 5] = "Forest"; // 大师:完全自主
18
18
  })(EvolutionTier || (EvolutionTier = {}));
19
19
  export const TIER_DEFINITIONS = [
20
- { tier: EvolutionTier.Seed, name: 'Seed', requiredPoints: 0, permissions: { maxLinesPerWrite: 20, maxFilesPerTask: 1, allowRiskPath: false, allowSubagentSpawn: false } },
21
- { tier: EvolutionTier.Sprout, name: 'Sprout', requiredPoints: 50, permissions: { maxLinesPerWrite: 50, maxFilesPerTask: 2, allowRiskPath: false, allowSubagentSpawn: false } },
22
- { tier: EvolutionTier.Sapling, name: 'Sapling', requiredPoints: 200, permissions: { maxLinesPerWrite: 200, maxFilesPerTask: 5, allowRiskPath: false, allowSubagentSpawn: true } },
23
- { tier: EvolutionTier.Tree, name: 'Tree', requiredPoints: 500, permissions: { maxLinesPerWrite: 500, maxFilesPerTask: 10, allowRiskPath: true, allowSubagentSpawn: true } },
20
+ // 2026-03-28: 大幅放宽限制,现代 AI 能力已很强
21
+ // Seed: 20 行提升到 150 行,允许 3 文件,允许子智能体
22
+ { tier: EvolutionTier.Seed, name: 'Seed', requiredPoints: 0, permissions: { maxLinesPerWrite: 150, maxFilesPerTask: 3, allowRiskPath: false, allowSubagentSpawn: true } },
23
+ // Sprout: 中等规模开发
24
+ { tier: EvolutionTier.Sprout, name: 'Sprout', requiredPoints: 50, permissions: { maxLinesPerWrite: 300, maxFilesPerTask: 5, allowRiskPath: false, allowSubagentSpawn: true } },
25
+ // Sapling: 较大规模开发,可访问风险路径(需 PLAN)
26
+ { tier: EvolutionTier.Sapling, name: 'Sapling', requiredPoints: 200, permissions: { maxLinesPerWrite: 500, maxFilesPerTask: 10, allowRiskPath: true, allowSubagentSpawn: true } },
27
+ // Tree: 大型重构
28
+ { tier: EvolutionTier.Tree, name: 'Tree', requiredPoints: 500, permissions: { maxLinesPerWrite: 1000, maxFilesPerTask: 20, allowRiskPath: true, allowSubagentSpawn: true } },
29
+ // Forest: 完全自主
24
30
  { tier: EvolutionTier.Forest, name: 'Forest', requiredPoints: 1000, permissions: { maxLinesPerWrite: Infinity, maxFilesPerTask: Infinity, allowRiskPath: true, allowSubagentSpawn: true } },
25
31
  ];
26
32
  export function getTierDefinition(tier) {
@@ -49,8 +55,24 @@ export const DEFAULT_EVOLUTION_CONFIG = {
49
55
  tier5Trivial: 0.1,
50
56
  tier5Normal: 0.5,
51
57
  },
52
- dualTrack: {
53
- enabled: true,
54
- primarySystem: 'evolution',
55
- },
56
58
  };
59
+ /**
60
+ * Validates that a detector metadata object has all required fields with non-empty values.
61
+ * Used as defense-in-depth before accepting detectorMetadata for auto-trainable principles.
62
+ */
63
+ export function isCompleteDetectorMetadata(meta) {
64
+ if (!meta || typeof meta !== 'object')
65
+ return false;
66
+ const m = meta;
67
+ const VALID_CONFIDENCE = ['high', 'medium', 'low'];
68
+ if (typeof m.confidence !== 'string' ||
69
+ !VALID_CONFIDENCE.includes(m.confidence)) {
70
+ return false;
71
+ }
72
+ const nonEmptyStringArray = (arr) => Array.isArray(arr) &&
73
+ arr.length > 0 &&
74
+ arr.every((s) => typeof s === 'string' && s.length > 0);
75
+ return (nonEmptyStringArray(m.applicabilityTags) &&
76
+ nonEmptyStringArray(m.positiveSignals) &&
77
+ nonEmptyStringArray(m.negativeSignals));
78
+ }
@@ -0,0 +1,276 @@
1
+ /**
2
+ * External Training Contract — Normalized Experiment Spec and Result Schema
3
+ * ========================================================================
4
+ *
5
+ * PURPOSE: Define the stable contract between the plugin and external trainer
6
+ * backends. The plugin produces a constrained experiment specification that an
7
+ * external trainer consumes. The trainer returns a normalized result that the
8
+ * plugin can register, evaluate, and gate for rollout.
9
+ *
10
+ * ARCHITECTURE:
11
+ * - Plugin is responsible for creating the experiment spec
12
+ * - Plugin is responsible for validating the trainer result
13
+ * - Plugin is responsible for registering lineage (train run → checkpoint → eval)
14
+ * - Plugin is responsible for invoking benchmark evaluation
15
+ * - Plugin is responsible for invoking promotion gate logic
16
+ * - Plugin is responsible for binding deployment only after gate approval
17
+ *
18
+ * DESIGN CONSTRAINTS:
19
+ * - ORPO-first: trainingMode must be 'orpo' for production runs
20
+ * - No real training inside the plugin
21
+ * - No direct deployment promotion from trainer output
22
+ * - No direct trainer writes to review/eval/deployment state
23
+ * - Backend-pluggable: same contract works for all backends
24
+ *
25
+ * CONTRACT GOALS:
26
+ * - support ORPO training for approved nocturnal exports
27
+ * - support multiple backend implementations behind one schema
28
+ * - preserve dataset / config / checkpoint lineage
29
+ * - remain valid on consumer hardware
30
+ * - fail closed when inputs are incomplete or inconsistent
31
+ */
32
+ /**
33
+ * Allowed backend identifiers.
34
+ *
35
+ * - `peft-trl-orpo`: primary reference implementation using PEFT + TRL ORPO
36
+ * - `unsloth-orpo`: compatible accelerated implementation using Unsloth
37
+ * - `dry-run`: validates paths/spec/environment only, no real training
38
+ */
39
+ export type TrainerBackendKind = 'peft-trl-orpo' | 'unsloth-orpo' | 'dry-run';
40
+ /**
41
+ * Hardware tier for training.
42
+ *
43
+ * - `consumer-gpu`: RTX 4090 24GB or equivalent (production target)
44
+ * - `small-gpu`: 8GB-16GB VRAM (compatibility target)
45
+ * - `cpu-experimental`: CPU-only experimental runs (dry-run or tiny models only)
46
+ */
47
+ export type HardwareTier = 'consumer-gpu' | 'small-gpu' | 'cpu-experimental';
48
+ /**
49
+ * Worker profiles supported for training.
50
+ *
51
+ * Phase 7 first rollout: `local-reader` only.
52
+ * `local-editor` requires explicit human approval to enable.
53
+ */
54
+ export type TrainableWorkerProfile = 'local-reader' | 'local-editor';
55
+ /**
56
+ * Training mode — Phase 7 production is ORPO-only.
57
+ */
58
+ export type TrainingMode = 'orpo';
59
+ /**
60
+ * Hyperparameters for ORPO training.
61
+ */
62
+ export interface TrainingHyperparameters {
63
+ learningRate: number;
64
+ batchSize: number;
65
+ gradientAccumulation: number;
66
+ loraRank: number;
67
+ loraAlpha: number;
68
+ loraDropout: number;
69
+ warmupRatio: number;
70
+ maxSteps: number;
71
+ maxSeqLength: number;
72
+ }
73
+ /**
74
+ * Budget constraints for a training experiment.
75
+ */
76
+ export interface TrainingBudget {
77
+ maxWallClockMinutes: number;
78
+ maxTrainTokens?: number;
79
+ }
80
+ /**
81
+ * Expected artifact from a successful training run.
82
+ */
83
+ export interface ExpectedArtifact {
84
+ checkpointName: string;
85
+ adapterFormat: 'peft-adapter';
86
+ }
87
+ /**
88
+ * The experiment specification sent to an external trainer.
89
+ * This defines WHAT to train, not HOW to train (backend-specific).
90
+ */
91
+ export interface TrainingExperimentSpec {
92
+ /** Unique identifier for this experiment */
93
+ experimentId: string;
94
+ /** Which backend to use */
95
+ backend: TrainerBackendKind;
96
+ /** Training mode — only 'orpo' is supported in Phase 7 */
97
+ trainingMode: TrainingMode;
98
+ /** Target worker profile for this experiment */
99
+ targetWorkerProfile: TrainableWorkerProfile;
100
+ /** Target model family to train */
101
+ targetModelFamily: string;
102
+ /** Hardware tier for this experiment */
103
+ hardwareTier: HardwareTier;
104
+ /** Reference to the ORPO export providing training data */
105
+ datasetExportId: string;
106
+ datasetExportPath: string;
107
+ /** Fingerprint of the dataset for lineage verification */
108
+ datasetFingerprint: string;
109
+ /** Reference to the benchmark export for eval */
110
+ benchmarkExportId: string;
111
+ /** Output directory for checkpoint artifacts */
112
+ outputDir: string;
113
+ /** Fingerprint of the training configuration */
114
+ configFingerprint: string;
115
+ /** Hash of the training code/contract version */
116
+ codeHash: string;
117
+ /** Training hyperparameters */
118
+ hyperparameters: TrainingHyperparameters;
119
+ /** Budget constraints */
120
+ budget: TrainingBudget;
121
+ /** Expected artifact from training */
122
+ expectedArtifact: ExpectedArtifact;
123
+ }
124
+ /**
125
+ * Training metrics recorded by the backend.
126
+ */
127
+ export interface TrainingMetrics {
128
+ wallClockMinutes: number;
129
+ finalLoss?: number;
130
+ tokensSeen?: number;
131
+ }
132
+ /**
133
+ * Artifact produced by a successful training run.
134
+ */
135
+ export interface TrainingArtifact {
136
+ adapterFormat: 'peft-adapter';
137
+ artifactPath: string;
138
+ }
139
+ /**
140
+ * Status of a training experiment.
141
+ */
142
+ export type ExperimentStatus = 'completed' | 'failed' | 'dry_run';
143
+ /**
144
+ * The result returned by an external trainer after execution.
145
+ * This defines the output contract — all backends must return the same shape.
146
+ */
147
+ export interface TrainingExperimentResult {
148
+ /** Experiment ID (must match the spec's experimentId) */
149
+ experimentId: string;
150
+ /** Which backend was used */
151
+ backend: TrainerBackendKind;
152
+ /** Final status of the experiment */
153
+ status: ExperimentStatus;
154
+ /** Registered training run ID (plugin-side) */
155
+ trainRunId?: string;
156
+ /** Registered checkpoint ID (plugin-side) */
157
+ checkpointId?: string;
158
+ /** Checkpoint reference string (for lineage) */
159
+ checkpointRef?: string;
160
+ /** Target worker profile */
161
+ targetWorkerProfile: TrainableWorkerProfile;
162
+ /** Target model family */
163
+ targetModelFamily: string;
164
+ /** Dataset fingerprint (for lineage verification) */
165
+ datasetFingerprint: string;
166
+ /** Config fingerprint (for lineage verification) */
167
+ configFingerprint: string;
168
+ /** Code hash (for lineage verification) */
169
+ codeHash: string;
170
+ /** Training metrics */
171
+ metrics?: TrainingMetrics;
172
+ /** Produced artifact (only if status === 'completed') */
173
+ artifact?: TrainingArtifact;
174
+ /** Failure reason (only if status === 'failed') */
175
+ failureReason?: string;
176
+ /** ISO-8601 creation timestamp */
177
+ createdAt: string;
178
+ }
179
+ /**
180
+ * Validation error for trainer result verification.
181
+ */
182
+ export interface ValidationError {
183
+ field: string;
184
+ expected: string;
185
+ actual: string;
186
+ reason: string;
187
+ }
188
+ /**
189
+ * Result of validating a trainer result against the experiment spec.
190
+ */
191
+ export interface ValidationResult {
192
+ valid: boolean;
193
+ errors: ValidationError[];
194
+ }
195
+ /**
196
+ * Validate that a trainer result matches the experiment spec.
197
+ *
198
+ * FAILS CLOSED on any mismatch — a checkpoint with invalid lineage must not
199
+ * be registered or promoted.
200
+ *
201
+ * Validation rules:
202
+ * 1. experimentId must match
203
+ * 2. backend must match
204
+ * 3. targetWorkerProfile must match
205
+ * 4. targetModelFamily must match
206
+ * 5. datasetFingerprint must match
207
+ * 6. configFingerprint must match
208
+ * 7. codeHash must match
209
+ * 8. dry-run must not produce a deployable checkpoint
210
+ *
211
+ * @param spec - The original experiment spec
212
+ * @param result - The trainer result to validate
213
+ * @returns ValidationResult indicating pass/fail and any errors
214
+ */
215
+ export declare function validateTrainerResult(spec: TrainingExperimentSpec, result: TrainingExperimentResult): ValidationResult;
216
+ /**
217
+ * Generate a fingerprint for a configuration object.
218
+ * Used for configFingerprint in the experiment spec.
219
+ */
220
+ export declare function computeConfigFingerprint(config: Partial<TrainingHyperparameters>): string;
221
+ /**
222
+ * Generate a fingerprint for a dataset export.
223
+ * Used for datasetFingerprint in the experiment spec.
224
+ *
225
+ * Combines file content hash with sampleCount to detect:
226
+ * - Content changes (file modified/replaced)
227
+ * - Sample count changes (different export)
228
+ *
229
+ * If the file cannot be read, falls back to path+count hash (legacy behavior).
230
+ */
231
+ export declare function computeDatasetFingerprint(exportPath: string, sampleCount: number): string;
232
+ /**
233
+ * Generate a code hash for the training contract version.
234
+ * Used for codeHash in the experiment spec.
235
+ *
236
+ * Hashes the actual contract source file content so any change to the
237
+ * contract produces a different hash, ensuring lineage integrity.
238
+ *
239
+ * Falls back to version string + timestamp if source cannot be read.
240
+ */
241
+ export declare function computeCodeHash(): string;
242
+ /**
243
+ * Generate a new experiment ID.
244
+ */
245
+ export declare function generateExperimentId(): string;
246
+ /**
247
+ * Validate that a hardware tier is appropriate for the backend.
248
+ *
249
+ * @param backend - The backend being used
250
+ * @param tier - The hardware tier
251
+ * @throws Error if the combination is not supported
252
+ */
253
+ export declare function validateHardwareTier(backend: TrainerBackendKind, tier: HardwareTier): void;
254
+ /**
255
+ * Get the default hardware tier for a backend.
256
+ */
257
+ export declare function getDefaultHardwareTier(backend: TrainerBackendKind): HardwareTier;
258
+ /**
259
+ * Valid model family patterns for local-reader profile.
260
+ * Used for family validation in the training contract.
261
+ */
262
+ export declare const READER_FAMILY_PATTERNS: string[];
263
+ /**
264
+ * Valid model family patterns for local-editor profile.
265
+ * Used for family validation in the training contract.
266
+ */
267
+ export declare const EDITOR_FAMILY_PATTERNS: string[];
268
+ /**
269
+ * Check if a model family is valid for a worker profile.
270
+ */
271
+ export declare function isValidModelFamilyForProfile(family: string, profile: TrainableWorkerProfile): boolean;
272
+ /**
273
+ * Phase 7 first rollout is limited to local-reader.
274
+ * This flag controls whether local-editor is allowed.
275
+ */
276
+ export declare const LOCAL_EDITOR_ENABLED = false;