@holoscript/framework 6.0.3 → 6.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. package/CHANGELOG.md +1 -2
  2. package/ROADMAP.md +68 -66
  3. package/dist/{InvisibleWallet-BB6tFvRA.d.cts → InvisibleWallet-EFiuaLn3.d.cts} +1 -1
  4. package/dist/{OrchestratorAgent-BvWgf9uw.d.cts → OrchestratorAgent-CrLDGNL6.d.cts} +1 -1
  5. package/dist/agents/index.cjs +11 -10
  6. package/dist/agents/index.d.cts +4 -16
  7. package/dist/ai/index.cjs +2 -2
  8. package/dist/behavior.cjs +10 -0
  9. package/dist/economy/index.cjs +4 -4
  10. package/dist/economy/index.d.cts +2 -2
  11. package/dist/index.cjs +33 -11
  12. package/dist/index.d.cts +3 -3
  13. package/dist/swarm/index.cjs +3 -0
  14. package/package.json +14 -9
  15. package/src/__tests__/bounty-marketplace.test.ts +53 -21
  16. package/src/__tests__/delegation.test.ts +1 -4
  17. package/src/__tests__/done-log-audit.test.ts +38 -46
  18. package/src/__tests__/framework.test.ts +172 -53
  19. package/src/__tests__/goal-synthesizer.test.ts +9 -6
  20. package/src/__tests__/presence.test.ts +1 -1
  21. package/src/__tests__/protocol-agent.test.ts +12 -11
  22. package/src/__tests__/revenue-splitter.test.ts +22 -15
  23. package/src/__tests__/scenario-driven-todo.test.ts +55 -35
  24. package/src/__tests__/self-improve.test.ts +28 -9
  25. package/src/__tests__/service-lifecycle.test.ts +9 -3
  26. package/src/__tests__/skill-router.test.ts +3 -3
  27. package/src/agents/CulturalMemory.ts +6 -6
  28. package/src/agents/DelegationTraceHooks.ts +560 -0
  29. package/src/agents/FederatedRegistryAdapter.ts +1 -1
  30. package/src/agents/NormEngine.ts +3 -8
  31. package/src/agents/OrchestratorAgent.ts +1 -1
  32. package/src/agents/TaskDelegationService.ts +5 -9
  33. package/src/agents/__tests__/AgentWalletRegistry.test.ts +5 -4
  34. package/src/agents/__tests__/CrossRealityHandoff.test.ts +9 -3
  35. package/src/agents/__tests__/DelegationTraceHooks.test.ts +390 -0
  36. package/src/agents/__tests__/TaskDelegationService.test.ts +4 -2
  37. package/src/agents/spatial-comms/Layer1RealTime.ts +36 -19
  38. package/src/agents/spatial-comms/Layer2A2A.ts +1 -3
  39. package/src/agents/spatial-comms/Layer3MCP.ts +13 -4
  40. package/src/agents/spatial-comms/ProtocolTypes.ts +5 -2
  41. package/src/agents/spatial-comms/examples/multi-agent-world-creation.ts +2 -2
  42. package/src/ai/HoloScriptGenerator.ts +2 -2
  43. package/src/ai/__tests__/PerceptionSystem.prod.test.ts +1 -1
  44. package/src/ai/__tests__/PerceptionSystem.test.ts +14 -14
  45. package/src/ai/__tests__/SteeringBehaviors.prod.test.ts +1 -1
  46. package/src/ai/index.ts +5 -1
  47. package/src/board/audit.ts +17 -6
  48. package/src/board/board-ops.ts +45 -15
  49. package/src/board/board-types.ts +94 -20
  50. package/src/delegation.ts +5 -3
  51. package/src/distributed-claimer.ts +13 -2
  52. package/src/economy/BountyManager.ts +40 -18
  53. package/src/economy/KnowledgeMarketplace.ts +27 -8
  54. package/src/economy/PaymentWebhookService.ts +0 -1
  55. package/src/economy/RevenueSplitter.ts +2 -4
  56. package/src/economy/UnifiedBudgetOptimizer.ts +8 -9
  57. package/src/economy/_core-stubs.ts +1 -1
  58. package/src/economy/x402-facilitator.ts +17 -8
  59. package/src/index.ts +16 -12
  60. package/src/knowledge/__tests__/knowledge-consolidator.test.ts +138 -89
  61. package/src/knowledge/__tests__/knowledge-store-vector.test.ts +59 -16
  62. package/src/knowledge/brain.ts +7 -7
  63. package/src/knowledge/consolidation.ts +16 -16
  64. package/src/knowledge/knowledge-consolidator.ts +60 -30
  65. package/src/knowledge/knowledge-store.ts +83 -45
  66. package/src/learning/ProceduralCompiler.ts +6 -1
  67. package/src/learning/learning/MemoryConsolidator.ts +102 -0
  68. package/src/learning/learning/MemoryScorer.ts +69 -0
  69. package/src/learning/learning/ProceduralCompiler.ts +45 -0
  70. package/src/learning/learning/SemanticClusterer.ts +66 -0
  71. package/src/llm/llm-adapter.ts +24 -10
  72. package/src/mesh/index.ts +37 -17
  73. package/src/protocol/goal-synthesizer.ts +24 -34
  74. package/src/protocol/implementations.ts +91 -22
  75. package/src/protocol/micro-phase-decomposer.ts +25 -17
  76. package/src/protocol/micro-step-decomposer.test.ts +104 -39
  77. package/src/protocol-agent.test.ts +17 -7
  78. package/src/protocol-agent.ts +45 -42
  79. package/src/self-improve/absorb-scanner.ts +9 -6
  80. package/src/self-improve/evolution-engine.ts +36 -18
  81. package/src/self-improve/framework-absorber.ts +21 -16
  82. package/src/self-improve/index.ts +2 -10
  83. package/src/self-improve/prompt-optimizer.ts +31 -19
  84. package/src/self-improve/test-generator.ts +16 -12
  85. package/src/skill-router.ts +7 -6
  86. package/src/swarm/messaging/GossipProtocol.ts +1 -1
  87. package/src/swarm/messaging/__tests__/BroadcastChannel.prod.test.ts +31 -9
  88. package/src/swarm/messaging/__tests__/GossipProtocol.prod.test.ts +21 -7
  89. package/src/swarm/messaging/__tests__/SwarmEventBus.prod.test.ts +24 -8
  90. package/src/swarm/messaging/__tests__/SwarmEventBus.test.ts +6 -2
  91. package/src/team.ts +277 -122
  92. package/src/training/scripts/generate-spatial-dataset.ts +1 -1
  93. package/src/training/training/LRScheduler.ts +377 -0
  94. package/src/training/training/QualityScoringPipeline.ts +139 -0
  95. package/src/training/training/SoftDedup.ts +461 -0
  96. package/src/training/training/SparsityMonitor.ts +685 -0
  97. package/src/training/training/SparsityMonitorTypes.ts +209 -0
  98. package/src/training/training/SpatialTrainingDataGenerator.ts +1526 -0
  99. package/src/training/training/SpatialTrainingDataTypes.ts +216 -0
  100. package/src/training/training/TrainingPipelineConfig.ts +215 -0
  101. package/src/training/training/__tests__/CorpusValidation.test.ts +87 -0
  102. package/src/training/training/__tests__/LRScheduler.test.ts +592 -0
  103. package/src/training/training/__tests__/SoftDedup.test.ts +415 -0
  104. package/src/training/training/__tests__/SparsityMonitor.test.ts +1623 -0
  105. package/src/training/training/__tests__/SpatialCorpusValidation.test.ts +72 -0
  106. package/src/training/training/__tests__/SpatialTrainingDataGenerator.test.ts +1244 -0
  107. package/src/training/training/__tests__/TrainingMonkeyIntegration.test.ts +897 -0
  108. package/src/training/training/__tests__/TrainingPipelineConfig.test.ts +202 -0
  109. package/src/training/training/__tests__/schema.test.ts +72 -0
  110. package/src/training/training/__tests__/training-constants.test.ts +106 -0
  111. package/src/training/training/__tests__/trait-mappings.test.ts +81 -0
  112. package/src/training/training/constants.ts +94 -0
  113. package/src/training/training/index.ts +17 -0
  114. package/src/training/training/schema.ts +147 -0
  115. package/src/training/training/scripts/generate-novel-use-cases-dataset.ts +272 -0
  116. package/src/training/training/scripts/generate-spatial-dataset.ts +521 -0
  117. package/src/training/training/trainingmonkey/TrainingMonkeyIntegration.ts +477 -0
  118. package/src/training/training/trainingmonkey/TrainingMonkeyTypes.ts +230 -0
  119. package/src/training/training/trainingmonkey/index.ts +26 -0
  120. package/src/training/training/trait-mappings.ts +157 -0
  121. package/src/types.ts +2 -7
  122. package/ALL-test-results.json +0 -1
  123. package/LICENSE +0 -21
  124. package/dist/AgentManifest-CB4xM-Ma.d.ts +0 -704
  125. package/dist/BehaviorTree-BrBFECv5.d.ts +0 -103
  126. package/dist/InvisibleWallet-rtRrBOA8.d.ts +0 -1732
  127. package/dist/OrchestratorAgent-Q_CbVTmO.d.ts +0 -798
  128. package/dist/agents/index.d.ts +0 -1788
  129. package/dist/agents/index.js +0 -4695
  130. package/dist/ai/index.d.ts +0 -1753
  131. package/dist/ai/index.js +0 -5244
  132. package/dist/behavior.d.ts +0 -130
  133. package/dist/behavior.js +0 -407
  134. package/dist/economy/index.d.ts +0 -747
  135. package/dist/economy/index.js +0 -3617
  136. package/dist/implementations-D9T3un9D.d.ts +0 -236
  137. package/dist/index.d.ts +0 -1729
  138. package/dist/index.js +0 -24277
  139. package/dist/learning/index.d.ts +0 -104
  140. package/dist/learning/index.js +0 -189
  141. package/dist/negotiation/index.d.ts +0 -610
  142. package/dist/negotiation/index.js +0 -931
  143. package/dist/skills/index.d.ts +0 -289
  144. package/dist/skills/index.js +0 -1079
  145. package/dist/swarm/index.d.ts +0 -2433
  146. package/dist/swarm/index.js +0 -5221
  147. package/dist/training/index.d.ts +0 -1734
  148. package/dist/training/index.js +0 -2687
  149. package/extract-failures.js +0 -10
  150. package/src/training/training/data/novel-use-cases.jsonl +0 -153
  151. package/src/training/training/data/spatial-reasoning-10k.jsonl +0 -9354
  152. package/src/types/core-stubs.d.ts +0 -113
  153. package/test-output.txt +0 -0
  154. package/test-result.json +0 -1
  155. package/tsc-errors.txt +0 -4
  156. package/tsc_output.txt +0 -0
  157. package/typescript-errors-2.txt +0 -0
  158. package/typescript-errors.txt +0 -22
  159. package/vitest-log-utf8.txt +0 -268
  160. package/vitest-log.txt +0 -0
@@ -1,1734 +0,0 @@
1
- /**
2
- * Spatial Training Data Types
3
- *
4
- * Type definitions for the spatial reasoning training data pipeline.
5
- * Used to generate instruction-response pairs from HoloScript compositions
6
- * with spatial constraints (spatial_adjacent, spatial_contains, spatial_reachable)
7
- * for fine-tuning LLMs on spatial reasoning tasks.
8
- *
9
- * @module training/SpatialTrainingDataTypes
10
- */
11
- /**
12
- * Difficulty levels for generated spatial reasoning examples.
13
- *
14
- * - basic: 2 objects, single spatial relationship
15
- * - intermediate: 3-5 objects, multiple relationships, mixed constraint types
16
- * - advanced: 6+ objects, occlusion, nested containment, chained reachability
17
- */
18
- type SpatialDifficulty = 'basic' | 'intermediate' | 'advanced';
19
- /**
20
- * The three core spatial relationship types from HoloScript's constraint system.
21
- */
22
- type SpatialRelationshipType = 'spatial_adjacent' | 'spatial_contains' | 'spatial_reachable';
23
- /**
24
- * A scene object with spatial properties for training data generation.
25
- */
26
- interface SceneObject {
27
- /** Unique identifier for the object */
28
- id: string;
29
- /** Object type (e.g., 'cube', 'sphere', 'zone', 'npc') */
30
- type: string;
31
- /** 3D position */
32
- position: {
33
- x: number;
34
- y: number;
35
- z: number;
36
- };
37
- /** 3D scale */
38
- scale: {
39
- x: number;
40
- y: number;
41
- z: number;
42
- };
43
- /** Bounding box (for containment checks) */
44
- bounds?: {
45
- min: {
46
- x: number;
47
- y: number;
48
- z: number;
49
- };
50
- max: {
51
- x: number;
52
- y: number;
53
- z: number;
54
- };
55
- };
56
- /** Whether this object acts as an obstacle */
57
- isObstacle?: boolean;
58
- /** Optional color for visual description */
59
- color?: string;
60
- /** Optional geometry type */
61
- geometry?: string;
62
- }
63
- /**
64
- * A spatial relationship between two scene objects.
65
- */
66
- interface SpatialRelationship {
67
- /** The relationship type */
68
- type: SpatialRelationshipType;
69
- /** Source object ID */
70
- sourceId: string;
71
- /** Target object ID */
72
- targetId: string;
73
- /** Whether the relationship is satisfied (true=positive, false=negative) */
74
- satisfied: boolean;
75
- /** Constraint parameters */
76
- params: SpatialRelationshipParams;
77
- }
78
- /**
79
- * Parameters for spatial relationship constraints.
80
- */
81
- interface SpatialRelationshipParams {
82
- /** For adjacent: maximum distance */
83
- maxDistance?: number;
84
- /** For adjacent: minimum distance */
85
- minDistance?: number;
86
- /** For adjacent: axis restriction */
87
- axis?: string;
88
- /** For contains: margin */
89
- margin?: number;
90
- /** For contains: strict mode */
91
- strict?: boolean;
92
- /** For reachable: max path length */
93
- maxPathLength?: number;
94
- /** For reachable: obstacle types */
95
- obstacleTypes?: string[];
96
- /** For reachable: algorithm */
97
- algorithm?: string;
98
- }
99
- /**
100
- * A complete scene with objects and spatial relationships.
101
- */
102
- interface SpatialScene {
103
- /** Scene name/identifier */
104
- name: string;
105
- /** Objects in the scene */
106
- objects: SceneObject[];
107
- /** Spatial relationships between objects */
108
- relationships: SpatialRelationship[];
109
- /** Difficulty level */
110
- difficulty: SpatialDifficulty;
111
- /** The HoloScript composition source */
112
- holoScriptSource: string;
113
- }
114
- /**
115
- * A single training example with instruction and response.
116
- * Suitable for fine-tuning LLMs on spatial reasoning tasks.
117
- */
118
- interface SpatialTrainingExample {
119
- /** Unique example ID */
120
- id: string;
121
- /** The instruction/question for the LLM */
122
- instruction: string;
123
- /** The expected response/answer */
124
- response: string;
125
- /** The HoloScript source that defines the scene */
126
- context: string;
127
- /** Spatial relationship type being tested */
128
- relationshipType: SpatialRelationshipType;
129
- /** Whether this is a positive or negative example */
130
- isPositive: boolean;
131
- /** Difficulty level */
132
- difficulty: SpatialDifficulty;
133
- /** Tags for categorization */
134
- tags: string[];
135
- }
136
- /**
137
- * Configuration options for the SpatialTrainingDataGenerator.
138
- */
139
- interface SpatialGeneratorConfig {
140
- /** Number of examples to generate per relationship type per difficulty level */
141
- examplesPerCategory?: number;
142
- /** Which relationship types to include */
143
- relationshipTypes?: SpatialRelationshipType[];
144
- /** Which difficulty levels to include */
145
- difficultyLevels?: SpatialDifficulty[];
146
- /** Ratio of positive to negative examples (default 0.5 = equal) */
147
- positiveRatio?: number;
148
- /** Random seed for reproducibility (if provided) */
149
- seed?: number;
150
- /** Whether to include HoloScript context in output */
151
- includeContext?: boolean;
152
- }
153
- /**
154
- * Statistics about generated training data.
155
- */
156
- interface SpatialGeneratorStats {
157
- /** Total number of examples generated */
158
- totalExamples: number;
159
- /** Breakdown by relationship type */
160
- byRelationship: Record<SpatialRelationshipType, number>;
161
- /** Breakdown by difficulty */
162
- byDifficulty: Record<SpatialDifficulty, number>;
163
- /** Breakdown by positive/negative */
164
- positiveCount: number;
165
- negativeCount: number;
166
- /** Number of unique templates used */
167
- uniqueTemplatesUsed: number;
168
- }
169
- /**
170
- * JSONL line format for fine-tuning output.
171
- * Each line in the JSONL file is one of these objects.
172
- */
173
- interface SpatialTrainingJSONLEntry {
174
- /** The instruction/question */
175
- instruction: string;
176
- /** The expected response */
177
- response: string;
178
- /** Metadata for filtering/analysis */
179
- metadata: {
180
- id: string;
181
- relationship_type: SpatialRelationshipType;
182
- is_positive: boolean;
183
- difficulty: SpatialDifficulty;
184
- tags: string[];
185
- };
186
- }
187
-
188
- /**
189
- * Spatial Training Data Generator
190
- *
191
- * Generates labeled spatial reasoning examples from HoloScript compositions
192
- * with spatial constraints (spatial_adjacent, spatial_contains, spatial_reachable).
193
- *
194
- * Outputs instruction-response pairs in JSONL format suitable for fine-tuning
195
- * LLMs on spatial reasoning tasks.
196
- *
197
- * Features:
198
- * - 12+ instruction templates per spatial relationship type (per G.002 mandate)
199
- * - Randomized scene parameters (object counts, positions, scales, relationships)
200
- * - Both positive and negative examples for each spatial relationship type
201
- * - Configurable difficulty levels (basic, intermediate, advanced)
202
- *
203
- * @module training/SpatialTrainingDataGenerator
204
- */
205
-
206
- /**
207
- * Generates labeled spatial reasoning examples from HoloScript compositions.
208
- *
209
- * @example
210
- * ```typescript
211
- * const generator = new SpatialTrainingDataGenerator({ seed: 42 });
212
- * const examples = generator.generate();
213
- * const jsonl = generator.exportJSONL(examples);
214
- * ```
215
- */
216
- declare class SpatialTrainingDataGenerator {
217
- private readonly config;
218
- private rng;
219
- private exampleCounter;
220
- constructor(config?: SpatialGeneratorConfig);
221
- /**
222
- * Generate all spatial training examples based on configuration.
223
- */
224
- generate(): SpatialTrainingExample[];
225
- /**
226
- * Generate examples for a specific relationship type.
227
- */
228
- generateForRelationship(relType: SpatialRelationshipType): SpatialTrainingExample[];
229
- /**
230
- * Generate examples for a specific difficulty level.
231
- */
232
- generateForDifficulty(difficulty: SpatialDifficulty): SpatialTrainingExample[];
233
- /**
234
- * Export examples as JSONL string (one JSON object per line).
235
- */
236
- exportJSONL(examples: SpatialTrainingExample[]): string;
237
- /**
238
- * Export examples as JSON array string.
239
- */
240
- exportJSON(examples: SpatialTrainingExample[]): string;
241
- /**
242
- * Get statistics about generated examples.
243
- */
244
- getStats(examples: SpatialTrainingExample[]): SpatialGeneratorStats;
245
- /**
246
- * Reset the generator with a new seed.
247
- */
248
- reseed(seed: number): void;
249
- /**
250
- * Generate a spatial scene for the given relationship type and difficulty.
251
- */
252
- generateScene(relType: SpatialRelationshipType, difficulty: SpatialDifficulty, isPositive: boolean): SpatialScene;
253
- private generateAdjacentScene;
254
- private buildAdjacentHoloScript;
255
- private generateContainsScene;
256
- private buildContainsHoloScript;
257
- private generateReachableScene;
258
- private buildReachableHoloScript;
259
- private generateExample;
260
- private getObjectCount;
261
- private randomPosition;
262
- private randomScale;
263
- private pickUniqueName;
264
- }
265
- /**
266
- * Create a new SpatialTrainingDataGenerator with the given configuration.
267
- */
268
- declare function createSpatialTrainingDataGenerator(config?: SpatialGeneratorConfig): SpatialTrainingDataGenerator;
269
-
270
- /**
271
- * SparsityMonitorTypes.ts
272
- *
273
- * Type definitions for SNN (Spiking Neural Network) sparsity monitoring
274
- * in the self-improvement pipeline. Tracks spike rates, activation sparsity,
275
- * energy efficiency metrics, and detects sparsity regime violations.
276
- *
277
- * Key threshold (W.041): SNN layers must maintain >= 93% activation sparsity
278
- * to preserve the energy efficiency advantages of spike-based computation.
279
- *
280
- * @module training/SparsityMonitorTypes
281
- */
282
- /**
283
- * Metrics for a single SNN layer during a simulation timestep or batch.
284
- */
285
- interface SNNLayerMetrics {
286
- /** Unique layer identifier (e.g., "lif_hidden_1", "snn_output") */
287
- layerId: string;
288
- /** Total number of neurons in this layer */
289
- neuronCount: number;
290
- /** Number of neurons that spiked (fired) in this timestep/batch */
291
- spikeCount: number;
292
- /** Spike rate = spikeCount / neuronCount (0-1) */
293
- spikeRate: number;
294
- /** Activation sparsity = 1 - spikeRate (0-1); higher = sparser = better */
295
- activationSparsity: number;
296
- /** Average membrane potential across neurons (for LIF models) */
297
- avgMembranePotential?: number;
298
- /** Simulation timestep index */
299
- timestep: number;
300
- }
301
- /**
302
- * A point-in-time snapshot of sparsity metrics across all SNN layers.
303
- */
304
- interface SparsitySnapshot {
305
- /** ISO 8601 timestamp of measurement */
306
- timestamp: string;
307
- /** Per-layer metrics at this point in time */
308
- layers: SNNLayerMetrics[];
309
- /** Aggregate sparsity across all layers (weighted by neuron count) */
310
- aggregateSparsity: number;
311
- /** Aggregate spike rate across all layers */
312
- aggregateSpikeRate: number;
313
- /** Total neurons across all layers */
314
- totalNeurons: number;
315
- /** Total spikes across all layers */
316
- totalSpikes: number;
317
- /** Energy efficiency metrics at this snapshot */
318
- energyEfficiency: EnergyEfficiencyMetrics;
319
- /** Any violations detected at this snapshot */
320
- violations: SparsityViolation[];
321
- }
322
- /**
323
- * Theoretical energy efficiency metrics comparing SNN spike-based
324
- * computation vs. equivalent dense (ANN) computation.
325
- *
326
- * The key insight: in an SNN, only spiking neurons perform multiply-accumulate
327
- * (MAC) operations on their synaptic connections. Silent neurons contribute
328
- * zero computation. This is the source of SNN energy efficiency.
329
- */
330
- interface EnergyEfficiencyMetrics {
331
- /** Total theoretical operations if all neurons were active (dense ANN baseline) */
332
- denseOps: number;
333
- /** Actual operations performed (only spiking neurons contribute) */
334
- sparseOps: number;
335
- /** Operations saved = denseOps - sparseOps */
336
- opsSaved: number;
337
- /** Efficiency ratio = opsSaved / denseOps (0-1); higher = more efficient */
338
- efficiencyRatio: number;
339
- /** Estimated energy savings factor (relative to dense baseline = 1.0) */
340
- energySavingsFactor: number;
341
- }
342
- /**
343
- * A detected violation of the sparsity threshold.
344
- *
345
- * Per W.041 from SNN research: SNN layers must maintain >= 93% activation
346
- * sparsity. Below this threshold, the energy efficiency advantage of
347
- * spike-based computation degrades significantly.
348
- */
349
- interface SparsityViolation {
350
- /** The layer that violated the threshold */
351
- layerId: string;
352
- /** The measured activation sparsity (0-1) */
353
- measuredSparsity: number;
354
- /** The required minimum sparsity threshold (default: 0.93) */
355
- requiredThreshold: number;
356
- /** How far below the threshold: threshold - measured */
357
- deficit: number;
358
- /** Severity classification */
359
- severity: 'warning' | 'critical';
360
- /** ISO 8601 timestamp of violation detection */
361
- detectedAt: string;
362
- /** Timestep at which the violation was detected */
363
- timestep: number;
364
- }
365
- /**
366
- * Configuration for the SparsityMonitor.
367
- */
368
- interface SparsityMonitorConfig {
369
- /** Minimum activation sparsity threshold (default: 0.93 per W.041) */
370
- sparsityThreshold: number;
371
- /** Window size for rolling average calculations (default: 50 timesteps) */
372
- windowSize: number;
373
- /** Whether to track per-layer detailed metrics (default: true) */
374
- perLayerTracking: boolean;
375
- /** Whether energy efficiency calculation is enabled (default: true) */
376
- energyMetricsEnabled: boolean;
377
- /** Average synaptic connections per neuron (for ops calculation, default: 100) */
378
- avgSynapsesPerNeuron: number;
379
- /** MAC operations per synaptic event (default: 2 - multiply + accumulate) */
380
- opsPerSynapse: number;
381
- /** Critical severity threshold: sparsity below this is critical (default: 0.85) */
382
- criticalThreshold: number;
383
- /** Maximum violations to retain in history (default: 1000) */
384
- maxViolationHistory: number;
385
- }
386
- /**
387
- * Aggregate statistics from the SparsityMonitor.
388
- */
389
- interface SparsityMonitorStats {
390
- /** Total number of timesteps recorded */
391
- totalTimesteps: number;
392
- /** Total number of snapshots taken */
393
- totalSnapshots: number;
394
- /** Number of layers being tracked */
395
- trackedLayers: number;
396
- /** Overall mean sparsity across all timesteps and layers */
397
- meanSparsity: number;
398
- /** Minimum sparsity observed */
399
- minSparsity: number;
400
- /** Maximum sparsity observed */
401
- maxSparsity: number;
402
- /** Standard deviation of sparsity measurements */
403
- stdDevSparsity: number;
404
- /** Total violations detected */
405
- totalViolations: number;
406
- /** Violations by severity */
407
- violationsBySeverity: {
408
- warning: number;
409
- critical: number;
410
- };
411
- /** Per-layer mean sparsity */
412
- perLayerMeanSparsity: Record<string, number>;
413
- /** Mean energy efficiency ratio */
414
- meanEnergyEfficiency: number;
415
- /** Whether the system is currently in compliance (no active violations) */
416
- inCompliance: boolean;
417
- }
418
- /**
419
- * An entry compatible with the quality-history.json format used by the
420
- * self-improvement pipeline. Allows sparsity metrics to be tracked
421
- * alongside existing quality metrics.
422
- */
423
- interface SparsityQualityHistoryEntry {
424
- /** ISO 8601 timestamp */
425
- timestamp: string;
426
- /** Monitoring cycle number */
427
- cycle: number;
428
- /** Aggregate sparsity as composite score (0-1) */
429
- composite: number;
430
- /** Grade based on sparsity compliance */
431
- grade: 'A' | 'B' | 'C' | 'D' | 'F';
432
- /** Focus area identifier */
433
- focus: 'snn-sparsity';
434
- /** Human-readable summary */
435
- summary: string;
436
- /** Detailed sparsity metrics */
437
- sparsityMetrics: {
438
- aggregateSparsity: number;
439
- aggregateSpikeRate: number;
440
- energyEfficiencyRatio: number;
441
- violationCount: number;
442
- layerCount: number;
443
- totalNeurons: number;
444
- inCompliance: boolean;
445
- };
446
- }
447
-
448
- /**
449
- * SparsityMonitor.ts
450
- *
451
- * Monitors SNN (Spiking Neural Network) sparsity during simulation,
452
- * tracking spike rates and activation sparsity across layers, calculating
453
- * energy efficiency metrics, and detecting sparsity regime violations.
454
- *
455
- * Integrates with the self-improvement pipeline via:
456
- * - SelfImproveHarvester: provides metrics for continuous quality monitoring
457
- * - quality-history.json: outputs compatible entries for historical tracking
458
- *
459
- * Key threshold (W.041): SNN layers must maintain >= 93% activation sparsity.
460
- *
461
- * Usage:
462
- * ```ts
463
- * const monitor = new SparsityMonitor({ sparsityThreshold: 0.93 });
464
- *
465
- * // Record layer activity during simulation
466
- * monitor.recordLayerActivity('lif_hidden_1', {
467
- * neuronCount: 1000,
468
- * spikeCount: 50,
469
- * timestep: 0,
470
- * });
471
- *
472
- * // Take a snapshot
473
- * const snapshot = monitor.takeSnapshot();
474
- *
475
- * // Check for violations
476
- * const violations = monitor.getActiveViolations();
477
- *
478
- * // Get quality-history.json compatible entry
479
- * const entry = monitor.toQualityHistoryEntry(1);
480
- * ```
481
- *
482
- * @module training/SparsityMonitor
483
- */
484
-
485
- /**
486
- * Input data for recording layer activity. The monitor computes
487
- * derived fields (spikeRate, activationSparsity) from these inputs.
488
- */
489
- interface LayerActivityInput {
490
- /** Total number of neurons in the layer */
491
- neuronCount: number;
492
- /** Number of neurons that spiked */
493
- spikeCount: number;
494
- /** Simulation timestep index */
495
- timestep: number;
496
- /** Optional: average membrane potential */
497
- avgMembranePotential?: number;
498
- }
499
- /**
500
- * Monitors SNN activation sparsity across layers during simulation.
501
- *
502
- * Provides:
503
- * 1. Per-layer spike rate and activation sparsity tracking
504
- * 2. Energy efficiency calculation (theoretical ops saved via sparsity)
505
- * 3. Sparsity regime violation detection (W.041: >= 93% threshold)
506
- * 4. Integration with SelfImproveHarvester for continuous quality monitoring
507
- * 5. Output compatible with quality-history.json format
508
- */
509
- declare class SparsityMonitor {
510
- private config;
511
- /** Current layer metrics indexed by layerId, latest values per layer */
512
- private currentLayerMetrics;
513
- /** Historical layer metrics: layerId -> array of metrics over time */
514
- private layerHistory;
515
- /** Snapshots taken over time */
516
- private snapshots;
517
- /** Detected violations */
518
- private violations;
519
- /** Rolling window of aggregate sparsity values for stats */
520
- private sparsityWindow;
521
- /** Total timesteps recorded across all layers */
522
- private totalTimestepsRecorded;
523
- constructor(config?: Partial<SparsityMonitorConfig>);
524
- /**
525
- * Record activity for a single SNN layer at a given timestep.
526
- *
527
- * Computes spike rate and activation sparsity from the raw input,
528
- * checks for threshold violations, and stores the metrics.
529
- *
530
- * @param layerId - Unique identifier for the layer
531
- * @param input - Raw activity data (neuronCount, spikeCount, timestep)
532
- * @returns The computed SNNLayerMetrics for this recording
533
- */
534
- recordLayerActivity(layerId: string, input: LayerActivityInput): SNNLayerMetrics;
535
- /**
536
- * Record activity for multiple layers at the same timestep (batch recording).
537
- *
538
- * @param layerInputs - Map of layerId -> activity input
539
- * @returns Array of computed metrics for each layer
540
- */
541
- recordBatchActivity(layerInputs: Map<string, LayerActivityInput> | Record<string, LayerActivityInput>): SNNLayerMetrics[];
542
- /**
543
- * Take a point-in-time snapshot of all current layer metrics.
544
- *
545
- * The snapshot captures aggregate statistics across all tracked layers,
546
- * computes energy efficiency, and checks for violations.
547
- *
548
- * @returns The snapshot, or null if no layer metrics have been recorded
549
- */
550
- takeSnapshot(): SparsitySnapshot | null;
551
- /**
552
- * Calculate theoretical energy efficiency metrics for the given layers.
553
- *
554
- * Dense ops = sum of (neuronCount * avgSynapsesPerNeuron * opsPerSynapse) per layer
555
- * Sparse ops = sum of (spikeCount * avgSynapsesPerNeuron * opsPerSynapse) per layer
556
- *
557
- * The ratio of ops saved reflects the theoretical computational advantage
558
- * of SNN sparsity over equivalent dense ANN computation.
559
- */
560
- calculateEnergyEfficiency(layers: SNNLayerMetrics[]): EnergyEfficiencyMetrics;
561
- /**
562
- * Check a single layer's metrics against the sparsity threshold.
563
- * If a violation is detected, it is recorded in the violations history.
564
- */
565
- private checkViolation;
566
- /**
567
- * Detect violations for an array of layer metrics (used in snapshots).
568
- */
569
- private detectViolationsForLayers;
570
- /**
571
- * Get all violations currently affecting active layers
572
- * (the most recent metric per layer that is below threshold).
573
- */
574
- getActiveViolations(): SparsityViolation[];
575
- /**
576
- * Get all historical violations.
577
- */
578
- getViolationHistory(): SparsityViolation[];
579
- /**
580
- * Compute aggregate statistics across all recorded data.
581
- */
582
- getStats(): SparsityMonitorStats;
583
- /**
584
- * Generate an entry compatible with the quality-history.json format.
585
- *
586
- * The composite score is based on the aggregate sparsity relative to
587
- * the threshold: score = min(1, aggregateSparsity / threshold).
588
- *
589
- * @param cycle - The monitoring cycle number
590
- * @returns A quality history entry with sparsity-specific metrics
591
- */
592
- toQualityHistoryEntry(cycle: number): SparsityQualityHistoryEntry;
593
- /**
594
- * Generate a human-readable summary string for the quality history entry.
595
- */
596
- private generateSummary;
597
- /**
598
- * Get metrics formatted for integration with SelfImproveHarvester.
599
- *
600
- * Returns a record of key metrics that can be attached to harvest records
601
- * as additional metadata for training data quality assessment.
602
- */
603
- getHarvesterMetrics(): Record<string, number | boolean>;
604
- /**
605
- * Get all recorded snapshots.
606
- */
607
- getSnapshots(): SparsitySnapshot[];
608
- /**
609
- * Get the most recent snapshot, or null if none have been taken.
610
- */
611
- getLatestSnapshot(): SparsitySnapshot | null;
612
- /**
613
- * Get current layer metrics.
614
- */
615
- getCurrentLayerMetrics(): Map<string, SNNLayerMetrics>;
616
- /**
617
- * Get history for a specific layer.
618
- */
619
- getLayerHistory(layerId: string): SNNLayerMetrics[];
620
- /**
621
- * Get the current configuration.
622
- */
623
- getConfig(): SparsityMonitorConfig;
624
- /**
625
- * Reset all recorded data and start fresh.
626
- */
627
- reset(): void;
628
- }
629
- /**
630
- * Create a new SparsityMonitor with optional configuration overrides.
631
- */
632
- declare function createSparsityMonitor(config?: Partial<SparsityMonitorConfig>): SparsityMonitor;
633
-
634
- /**
635
- * SoftDedup - Soft Deduplication via N-gram Commonness Scoring
636
- *
637
- * Instead of hard-deleting duplicate training examples, SoftDedup computes
638
- * n-gram commonness scores and assigns sampling weights. Examples with
639
- * high-frequency n-grams (template-generated / near-duplicate content)
640
- * receive lower sampling weights, reducing their influence during training
641
- * without discarding them entirely.
642
- *
643
- * Based on training rule W.008:
644
- * "Reweight duplicates instead of deleting them. SoftDedup uses n-gram
645
- * commonness scores to reduce sampling weight of high-frequency data.
646
- * 26% faster training, +1.77% accuracy vs hard dedup alone."
647
- *
648
- * Pipeline position: Quality Filter -> Hard Dedup (W.004) -> SoftDedup (W.008)
649
- *
650
- * @module training/SoftDedup
651
- */
652
- /**
653
- * Configuration for the SoftDedup algorithm.
654
- */
655
- interface SoftDedupConfig {
656
- /**
657
- * N-gram sizes to compute commonness scores for.
658
- * Using multiple sizes captures both local (small n) and structural (large n)
659
- * patterns. Default: [3, 5, 7] (character-level trigrams, 5-grams, 7-grams).
660
- */
661
- ngramSizes: number[];
662
- /**
663
- * Whether to use word-level n-grams instead of character-level.
664
- * Word-level captures semantic similarity; character-level captures
665
- * template-level patterns. Default: false (character-level).
666
- */
667
- wordLevel: boolean;
668
- /**
669
- * Minimum sampling weight. Even the most common examples keep at least
670
- * this weight to prevent complete exclusion. Default: 0.1 (10% weight).
671
- * Must be in range (0, 1].
672
- */
673
- minWeight: number;
674
- /**
675
- * Maximum sampling weight. Rare/unique examples get at most this weight.
676
- * Default: 1.0 (100% weight). Must be in range [minWeight, 1].
677
- */
678
- maxWeight: number;
679
- /**
680
- * Temperature parameter controlling how aggressively to downweight
681
- * common examples. Higher temperature = more uniform weights.
682
- * Lower temperature = more aggressive downweighting.
683
- * Default: 1.0.
684
- */
685
- temperature: number;
686
- /**
687
- * Percentile threshold for "common" n-grams.
688
- * N-grams appearing more frequently than this percentile of all n-gram
689
- * frequencies are considered "common". Default: 0.7 (top 30% are common).
690
- * Must be in range [0, 1].
691
- */
692
- commonThresholdPercentile: number;
693
- }
694
- /**
695
- * Result for a single training example after SoftDedup scoring.
696
- */
697
- interface SoftDedupResult {
698
- /** Index of the example in the input array */
699
- index: number;
700
- /** Computed commonness score (0 = unique, 1 = fully common) */
701
- commonnessScore: number;
702
- /** Assigned sampling weight (minWeight to maxWeight) */
703
- samplingWeight: number;
704
- /** N-gram statistics for this example */
705
- ngramStats: NgramStats;
706
- }
707
- /**
708
- * N-gram statistics for a single example.
709
- */
710
- interface NgramStats {
711
- /** Total number of n-grams extracted */
712
- totalNgrams: number;
713
- /** Number of n-grams classified as "common" */
714
- commonNgrams: number;
715
- /** Ratio of common n-grams to total (0 to 1) */
716
- commonRatio: number;
717
- }
718
- /**
719
- * Aggregate statistics for the entire SoftDedup run.
720
- */
721
- interface SoftDedupStats {
722
- /** Total examples processed */
723
- totalExamples: number;
724
- /** Mean sampling weight across all examples */
725
- meanWeight: number;
726
- /** Median sampling weight */
727
- medianWeight: number;
728
- /** Standard deviation of sampling weights */
729
- stdWeight: number;
730
- /** Number of examples at minimum weight (heavily downweighted) */
731
- atMinWeight: number;
732
- /** Number of examples at maximum weight (unique/rare) */
733
- atMaxWeight: number;
734
- /** Effective dataset size (sum of all weights) */
735
- effectiveDatasetSize: number;
736
- /** Reduction ratio: 1 - (effectiveSize / totalExamples) */
737
- reductionRatio: number;
738
- /** Number of unique n-grams in the corpus */
739
- uniqueNgramsInCorpus: number;
740
- /** Commonness threshold frequency (absolute count) */
741
- commonThresholdFrequency: number;
742
- }
743
- /**
744
- * Default SoftDedup configuration.
745
- * Tuned for HoloScript/Brittney training datasets (920K-1.5M examples).
746
- */
747
- declare const DEFAULT_SOFTDEDUP_CONFIG: SoftDedupConfig;
748
- /**
749
- * SoftDedup processor for training data.
750
- *
751
- * Computes n-gram commonness scores and assigns sampling weights
752
- * to training examples. Works AFTER hard dedup (W.004).
753
- *
754
- * @example
755
- * ```ts
756
- * const dedup = new SoftDedup();
757
- * const results = dedup.process([
758
- * 'composition MyScene { orb Player { Grabbable {} } }',
759
- * 'composition MyScene { orb Player { Grabbable {} } }', // near-duplicate
760
- * 'world Arena { orb Enemy { Physics { mass: 10 } } }', // unique
761
- * ]);
762
- *
763
- * // results[0].samplingWeight ~= 0.3 (common template)
764
- * // results[1].samplingWeight ~= 0.3 (common template)
765
- * // results[2].samplingWeight ~= 1.0 (unique content)
766
- * ```
767
- */
768
- declare class SoftDedup {
769
- private config;
770
- constructor(config?: Partial<SoftDedupConfig>);
771
- /**
772
- * Process a dataset of text examples and compute sampling weights.
773
- *
774
- * @param examples - Array of text strings (training examples)
775
- * @returns Array of SoftDedupResult with sampling weights
776
- */
777
- process(examples: string[]): SoftDedupResult[];
778
- /**
779
- * Compute aggregate statistics for a set of SoftDedup results.
780
- */
781
- computeStats(results: SoftDedupResult[]): SoftDedupStats;
782
- /**
783
- * Get the current configuration.
784
- */
785
- getConfig(): Readonly<SoftDedupConfig>;
786
- /**
787
- * Extract n-grams from a text string.
788
- * Supports both character-level and word-level n-grams.
789
- */
790
- private extractNgrams;
791
- /**
792
- * Build a frequency map of all n-grams across the entire corpus.
793
- */
794
- private buildCorpusFrequencies;
795
- /**
796
- * Compute the frequency threshold above which an n-gram is considered "common".
797
- * Uses the configured percentile of the frequency distribution.
798
- */
799
- private computeThreshold;
800
- /**
801
- * Convert a commonness score (0-1) to a sampling weight.
802
- *
803
- * Uses exponential decay with temperature scaling:
804
- * weight = maxWeight * exp(-commonnessScore / temperature)
805
- *
806
- * Then clamps to [minWeight, maxWeight].
807
- */
808
- private commonnessToWeight;
809
- /**
810
- * Validate configuration parameters.
811
- * @throws Error if configuration is invalid
812
- */
813
- private validateConfig;
814
- }
815
- /**
816
- * Create a SoftDedup processor with optional configuration overrides.
817
- *
818
- * @example
819
- * ```ts
820
- * const dedup = createSoftDedup({ wordLevel: true, temperature: 0.5 });
821
- * const results = dedup.process(myDataset);
822
- * const stats = dedup.computeStats(results);
823
- * console.log(`Effective dataset size: ${stats.effectiveDatasetSize}`);
824
- * ```
825
- */
826
- declare function createSoftDedup(config?: Partial<SoftDedupConfig>): SoftDedup;
827
-
828
- /**
829
- * LRScheduler - Learning Rate Scheduler with Warmup + Cosine Decay
830
- *
831
- * Implements the learning rate schedule described in training rule W.009:
832
- * "Always use warmup (10% steps) + cosine decay."
833
- *
834
- * The schedule has two phases:
835
- * 1. **Linear Warmup**: LR ramps linearly from 0 to baseLR over the
836
- * first warmupSteps (typically 10% of total steps).
837
- * 2. **Cosine Decay**: LR decays from baseLR to minLR following a
838
- * cosine curve over the remaining steps.
839
- *
840
- * This prevents early divergence in deep networks and enables smooth
841
- * convergence. Pairs with W.006 base LR (2e-4 for SFT, 1e-6 for GRPO).
842
- *
843
- * @module training/LRScheduler
844
- */
845
- /**
846
- * Configuration for the LR scheduler.
847
- */
848
- interface LRSchedulerConfig {
849
- /**
850
- * Base (peak) learning rate.
851
- * Per W.006: 2e-4 for SFT, 1e-6 for GRPO.
852
- */
853
- baseLR: number;
854
- /**
855
- * Total number of training steps.
856
- * Computed as: (dataset_size / effective_batch_size) * num_epochs.
857
- */
858
- totalSteps: number;
859
- /**
860
- * Warmup ratio: fraction of totalSteps used for linear warmup.
861
- * Per W.009: 10% warmup steps (warmupRatio = 0.1).
862
- * Must be in range [0, 1).
863
- */
864
- warmupRatio: number;
865
- /**
866
- * Minimum learning rate at the end of cosine decay.
867
- * Typically 0 or a very small value (e.g., 1e-7).
868
- * Must be in range [0, baseLR).
869
- */
870
- minLR: number;
871
- /**
872
- * Number of cosine annealing cycles.
873
- * Default: 1 (single cosine decay from peak to min).
874
- * Values > 1 create "cosine annealing with warm restarts" (SGDR).
875
- */
876
- numCycles: number;
877
- }
878
- /**
879
- * Snapshot of the LR scheduler state at a given step.
880
- */
881
- interface LRSchedulerSnapshot {
882
- /** Current training step */
883
- step: number;
884
- /** Current learning rate */
885
- learningRate: number;
886
- /** Current phase: 'warmup' or 'decay' */
887
- phase: 'warmup' | 'decay';
888
- /** Progress through current phase (0 to 1) */
889
- phaseProgress: number;
890
- /** Overall training progress (0 to 1) */
891
- overallProgress: number;
892
- }
893
- /**
894
- * Summary statistics for the full LR schedule.
895
- */
896
- interface LRScheduleStats {
897
- /** Peak learning rate (baseLR) */
898
- peakLR: number;
899
- /** Minimum learning rate (minLR or end-of-decay value) */
900
- minLR: number;
901
- /** Number of warmup steps */
902
- warmupSteps: number;
903
- /** Number of decay steps */
904
- decaySteps: number;
905
- /** Total training steps */
906
- totalSteps: number;
907
- /** Average learning rate across all steps */
908
- avgLR: number;
909
- }
910
- /**
911
- * Default LR scheduler configuration for HoloScript/Brittney SFT training.
912
- * Based on W.006 (baseLR=2e-4) and W.009 (warmupRatio=0.1, cosine decay).
913
- */
914
- declare const DEFAULT_LR_SCHEDULER_CONFIG: LRSchedulerConfig;
915
- /**
916
- * LR scheduler configuration for GRPO training.
917
- * Uses lower baseLR (1e-6) per GRPOConfig.ts.
918
- */
919
- declare const GRPO_LR_SCHEDULER_CONFIG: LRSchedulerConfig;
920
- /**
921
- * Learning Rate Scheduler with warmup + cosine decay.
922
- *
923
- * Computes the learning rate at any given training step.
924
- * Stateless: does not track the current step internally. This makes it
925
- * safe to use in distributed training where multiple workers may query
926
- * different steps simultaneously.
927
- *
928
- * @example
929
- * ```ts
930
- * const scheduler = new LRScheduler({
931
- * baseLR: 2e-4,
932
- * totalSteps: 10000,
933
- * warmupRatio: 0.1,
934
- * minLR: 1e-7,
935
- * numCycles: 1,
936
- * });
937
- *
938
- * // Step 0: LR = 0 (start of warmup)
939
- * scheduler.getLR(0); // 0
940
- *
941
- * // Step 500: LR = 1e-4 (midway through warmup)
942
- * scheduler.getLR(500); // ~0.0001
943
- *
944
- * // Step 1000: LR = 2e-4 (end of warmup, peak LR)
945
- * scheduler.getLR(1000); // 0.0002
946
- *
947
- * // Step 5500: LR ~= 1e-4 (midway through cosine decay)
948
- * scheduler.getLR(5500);
949
- *
950
- * // Step 10000: LR ~= 1e-7 (end of training)
951
- * scheduler.getLR(10000); // ~0.0000001
952
- * ```
953
- */
954
- declare class LRScheduler {
955
- private config;
956
- private warmupSteps;
957
- constructor(config?: Partial<LRSchedulerConfig>);
958
- /**
959
- * Get the learning rate at a given training step.
960
- *
961
- * @param step - Current training step (0-indexed)
962
- * @returns The learning rate at this step
963
- */
964
- getLR(step: number): number;
965
- /**
966
- * Get a detailed snapshot of the scheduler state at a given step.
967
- *
968
- * @param step - Current training step (0-indexed)
969
- * @returns LRSchedulerSnapshot with full state information
970
- */
971
- getSnapshot(step: number): LRSchedulerSnapshot;
972
- /**
973
- * Compute summary statistics for the full LR schedule.
974
- * Samples every step to compute the average LR.
975
- *
976
- * For large totalSteps, this samples at most 10000 evenly-spaced points
977
- * for efficiency.
978
- */
979
- getStats(): LRScheduleStats;
980
- /**
981
- * Generate the full LR schedule as an array of [step, lr] pairs.
982
- * Useful for plotting or debugging.
983
- *
984
- * @param numPoints - Number of points to sample (default: 100)
985
- * @returns Array of [step, learningRate] tuples
986
- */
987
- getSchedule(numPoints?: number): Array<[number, number]>;
988
- /**
989
- * Get the number of warmup steps.
990
- */
991
- getWarmupSteps(): number;
992
- /**
993
- * Get the current configuration.
994
- */
995
- getConfig(): Readonly<LRSchedulerConfig>;
996
- /**
997
- * Validate configuration parameters.
998
- * @throws Error if configuration is invalid
999
- */
1000
- private validateConfig;
1001
- }
1002
- /**
1003
- * Create an LR scheduler for SFT training with optional overrides.
1004
- *
1005
- * @example
1006
- * ```ts
1007
- * const scheduler = createSFTScheduler({ totalSteps: 5000 });
1008
- * const lr = scheduler.getLR(100);
1009
- * ```
1010
- */
1011
- declare function createSFTScheduler(config?: Partial<LRSchedulerConfig>): LRScheduler;
1012
- /**
1013
- * Create an LR scheduler for GRPO training with optional overrides.
1014
- *
1015
- * @example
1016
- * ```ts
1017
- * const scheduler = createGRPOScheduler({ totalSteps: 2000 });
1018
- * const lr = scheduler.getLR(100);
1019
- * ```
1020
- */
1021
- declare function createGRPOScheduler(config?: Partial<LRSchedulerConfig>): LRScheduler;
1022
-
1023
- /**
1024
- * Compiler-Based Quality Scoring Pipeline for Training Data
1025
- * Metrics: syntax_validity, schema_compliance, semantic_correctness
1026
- * @version 1.0.0
1027
- */
1028
- interface QualityScore {
1029
- syntaxValidity: number;
1030
- schemaCompliance: number;
1031
- semanticCorrectness: number;
1032
- compositeScore: number;
1033
- details: QualityDetail[];
1034
- }
1035
- interface QualityDetail {
1036
- metric: string;
1037
- score: number;
1038
- message: string;
1039
- }
1040
- interface QualityScoringConfig {
1041
- syntaxWeight: number;
1042
- schemaWeight: number;
1043
- semanticWeight: number;
1044
- minPassScore: number;
1045
- knownTraits: Set<string>;
1046
- knownTypes: Set<string>;
1047
- }
1048
- declare const DEFAULT_SCORING_CONFIG: QualityScoringConfig;
1049
- declare class QualityScoringPipeline {
1050
- private config;
1051
- constructor(config?: Partial<QualityScoringConfig>);
1052
- score(source: string): QualityScore;
1053
- passes(score: QualityScore): boolean;
1054
- private scoreSyntax;
1055
- private scoreSchema;
1056
- private scoreSemantic;
1057
- }
1058
-
1059
- /**
1060
- * TrainingPipelineConfig - Unified Training Pipeline Configuration
1061
- *
1062
- * Integrates all training pipeline components:
1063
- * - Quality Scoring (W.010): Multi-dimensional quality filtering
1064
- * - Hard Dedup (W.004): Exact/near-duplicate removal (external)
1065
- * - SoftDedup (W.008): N-gram commonness-based reweighting
1066
- * - LR Schedule (W.009): Warmup + cosine decay
1067
- * - Hyperparameters (W.006, W.007): Learning rate, batch size, epochs
1068
- *
1069
- * Pipeline order: Quality Filter -> Hard Dedup -> SoftDedup -> Training
1070
- *
1071
- * @module training/TrainingPipelineConfig
1072
- */
1073
-
1074
- /**
1075
- * Complete training pipeline configuration.
1076
- *
1077
- * Encompasses all stages from data preparation through training execution.
1078
- */
1079
- interface TrainingPipelineConfig {
1080
- /** Data quality filtering configuration (W.010) */
1081
- qualityScoring: QualityScoringConfig;
1082
- /** Soft deduplication configuration (W.008) */
1083
- softDedup: SoftDedupConfig;
1084
- /** Learning rate schedule configuration (W.009) */
1085
- lrSchedule: LRSchedulerConfig;
1086
- /** Core training hyperparameters (W.006, W.007) */
1087
- hyperparameters: TrainingHyperparameters;
1088
- /** Pipeline-level settings */
1089
- pipeline: PipelineSettings;
1090
- }
1091
- /**
1092
- * Core training hyperparameters per W.006 and W.007.
1093
- */
1094
- interface TrainingHyperparameters {
1095
- /**
1096
- * Base learning rate.
1097
- * Per W.006: 2e-4 for SFT (NOT 2e-5).
1098
- */
1099
- learningRate: number;
1100
- /**
1101
- * Number of training epochs.
1102
- * Per W.006: 2 epochs (NOT 3). "Loss converges in 1-2 epochs."
1103
- */
1104
- epochs: number;
1105
- /**
1106
- * Optimizer.
1107
- * Per W.006: paged_adamw_8bit (NOT adamw_torch).
1108
- */
1109
- optimizer: 'paged_adamw_8bit' | 'adamw_torch' | 'adafactor';
1110
- /**
1111
- * Micro-batch size per device.
1112
- * Per W.007: 8-16 for 7B models.
1113
- */
1114
- microBatchSize: number;
1115
- /**
1116
- * Gradient accumulation steps.
1117
- * Per W.007: 2-4 steps for effective batch 32-512.
1118
- */
1119
- gradientAccumulationSteps: number;
1120
- /**
1121
- * Maximum gradient norm for clipping.
1122
- */
1123
- maxGradNorm: number;
1124
- /**
1125
- * Weight decay coefficient.
1126
- */
1127
- weightDecay: number;
1128
- }
1129
- /**
1130
- * Pipeline-level settings controlling the data preparation flow.
1131
- */
1132
- interface PipelineSettings {
1133
- /**
1134
- * Whether to apply quality scoring filter before training.
1135
- * Per W.010: Apply BEFORE deduplication to avoid wasting compute on junk.
1136
- */
1137
- enableQualityFilter: boolean;
1138
- /**
1139
- * Whether to apply SoftDedup after hard dedup.
1140
- * Per W.008: Apply AFTER hard dedup (W.004), not instead of it.
1141
- */
1142
- enableSoftDedup: boolean;
1143
- /**
1144
- * Whether to use the LR scheduler (warmup + cosine decay).
1145
- * Per W.009: Always use.
1146
- */
1147
- enableLRSchedule: boolean;
1148
- /**
1149
- * Seed for reproducibility.
1150
- */
1151
- seed?: number;
1152
- }
1153
- /**
1154
- * Default training pipeline configuration.
1155
- *
1156
- * Implements the full optimization pipeline:
1157
- * - Quality Filter (W.010) -> Hard Dedup (W.004) -> SoftDedup (W.008)
1158
- * - LR Schedule: warmup 10% + cosine decay (W.009)
1159
- * - Hyperparameters: LR=2e-4, epochs=2, paged_adamw_8bit (W.006)
1160
- * - Batch: micro=8, accumulation=4, effective=32 (W.007)
1161
- */
1162
- declare const DEFAULT_TRAINING_PIPELINE_CONFIG: TrainingPipelineConfig;
1163
- /** Deep partial utility type */
1164
- type DeepPartial<T> = {
1165
- [P in keyof T]?: T[P] extends object ? DeepPartial<T[P]> : T[P];
1166
- };
1167
- /**
1168
- * Build a TrainingPipelineConfig with custom overrides.
1169
- *
1170
- * @example
1171
- * ```ts
1172
- * const config = buildTrainingPipelineConfig({
1173
- * hyperparameters: { learningRate: 1e-4, epochs: 3 },
1174
- * softDedup: { temperature: 0.5 },
1175
- * lrSchedule: { totalSteps: 5000 },
1176
- * });
1177
- * ```
1178
- */
1179
- declare function buildTrainingPipelineConfig(overrides?: DeepPartial<TrainingPipelineConfig>): TrainingPipelineConfig;
1180
- /**
1181
- * Compute the total training steps from dataset size and hyperparameters.
1182
- *
1183
- * @param datasetSize - Number of training examples (after dedup)
1184
- * @param config - Training pipeline configuration
1185
- * @returns Total number of training steps
1186
- */
1187
- declare function computeTotalSteps(datasetSize: number, config: TrainingPipelineConfig): number;
1188
-
1189
- /**
1190
- * TrainingMonkey Integration Types
1191
- *
1192
- * Type definitions for integrating HoloScript spatial reasoning training data
1193
- * with the TrainingMonkey fine-tuning pipeline. Converts from the internal
1194
- * SpatialTrainingJSONLEntry format to TrainingMonkey's Alpaca-style format
1195
- * with SoftDedup (W.008) n-gram reweighting and train/validation splits.
1196
- *
1197
- * @module training/trainingmonkey/TrainingMonkeyTypes
1198
- */
1199
- /**
1200
- * Alpaca instruction-following format used by TrainingMonkey.
1201
- *
1202
- * TrainingMonkey's train_v43.py reads:
1203
- * - example.get("instruction", "")
1204
- * - example.get("output", "")
1205
- *
1206
- * The optional `input` field provides additional context (e.g., HoloScript scene).
1207
- */
1208
- interface AlpacaEntry {
1209
- /** The instruction/question for the model */
1210
- instruction: string;
1211
- /** Optional additional input context (HoloScript scene source) */
1212
- input: string;
1213
- /** The expected output/response from the model */
1214
- output: string;
1215
- }
1216
- /**
1217
- * Extended Alpaca entry with SoftDedup sampling weight and metadata.
1218
- * Used for weighted sampling during training.
1219
- */
1220
- interface WeightedAlpacaEntry extends AlpacaEntry {
1221
- /** SoftDedup sampling weight (0.1 to 1.0). Higher = more likely to be sampled */
1222
- sampling_weight: number;
1223
- /** Original metadata preserved from the spatial reasoning dataset */
1224
- metadata?: {
1225
- id: string;
1226
- relationship_type: string;
1227
- is_positive: boolean;
1228
- difficulty: string;
1229
- tags: string[];
1230
- };
1231
- }
1232
- /**
1233
- * Result of splitting a dataset into train/validation sets.
1234
- */
1235
- interface DatasetSplit {
1236
- /** Training set entries */
1237
- train: WeightedAlpacaEntry[];
1238
- /** Validation set entries */
1239
- validation: WeightedAlpacaEntry[];
1240
- /** Split statistics */
1241
- stats: SplitStats;
1242
- }
1243
- /**
1244
- * Statistics about a train/validation split.
1245
- */
1246
- interface SplitStats {
1247
- /** Total examples before split */
1248
- totalExamples: number;
1249
- /** Number of training examples */
1250
- trainCount: number;
1251
- /** Number of validation examples */
1252
- validationCount: number;
1253
- /** Actual train ratio (trainCount / totalExamples) */
1254
- trainRatio: number;
1255
- /** Actual validation ratio (validationCount / totalExamples) */
1256
- validationRatio: number;
1257
- /** Whether the split is stratified by difficulty/relationship type */
1258
- stratified: boolean;
1259
- }
1260
- /**
1261
- * TrainingMonkey-compatible training configuration.
1262
- * Generated alongside the dataset for direct use with train_v43.py.
1263
- */
1264
- interface TrainingMonkeyConfig {
1265
- /** Model configuration */
1266
- model: {
1267
- /** Model identifier (e.g., "qwen7b", "phi35") */
1268
- name: string;
1269
- /** Maximum sequence length */
1270
- maxSeqLength: number;
1271
- };
1272
- /** Training hyperparameters (per W.006) */
1273
- hyperparameters: {
1274
- /** Learning rate. Per W.006: 2e-4 */
1275
- learningRate: number;
1276
- /** Number of epochs. Per W.006: 2 */
1277
- epochs: number;
1278
- /** Optimizer. Per W.006: paged_adamw_8bit */
1279
- optimizer: string;
1280
- /** Micro-batch size per device. Per W.007: 8-16 */
1281
- microBatchSize: number;
1282
- /** Gradient accumulation steps. Per W.007: 2-4 */
1283
- gradientAccumulationSteps: number;
1284
- /** Maximum gradient norm for clipping */
1285
- maxGradNorm: number;
1286
- /** Weight decay coefficient */
1287
- weightDecay: number;
1288
- };
1289
- /** LR schedule (per W.009) */
1290
- lrSchedule: {
1291
- /** Warmup ratio (10% of total steps) */
1292
- warmupRatio: number;
1293
- /** Schedule type */
1294
- type: 'cosine';
1295
- };
1296
- /** Dataset paths */
1297
- dataset: {
1298
- /** Path to training JSONL */
1299
- trainPath: string;
1300
- /** Path to validation JSONL */
1301
- validationPath: string;
1302
- /** Number of training examples */
1303
- trainCount: number;
1304
- /** Number of validation examples */
1305
- validationCount: number;
1306
- /** Total computed training steps */
1307
- totalSteps: number;
1308
- };
1309
- /** SoftDedup statistics */
1310
- softDedup: {
1311
- /** Whether SoftDedup was applied */
1312
- applied: boolean;
1313
- /** Mean sampling weight */
1314
- meanWeight: number;
1315
- /** Effective dataset size after reweighting */
1316
- effectiveSize: number;
1317
- /** Reduction ratio */
1318
- reductionRatio: number;
1319
- };
1320
- }
1321
- /**
1322
- * Configuration for the TrainingMonkey integration pipeline.
1323
- */
1324
- interface TrainingMonkeyIntegrationConfig {
1325
- /** Path to the input JSONL file */
1326
- inputPath: string;
1327
- /** Directory for output files */
1328
- outputDir: string;
1329
- /** Train/validation split ratio (default: 0.9 = 90% train) */
1330
- trainRatio: number;
1331
- /** Random seed for reproducible splits */
1332
- seed: number;
1333
- /** Whether to apply SoftDedup reweighting (default: true) */
1334
- enableSoftDedup: boolean;
1335
- /** Target model name for config generation (default: "qwen7b") */
1336
- modelName: string;
1337
- /** Whether to stratify the split by metadata fields (default: true) */
1338
- stratify: boolean;
1339
- }
1340
- /**
1341
- * Default configuration for TrainingMonkey integration.
1342
- */
1343
- declare const DEFAULT_INTEGRATION_CONFIG: TrainingMonkeyIntegrationConfig;
1344
- /**
1345
- * Complete result of running the TrainingMonkey integration pipeline.
1346
- */
1347
- interface IntegrationResult {
1348
- /** The dataset split (train/validation) */
1349
- split: DatasetSplit;
1350
- /** Generated training configuration */
1351
- config: TrainingMonkeyConfig;
1352
- /** Serialized train JSONL content */
1353
- trainJsonl: string;
1354
- /** Serialized validation JSONL content */
1355
- validationJsonl: string;
1356
- /** Serialized config JSON content */
1357
- configJson: string;
1358
- }
1359
-
1360
- /**
1361
- * TrainingMonkey Integration Module
1362
- *
1363
- * Converts HoloScript spatial reasoning JSONL training data into
1364
- * TrainingMonkey's Alpaca format with:
1365
- * 1. JSONL reading and parsing
1366
- * 2. Alpaca format conversion (instruction/input/output)
1367
- * 3. SoftDedup (W.008) n-gram reweighting for sampling
1368
- * 4. Stratified train/validation splits (90/10)
1369
- * 5. TrainingMonkey-compatible training config (W.006 hyperparameters)
1370
- * 6. Ready-to-upload output file generation
1371
- *
1372
- * @module training/trainingmonkey/TrainingMonkeyIntegration
1373
- */
1374
-
1375
- /**
1376
- * Integrates HoloScript spatial reasoning data with TrainingMonkey.
1377
- *
1378
- * Full pipeline:
1379
- * readJsonl() -> convertToAlpaca() -> applySoftDedup() -> splitDataset() -> generateConfig()
1380
- *
1381
- * @example
1382
- * ```ts
1383
- * const integration = new TrainingMonkeyIntegration({
1384
- * inputPath: 'spatial-reasoning-10k.jsonl',
1385
- * outputDir: './output',
1386
- * });
1387
- *
1388
- * const jsonlContent = fs.readFileSync('spatial-reasoning-10k.jsonl', 'utf-8');
1389
- * const result = integration.process(jsonlContent);
1390
- *
1391
- * fs.writeFileSync('alpaca-train.jsonl', result.trainJsonl);
1392
- * fs.writeFileSync('alpaca-val.jsonl', result.validationJsonl);
1393
- * fs.writeFileSync('training-config.json', result.configJson);
1394
- * ```
1395
- */
1396
- declare class TrainingMonkeyIntegration {
1397
- private config;
1398
- private softDedup;
1399
- constructor(config?: Partial<TrainingMonkeyIntegrationConfig>);
1400
- /**
1401
- * Run the full integration pipeline on raw JSONL content.
1402
- *
1403
- * @param jsonlContent - Raw JSONL string content from the dataset file
1404
- * @returns Complete IntegrationResult with split data, config, and serialized output
1405
- */
1406
- process(jsonlContent: string): IntegrationResult;
1407
- /**
1408
- * Parse raw JSONL content into SpatialTrainingJSONLEntry objects.
1409
- *
1410
- * @param jsonlContent - Raw JSONL string (one JSON object per line)
1411
- * @returns Array of parsed entries
1412
- * @throws Error if a line contains invalid JSON
1413
- */
1414
- readJsonl(jsonlContent: string): SpatialTrainingJSONLEntry[];
1415
- /**
1416
- * Convert spatial training entries to Alpaca format.
1417
- *
1418
- * Mapping:
1419
- * instruction -> instruction (question/prompt)
1420
- * input -> extracted HoloScript scene from instruction (if present)
1421
- * output -> response (answer)
1422
- *
1423
- * @param entries - Parsed spatial training entries
1424
- * @returns Alpaca-formatted entries
1425
- */
1426
- convertToAlpaca(entries: SpatialTrainingJSONLEntry[]): AlpacaEntry[];
1427
- /**
1428
- * Apply SoftDedup (W.008) n-gram reweighting to Alpaca entries.
1429
- *
1430
- * Uses the instruction + output text to compute n-gram commonness scores
1431
- * and assigns sampling weights. Template-generated near-duplicates receive
1432
- * lower weights (min 0.1), while unique examples keep weight 1.0.
1433
- *
1434
- * @param alpacaEntries - Converted Alpaca entries
1435
- * @param originalEntries - Original JSONL entries (for metadata preservation)
1436
- * @returns Weighted Alpaca entries with sampling_weight and metadata
1437
- */
1438
- applySoftDedup(alpacaEntries: AlpacaEntry[], originalEntries: SpatialTrainingJSONLEntry[]): WeightedAlpacaEntry[];
1439
- /**
1440
- * Split weighted entries into train/validation sets.
1441
- *
1442
- * When stratified=true, the split preserves the distribution of
1443
- * relationship_type and difficulty across both sets.
1444
- *
1445
- * @param entries - Weighted Alpaca entries
1446
- * @returns DatasetSplit with train, validation, and stats
1447
- */
1448
- splitDataset(entries: WeightedAlpacaEntry[]): DatasetSplit;
1449
- /**
1450
- * Generate a TrainingMonkey-compatible training configuration.
1451
- *
1452
- * Uses W.006 hyperparameters:
1453
- * - Learning rate: 2e-4
1454
- * - Epochs: 2
1455
- * - Optimizer: paged_adamw_8bit
1456
- *
1457
- * Uses W.007 batch sizing:
1458
- * - Micro-batch: 8
1459
- * - Gradient accumulation: 4
1460
- * - Effective batch: 32
1461
- *
1462
- * Uses W.009 LR schedule:
1463
- * - Warmup: 10% of total steps
1464
- * - Schedule: cosine decay
1465
- *
1466
- * @param split - The dataset split result
1467
- * @returns TrainingMonkey-compatible config
1468
- */
1469
- generateConfig(split: DatasetSplit): TrainingMonkeyConfig;
1470
- /**
1471
- * Serialize weighted Alpaca entries to JSONL string.
1472
- *
1473
- * @param entries - Entries to serialize
1474
- * @returns JSONL string (one JSON object per line)
1475
- */
1476
- serializeJsonl(entries: WeightedAlpacaEntry[]): string;
1477
- /**
1478
- * Get the current integration configuration.
1479
- */
1480
- getConfig(): Readonly<TrainingMonkeyIntegrationConfig>;
1481
- /**
1482
- * Extract the question part and HoloScript scene from an instruction string.
1483
- *
1484
- * The spatial reasoning dataset embeds HoloScript scenes in instructions:
1485
- * ```
1486
- * Does the spatial_adjacent constraint pass?
1487
- *
1488
- * HoloScript Scene:
1489
- * ```holoscript
1490
- * composition "SpatialScene" { ... }
1491
- * ```
1492
- * ```
1493
- *
1494
- * This method splits the instruction into the question (instruction field)
1495
- * and the scene source (input field) for the Alpaca format.
1496
- */
1497
- private extractSceneFromInstruction;
1498
- /**
1499
- * Perform a stratified split preserving distribution of metadata fields.
1500
- * Groups by relationship_type + difficulty and splits each group proportionally.
1501
- */
1502
- private stratifiedSplit;
1503
- /**
1504
- * Perform a simple random split.
1505
- */
1506
- private randomSplit;
1507
- /**
1508
- * Fisher-Yates shuffle with seeded PRNG for deterministic ordering.
1509
- */
1510
- private fisherYatesShuffle;
1511
- }
1512
- /**
1513
- * Create a TrainingMonkeyIntegration instance with optional config overrides.
1514
- *
1515
- * @example
1516
- * ```ts
1517
- * const integration = createTrainingMonkeyIntegration({
1518
- * inputPath: 'spatial-reasoning-10k.jsonl',
1519
- * outputDir: './output',
1520
- * trainRatio: 0.9,
1521
- * });
1522
- *
1523
- * const result = integration.process(jsonlContent);
1524
- * ```
1525
- */
1526
- declare function createTrainingMonkeyIntegration(config?: Partial<TrainingMonkeyIntegrationConfig>): TrainingMonkeyIntegration;
1527
-
1528
- /**
1529
- * Training Constants
1530
- *
1531
- * Canonical constants shared between @holoscript/core and TrainingMonkey.
1532
- * This module has ZERO imports from other workspace packages (G.GAP.01 prevention).
1533
- *
1534
- * @version 1.0.0
1535
- */
1536
- /**
1537
- * 9 training categories covering all HoloScript domains
1538
- */
1539
- declare const TRAINING_CATEGORIES: readonly ["vr-interaction", "multiplayer", "physics", "ui-spatial", "ai-agents", "procedural", "audio-spatial", "visual-effects", "game-mechanics"];
1540
- type TrainingCategory = (typeof TRAINING_CATEGORIES)[number];
1541
- /**
1542
- * 4 difficulty levels for training data
1543
- */
1544
- declare const DIFFICULTY_LEVELS: readonly ["beginner", "intermediate", "advanced", "production"];
1545
- type DifficultyLevel = (typeof DIFFICULTY_LEVELS)[number];
1546
- /**
1547
- * Quality score thresholds for training data evaluation
1548
- */
1549
- declare const QUALITY_THRESHOLDS: {
1550
- readonly Excellent: {
1551
- readonly min: 90;
1552
- readonly max: 100;
1553
- };
1554
- readonly VeryGood: {
1555
- readonly min: 80;
1556
- readonly max: 89;
1557
- };
1558
- readonly Acceptable: {
1559
- readonly min: 70;
1560
- readonly max: 79;
1561
- };
1562
- };
1563
- type QualityTier = keyof typeof QUALITY_THRESHOLDS;
1564
- /**
1565
- * Default quality constraints for training data generators
1566
- */
1567
- declare const DEFAULT_GENERATOR_THRESHOLDS: {
1568
- readonly min_compression_ratio: 5;
1569
- readonly max_compression_ratio: 15;
1570
- readonly max_duplication_rate: 0.05;
1571
- readonly min_templates_per_difficulty: 10;
1572
- readonly min_quality_score: 0.7;
1573
- };
1574
- /**
1575
- * RuleForge domain categories used by the rule generator
1576
- */
1577
- declare const RULEFORGE_DOMAINS: readonly ["ai_agents", "physics", "robotics", "audio", "rendering", "interaction", "multiplayer", "vr_ar"];
1578
- type RuleForgeDomain = (typeof RULEFORGE_DOMAINS)[number];
1579
- /**
1580
- * Get quality tier for a given score
1581
- */
1582
- declare function getQualityTier(score: number): QualityTier | 'BelowAcceptable';
1583
- /**
1584
- * Validate that a category string is a valid TrainingCategory
1585
- */
1586
- declare function isValidCategory(category: string): category is TrainingCategory;
1587
- /**
1588
- * Validate that a difficulty string is a valid DifficultyLevel
1589
- */
1590
- declare function isValidDifficulty(difficulty: string): difficulty is DifficultyLevel;
1591
-
1592
- /**
1593
- * Training Data Schema
1594
- *
1595
- * Canonical schema for training examples used across TM and HS.
1596
- * Imports only from local constants.ts (G.GAP.01 prevention).
1597
- *
1598
- * @version 1.0.0
1599
- */
1600
-
1601
- /**
1602
- * A single training example in instruction/input/output format (JSONL-compatible)
1603
- */
1604
- interface TrainingExample {
1605
- instruction: string;
1606
- input: string;
1607
- output: string;
1608
- metadata: TrainingExampleMetadata;
1609
- }
1610
- /**
1611
- * Metadata for a training example
1612
- */
1613
- interface TrainingExampleMetadata {
1614
- category: TrainingCategory;
1615
- difficulty: DifficultyLevel;
1616
- traits: string[];
1617
- keywords: string[];
1618
- version: string;
1619
- behavior_template?: string;
1620
- quality_score?: number;
1621
- }
1622
- /**
1623
- * Quality scoring rubric for training examples (TM-compatible format)
1624
- */
1625
- interface TrainingQualityScore {
1626
- helpfulness: number;
1627
- correctness: number;
1628
- coherence: number;
1629
- complexity: number;
1630
- verbosity: number;
1631
- overall: number;
1632
- }
1633
- /**
1634
- * Validation result for a training example
1635
- */
1636
- interface TrainingValidationResult {
1637
- valid: boolean;
1638
- errors: TrainingValidationError[];
1639
- warnings: string[];
1640
- }
1641
- interface TrainingValidationError {
1642
- field: string;
1643
- message: string;
1644
- severity: 'error' | 'warning' | 'info';
1645
- }
1646
- /**
1647
- * Compression metrics for a training dataset
1648
- */
1649
- interface CompressionResult {
1650
- passed: boolean;
1651
- ratio: number;
1652
- total_examples: number;
1653
- unique_patterns: number;
1654
- quality_score?: number;
1655
- issue?: string;
1656
- recommendation?: string;
1657
- }
1658
- /**
1659
- * Generator metrics for auditing training data quality
1660
- */
1661
- interface GeneratorMetrics {
1662
- file_size_bytes: number;
1663
- total_examples: number;
1664
- unique_patterns: number;
1665
- compression_ratio: number;
1666
- duplication_rate: number;
1667
- avg_quality_score: number;
1668
- generation_time_ms: number;
1669
- }
1670
- /**
1671
- * Validate a training example against the schema
1672
- */
1673
- declare function validateTrainingExample(example: unknown): TrainingValidationResult;
1674
-
1675
- /**
1676
- * Trait Mappings
1677
- *
1678
- * Maps TrainingMonkey trait names to canonical @holoscript/core trait IDs.
1679
- * Provides validation utilities to check trait coverage.
1680
- *
1681
- * @version 1.0.0
1682
- */
1683
-
1684
- /**
1685
- * Training metadata that extends a trait definition
1686
- */
1687
- interface TrainingMetadata {
1688
- difficulty: 'beginner' | 'intermediate' | 'advanced' | 'production';
1689
- categories: TrainingCategory[];
1690
- exampleCount: number;
1691
- qualityScore: number;
1692
- }
1693
- /**
1694
- * Mapping entry from TM trait to HS canonical trait
1695
- */
1696
- interface TraitMapping {
1697
- /** TrainingMonkey trait name */
1698
- tmName: string;
1699
- /** Canonical HoloScript trait name (null if unmapped) */
1700
- hsName: string | null;
1701
- /** Status of the mapping */
1702
- status: 'matched' | 'unmatched' | 'deprecated' | 'promoted';
1703
- /** Training metadata (populated from TM datasets) */
1704
- training?: TrainingMetadata;
1705
- }
1706
- /**
1707
- * Result of a trait validation run
1708
- */
1709
- interface TraitValidationReport {
1710
- matched: number;
1711
- unmatched: number;
1712
- deprecated: number;
1713
- total: number;
1714
- details: TraitMapping[];
1715
- }
1716
- /**
1717
- * Known TM traits from holoscript-trait-registry.ts (46 = 41 traits + 5 physics patterns)
1718
- * These are the traits explicitly registered in TrainingMonkey.
1719
- */
1720
- declare const TM_REGISTERED_TRAITS: readonly ["llm_agent", "behavior_tree", "goal_oriented", "neural_link", "neural_forge", "spatial_awareness", "shared_world", "eye_tracked", "hand_tracking", "vision", "spatial_persona", "shareplay", "object_tracking", "scene_reconstruction", "realitykit_mesh", "room_mesh", "volumetric_window", "spatial_navigation", "stable_diffusion", "controlnet", "ai_texture_gen", "diffusion_realtime", "ai_inpainting", "ai_upscaling", "networked", "render_network", "openxr_hal", "hitl", "zora_coins", "neural_upscaling", "grabbable", "throwable", "pointable", "drawable", "attachable", "socket", "billboard", "ui_panel", "hud", "glowing", "physics", "persistent", "tool", "conversion_tracking", "impact_physics", "fragment_conversion", "damage_falloff", "cross_system_integration"];
1721
- /**
1722
- * Validate a trait name against a set of valid traits.
1723
- * Returns the canonical name if found, null otherwise.
1724
- */
1725
- declare function validateTraitName(traitName: string, validTraits: ReadonlySet<string> | readonly string[]): string | null;
1726
- /**
1727
- * Generate a validation report comparing TM traits against HS registry.
1728
- * @param tmTraits - Trait names from TrainingMonkey
1729
- * @param hsTraits - Valid trait names from HoloScript core
1730
- * @param deprecatedTraits - Set of deprecated trait names
1731
- */
1732
- declare function generateValidationReport(tmTraits: readonly string[], hsTraits: ReadonlySet<string> | readonly string[], deprecatedTraits?: ReadonlySet<string>): TraitValidationReport;
1733
-
1734
- export { type AlpacaEntry, DEFAULT_GENERATOR_THRESHOLDS, DEFAULT_INTEGRATION_CONFIG, DEFAULT_LR_SCHEDULER_CONFIG, DEFAULT_SCORING_CONFIG, DEFAULT_SOFTDEDUP_CONFIG, DEFAULT_TRAINING_PIPELINE_CONFIG, DIFFICULTY_LEVELS, type DatasetSplit, type DifficultyLevel, type EnergyEfficiencyMetrics, GRPO_LR_SCHEDULER_CONFIG, type GeneratorMetrics, type IntegrationResult, type LRScheduleStats, LRScheduler, type LRSchedulerConfig, type LRSchedulerSnapshot, type LayerActivityInput, type NgramStats, type PipelineSettings, QUALITY_THRESHOLDS, type QualityDetail, type QualityScore, type QualityScoringConfig, QualityScoringPipeline, type QualityTier, RULEFORGE_DOMAINS, type RuleForgeDomain, type SNNLayerMetrics, type SceneObject, SoftDedup, type SoftDedupConfig, type SoftDedupResult, type SoftDedupStats, SparsityMonitor, type SparsityMonitorConfig, type SparsityMonitorStats, type SparsityQualityHistoryEntry, type SparsitySnapshot, type SparsityViolation, type SpatialDifficulty, type SpatialGeneratorConfig, type SpatialGeneratorStats, type SpatialRelationship, type SpatialRelationshipParams, type SpatialRelationshipType, type SpatialScene, SpatialTrainingDataGenerator, type SpatialTrainingExample, type SpatialTrainingJSONLEntry, type SplitStats, TM_REGISTERED_TRAITS, TRAINING_CATEGORIES, type TrainingCategory, type CompressionResult as TrainingCompressionResult, type TrainingExample, type TrainingExampleMetadata, type TrainingHyperparameters, type TrainingMetadata, type TrainingMonkeyConfig, TrainingMonkeyIntegration, type TrainingMonkeyIntegrationConfig, type TrainingPipelineConfig, type TrainingQualityScore, type TrainingValidationError, type TrainingValidationResult, type TraitMapping, type TraitValidationReport, type WeightedAlpacaEntry, buildTrainingPipelineConfig, computeTotalSteps, createGRPOScheduler, createSFTScheduler, createSoftDedup, createSparsityMonitor, createSpatialTrainingDataGenerator, createTrainingMonkeyIntegration, generateValidationReport, getQualityTier, isValidCategory, isValidDifficulty, validateTrainingExample, validateTraitName };