@holoscript/framework 6.0.3 → 6.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +1 -2
- package/ROADMAP.md +68 -66
- package/dist/{InvisibleWallet-BB6tFvRA.d.cts → InvisibleWallet-EFiuaLn3.d.cts} +1 -1
- package/dist/{OrchestratorAgent-BvWgf9uw.d.cts → OrchestratorAgent-CrLDGNL6.d.cts} +1 -1
- package/dist/agents/index.cjs +11 -10
- package/dist/agents/index.d.cts +4 -16
- package/dist/ai/index.cjs +2 -2
- package/dist/behavior.cjs +10 -0
- package/dist/economy/index.cjs +4 -4
- package/dist/economy/index.d.cts +2 -2
- package/dist/index.cjs +33 -11
- package/dist/index.d.cts +3 -3
- package/dist/swarm/index.cjs +3 -0
- package/package.json +14 -9
- package/src/__tests__/bounty-marketplace.test.ts +53 -21
- package/src/__tests__/delegation.test.ts +1 -4
- package/src/__tests__/done-log-audit.test.ts +38 -46
- package/src/__tests__/framework.test.ts +172 -53
- package/src/__tests__/goal-synthesizer.test.ts +9 -6
- package/src/__tests__/presence.test.ts +1 -1
- package/src/__tests__/protocol-agent.test.ts +12 -11
- package/src/__tests__/revenue-splitter.test.ts +22 -15
- package/src/__tests__/scenario-driven-todo.test.ts +55 -35
- package/src/__tests__/self-improve.test.ts +28 -9
- package/src/__tests__/service-lifecycle.test.ts +9 -3
- package/src/__tests__/skill-router.test.ts +3 -3
- package/src/agents/CulturalMemory.ts +6 -6
- package/src/agents/DelegationTraceHooks.ts +560 -0
- package/src/agents/FederatedRegistryAdapter.ts +1 -1
- package/src/agents/NormEngine.ts +3 -8
- package/src/agents/OrchestratorAgent.ts +1 -1
- package/src/agents/TaskDelegationService.ts +5 -9
- package/src/agents/__tests__/AgentWalletRegistry.test.ts +5 -4
- package/src/agents/__tests__/CrossRealityHandoff.test.ts +9 -3
- package/src/agents/__tests__/DelegationTraceHooks.test.ts +390 -0
- package/src/agents/__tests__/TaskDelegationService.test.ts +4 -2
- package/src/agents/spatial-comms/Layer1RealTime.ts +36 -19
- package/src/agents/spatial-comms/Layer2A2A.ts +1 -3
- package/src/agents/spatial-comms/Layer3MCP.ts +13 -4
- package/src/agents/spatial-comms/ProtocolTypes.ts +5 -2
- package/src/agents/spatial-comms/examples/multi-agent-world-creation.ts +2 -2
- package/src/ai/HoloScriptGenerator.ts +2 -2
- package/src/ai/__tests__/PerceptionSystem.prod.test.ts +1 -1
- package/src/ai/__tests__/PerceptionSystem.test.ts +14 -14
- package/src/ai/__tests__/SteeringBehaviors.prod.test.ts +1 -1
- package/src/ai/index.ts +5 -1
- package/src/board/audit.ts +17 -6
- package/src/board/board-ops.ts +45 -15
- package/src/board/board-types.ts +94 -20
- package/src/delegation.ts +5 -3
- package/src/distributed-claimer.ts +13 -2
- package/src/economy/BountyManager.ts +40 -18
- package/src/economy/KnowledgeMarketplace.ts +27 -8
- package/src/economy/PaymentWebhookService.ts +0 -1
- package/src/economy/RevenueSplitter.ts +2 -4
- package/src/economy/UnifiedBudgetOptimizer.ts +8 -9
- package/src/economy/_core-stubs.ts +1 -1
- package/src/economy/x402-facilitator.ts +17 -8
- package/src/index.ts +16 -12
- package/src/knowledge/__tests__/knowledge-consolidator.test.ts +138 -89
- package/src/knowledge/__tests__/knowledge-store-vector.test.ts +59 -16
- package/src/knowledge/brain.ts +7 -7
- package/src/knowledge/consolidation.ts +16 -16
- package/src/knowledge/knowledge-consolidator.ts +60 -30
- package/src/knowledge/knowledge-store.ts +83 -45
- package/src/learning/ProceduralCompiler.ts +6 -1
- package/src/learning/learning/MemoryConsolidator.ts +102 -0
- package/src/learning/learning/MemoryScorer.ts +69 -0
- package/src/learning/learning/ProceduralCompiler.ts +45 -0
- package/src/learning/learning/SemanticClusterer.ts +66 -0
- package/src/llm/llm-adapter.ts +24 -10
- package/src/mesh/index.ts +37 -17
- package/src/protocol/goal-synthesizer.ts +24 -34
- package/src/protocol/implementations.ts +91 -22
- package/src/protocol/micro-phase-decomposer.ts +25 -17
- package/src/protocol/micro-step-decomposer.test.ts +104 -39
- package/src/protocol-agent.test.ts +17 -7
- package/src/protocol-agent.ts +45 -42
- package/src/self-improve/absorb-scanner.ts +9 -6
- package/src/self-improve/evolution-engine.ts +36 -18
- package/src/self-improve/framework-absorber.ts +21 -16
- package/src/self-improve/index.ts +2 -10
- package/src/self-improve/prompt-optimizer.ts +31 -19
- package/src/self-improve/test-generator.ts +16 -12
- package/src/skill-router.ts +7 -6
- package/src/swarm/messaging/GossipProtocol.ts +1 -1
- package/src/swarm/messaging/__tests__/BroadcastChannel.prod.test.ts +31 -9
- package/src/swarm/messaging/__tests__/GossipProtocol.prod.test.ts +21 -7
- package/src/swarm/messaging/__tests__/SwarmEventBus.prod.test.ts +24 -8
- package/src/swarm/messaging/__tests__/SwarmEventBus.test.ts +6 -2
- package/src/team.ts +277 -122
- package/src/training/scripts/generate-spatial-dataset.ts +1 -1
- package/src/training/training/LRScheduler.ts +377 -0
- package/src/training/training/QualityScoringPipeline.ts +139 -0
- package/src/training/training/SoftDedup.ts +461 -0
- package/src/training/training/SparsityMonitor.ts +685 -0
- package/src/training/training/SparsityMonitorTypes.ts +209 -0
- package/src/training/training/SpatialTrainingDataGenerator.ts +1526 -0
- package/src/training/training/SpatialTrainingDataTypes.ts +216 -0
- package/src/training/training/TrainingPipelineConfig.ts +215 -0
- package/src/training/training/__tests__/CorpusValidation.test.ts +87 -0
- package/src/training/training/__tests__/LRScheduler.test.ts +592 -0
- package/src/training/training/__tests__/SoftDedup.test.ts +415 -0
- package/src/training/training/__tests__/SparsityMonitor.test.ts +1623 -0
- package/src/training/training/__tests__/SpatialCorpusValidation.test.ts +72 -0
- package/src/training/training/__tests__/SpatialTrainingDataGenerator.test.ts +1244 -0
- package/src/training/training/__tests__/TrainingMonkeyIntegration.test.ts +897 -0
- package/src/training/training/__tests__/TrainingPipelineConfig.test.ts +202 -0
- package/src/training/training/__tests__/schema.test.ts +72 -0
- package/src/training/training/__tests__/training-constants.test.ts +106 -0
- package/src/training/training/__tests__/trait-mappings.test.ts +81 -0
- package/src/training/training/constants.ts +94 -0
- package/src/training/training/index.ts +17 -0
- package/src/training/training/schema.ts +147 -0
- package/src/training/training/scripts/generate-novel-use-cases-dataset.ts +272 -0
- package/src/training/training/scripts/generate-spatial-dataset.ts +521 -0
- package/src/training/training/trainingmonkey/TrainingMonkeyIntegration.ts +477 -0
- package/src/training/training/trainingmonkey/TrainingMonkeyTypes.ts +230 -0
- package/src/training/training/trainingmonkey/index.ts +26 -0
- package/src/training/training/trait-mappings.ts +157 -0
- package/src/types.ts +2 -7
- package/ALL-test-results.json +0 -1
- package/LICENSE +0 -21
- package/dist/AgentManifest-CB4xM-Ma.d.ts +0 -704
- package/dist/BehaviorTree-BrBFECv5.d.ts +0 -103
- package/dist/InvisibleWallet-rtRrBOA8.d.ts +0 -1732
- package/dist/OrchestratorAgent-Q_CbVTmO.d.ts +0 -798
- package/dist/agents/index.d.ts +0 -1788
- package/dist/agents/index.js +0 -4695
- package/dist/ai/index.d.ts +0 -1753
- package/dist/ai/index.js +0 -5244
- package/dist/behavior.d.ts +0 -130
- package/dist/behavior.js +0 -407
- package/dist/economy/index.d.ts +0 -747
- package/dist/economy/index.js +0 -3617
- package/dist/implementations-D9T3un9D.d.ts +0 -236
- package/dist/index.d.ts +0 -1729
- package/dist/index.js +0 -24277
- package/dist/learning/index.d.ts +0 -104
- package/dist/learning/index.js +0 -189
- package/dist/negotiation/index.d.ts +0 -610
- package/dist/negotiation/index.js +0 -931
- package/dist/skills/index.d.ts +0 -289
- package/dist/skills/index.js +0 -1079
- package/dist/swarm/index.d.ts +0 -2433
- package/dist/swarm/index.js +0 -5221
- package/dist/training/index.d.ts +0 -1734
- package/dist/training/index.js +0 -2687
- package/extract-failures.js +0 -10
- package/src/training/training/data/novel-use-cases.jsonl +0 -153
- package/src/training/training/data/spatial-reasoning-10k.jsonl +0 -9354
- package/src/types/core-stubs.d.ts +0 -113
- package/test-output.txt +0 -0
- package/test-result.json +0 -1
- package/tsc-errors.txt +0 -4
- package/tsc_output.txt +0 -0
- package/typescript-errors-2.txt +0 -0
- package/typescript-errors.txt +0 -22
- package/vitest-log-utf8.txt +0 -268
- package/vitest-log.txt +0 -0
|
@@ -0,0 +1,897 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests: TrainingMonkey Integration Module
|
|
3
|
+
*
|
|
4
|
+
* Full test coverage for the spatial reasoning -> TrainingMonkey pipeline:
|
|
5
|
+
* 1. JSONL reading and parsing
|
|
6
|
+
* 2. Alpaca format conversion (instruction/input/output)
|
|
7
|
+
* 3. SoftDedup (W.008) n-gram reweighting
|
|
8
|
+
* 4. Train/validation splits (90/10, stratified)
|
|
9
|
+
* 5. TrainingMonkey-compatible config generation (W.006 hyperparameters)
|
|
10
|
+
* 6. End-to-end pipeline (process)
|
|
11
|
+
* 7. Serialization to JSONL output
|
|
12
|
+
* 8. Edge cases and error handling
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import { describe, it, expect } from 'vitest';
|
|
16
|
+
import {
|
|
17
|
+
TrainingMonkeyIntegration,
|
|
18
|
+
createTrainingMonkeyIntegration,
|
|
19
|
+
DEFAULT_INTEGRATION_CONFIG,
|
|
20
|
+
} from '../trainingmonkey';
|
|
21
|
+
import type {
|
|
22
|
+
AlpacaEntry,
|
|
23
|
+
WeightedAlpacaEntry,
|
|
24
|
+
DatasetSplit,
|
|
25
|
+
TrainingMonkeyConfig,
|
|
26
|
+
IntegrationResult,
|
|
27
|
+
} from '../trainingmonkey';
|
|
28
|
+
|
|
29
|
+
// =============================================================================
|
|
30
|
+
// TEST FIXTURES
|
|
31
|
+
// =============================================================================
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Minimal valid JSONL entry matching SpatialTrainingJSONLEntry format.
|
|
35
|
+
*/
|
|
36
|
+
function makeEntry(
|
|
37
|
+
id: string,
|
|
38
|
+
relType: string = 'spatial_adjacent',
|
|
39
|
+
difficulty: string = 'basic',
|
|
40
|
+
isPositive: boolean = true,
|
|
41
|
+
includeScene: boolean = true
|
|
42
|
+
): string {
|
|
43
|
+
const scenePart = includeScene
|
|
44
|
+
? `\n\nHoloScript Scene:\n\`\`\`holoscript\ncomposition "SpatialScene" {\n object "${id}" {\n geometry: "cube"\n position: [1.0, 2.0, 3.0]\n }\n}\n\`\`\``
|
|
45
|
+
: '';
|
|
46
|
+
|
|
47
|
+
const instruction = `Does the ${relType} constraint pass for "${id}"?${scenePart}`;
|
|
48
|
+
const response = isPositive
|
|
49
|
+
? `Yes, "${id}" satisfies the constraint.`
|
|
50
|
+
: `No, "${id}" violates the constraint.`;
|
|
51
|
+
|
|
52
|
+
return JSON.stringify({
|
|
53
|
+
instruction,
|
|
54
|
+
response,
|
|
55
|
+
metadata: {
|
|
56
|
+
id,
|
|
57
|
+
relationship_type: relType,
|
|
58
|
+
is_positive: isPositive,
|
|
59
|
+
difficulty,
|
|
60
|
+
tags: [relType, difficulty, isPositive ? 'positive' : 'negative'],
|
|
61
|
+
},
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Build a multi-line JSONL string from entries.
|
|
67
|
+
*/
|
|
68
|
+
function buildJsonl(entries: string[]): string {
|
|
69
|
+
return entries.join('\n');
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Create a small balanced dataset with all relationship types and difficulties.
|
|
74
|
+
*/
|
|
75
|
+
function createBalancedDataset(perGroup: number = 3): string {
|
|
76
|
+
const relTypes = ['spatial_adjacent', 'spatial_contains', 'spatial_reachable'];
|
|
77
|
+
const difficulties = ['basic', 'intermediate', 'advanced'];
|
|
78
|
+
const entries: string[] = [];
|
|
79
|
+
|
|
80
|
+
let counter = 0;
|
|
81
|
+
for (const rel of relTypes) {
|
|
82
|
+
for (const diff of difficulties) {
|
|
83
|
+
for (let i = 0; i < perGroup; i++) {
|
|
84
|
+
counter++;
|
|
85
|
+
entries.push(
|
|
86
|
+
makeEntry(
|
|
87
|
+
`obj-${counter}`,
|
|
88
|
+
rel,
|
|
89
|
+
diff,
|
|
90
|
+
i % 2 === 0 // alternate positive/negative
|
|
91
|
+
)
|
|
92
|
+
);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
return buildJsonl(entries);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// =============================================================================
|
|
101
|
+
// TESTS
|
|
102
|
+
// =============================================================================
|
|
103
|
+
|
|
104
|
+
describe('TrainingMonkeyIntegration', () => {
|
|
105
|
+
// ---------------------------------------------------------------------------
|
|
106
|
+
// Construction & Configuration
|
|
107
|
+
// ---------------------------------------------------------------------------
|
|
108
|
+
|
|
109
|
+
describe('constructor and configuration', () => {
|
|
110
|
+
it('uses default config when no overrides provided', () => {
|
|
111
|
+
const integration = new TrainingMonkeyIntegration();
|
|
112
|
+
const config = integration.getConfig();
|
|
113
|
+
expect(config.trainRatio).toBe(0.9);
|
|
114
|
+
expect(config.seed).toBe(42);
|
|
115
|
+
expect(config.enableSoftDedup).toBe(true);
|
|
116
|
+
expect(config.modelName).toBe('qwen7b');
|
|
117
|
+
expect(config.stratify).toBe(true);
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
it('merges partial config with defaults', () => {
|
|
121
|
+
const integration = new TrainingMonkeyIntegration({
|
|
122
|
+
trainRatio: 0.8,
|
|
123
|
+
seed: 123,
|
|
124
|
+
});
|
|
125
|
+
const config = integration.getConfig();
|
|
126
|
+
expect(config.trainRatio).toBe(0.8);
|
|
127
|
+
expect(config.seed).toBe(123);
|
|
128
|
+
expect(config.enableSoftDedup).toBe(true); // default preserved
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
it('factory function creates instance with config', () => {
|
|
132
|
+
const integration = createTrainingMonkeyIntegration({
|
|
133
|
+
modelName: 'phi35',
|
|
134
|
+
});
|
|
135
|
+
expect(integration.getConfig().modelName).toBe('phi35');
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
it('exports DEFAULT_INTEGRATION_CONFIG', () => {
|
|
139
|
+
expect(DEFAULT_INTEGRATION_CONFIG).toBeDefined();
|
|
140
|
+
expect(DEFAULT_INTEGRATION_CONFIG.trainRatio).toBe(0.9);
|
|
141
|
+
expect(DEFAULT_INTEGRATION_CONFIG.seed).toBe(42);
|
|
142
|
+
});
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
// ---------------------------------------------------------------------------
|
|
146
|
+
// JSONL Reading
|
|
147
|
+
// ---------------------------------------------------------------------------
|
|
148
|
+
|
|
149
|
+
describe('readJsonl', () => {
|
|
150
|
+
it('parses valid JSONL with multiple entries', () => {
|
|
151
|
+
const integration = new TrainingMonkeyIntegration();
|
|
152
|
+
const jsonl = buildJsonl([makeEntry('obj-1'), makeEntry('obj-2'), makeEntry('obj-3')]);
|
|
153
|
+
|
|
154
|
+
const entries = integration.readJsonl(jsonl);
|
|
155
|
+
expect(entries).toHaveLength(3);
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
it('preserves instruction and response fields', () => {
|
|
159
|
+
const integration = new TrainingMonkeyIntegration();
|
|
160
|
+
const jsonl = makeEntry('test-obj', 'spatial_adjacent', 'basic', true);
|
|
161
|
+
|
|
162
|
+
const entries = integration.readJsonl(jsonl);
|
|
163
|
+
expect(entries[0].instruction).toContain('test-obj');
|
|
164
|
+
expect(entries[0].response).toContain('test-obj');
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
it('preserves metadata fields', () => {
|
|
168
|
+
const integration = new TrainingMonkeyIntegration();
|
|
169
|
+
const jsonl = makeEntry('meta-test', 'spatial_contains', 'advanced', false);
|
|
170
|
+
|
|
171
|
+
const entries = integration.readJsonl(jsonl);
|
|
172
|
+
expect(entries[0].metadata.id).toBe('meta-test');
|
|
173
|
+
expect(entries[0].metadata.relationship_type).toBe('spatial_contains');
|
|
174
|
+
expect(entries[0].metadata.difficulty).toBe('advanced');
|
|
175
|
+
expect(entries[0].metadata.is_positive).toBe(false);
|
|
176
|
+
expect(entries[0].metadata.tags).toContain('spatial_contains');
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
it('skips empty lines in JSONL', () => {
|
|
180
|
+
const integration = new TrainingMonkeyIntegration();
|
|
181
|
+
const jsonl = `${makeEntry('obj-1')}\n\n${makeEntry('obj-2')}\n\n`;
|
|
182
|
+
|
|
183
|
+
const entries = integration.readJsonl(jsonl);
|
|
184
|
+
expect(entries).toHaveLength(2);
|
|
185
|
+
});
|
|
186
|
+
|
|
187
|
+
it('throws on invalid JSON lines', () => {
|
|
188
|
+
const integration = new TrainingMonkeyIntegration();
|
|
189
|
+
const jsonl = `${makeEntry('obj-1')}\n{invalid json}\n${makeEntry('obj-3')}`;
|
|
190
|
+
|
|
191
|
+
expect(() => integration.readJsonl(jsonl)).toThrow('Failed to parse JSONL line 2');
|
|
192
|
+
});
|
|
193
|
+
|
|
194
|
+
it('throws on missing instruction field', () => {
|
|
195
|
+
const integration = new TrainingMonkeyIntegration();
|
|
196
|
+
const jsonl = JSON.stringify({ response: 'hello', metadata: {} });
|
|
197
|
+
|
|
198
|
+
expect(() => integration.readJsonl(jsonl)).toThrow('Missing required fields');
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
it('throws on missing response field', () => {
|
|
202
|
+
const integration = new TrainingMonkeyIntegration();
|
|
203
|
+
const jsonl = JSON.stringify({ instruction: 'hello', metadata: {} });
|
|
204
|
+
|
|
205
|
+
expect(() => integration.readJsonl(jsonl)).toThrow('Missing required fields');
|
|
206
|
+
});
|
|
207
|
+
|
|
208
|
+
it('handles single-line JSONL', () => {
|
|
209
|
+
const integration = new TrainingMonkeyIntegration();
|
|
210
|
+
const jsonl = makeEntry('single');
|
|
211
|
+
|
|
212
|
+
const entries = integration.readJsonl(jsonl);
|
|
213
|
+
expect(entries).toHaveLength(1);
|
|
214
|
+
});
|
|
215
|
+
});
|
|
216
|
+
|
|
217
|
+
// ---------------------------------------------------------------------------
|
|
218
|
+
// Alpaca Format Conversion
|
|
219
|
+
// ---------------------------------------------------------------------------
|
|
220
|
+
|
|
221
|
+
describe('convertToAlpaca', () => {
|
|
222
|
+
it('produces entries with instruction, input, and output fields', () => {
|
|
223
|
+
const integration = new TrainingMonkeyIntegration();
|
|
224
|
+
const jsonl = makeEntry('conv-test');
|
|
225
|
+
const entries = integration.readJsonl(jsonl);
|
|
226
|
+
|
|
227
|
+
const alpaca = integration.convertToAlpaca(entries);
|
|
228
|
+
expect(alpaca).toHaveLength(1);
|
|
229
|
+
expect(alpaca[0]).toHaveProperty('instruction');
|
|
230
|
+
expect(alpaca[0]).toHaveProperty('input');
|
|
231
|
+
expect(alpaca[0]).toHaveProperty('output');
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
it('maps response to output field', () => {
|
|
235
|
+
const integration = new TrainingMonkeyIntegration();
|
|
236
|
+
const jsonl = makeEntry('output-test', 'spatial_adjacent', 'basic', true);
|
|
237
|
+
const entries = integration.readJsonl(jsonl);
|
|
238
|
+
|
|
239
|
+
const alpaca = integration.convertToAlpaca(entries);
|
|
240
|
+
expect(alpaca[0].output).toContain('output-test');
|
|
241
|
+
expect(alpaca[0].output).toContain('satisfies the constraint');
|
|
242
|
+
});
|
|
243
|
+
|
|
244
|
+
it('extracts HoloScript scene into input field', () => {
|
|
245
|
+
const integration = new TrainingMonkeyIntegration();
|
|
246
|
+
const jsonl = makeEntry('scene-test', 'spatial_adjacent', 'basic', true, true);
|
|
247
|
+
const entries = integration.readJsonl(jsonl);
|
|
248
|
+
|
|
249
|
+
const alpaca = integration.convertToAlpaca(entries);
|
|
250
|
+
expect(alpaca[0].input).toContain('HoloScript Scene:');
|
|
251
|
+
expect(alpaca[0].input).toContain('composition "SpatialScene"');
|
|
252
|
+
});
|
|
253
|
+
|
|
254
|
+
it('separates question from scene in instruction', () => {
|
|
255
|
+
const integration = new TrainingMonkeyIntegration();
|
|
256
|
+
const jsonl = makeEntry('sep-test');
|
|
257
|
+
const entries = integration.readJsonl(jsonl);
|
|
258
|
+
|
|
259
|
+
const alpaca = integration.convertToAlpaca(entries);
|
|
260
|
+
// The instruction should contain the question part WITHOUT the scene
|
|
261
|
+
expect(alpaca[0].instruction).toContain('sep-test');
|
|
262
|
+
expect(alpaca[0].instruction).not.toContain('composition "SpatialScene"');
|
|
263
|
+
});
|
|
264
|
+
|
|
265
|
+
it('uses empty input when no scene is present', () => {
|
|
266
|
+
const integration = new TrainingMonkeyIntegration();
|
|
267
|
+
const jsonl = makeEntry('no-scene', 'spatial_adjacent', 'basic', true, false);
|
|
268
|
+
const entries = integration.readJsonl(jsonl);
|
|
269
|
+
|
|
270
|
+
const alpaca = integration.convertToAlpaca(entries);
|
|
271
|
+
expect(alpaca[0].input).toBe('');
|
|
272
|
+
});
|
|
273
|
+
|
|
274
|
+
it('converts all entries in batch', () => {
|
|
275
|
+
const integration = new TrainingMonkeyIntegration();
|
|
276
|
+
const jsonl = buildJsonl([
|
|
277
|
+
makeEntry('batch-1'),
|
|
278
|
+
makeEntry('batch-2'),
|
|
279
|
+
makeEntry('batch-3'),
|
|
280
|
+
makeEntry('batch-4'),
|
|
281
|
+
makeEntry('batch-5'),
|
|
282
|
+
]);
|
|
283
|
+
const entries = integration.readJsonl(jsonl);
|
|
284
|
+
|
|
285
|
+
const alpaca = integration.convertToAlpaca(entries);
|
|
286
|
+
expect(alpaca).toHaveLength(5);
|
|
287
|
+
for (const entry of alpaca) {
|
|
288
|
+
expect(entry.instruction).toBeTruthy();
|
|
289
|
+
expect(entry.output).toBeTruthy();
|
|
290
|
+
}
|
|
291
|
+
});
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
// ---------------------------------------------------------------------------
|
|
295
|
+
// SoftDedup (W.008) N-gram Reweighting
|
|
296
|
+
// ---------------------------------------------------------------------------
|
|
297
|
+
|
|
298
|
+
describe('applySoftDedup (W.008)', () => {
|
|
299
|
+
it('assigns sampling weights to all entries', () => {
|
|
300
|
+
const integration = new TrainingMonkeyIntegration();
|
|
301
|
+
const jsonl = createBalancedDataset(3);
|
|
302
|
+
const entries = integration.readJsonl(jsonl);
|
|
303
|
+
const alpaca = integration.convertToAlpaca(entries);
|
|
304
|
+
|
|
305
|
+
const weighted = integration.applySoftDedup(alpaca, entries);
|
|
306
|
+
expect(weighted).toHaveLength(entries.length);
|
|
307
|
+
|
|
308
|
+
for (const entry of weighted) {
|
|
309
|
+
expect(entry.sampling_weight).toBeGreaterThanOrEqual(0.1);
|
|
310
|
+
expect(entry.sampling_weight).toBeLessThanOrEqual(1.0);
|
|
311
|
+
}
|
|
312
|
+
});
|
|
313
|
+
|
|
314
|
+
it('preserves original metadata in weighted entries', () => {
|
|
315
|
+
const integration = new TrainingMonkeyIntegration();
|
|
316
|
+
const jsonl = makeEntry('meta-keep', 'spatial_reachable', 'advanced', false);
|
|
317
|
+
const entries = integration.readJsonl(jsonl);
|
|
318
|
+
const alpaca = integration.convertToAlpaca(entries);
|
|
319
|
+
|
|
320
|
+
const weighted = integration.applySoftDedup(alpaca, entries);
|
|
321
|
+
expect(weighted[0].metadata).toBeDefined();
|
|
322
|
+
expect(weighted[0].metadata!.id).toBe('meta-keep');
|
|
323
|
+
expect(weighted[0].metadata!.relationship_type).toBe('spatial_reachable');
|
|
324
|
+
});
|
|
325
|
+
|
|
326
|
+
it('preserves Alpaca fields in weighted entries', () => {
|
|
327
|
+
const integration = new TrainingMonkeyIntegration();
|
|
328
|
+
const jsonl = makeEntry('fields-test');
|
|
329
|
+
const entries = integration.readJsonl(jsonl);
|
|
330
|
+
const alpaca = integration.convertToAlpaca(entries);
|
|
331
|
+
|
|
332
|
+
const weighted = integration.applySoftDedup(alpaca, entries);
|
|
333
|
+
expect(weighted[0].instruction).toBe(alpaca[0].instruction);
|
|
334
|
+
expect(weighted[0].input).toBe(alpaca[0].input);
|
|
335
|
+
expect(weighted[0].output).toBe(alpaca[0].output);
|
|
336
|
+
});
|
|
337
|
+
|
|
338
|
+
it('downweights near-duplicate template content', () => {
|
|
339
|
+
const integration = new TrainingMonkeyIntegration();
|
|
340
|
+
|
|
341
|
+
// Create entries with very similar template text
|
|
342
|
+
const similarEntries: string[] = [];
|
|
343
|
+
for (let i = 0; i < 20; i++) {
|
|
344
|
+
similarEntries.push(makeEntry(`similar-${i}`, 'spatial_adjacent', 'basic', true));
|
|
345
|
+
}
|
|
346
|
+
// Add a unique entry
|
|
347
|
+
similarEntries.push(
|
|
348
|
+
JSON.stringify({
|
|
349
|
+
instruction:
|
|
350
|
+
'This is a completely unique and different instruction that has nothing in common with the template above. It discusses quantum mechanics and the behavior of subatomic particles.',
|
|
351
|
+
response:
|
|
352
|
+
'The answer involves Heisenberg uncertainty principle and wave-particle duality in quantum field theory.',
|
|
353
|
+
metadata: {
|
|
354
|
+
id: 'unique-1',
|
|
355
|
+
relationship_type: 'spatial_adjacent',
|
|
356
|
+
is_positive: true,
|
|
357
|
+
difficulty: 'advanced',
|
|
358
|
+
tags: ['unique'],
|
|
359
|
+
},
|
|
360
|
+
})
|
|
361
|
+
);
|
|
362
|
+
|
|
363
|
+
const jsonl = buildJsonl(similarEntries);
|
|
364
|
+
const entries = integration.readJsonl(jsonl);
|
|
365
|
+
const alpaca = integration.convertToAlpaca(entries);
|
|
366
|
+
const weighted = integration.applySoftDedup(alpaca, entries);
|
|
367
|
+
|
|
368
|
+
// The unique entry should have a higher weight than the template entries
|
|
369
|
+
const uniqueWeight = weighted[weighted.length - 1].sampling_weight;
|
|
370
|
+
const templateWeights = weighted.slice(0, -1).map((w) => w.sampling_weight);
|
|
371
|
+
const avgTemplateWeight = templateWeights.reduce((a, b) => a + b, 0) / templateWeights.length;
|
|
372
|
+
|
|
373
|
+
expect(uniqueWeight).toBeGreaterThan(avgTemplateWeight);
|
|
374
|
+
});
|
|
375
|
+
|
|
376
|
+
it('assigns weight 1.0 when SoftDedup is disabled', () => {
|
|
377
|
+
const integration = new TrainingMonkeyIntegration({
|
|
378
|
+
enableSoftDedup: false,
|
|
379
|
+
});
|
|
380
|
+
const jsonl = createBalancedDataset(2);
|
|
381
|
+
const result = integration.process(jsonl);
|
|
382
|
+
|
|
383
|
+
for (const entry of result.split.train) {
|
|
384
|
+
expect(entry.sampling_weight).toBe(1.0);
|
|
385
|
+
}
|
|
386
|
+
for (const entry of result.split.validation) {
|
|
387
|
+
expect(entry.sampling_weight).toBe(1.0);
|
|
388
|
+
}
|
|
389
|
+
});
|
|
390
|
+
});
|
|
391
|
+
|
|
392
|
+
// ---------------------------------------------------------------------------
|
|
393
|
+
// Train/Validation Split
|
|
394
|
+
// ---------------------------------------------------------------------------
|
|
395
|
+
|
|
396
|
+
describe('splitDataset', () => {
|
|
397
|
+
it('produces 90/10 split by default', () => {
|
|
398
|
+
const integration = new TrainingMonkeyIntegration();
|
|
399
|
+
const jsonl = createBalancedDataset(10); // 3 relTypes * 3 difficulties * 10 = 90 entries
|
|
400
|
+
const result = integration.process(jsonl);
|
|
401
|
+
|
|
402
|
+
const { stats } = result.split;
|
|
403
|
+
expect(stats.totalExamples).toBe(90);
|
|
404
|
+
// Allow +/- 2% tolerance for rounding
|
|
405
|
+
expect(stats.trainRatio).toBeGreaterThan(0.88);
|
|
406
|
+
expect(stats.trainRatio).toBeLessThan(0.92);
|
|
407
|
+
expect(stats.validationRatio).toBeGreaterThan(0.08);
|
|
408
|
+
expect(stats.validationRatio).toBeLessThan(0.12);
|
|
409
|
+
});
|
|
410
|
+
|
|
411
|
+
it('train + validation = total examples', () => {
|
|
412
|
+
const integration = new TrainingMonkeyIntegration();
|
|
413
|
+
const jsonl = createBalancedDataset(5);
|
|
414
|
+
const result = integration.process(jsonl);
|
|
415
|
+
|
|
416
|
+
const { stats } = result.split;
|
|
417
|
+
expect(stats.trainCount + stats.validationCount).toBe(stats.totalExamples);
|
|
418
|
+
});
|
|
419
|
+
|
|
420
|
+
it('respects custom trainRatio', () => {
|
|
421
|
+
const integration = new TrainingMonkeyIntegration({ trainRatio: 0.8 });
|
|
422
|
+
const jsonl = createBalancedDataset(10);
|
|
423
|
+
const result = integration.process(jsonl);
|
|
424
|
+
|
|
425
|
+
const { stats } = result.split;
|
|
426
|
+
expect(stats.trainRatio).toBeGreaterThan(0.78);
|
|
427
|
+
expect(stats.trainRatio).toBeLessThan(0.82);
|
|
428
|
+
});
|
|
429
|
+
|
|
430
|
+
it('preserves stratification across relationship types', () => {
|
|
431
|
+
const integration = new TrainingMonkeyIntegration({ stratify: true });
|
|
432
|
+
const jsonl = createBalancedDataset(20);
|
|
433
|
+
const result = integration.process(jsonl);
|
|
434
|
+
|
|
435
|
+
// Check that all relationship types appear in both train and validation
|
|
436
|
+
const trainTypes = new Set(
|
|
437
|
+
result.split.train.filter((e) => e.metadata).map((e) => e.metadata!.relationship_type)
|
|
438
|
+
);
|
|
439
|
+
const valTypes = new Set(
|
|
440
|
+
result.split.validation.filter((e) => e.metadata).map((e) => e.metadata!.relationship_type)
|
|
441
|
+
);
|
|
442
|
+
|
|
443
|
+
expect(trainTypes.has('spatial_adjacent')).toBe(true);
|
|
444
|
+
expect(trainTypes.has('spatial_contains')).toBe(true);
|
|
445
|
+
expect(trainTypes.has('spatial_reachable')).toBe(true);
|
|
446
|
+
expect(valTypes.has('spatial_adjacent')).toBe(true);
|
|
447
|
+
expect(valTypes.has('spatial_contains')).toBe(true);
|
|
448
|
+
expect(valTypes.has('spatial_reachable')).toBe(true);
|
|
449
|
+
});
|
|
450
|
+
|
|
451
|
+
it('preserves stratification across difficulty levels', () => {
|
|
452
|
+
const integration = new TrainingMonkeyIntegration({ stratify: true });
|
|
453
|
+
const jsonl = createBalancedDataset(20);
|
|
454
|
+
const result = integration.process(jsonl);
|
|
455
|
+
|
|
456
|
+
const trainDiffs = new Set(
|
|
457
|
+
result.split.train.filter((e) => e.metadata).map((e) => e.metadata!.difficulty)
|
|
458
|
+
);
|
|
459
|
+
const valDiffs = new Set(
|
|
460
|
+
result.split.validation.filter((e) => e.metadata).map((e) => e.metadata!.difficulty)
|
|
461
|
+
);
|
|
462
|
+
|
|
463
|
+
expect(trainDiffs.has('basic')).toBe(true);
|
|
464
|
+
expect(trainDiffs.has('intermediate')).toBe(true);
|
|
465
|
+
expect(trainDiffs.has('advanced')).toBe(true);
|
|
466
|
+
expect(valDiffs.has('basic')).toBe(true);
|
|
467
|
+
expect(valDiffs.has('intermediate')).toBe(true);
|
|
468
|
+
expect(valDiffs.has('advanced')).toBe(true);
|
|
469
|
+
});
|
|
470
|
+
|
|
471
|
+
it('is deterministic with the same seed', () => {
|
|
472
|
+
const jsonl = createBalancedDataset(10);
|
|
473
|
+
|
|
474
|
+
const result1 = new TrainingMonkeyIntegration({ seed: 42 }).process(jsonl);
|
|
475
|
+
const result2 = new TrainingMonkeyIntegration({ seed: 42 }).process(jsonl);
|
|
476
|
+
|
|
477
|
+
expect(result1.trainJsonl).toBe(result2.trainJsonl);
|
|
478
|
+
expect(result1.validationJsonl).toBe(result2.validationJsonl);
|
|
479
|
+
});
|
|
480
|
+
|
|
481
|
+
it('produces different splits with different seeds', () => {
|
|
482
|
+
const jsonl = createBalancedDataset(10);
|
|
483
|
+
|
|
484
|
+
const result1 = new TrainingMonkeyIntegration({ seed: 42 }).process(jsonl);
|
|
485
|
+
const result2 = new TrainingMonkeyIntegration({ seed: 999 }).process(jsonl);
|
|
486
|
+
|
|
487
|
+
// With different seeds, the train/val split ordering should differ
|
|
488
|
+
expect(result1.trainJsonl).not.toBe(result2.trainJsonl);
|
|
489
|
+
});
|
|
490
|
+
|
|
491
|
+
it('handles empty dataset', () => {
|
|
492
|
+
const integration = new TrainingMonkeyIntegration();
|
|
493
|
+
const entries: WeightedAlpacaEntry[] = [];
|
|
494
|
+
const split = integration.splitDataset(entries);
|
|
495
|
+
|
|
496
|
+
expect(split.train).toHaveLength(0);
|
|
497
|
+
expect(split.validation).toHaveLength(0);
|
|
498
|
+
expect(split.stats.totalExamples).toBe(0);
|
|
499
|
+
});
|
|
500
|
+
|
|
501
|
+
it('handles single-entry dataset', () => {
|
|
502
|
+
const integration = new TrainingMonkeyIntegration();
|
|
503
|
+
const jsonl = makeEntry('single');
|
|
504
|
+
const result = integration.process(jsonl);
|
|
505
|
+
|
|
506
|
+
// Single entry should go to train (90% rounds to 1)
|
|
507
|
+
expect(result.split.stats.totalExamples).toBe(1);
|
|
508
|
+
expect(result.split.stats.trainCount + result.split.stats.validationCount).toBe(1);
|
|
509
|
+
});
|
|
510
|
+
|
|
511
|
+
it('handles non-stratified split', () => {
|
|
512
|
+
const integration = new TrainingMonkeyIntegration({ stratify: false });
|
|
513
|
+
const jsonl = createBalancedDataset(10);
|
|
514
|
+
const result = integration.process(jsonl);
|
|
515
|
+
|
|
516
|
+
expect(result.split.stats.stratified).toBe(false);
|
|
517
|
+
expect(result.split.stats.trainCount + result.split.stats.validationCount).toBe(
|
|
518
|
+
result.split.stats.totalExamples
|
|
519
|
+
);
|
|
520
|
+
});
|
|
521
|
+
});
|
|
522
|
+
|
|
523
|
+
// ---------------------------------------------------------------------------
|
|
524
|
+
// Training Config Generation
|
|
525
|
+
// ---------------------------------------------------------------------------
|
|
526
|
+
|
|
527
|
+
describe('generateConfig', () => {
|
|
528
|
+
it('produces W.006-compliant hyperparameters', () => {
|
|
529
|
+
const integration = new TrainingMonkeyIntegration();
|
|
530
|
+
const jsonl = createBalancedDataset(10);
|
|
531
|
+
const result = integration.process(jsonl);
|
|
532
|
+
const config = result.config;
|
|
533
|
+
|
|
534
|
+
expect(config.hyperparameters.learningRate).toBe(2e-4);
|
|
535
|
+
expect(config.hyperparameters.epochs).toBe(2);
|
|
536
|
+
expect(config.hyperparameters.optimizer).toBe('paged_adamw_8bit');
|
|
537
|
+
});
|
|
538
|
+
|
|
539
|
+
it('produces W.007-compliant batch sizing', () => {
|
|
540
|
+
const integration = new TrainingMonkeyIntegration();
|
|
541
|
+
const jsonl = createBalancedDataset(10);
|
|
542
|
+
const result = integration.process(jsonl);
|
|
543
|
+
const config = result.config;
|
|
544
|
+
|
|
545
|
+
expect(config.hyperparameters.microBatchSize).toBe(8);
|
|
546
|
+
expect(config.hyperparameters.gradientAccumulationSteps).toBe(4);
|
|
547
|
+
// Effective batch = 8 * 4 = 32
|
|
548
|
+
const effectiveBatch =
|
|
549
|
+
config.hyperparameters.microBatchSize * config.hyperparameters.gradientAccumulationSteps;
|
|
550
|
+
expect(effectiveBatch).toBe(32);
|
|
551
|
+
});
|
|
552
|
+
|
|
553
|
+
it('produces W.009-compliant LR schedule', () => {
|
|
554
|
+
const integration = new TrainingMonkeyIntegration();
|
|
555
|
+
const jsonl = createBalancedDataset(10);
|
|
556
|
+
const result = integration.process(jsonl);
|
|
557
|
+
const config = result.config;
|
|
558
|
+
|
|
559
|
+
expect(config.lrSchedule.warmupRatio).toBe(0.1);
|
|
560
|
+
expect(config.lrSchedule.type).toBe('cosine');
|
|
561
|
+
});
|
|
562
|
+
|
|
563
|
+
it('computes correct total training steps', () => {
|
|
564
|
+
const integration = new TrainingMonkeyIntegration();
|
|
565
|
+
const jsonl = createBalancedDataset(10); // 3 relTypes * 3 difficulties * 10 = 90 entries total
|
|
566
|
+
const result = integration.process(jsonl);
|
|
567
|
+
const config = result.config;
|
|
568
|
+
|
|
569
|
+
const effectiveBatch = 8 * 4; // 32
|
|
570
|
+
const stepsPerEpoch = Math.ceil(config.dataset.trainCount / effectiveBatch);
|
|
571
|
+
const expectedTotalSteps = stepsPerEpoch * 2; // 2 epochs
|
|
572
|
+
|
|
573
|
+
expect(config.dataset.totalSteps).toBe(expectedTotalSteps);
|
|
574
|
+
});
|
|
575
|
+
|
|
576
|
+
it('includes correct dataset counts', () => {
|
|
577
|
+
const integration = new TrainingMonkeyIntegration();
|
|
578
|
+
const jsonl = createBalancedDataset(10);
|
|
579
|
+
const result = integration.process(jsonl);
|
|
580
|
+
const config = result.config;
|
|
581
|
+
|
|
582
|
+
expect(config.dataset.trainCount).toBe(result.split.stats.trainCount);
|
|
583
|
+
expect(config.dataset.validationCount).toBe(result.split.stats.validationCount);
|
|
584
|
+
// 3 relTypes * 3 difficulties * 10 perGroup = 90
|
|
585
|
+
expect(config.dataset.trainCount + config.dataset.validationCount).toBe(90);
|
|
586
|
+
});
|
|
587
|
+
|
|
588
|
+
it('includes SoftDedup statistics', () => {
|
|
589
|
+
const integration = new TrainingMonkeyIntegration();
|
|
590
|
+
const jsonl = createBalancedDataset(10);
|
|
591
|
+
const result = integration.process(jsonl);
|
|
592
|
+
const config = result.config;
|
|
593
|
+
|
|
594
|
+
expect(config.softDedup.applied).toBe(true);
|
|
595
|
+
expect(config.softDedup.meanWeight).toBeGreaterThan(0);
|
|
596
|
+
expect(config.softDedup.meanWeight).toBeLessThanOrEqual(1.0);
|
|
597
|
+
expect(config.softDedup.effectiveSize).toBeGreaterThan(0);
|
|
598
|
+
});
|
|
599
|
+
|
|
600
|
+
it('reports SoftDedup as not applied when disabled', () => {
|
|
601
|
+
const integration = new TrainingMonkeyIntegration({
|
|
602
|
+
enableSoftDedup: false,
|
|
603
|
+
});
|
|
604
|
+
const jsonl = createBalancedDataset(5);
|
|
605
|
+
const result = integration.process(jsonl);
|
|
606
|
+
|
|
607
|
+
expect(result.config.softDedup.applied).toBe(false);
|
|
608
|
+
expect(result.config.softDedup.meanWeight).toBe(1.0);
|
|
609
|
+
});
|
|
610
|
+
|
|
611
|
+
it('uses configured model name', () => {
|
|
612
|
+
const integration = new TrainingMonkeyIntegration({
|
|
613
|
+
modelName: 'phi35',
|
|
614
|
+
});
|
|
615
|
+
const jsonl = createBalancedDataset(3);
|
|
616
|
+
const result = integration.process(jsonl);
|
|
617
|
+
|
|
618
|
+
expect(result.config.model.name).toBe('phi35');
|
|
619
|
+
});
|
|
620
|
+
|
|
621
|
+
it('includes output directory in dataset paths', () => {
|
|
622
|
+
const integration = new TrainingMonkeyIntegration({
|
|
623
|
+
outputDir: '/root/training-v44',
|
|
624
|
+
});
|
|
625
|
+
const jsonl = createBalancedDataset(3);
|
|
626
|
+
const result = integration.process(jsonl);
|
|
627
|
+
|
|
628
|
+
expect(result.config.dataset.trainPath).toBe('/root/training-v44/alpaca-train.jsonl');
|
|
629
|
+
expect(result.config.dataset.validationPath).toBe('/root/training-v44/alpaca-val.jsonl');
|
|
630
|
+
});
|
|
631
|
+
});
|
|
632
|
+
|
|
633
|
+
// ---------------------------------------------------------------------------
|
|
634
|
+
// JSONL Serialization
|
|
635
|
+
// ---------------------------------------------------------------------------
|
|
636
|
+
|
|
637
|
+
describe('serializeJsonl', () => {
|
|
638
|
+
it('produces one JSON object per line', () => {
|
|
639
|
+
const integration = new TrainingMonkeyIntegration();
|
|
640
|
+
const jsonl = createBalancedDataset(3);
|
|
641
|
+
const result = integration.process(jsonl);
|
|
642
|
+
|
|
643
|
+
const trainLines = result.trainJsonl.split('\n').filter((l) => l.trim());
|
|
644
|
+
expect(trainLines).toHaveLength(result.split.stats.trainCount);
|
|
645
|
+
|
|
646
|
+
// Each line must be parseable JSON
|
|
647
|
+
for (const line of trainLines) {
|
|
648
|
+
expect(() => JSON.parse(line)).not.toThrow();
|
|
649
|
+
}
|
|
650
|
+
});
|
|
651
|
+
|
|
652
|
+
it('serialized entries contain all required Alpaca fields', () => {
|
|
653
|
+
const integration = new TrainingMonkeyIntegration();
|
|
654
|
+
const jsonl = createBalancedDataset(3);
|
|
655
|
+
const result = integration.process(jsonl);
|
|
656
|
+
|
|
657
|
+
const trainLines = result.trainJsonl.split('\n').filter((l) => l.trim());
|
|
658
|
+
for (const line of trainLines) {
|
|
659
|
+
const parsed = JSON.parse(line);
|
|
660
|
+
expect(parsed).toHaveProperty('instruction');
|
|
661
|
+
expect(parsed).toHaveProperty('input');
|
|
662
|
+
expect(parsed).toHaveProperty('output');
|
|
663
|
+
expect(parsed).toHaveProperty('sampling_weight');
|
|
664
|
+
}
|
|
665
|
+
});
|
|
666
|
+
|
|
667
|
+
it('serialized entries are readable by TrainingMonkey format', () => {
|
|
668
|
+
const integration = new TrainingMonkeyIntegration();
|
|
669
|
+
const jsonl = createBalancedDataset(3);
|
|
670
|
+
const result = integration.process(jsonl);
|
|
671
|
+
|
|
672
|
+
const trainLines = result.trainJsonl.split('\n').filter((l) => l.trim());
|
|
673
|
+
for (const line of trainLines) {
|
|
674
|
+
const parsed = JSON.parse(line);
|
|
675
|
+
// TrainingMonkey reads: example.get("instruction"), example.get("output")
|
|
676
|
+
expect(typeof parsed.instruction).toBe('string');
|
|
677
|
+
expect(parsed.instruction.length).toBeGreaterThan(0);
|
|
678
|
+
expect(typeof parsed.output).toBe('string');
|
|
679
|
+
expect(parsed.output.length).toBeGreaterThan(0);
|
|
680
|
+
}
|
|
681
|
+
});
|
|
682
|
+
|
|
683
|
+
it('config JSON is valid and parseable', () => {
|
|
684
|
+
const integration = new TrainingMonkeyIntegration();
|
|
685
|
+
const jsonl = createBalancedDataset(3);
|
|
686
|
+
const result = integration.process(jsonl);
|
|
687
|
+
|
|
688
|
+
const config = JSON.parse(result.configJson);
|
|
689
|
+
expect(config).toHaveProperty('model');
|
|
690
|
+
expect(config).toHaveProperty('hyperparameters');
|
|
691
|
+
expect(config).toHaveProperty('lrSchedule');
|
|
692
|
+
expect(config).toHaveProperty('dataset');
|
|
693
|
+
expect(config).toHaveProperty('softDedup');
|
|
694
|
+
});
|
|
695
|
+
});
|
|
696
|
+
|
|
697
|
+
// ---------------------------------------------------------------------------
|
|
698
|
+
// End-to-End Pipeline
|
|
699
|
+
// ---------------------------------------------------------------------------
|
|
700
|
+
|
|
701
|
+
describe('process (end-to-end pipeline)', () => {
|
|
702
|
+
it('produces complete IntegrationResult', () => {
|
|
703
|
+
const integration = new TrainingMonkeyIntegration({
|
|
704
|
+
outputDir: '/output',
|
|
705
|
+
seed: 42,
|
|
706
|
+
});
|
|
707
|
+
const jsonl = createBalancedDataset(5);
|
|
708
|
+
const result = integration.process(jsonl);
|
|
709
|
+
|
|
710
|
+
expect(result).toHaveProperty('split');
|
|
711
|
+
expect(result).toHaveProperty('config');
|
|
712
|
+
expect(result).toHaveProperty('trainJsonl');
|
|
713
|
+
expect(result).toHaveProperty('validationJsonl');
|
|
714
|
+
expect(result).toHaveProperty('configJson');
|
|
715
|
+
});
|
|
716
|
+
|
|
717
|
+
it('pipeline is idempotent with same seed', () => {
|
|
718
|
+
const jsonl = createBalancedDataset(5);
|
|
719
|
+
|
|
720
|
+
const result1 = new TrainingMonkeyIntegration({ seed: 42 }).process(jsonl);
|
|
721
|
+
const result2 = new TrainingMonkeyIntegration({ seed: 42 }).process(jsonl);
|
|
722
|
+
|
|
723
|
+
expect(result1.trainJsonl).toBe(result2.trainJsonl);
|
|
724
|
+
expect(result1.validationJsonl).toBe(result2.validationJsonl);
|
|
725
|
+
expect(result1.configJson).toBe(result2.configJson);
|
|
726
|
+
});
|
|
727
|
+
|
|
728
|
+
it('handles dataset with only one relationship type', () => {
|
|
729
|
+
const entries: string[] = [];
|
|
730
|
+
for (let i = 0; i < 20; i++) {
|
|
731
|
+
entries.push(makeEntry(`adj-${i}`, 'spatial_adjacent', 'basic', i % 2 === 0));
|
|
732
|
+
}
|
|
733
|
+
|
|
734
|
+
const integration = new TrainingMonkeyIntegration();
|
|
735
|
+
const result = integration.process(buildJsonl(entries));
|
|
736
|
+
|
|
737
|
+
expect(result.split.stats.totalExamples).toBe(20);
|
|
738
|
+
expect(result.split.stats.trainCount + result.split.stats.validationCount).toBe(20);
|
|
739
|
+
});
|
|
740
|
+
|
|
741
|
+
it('handles dataset with mixed scene/no-scene entries', () => {
|
|
742
|
+
const entries: string[] = [
|
|
743
|
+
makeEntry('with-scene', 'spatial_adjacent', 'basic', true, true),
|
|
744
|
+
makeEntry('no-scene', 'spatial_adjacent', 'basic', true, false),
|
|
745
|
+
];
|
|
746
|
+
|
|
747
|
+
const integration = new TrainingMonkeyIntegration();
|
|
748
|
+
const result = integration.process(buildJsonl(entries));
|
|
749
|
+
|
|
750
|
+
expect(result.split.stats.totalExamples).toBe(2);
|
|
751
|
+
});
|
|
752
|
+
|
|
753
|
+
it('handles larger dataset (500+ entries)', () => {
|
|
754
|
+
const integration = new TrainingMonkeyIntegration();
|
|
755
|
+
const jsonl = createBalancedDataset(60); // 3*3*60 = 540 entries
|
|
756
|
+
|
|
757
|
+
const result = integration.process(jsonl);
|
|
758
|
+
expect(result.split.stats.totalExamples).toBe(540);
|
|
759
|
+
expect(result.split.stats.trainCount).toBeGreaterThan(400);
|
|
760
|
+
expect(result.split.stats.validationCount).toBeGreaterThan(40);
|
|
761
|
+
});
|
|
762
|
+
});
|
|
763
|
+
|
|
764
|
+
// ---------------------------------------------------------------------------
|
|
765
|
+
// Edge Cases
|
|
766
|
+
// ---------------------------------------------------------------------------
|
|
767
|
+
|
|
768
|
+
describe('edge cases', () => {
|
|
769
|
+
it('handles entries without metadata gracefully in non-stratified mode', () => {
|
|
770
|
+
const entries = [
|
|
771
|
+
JSON.stringify({
|
|
772
|
+
instruction: 'Question 1?',
|
|
773
|
+
response: 'Answer 1.',
|
|
774
|
+
}),
|
|
775
|
+
JSON.stringify({
|
|
776
|
+
instruction: 'Question 2?',
|
|
777
|
+
response: 'Answer 2.',
|
|
778
|
+
}),
|
|
779
|
+
];
|
|
780
|
+
|
|
781
|
+
const integration = new TrainingMonkeyIntegration({ stratify: false });
|
|
782
|
+
const result = integration.process(buildJsonl(entries));
|
|
783
|
+
|
|
784
|
+
expect(result.split.stats.totalExamples).toBe(2);
|
|
785
|
+
expect(result.split.stats.stratified).toBe(false);
|
|
786
|
+
});
|
|
787
|
+
|
|
788
|
+
it('handles entries with metadata.tags as array', () => {
|
|
789
|
+
const integration = new TrainingMonkeyIntegration();
|
|
790
|
+
const entry = JSON.stringify({
|
|
791
|
+
instruction: 'Test question?',
|
|
792
|
+
response: 'Test answer.',
|
|
793
|
+
metadata: {
|
|
794
|
+
id: 'tag-test',
|
|
795
|
+
relationship_type: 'spatial_adjacent',
|
|
796
|
+
is_positive: true,
|
|
797
|
+
difficulty: 'basic',
|
|
798
|
+
tags: ['tag1', 'tag2', 'tag3'],
|
|
799
|
+
},
|
|
800
|
+
});
|
|
801
|
+
|
|
802
|
+
const result = integration.process(entry);
|
|
803
|
+
expect(result.split.stats.totalExamples).toBe(1);
|
|
804
|
+
});
|
|
805
|
+
|
|
806
|
+
it('handles very long instruction text', () => {
|
|
807
|
+
const longInstruction = 'A'.repeat(10000);
|
|
808
|
+
const entry = JSON.stringify({
|
|
809
|
+
instruction: longInstruction,
|
|
810
|
+
response: 'Short answer.',
|
|
811
|
+
metadata: {
|
|
812
|
+
id: 'long-test',
|
|
813
|
+
relationship_type: 'spatial_adjacent',
|
|
814
|
+
is_positive: true,
|
|
815
|
+
difficulty: 'basic',
|
|
816
|
+
tags: [],
|
|
817
|
+
},
|
|
818
|
+
});
|
|
819
|
+
|
|
820
|
+
const integration = new TrainingMonkeyIntegration();
|
|
821
|
+
const result = integration.process(entry);
|
|
822
|
+
expect(result.split.stats.totalExamples).toBe(1);
|
|
823
|
+
});
|
|
824
|
+
|
|
825
|
+
it('config totalSteps is correct for small datasets', () => {
|
|
826
|
+
const integration = new TrainingMonkeyIntegration();
|
|
827
|
+
const jsonl = buildJsonl([makeEntry('small-1'), makeEntry('small-2'), makeEntry('small-3')]);
|
|
828
|
+
const result = integration.process(jsonl);
|
|
829
|
+
|
|
830
|
+
// With 3 entries, ~3 train examples, effective batch 32
|
|
831
|
+
// stepsPerEpoch = ceil(trainCount / 32), totalSteps = stepsPerEpoch * 2
|
|
832
|
+
const effectiveBatch = 8 * 4;
|
|
833
|
+
const expected = Math.ceil(result.split.stats.trainCount / effectiveBatch) * 2;
|
|
834
|
+
expect(result.config.dataset.totalSteps).toBe(expected);
|
|
835
|
+
});
|
|
836
|
+
});
|
|
837
|
+
|
|
838
|
+
// ---------------------------------------------------------------------------
|
|
839
|
+
// Data Integrity Checks
|
|
840
|
+
// ---------------------------------------------------------------------------
|
|
841
|
+
|
|
842
|
+
describe('data integrity', () => {
|
|
843
|
+
it('no data is lost during pipeline (train + val = total)', () => {
|
|
844
|
+
const integration = new TrainingMonkeyIntegration();
|
|
845
|
+
const jsonl = createBalancedDataset(15);
|
|
846
|
+
const result = integration.process(jsonl);
|
|
847
|
+
|
|
848
|
+
const totalInput = jsonl.split('\n').filter((l) => l.trim()).length;
|
|
849
|
+
const totalOutput = result.split.stats.trainCount + result.split.stats.validationCount;
|
|
850
|
+
|
|
851
|
+
expect(totalOutput).toBe(totalInput);
|
|
852
|
+
});
|
|
853
|
+
|
|
854
|
+
it('all entries retain their original content through pipeline', () => {
|
|
855
|
+
const integration = new TrainingMonkeyIntegration();
|
|
856
|
+
const jsonl = buildJsonl([
|
|
857
|
+
makeEntry('integrity-1', 'spatial_adjacent', 'basic', true),
|
|
858
|
+
makeEntry('integrity-2', 'spatial_contains', 'intermediate', false),
|
|
859
|
+
makeEntry('integrity-3', 'spatial_reachable', 'advanced', true),
|
|
860
|
+
]);
|
|
861
|
+
|
|
862
|
+
const result = integration.process(jsonl);
|
|
863
|
+
const allEntries = [...result.split.train, ...result.split.validation];
|
|
864
|
+
|
|
865
|
+
// Each original entry should be in the output
|
|
866
|
+
const ids = allEntries
|
|
867
|
+
.filter((e) => e.metadata)
|
|
868
|
+
.map((e) => e.metadata!.id)
|
|
869
|
+
.sort();
|
|
870
|
+
expect(ids).toEqual(['integrity-1', 'integrity-2', 'integrity-3']);
|
|
871
|
+
});
|
|
872
|
+
|
|
873
|
+
it('sampling_weight values are always in valid range', () => {
|
|
874
|
+
const integration = new TrainingMonkeyIntegration();
|
|
875
|
+
const jsonl = createBalancedDataset(30);
|
|
876
|
+
const result = integration.process(jsonl);
|
|
877
|
+
|
|
878
|
+
const allEntries = [...result.split.train, ...result.split.validation];
|
|
879
|
+
for (const entry of allEntries) {
|
|
880
|
+
expect(entry.sampling_weight).toBeGreaterThanOrEqual(0.1);
|
|
881
|
+
expect(entry.sampling_weight).toBeLessThanOrEqual(1.0);
|
|
882
|
+
}
|
|
883
|
+
});
|
|
884
|
+
|
|
885
|
+
it('output JSONL line count matches entry count', () => {
|
|
886
|
+
const integration = new TrainingMonkeyIntegration();
|
|
887
|
+
const jsonl = createBalancedDataset(10);
|
|
888
|
+
const result = integration.process(jsonl);
|
|
889
|
+
|
|
890
|
+
const trainLineCount = result.trainJsonl.split('\n').filter((l) => l.trim()).length;
|
|
891
|
+
const valLineCount = result.validationJsonl.split('\n').filter((l) => l.trim()).length;
|
|
892
|
+
|
|
893
|
+
expect(trainLineCount).toBe(result.split.stats.trainCount);
|
|
894
|
+
expect(valLineCount).toBe(result.split.stats.validationCount);
|
|
895
|
+
});
|
|
896
|
+
});
|
|
897
|
+
});
|