@holoscript/framework 6.0.3 → 6.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. package/CHANGELOG.md +1 -2
  2. package/ROADMAP.md +68 -66
  3. package/dist/{InvisibleWallet-BB6tFvRA.d.cts → InvisibleWallet-EFiuaLn3.d.cts} +1 -1
  4. package/dist/{OrchestratorAgent-BvWgf9uw.d.cts → OrchestratorAgent-CrLDGNL6.d.cts} +1 -1
  5. package/dist/agents/index.cjs +11 -10
  6. package/dist/agents/index.d.cts +4 -16
  7. package/dist/ai/index.cjs +2 -2
  8. package/dist/behavior.cjs +10 -0
  9. package/dist/economy/index.cjs +4 -4
  10. package/dist/economy/index.d.cts +2 -2
  11. package/dist/index.cjs +33 -11
  12. package/dist/index.d.cts +3 -3
  13. package/dist/swarm/index.cjs +3 -0
  14. package/package.json +14 -9
  15. package/src/__tests__/bounty-marketplace.test.ts +53 -21
  16. package/src/__tests__/delegation.test.ts +1 -4
  17. package/src/__tests__/done-log-audit.test.ts +38 -46
  18. package/src/__tests__/framework.test.ts +172 -53
  19. package/src/__tests__/goal-synthesizer.test.ts +9 -6
  20. package/src/__tests__/presence.test.ts +1 -1
  21. package/src/__tests__/protocol-agent.test.ts +12 -11
  22. package/src/__tests__/revenue-splitter.test.ts +22 -15
  23. package/src/__tests__/scenario-driven-todo.test.ts +55 -35
  24. package/src/__tests__/self-improve.test.ts +28 -9
  25. package/src/__tests__/service-lifecycle.test.ts +9 -3
  26. package/src/__tests__/skill-router.test.ts +3 -3
  27. package/src/agents/CulturalMemory.ts +6 -6
  28. package/src/agents/DelegationTraceHooks.ts +560 -0
  29. package/src/agents/FederatedRegistryAdapter.ts +1 -1
  30. package/src/agents/NormEngine.ts +3 -8
  31. package/src/agents/OrchestratorAgent.ts +1 -1
  32. package/src/agents/TaskDelegationService.ts +5 -9
  33. package/src/agents/__tests__/AgentWalletRegistry.test.ts +5 -4
  34. package/src/agents/__tests__/CrossRealityHandoff.test.ts +9 -3
  35. package/src/agents/__tests__/DelegationTraceHooks.test.ts +390 -0
  36. package/src/agents/__tests__/TaskDelegationService.test.ts +4 -2
  37. package/src/agents/spatial-comms/Layer1RealTime.ts +36 -19
  38. package/src/agents/spatial-comms/Layer2A2A.ts +1 -3
  39. package/src/agents/spatial-comms/Layer3MCP.ts +13 -4
  40. package/src/agents/spatial-comms/ProtocolTypes.ts +5 -2
  41. package/src/agents/spatial-comms/examples/multi-agent-world-creation.ts +2 -2
  42. package/src/ai/HoloScriptGenerator.ts +2 -2
  43. package/src/ai/__tests__/PerceptionSystem.prod.test.ts +1 -1
  44. package/src/ai/__tests__/PerceptionSystem.test.ts +14 -14
  45. package/src/ai/__tests__/SteeringBehaviors.prod.test.ts +1 -1
  46. package/src/ai/index.ts +5 -1
  47. package/src/board/audit.ts +17 -6
  48. package/src/board/board-ops.ts +45 -15
  49. package/src/board/board-types.ts +94 -20
  50. package/src/delegation.ts +5 -3
  51. package/src/distributed-claimer.ts +13 -2
  52. package/src/economy/BountyManager.ts +40 -18
  53. package/src/economy/KnowledgeMarketplace.ts +27 -8
  54. package/src/economy/PaymentWebhookService.ts +0 -1
  55. package/src/economy/RevenueSplitter.ts +2 -4
  56. package/src/economy/UnifiedBudgetOptimizer.ts +8 -9
  57. package/src/economy/_core-stubs.ts +1 -1
  58. package/src/economy/x402-facilitator.ts +17 -8
  59. package/src/index.ts +16 -12
  60. package/src/knowledge/__tests__/knowledge-consolidator.test.ts +138 -89
  61. package/src/knowledge/__tests__/knowledge-store-vector.test.ts +59 -16
  62. package/src/knowledge/brain.ts +7 -7
  63. package/src/knowledge/consolidation.ts +16 -16
  64. package/src/knowledge/knowledge-consolidator.ts +60 -30
  65. package/src/knowledge/knowledge-store.ts +83 -45
  66. package/src/learning/ProceduralCompiler.ts +6 -1
  67. package/src/learning/learning/MemoryConsolidator.ts +102 -0
  68. package/src/learning/learning/MemoryScorer.ts +69 -0
  69. package/src/learning/learning/ProceduralCompiler.ts +45 -0
  70. package/src/learning/learning/SemanticClusterer.ts +66 -0
  71. package/src/llm/llm-adapter.ts +24 -10
  72. package/src/mesh/index.ts +37 -17
  73. package/src/protocol/goal-synthesizer.ts +24 -34
  74. package/src/protocol/implementations.ts +91 -22
  75. package/src/protocol/micro-phase-decomposer.ts +25 -17
  76. package/src/protocol/micro-step-decomposer.test.ts +104 -39
  77. package/src/protocol-agent.test.ts +17 -7
  78. package/src/protocol-agent.ts +45 -42
  79. package/src/self-improve/absorb-scanner.ts +9 -6
  80. package/src/self-improve/evolution-engine.ts +36 -18
  81. package/src/self-improve/framework-absorber.ts +21 -16
  82. package/src/self-improve/index.ts +2 -10
  83. package/src/self-improve/prompt-optimizer.ts +31 -19
  84. package/src/self-improve/test-generator.ts +16 -12
  85. package/src/skill-router.ts +7 -6
  86. package/src/swarm/messaging/GossipProtocol.ts +1 -1
  87. package/src/swarm/messaging/__tests__/BroadcastChannel.prod.test.ts +31 -9
  88. package/src/swarm/messaging/__tests__/GossipProtocol.prod.test.ts +21 -7
  89. package/src/swarm/messaging/__tests__/SwarmEventBus.prod.test.ts +24 -8
  90. package/src/swarm/messaging/__tests__/SwarmEventBus.test.ts +6 -2
  91. package/src/team.ts +277 -122
  92. package/src/training/scripts/generate-spatial-dataset.ts +1 -1
  93. package/src/training/training/LRScheduler.ts +377 -0
  94. package/src/training/training/QualityScoringPipeline.ts +139 -0
  95. package/src/training/training/SoftDedup.ts +461 -0
  96. package/src/training/training/SparsityMonitor.ts +685 -0
  97. package/src/training/training/SparsityMonitorTypes.ts +209 -0
  98. package/src/training/training/SpatialTrainingDataGenerator.ts +1526 -0
  99. package/src/training/training/SpatialTrainingDataTypes.ts +216 -0
  100. package/src/training/training/TrainingPipelineConfig.ts +215 -0
  101. package/src/training/training/__tests__/CorpusValidation.test.ts +87 -0
  102. package/src/training/training/__tests__/LRScheduler.test.ts +592 -0
  103. package/src/training/training/__tests__/SoftDedup.test.ts +415 -0
  104. package/src/training/training/__tests__/SparsityMonitor.test.ts +1623 -0
  105. package/src/training/training/__tests__/SpatialCorpusValidation.test.ts +72 -0
  106. package/src/training/training/__tests__/SpatialTrainingDataGenerator.test.ts +1244 -0
  107. package/src/training/training/__tests__/TrainingMonkeyIntegration.test.ts +897 -0
  108. package/src/training/training/__tests__/TrainingPipelineConfig.test.ts +202 -0
  109. package/src/training/training/__tests__/schema.test.ts +72 -0
  110. package/src/training/training/__tests__/training-constants.test.ts +106 -0
  111. package/src/training/training/__tests__/trait-mappings.test.ts +81 -0
  112. package/src/training/training/constants.ts +94 -0
  113. package/src/training/training/index.ts +17 -0
  114. package/src/training/training/schema.ts +147 -0
  115. package/src/training/training/scripts/generate-novel-use-cases-dataset.ts +272 -0
  116. package/src/training/training/scripts/generate-spatial-dataset.ts +521 -0
  117. package/src/training/training/trainingmonkey/TrainingMonkeyIntegration.ts +477 -0
  118. package/src/training/training/trainingmonkey/TrainingMonkeyTypes.ts +230 -0
  119. package/src/training/training/trainingmonkey/index.ts +26 -0
  120. package/src/training/training/trait-mappings.ts +157 -0
  121. package/src/types.ts +2 -7
  122. package/ALL-test-results.json +0 -1
  123. package/LICENSE +0 -21
  124. package/dist/AgentManifest-CB4xM-Ma.d.ts +0 -704
  125. package/dist/BehaviorTree-BrBFECv5.d.ts +0 -103
  126. package/dist/InvisibleWallet-rtRrBOA8.d.ts +0 -1732
  127. package/dist/OrchestratorAgent-Q_CbVTmO.d.ts +0 -798
  128. package/dist/agents/index.d.ts +0 -1788
  129. package/dist/agents/index.js +0 -4695
  130. package/dist/ai/index.d.ts +0 -1753
  131. package/dist/ai/index.js +0 -5244
  132. package/dist/behavior.d.ts +0 -130
  133. package/dist/behavior.js +0 -407
  134. package/dist/economy/index.d.ts +0 -747
  135. package/dist/economy/index.js +0 -3617
  136. package/dist/implementations-D9T3un9D.d.ts +0 -236
  137. package/dist/index.d.ts +0 -1729
  138. package/dist/index.js +0 -24277
  139. package/dist/learning/index.d.ts +0 -104
  140. package/dist/learning/index.js +0 -189
  141. package/dist/negotiation/index.d.ts +0 -610
  142. package/dist/negotiation/index.js +0 -931
  143. package/dist/skills/index.d.ts +0 -289
  144. package/dist/skills/index.js +0 -1079
  145. package/dist/swarm/index.d.ts +0 -2433
  146. package/dist/swarm/index.js +0 -5221
  147. package/dist/training/index.d.ts +0 -1734
  148. package/dist/training/index.js +0 -2687
  149. package/extract-failures.js +0 -10
  150. package/src/training/training/data/novel-use-cases.jsonl +0 -153
  151. package/src/training/training/data/spatial-reasoning-10k.jsonl +0 -9354
  152. package/src/types/core-stubs.d.ts +0 -113
  153. package/test-output.txt +0 -0
  154. package/test-result.json +0 -1
  155. package/tsc-errors.txt +0 -4
  156. package/tsc_output.txt +0 -0
  157. package/typescript-errors-2.txt +0 -0
  158. package/typescript-errors.txt +0 -22
  159. package/vitest-log-utf8.txt +0 -268
  160. package/vitest-log.txt +0 -0
@@ -0,0 +1,521 @@
1
+ #!/usr/bin/env npx tsx
2
+ /**
3
+ * Spatial Reasoning Training Dataset Generator
4
+ *
5
+ * Generates 10,000 spatial reasoning training examples for Brittney fine-tuning.
6
+ * Applies deduplication per W.004 and quality validation per W.010.
7
+ *
8
+ * Usage:
9
+ * npx tsx packages/core/src/training/scripts/generate-spatial-dataset.ts
10
+ *
11
+ * Output:
12
+ * packages/core/src/training/data/spatial-reasoning-10k.jsonl
13
+ *
14
+ * @module training/scripts/generate-spatial-dataset
15
+ */
16
+
17
+ import { writeFileSync, mkdirSync, existsSync } from 'node:fs';
18
+ import { join, dirname } from 'node:path';
19
+ import { fileURLToPath } from 'node:url';
20
+
21
+ import { SpatialTrainingDataGenerator } from '../SpatialTrainingDataGenerator';
22
+ import type {
23
+ SpatialTrainingExample,
24
+ SpatialTrainingJSONLEntry,
25
+ SpatialRelationshipType,
26
+ SpatialDifficulty,
27
+ } from '../SpatialTrainingDataTypes';
28
+
29
+ // =============================================================================
30
+ // CONFIGURATION
31
+ // =============================================================================
32
+
33
+ const CONFIG = {
34
+ /** Target number of examples (before dedup) */
35
+ targetExamples: 10_008, // 1112 * 9 categories = 10,008
36
+ /** Examples per category (relationship_type x difficulty_level) */
37
+ examplesPerCategory: 1112, // 9 categories => 10,008 total
38
+ /** Positive/negative ratio: 60% positive, 40% negative */
39
+ positiveRatio: 0.6,
40
+ /** Seed for reproducibility */
41
+ seed: 2026_0306,
42
+ /** Include HoloScript context in instructions */
43
+ includeContext: true,
44
+ /** Output file path */
45
+ outputPath: join(
46
+ dirname(fileURLToPath(import.meta.url)),
47
+ '..',
48
+ 'data',
49
+ 'spatial-reasoning-10k.jsonl'
50
+ ),
51
+ };
52
+
53
+ // =============================================================================
54
+ // N-GRAM DEDUPLICATION (W.004)
55
+ // =============================================================================
56
+
57
+ /**
58
+ * Generates n-grams from text for near-duplicate detection.
59
+ */
60
+ function getNgrams(text: string, n: number = 3): Set<string> {
61
+ const words = text
62
+ .toLowerCase()
63
+ .replace(/[^a-z0-9\s]/g, '')
64
+ .split(/\s+/)
65
+ .filter(Boolean);
66
+ const ngrams = new Set<string>();
67
+ for (let i = 0; i <= words.length - n; i++) {
68
+ ngrams.add(words.slice(i, i + n).join(' '));
69
+ }
70
+ return ngrams;
71
+ }
72
+
73
+ /**
74
+ * Jaccard similarity between two n-gram sets.
75
+ */
76
+ function jaccardSimilarity(a: Set<string>, b: Set<string>): number {
77
+ if (a.size === 0 && b.size === 0) return 1.0;
78
+ let intersection = 0;
79
+ for (const gram of a) {
80
+ if (b.has(gram)) intersection++;
81
+ }
82
+ const union = a.size + b.size - intersection;
83
+ return union === 0 ? 0 : intersection / union;
84
+ }
85
+
86
+ /**
87
+ * Deduplicates examples using n-gram Jaccard similarity.
88
+ * W.004: ALWAYS run deduplication before training.
89
+ *
90
+ * Uses a combination of exact hash dedup (instruction+response) and
91
+ * near-duplicate detection via 3-gram Jaccard similarity > 0.85 threshold.
92
+ */
93
+ function deduplicateExamples(
94
+ examples: SpatialTrainingExample[],
95
+ threshold: number = 0.85
96
+ ): { unique: SpatialTrainingExample[]; duplicateCount: number; duplicateRate: number } {
97
+ console.log('\n--- W.004: Running Deduplication ---');
98
+
99
+ // Phase 1: Exact deduplication (hash-based)
100
+ const seen = new Set<string>();
101
+ const afterExact: SpatialTrainingExample[] = [];
102
+ let exactDupes = 0;
103
+
104
+ for (const ex of examples) {
105
+ const key = `${ex.instruction}|||${ex.response}`;
106
+ if (seen.has(key)) {
107
+ exactDupes++;
108
+ continue;
109
+ }
110
+ seen.add(key);
111
+ afterExact.push(ex);
112
+ }
113
+ console.log(` Phase 1 (Exact): ${exactDupes} exact duplicates removed`);
114
+
115
+ // Phase 2: Near-duplicate detection (n-gram Jaccard)
116
+ // For performance with 10K examples, we compare within same category
117
+ const categories = new Map<string, SpatialTrainingExample[]>();
118
+ for (const ex of afterExact) {
119
+ const cat = `${ex.relationshipType}:${ex.difficulty}`;
120
+ if (!categories.has(cat)) categories.set(cat, []);
121
+ categories.get(cat)!.push(ex);
122
+ }
123
+
124
+ const unique: SpatialTrainingExample[] = [];
125
+ let nearDupes = 0;
126
+
127
+ for (const [_cat, catExamples] of categories) {
128
+ const catUnique: SpatialTrainingExample[] = [];
129
+ const ngramCache: Array<Set<string>> = [];
130
+
131
+ for (const ex of catExamples) {
132
+ const combined = `${ex.instruction} ${ex.response}`;
133
+ const ngrams = getNgrams(combined);
134
+
135
+ let isDup = false;
136
+ // Compare against existing unique examples in this category
137
+ // Only check last 100 to keep O(n) reasonable
138
+ const startIdx = Math.max(0, ngramCache.length - 100);
139
+ for (let i = startIdx; i < ngramCache.length; i++) {
140
+ if (jaccardSimilarity(ngrams, ngramCache[i]) > threshold) {
141
+ isDup = true;
142
+ nearDupes++;
143
+ break;
144
+ }
145
+ }
146
+
147
+ if (!isDup) {
148
+ catUnique.push(ex);
149
+ ngramCache.push(ngrams);
150
+ }
151
+ }
152
+
153
+ unique.push(...catUnique);
154
+ }
155
+
156
+ console.log(
157
+ ` Phase 2 (Near-dup): ${nearDupes} near-duplicates removed (threshold: ${threshold})`
158
+ );
159
+
160
+ const totalDupes = exactDupes + nearDupes;
161
+ const rate = totalDupes / examples.length;
162
+ console.log(
163
+ ` Total: ${totalDupes} duplicates removed (${(rate * 100).toFixed(1)}% duplication rate)`
164
+ );
165
+ console.log(` Remaining: ${unique.length} unique examples`);
166
+
167
+ return { unique, duplicateCount: totalDupes, duplicateRate: rate };
168
+ }
169
+
170
+ // =============================================================================
171
+ // QUALITY VALIDATION (W.010)
172
+ // =============================================================================
173
+
174
+ interface QualityScores {
175
+ helpfulness: number;
176
+ correctness: number;
177
+ coherence: number;
178
+ complexity: number;
179
+ verbosity: number;
180
+ overall: number;
181
+ }
182
+
183
+ /**
184
+ * Multi-dimensional quality scoring per W.010.
185
+ * Evaluates each example on 5 metrics: helpfulness, correctness, coherence, complexity, verbosity.
186
+ */
187
+ function scoreQuality(ex: SpatialTrainingExample): QualityScores {
188
+ // Helpfulness: Does the instruction ask a clear, answerable question?
189
+ let helpfulness = 0;
190
+ if (ex.instruction.length > 20) helpfulness += 0.3;
191
+ if (
192
+ ex.instruction.includes('?') ||
193
+ ex.instruction.toLowerCase().includes('evaluate') ||
194
+ ex.instruction.toLowerCase().includes('check') ||
195
+ ex.instruction.toLowerCase().includes('analyze') ||
196
+ ex.instruction.toLowerCase().includes('verify')
197
+ )
198
+ helpfulness += 0.3;
199
+ if (ex.instruction.includes('"')) helpfulness += 0.2; // References specific objects
200
+ if (ex.response.length > 20) helpfulness += 0.2;
201
+
202
+ // Correctness: Does the response contain concrete spatial data (distances, positions)?
203
+ let correctness = 0;
204
+ if (ex.response.match(/\d+\.\d+m/)) correctness += 0.4; // Contains distance measurement
205
+ if (
206
+ ex.response.includes('Yes') ||
207
+ ex.response.includes('No') ||
208
+ ex.response.includes('constraint') ||
209
+ ex.response.includes('satisf') ||
210
+ ex.response.includes('violat') ||
211
+ ex.response.includes('pass') ||
212
+ ex.response.includes('fail')
213
+ )
214
+ correctness += 0.3; // Contains a definitive answer
215
+ if (
216
+ ex.tags.includes('positive') &&
217
+ (ex.response.includes('Yes') || ex.response.includes('pass') || ex.response.includes('satisf'))
218
+ ) {
219
+ correctness += 0.3; // Positive label matches positive response
220
+ } else if (
221
+ ex.tags.includes('negative') &&
222
+ (ex.response.includes('No') ||
223
+ ex.response.includes('fail') ||
224
+ ex.response.includes('violat') ||
225
+ ex.response.includes('block'))
226
+ ) {
227
+ correctness += 0.3; // Negative label matches negative response
228
+ }
229
+
230
+ // Coherence: Is the instruction-response pair logically consistent?
231
+ let coherence = 0;
232
+ // Check that the response references objects from the instruction
233
+ const instructionObjects = ex.instruction.match(/"([^"]+)"/g) || [];
234
+ const responseObjects = ex.response.match(/"([^"]+)"/g) || [];
235
+ if (instructionObjects.length > 0 && responseObjects.length > 0) {
236
+ const instrSet = new Set(instructionObjects);
237
+ const overlap = responseObjects.filter((o) => instrSet.has(o));
238
+ coherence += Math.min(1.0, overlap.length / instructionObjects.length);
239
+ } else {
240
+ coherence += 0.5; // Partial credit if no quoted objects
241
+ }
242
+
243
+ // Complexity: Is the example sufficiently complex for training value?
244
+ let complexity = 0;
245
+ const difficultyScore: Record<SpatialDifficulty, number> = {
246
+ basic: 0.3,
247
+ intermediate: 0.6,
248
+ advanced: 1.0,
249
+ };
250
+ complexity = difficultyScore[ex.difficulty];
251
+
252
+ // Verbosity: Is the response appropriately verbose (not too short, not excessive)?
253
+ let verbosity = 0;
254
+ const responseLen = ex.response.length;
255
+ if (responseLen >= 20 && responseLen <= 300) {
256
+ verbosity = 1.0; // Ideal range
257
+ } else if (responseLen < 20) {
258
+ verbosity = responseLen / 20; // Too short
259
+ } else {
260
+ verbosity = Math.max(0.5, 1.0 - (responseLen - 300) / 500); // Slightly penalize verbose
261
+ }
262
+
263
+ const overall = (helpfulness + correctness + coherence + complexity + verbosity) / 5;
264
+
265
+ return { helpfulness, correctness, coherence, complexity, verbosity, overall };
266
+ }
267
+
268
+ /**
269
+ * Validate quality across all examples per W.010.
270
+ * Returns distribution stats and flags low-quality examples.
271
+ */
272
+ function validateQuality(
273
+ examples: SpatialTrainingExample[],
274
+ minQualityThreshold: number = 0.4
275
+ ): {
276
+ passed: SpatialTrainingExample[];
277
+ rejected: SpatialTrainingExample[];
278
+ avgScores: QualityScores;
279
+ distribution: Record<string, number>;
280
+ } {
281
+ console.log('\n--- W.010: Multi-Dimensional Quality Validation ---');
282
+
283
+ const allScores: QualityScores[] = [];
284
+ const passed: SpatialTrainingExample[] = [];
285
+ const rejected: SpatialTrainingExample[] = [];
286
+ const distribution: Record<string, number> = {
287
+ 'excellent (0.8-1.0)': 0,
288
+ 'good (0.6-0.8)': 0,
289
+ 'fair (0.4-0.6)': 0,
290
+ 'poor (0.2-0.4)': 0,
291
+ 'bad (0.0-0.2)': 0,
292
+ };
293
+
294
+ for (const ex of examples) {
295
+ const scores = scoreQuality(ex);
296
+ allScores.push(scores);
297
+
298
+ if (scores.overall >= minQualityThreshold) {
299
+ passed.push(ex);
300
+ } else {
301
+ rejected.push(ex);
302
+ }
303
+
304
+ if (scores.overall >= 0.8) distribution['excellent (0.8-1.0)']++;
305
+ else if (scores.overall >= 0.6) distribution['good (0.6-0.8)']++;
306
+ else if (scores.overall >= 0.4) distribution['fair (0.4-0.6)']++;
307
+ else if (scores.overall >= 0.2) distribution['poor (0.2-0.4)']++;
308
+ else distribution['bad (0.0-0.2)']++;
309
+ }
310
+
311
+ // Compute averages
312
+ const avgScores: QualityScores = {
313
+ helpfulness: allScores.reduce((s, q) => s + q.helpfulness, 0) / allScores.length,
314
+ correctness: allScores.reduce((s, q) => s + q.correctness, 0) / allScores.length,
315
+ coherence: allScores.reduce((s, q) => s + q.coherence, 0) / allScores.length,
316
+ complexity: allScores.reduce((s, q) => s + q.complexity, 0) / allScores.length,
317
+ verbosity: allScores.reduce((s, q) => s + q.verbosity, 0) / allScores.length,
318
+ overall: allScores.reduce((s, q) => s + q.overall, 0) / allScores.length,
319
+ };
320
+
321
+ console.log(` Average Quality Score: ${(avgScores.overall * 100).toFixed(1)}%`);
322
+ console.log(` - Helpfulness: ${(avgScores.helpfulness * 100).toFixed(1)}%`);
323
+ console.log(` - Correctness: ${(avgScores.correctness * 100).toFixed(1)}%`);
324
+ console.log(` - Coherence: ${(avgScores.coherence * 100).toFixed(1)}%`);
325
+ console.log(` - Complexity: ${(avgScores.complexity * 100).toFixed(1)}%`);
326
+ console.log(` - Verbosity: ${(avgScores.verbosity * 100).toFixed(1)}%`);
327
+ console.log(`\n Quality Distribution:`);
328
+ for (const [band, count] of Object.entries(distribution)) {
329
+ const pct = ((count / examples.length) * 100).toFixed(1);
330
+ const bar = '#'.repeat(Math.round((count / examples.length) * 50));
331
+ console.log(` ${band.padEnd(22)} ${String(count).padStart(6)} (${pct.padStart(5)}%) ${bar}`);
332
+ }
333
+ console.log(
334
+ `\n Passed: ${passed.length} (${((passed.length / examples.length) * 100).toFixed(1)}%)`
335
+ );
336
+ console.log(
337
+ ` Rejected: ${rejected.length} (${((rejected.length / examples.length) * 100).toFixed(1)}%)`
338
+ );
339
+
340
+ return { passed, rejected, avgScores, distribution };
341
+ }
342
+
343
+ // =============================================================================
344
+ // STATISTICS REPORTING
345
+ // =============================================================================
346
+
347
+ function reportStatistics(
348
+ examples: SpatialTrainingExample[],
349
+ generator: SpatialTrainingDataGenerator,
350
+ dedupStats: { duplicateCount: number; duplicateRate: number },
351
+ qualityStats: { avgScores: QualityScores; distribution: Record<string, number> },
352
+ originalCount: number
353
+ ): void {
354
+ const stats = generator.getStats(examples);
355
+
356
+ console.log('\n' + '='.repeat(70));
357
+ console.log(' SPATIAL REASONING TRAINING DATASET - FINAL REPORT');
358
+ console.log('='.repeat(70));
359
+
360
+ console.log('\n--- Generation Summary ---');
361
+ console.log(` Seed: ${CONFIG.seed}`);
362
+ console.log(` Target Examples: ${CONFIG.targetExamples}`);
363
+ console.log(` Generated: ${originalCount}`);
364
+ console.log(` After Dedup: ${examples.length}`);
365
+ console.log(
366
+ ` Dedup Removed: ${dedupStats.duplicateCount} (${(dedupStats.duplicateRate * 100).toFixed(1)}%)`
367
+ );
368
+ console.log(` Positive Ratio: ${CONFIG.positiveRatio} (target: 60/40)`);
369
+
370
+ console.log('\n--- Examples by Relationship Type ---');
371
+ console.log(` spatial_adjacent: ${stats.byRelationship.spatial_adjacent}`);
372
+ console.log(` spatial_contains: ${stats.byRelationship.spatial_contains}`);
373
+ console.log(` spatial_reachable: ${stats.byRelationship.spatial_reachable}`);
374
+
375
+ console.log('\n--- Examples by Difficulty Level ---');
376
+ console.log(` basic: ${stats.byDifficulty.basic}`);
377
+ console.log(` intermediate: ${stats.byDifficulty.intermediate}`);
378
+ console.log(` advanced: ${stats.byDifficulty.advanced}`);
379
+
380
+ console.log('\n--- Positive/Negative Balance ---');
381
+ const actualPositiveRatio = stats.positiveCount / stats.totalExamples;
382
+ console.log(
383
+ ` Positive: ${stats.positiveCount} (${(actualPositiveRatio * 100).toFixed(1)}%)`
384
+ );
385
+ console.log(
386
+ ` Negative: ${stats.negativeCount} (${((1 - actualPositiveRatio) * 100).toFixed(1)}%)`
387
+ );
388
+
389
+ console.log('\n--- Template Diversity (G.002 Compliance) ---');
390
+ console.log(` Unique Templates: ${stats.uniqueTemplatesUsed}`);
391
+ console.log(
392
+ ` G.002 Mandate: ${stats.uniqueTemplatesUsed >= 10 ? 'PASS (>=10)' : 'FAIL (<10)'}`
393
+ );
394
+
395
+ // Detailed template usage per relationship type
396
+ const templatesByType: Record<string, Set<string>> = {
397
+ spatial_adjacent: new Set(),
398
+ spatial_contains: new Set(),
399
+ spatial_reachable: new Set(),
400
+ };
401
+ for (const ex of examples) {
402
+ const tplTag = ex.tags.find((t) => t.startsWith('template:'));
403
+ if (tplTag) {
404
+ templatesByType[ex.relationshipType].add(tplTag);
405
+ }
406
+ }
407
+ console.log(` Templates per type:`);
408
+ for (const [type, templates] of Object.entries(templatesByType)) {
409
+ console.log(` ${type}: ${templates.size} unique templates`);
410
+ }
411
+
412
+ console.log('\n--- Quality Metrics (W.010) ---');
413
+ console.log(` Overall Score: ${(qualityStats.avgScores.overall * 100).toFixed(1)}%`);
414
+ console.log(` Distribution:`);
415
+ for (const [band, count] of Object.entries(qualityStats.distribution)) {
416
+ console.log(` ${band}: ${count}`);
417
+ }
418
+
419
+ // Cross-tabulation: relationship x difficulty
420
+ console.log('\n--- Cross-Tabulation (Relationship x Difficulty) ---');
421
+ const crossTab: Record<string, Record<string, number>> = {};
422
+ for (const ex of examples) {
423
+ const key = ex.relationshipType;
424
+ if (!crossTab[key]) crossTab[key] = { basic: 0, intermediate: 0, advanced: 0 };
425
+ crossTab[key][ex.difficulty]++;
426
+ }
427
+ console.log(
428
+ ' ' +
429
+ 'Type'.padEnd(25) +
430
+ 'Basic'.padStart(8) +
431
+ 'Intermediate'.padStart(15) +
432
+ 'Advanced'.padStart(12)
433
+ );
434
+ for (const [type, diffs] of Object.entries(crossTab)) {
435
+ console.log(
436
+ ` ${type.padEnd(25)}${String(diffs.basic).padStart(8)}${String(diffs.intermediate).padStart(15)}${String(diffs.advanced).padStart(12)}`
437
+ );
438
+ }
439
+
440
+ console.log('\n--- Output ---');
441
+ console.log(` File: ${CONFIG.outputPath}`);
442
+ console.log(` Format: JSONL (one JSON object per line)`);
443
+ console.log(` Lines: ${examples.length}`);
444
+
445
+ console.log('\n' + '='.repeat(70));
446
+ }
447
+
448
+ // =============================================================================
449
+ // MAIN
450
+ // =============================================================================
451
+
452
+ async function main(): Promise<void> {
453
+ console.log('='.repeat(70));
454
+ console.log(' HoloScript Spatial Reasoning Training Data Generator');
455
+ console.log(' Target: 10,000 examples | 3 types | 3 difficulties | 60/40 ratio');
456
+ console.log('='.repeat(70));
457
+
458
+ const startTime = Date.now();
459
+
460
+ // Step 1: Generate raw examples
461
+ console.log('\n--- Step 1: Generating Raw Examples ---');
462
+ const generator = new SpatialTrainingDataGenerator({
463
+ seed: CONFIG.seed,
464
+ examplesPerCategory: CONFIG.examplesPerCategory,
465
+ positiveRatio: CONFIG.positiveRatio,
466
+ includeContext: CONFIG.includeContext,
467
+ relationshipTypes: ['spatial_adjacent', 'spatial_contains', 'spatial_reachable'],
468
+ difficultyLevels: ['basic', 'intermediate', 'advanced'],
469
+ });
470
+
471
+ const rawExamples = generator.generate();
472
+ const originalCount = rawExamples.length;
473
+ console.log(` Generated ${rawExamples.length} raw examples`);
474
+ console.log(` Time: ${((Date.now() - startTime) / 1000).toFixed(1)}s`);
475
+
476
+ // Step 2: Deduplication (W.004)
477
+ const dedupStart = Date.now();
478
+ const { unique: dedupExamples, duplicateCount, duplicateRate } = deduplicateExamples(rawExamples);
479
+ console.log(` Dedup time: ${((Date.now() - dedupStart) / 1000).toFixed(1)}s`);
480
+
481
+ // Step 3: Quality Validation (W.010)
482
+ const qualStart = Date.now();
483
+ const { passed: qualityExamples, avgScores, distribution } = validateQuality(dedupExamples);
484
+ console.log(` Quality validation time: ${((Date.now() - qualStart) / 1000).toFixed(1)}s`);
485
+
486
+ // Step 4: Export to JSONL
487
+ console.log('\n--- Step 4: Exporting to JSONL ---');
488
+ const outputDir = dirname(CONFIG.outputPath);
489
+ if (!existsSync(outputDir)) {
490
+ mkdirSync(outputDir, { recursive: true });
491
+ }
492
+
493
+ // Use the generator's exportJSONL for consistency
494
+ const jsonl = generator.exportJSONL(qualityExamples);
495
+ writeFileSync(CONFIG.outputPath, jsonl, 'utf-8');
496
+ console.log(` Written ${qualityExamples.length} examples to:`);
497
+ console.log(` ${CONFIG.outputPath}`);
498
+
499
+ // File size
500
+ const fileSizeBytes = Buffer.byteLength(jsonl, 'utf-8');
501
+ const fileSizeMB = (fileSizeBytes / (1024 * 1024)).toFixed(2);
502
+ console.log(` File size: ${fileSizeMB} MB`);
503
+
504
+ // Step 5: Final Report
505
+ reportStatistics(
506
+ qualityExamples,
507
+ generator,
508
+ { duplicateCount, duplicateRate },
509
+ { avgScores, distribution },
510
+ originalCount
511
+ );
512
+
513
+ const totalTime = ((Date.now() - startTime) / 1000).toFixed(1);
514
+ console.log(`\nTotal generation time: ${totalTime}s`);
515
+ console.log('Done.');
516
+ }
517
+
518
+ main().catch((err) => {
519
+ console.error('Fatal error:', err);
520
+ process.exit(1);
521
+ });