@holoscript/framework 6.0.3 → 6.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. package/CHANGELOG.md +1 -2
  2. package/ROADMAP.md +68 -66
  3. package/dist/{InvisibleWallet-BB6tFvRA.d.cts → InvisibleWallet-EFiuaLn3.d.cts} +1 -1
  4. package/dist/{OrchestratorAgent-BvWgf9uw.d.cts → OrchestratorAgent-CrLDGNL6.d.cts} +1 -1
  5. package/dist/agents/index.cjs +11 -10
  6. package/dist/agents/index.d.cts +4 -16
  7. package/dist/ai/index.cjs +2 -2
  8. package/dist/behavior.cjs +10 -0
  9. package/dist/economy/index.cjs +4 -4
  10. package/dist/economy/index.d.cts +2 -2
  11. package/dist/index.cjs +33 -11
  12. package/dist/index.d.cts +3 -3
  13. package/dist/swarm/index.cjs +3 -0
  14. package/package.json +14 -9
  15. package/src/__tests__/bounty-marketplace.test.ts +53 -21
  16. package/src/__tests__/delegation.test.ts +1 -4
  17. package/src/__tests__/done-log-audit.test.ts +38 -46
  18. package/src/__tests__/framework.test.ts +172 -53
  19. package/src/__tests__/goal-synthesizer.test.ts +9 -6
  20. package/src/__tests__/presence.test.ts +1 -1
  21. package/src/__tests__/protocol-agent.test.ts +12 -11
  22. package/src/__tests__/revenue-splitter.test.ts +22 -15
  23. package/src/__tests__/scenario-driven-todo.test.ts +55 -35
  24. package/src/__tests__/self-improve.test.ts +28 -9
  25. package/src/__tests__/service-lifecycle.test.ts +9 -3
  26. package/src/__tests__/skill-router.test.ts +3 -3
  27. package/src/agents/CulturalMemory.ts +6 -6
  28. package/src/agents/DelegationTraceHooks.ts +560 -0
  29. package/src/agents/FederatedRegistryAdapter.ts +1 -1
  30. package/src/agents/NormEngine.ts +3 -8
  31. package/src/agents/OrchestratorAgent.ts +1 -1
  32. package/src/agents/TaskDelegationService.ts +5 -9
  33. package/src/agents/__tests__/AgentWalletRegistry.test.ts +5 -4
  34. package/src/agents/__tests__/CrossRealityHandoff.test.ts +9 -3
  35. package/src/agents/__tests__/DelegationTraceHooks.test.ts +390 -0
  36. package/src/agents/__tests__/TaskDelegationService.test.ts +4 -2
  37. package/src/agents/spatial-comms/Layer1RealTime.ts +36 -19
  38. package/src/agents/spatial-comms/Layer2A2A.ts +1 -3
  39. package/src/agents/spatial-comms/Layer3MCP.ts +13 -4
  40. package/src/agents/spatial-comms/ProtocolTypes.ts +5 -2
  41. package/src/agents/spatial-comms/examples/multi-agent-world-creation.ts +2 -2
  42. package/src/ai/HoloScriptGenerator.ts +2 -2
  43. package/src/ai/__tests__/PerceptionSystem.prod.test.ts +1 -1
  44. package/src/ai/__tests__/PerceptionSystem.test.ts +14 -14
  45. package/src/ai/__tests__/SteeringBehaviors.prod.test.ts +1 -1
  46. package/src/ai/index.ts +5 -1
  47. package/src/board/audit.ts +17 -6
  48. package/src/board/board-ops.ts +45 -15
  49. package/src/board/board-types.ts +94 -20
  50. package/src/delegation.ts +5 -3
  51. package/src/distributed-claimer.ts +13 -2
  52. package/src/economy/BountyManager.ts +40 -18
  53. package/src/economy/KnowledgeMarketplace.ts +27 -8
  54. package/src/economy/PaymentWebhookService.ts +0 -1
  55. package/src/economy/RevenueSplitter.ts +2 -4
  56. package/src/economy/UnifiedBudgetOptimizer.ts +8 -9
  57. package/src/economy/_core-stubs.ts +1 -1
  58. package/src/economy/x402-facilitator.ts +17 -8
  59. package/src/index.ts +16 -12
  60. package/src/knowledge/__tests__/knowledge-consolidator.test.ts +138 -89
  61. package/src/knowledge/__tests__/knowledge-store-vector.test.ts +59 -16
  62. package/src/knowledge/brain.ts +7 -7
  63. package/src/knowledge/consolidation.ts +16 -16
  64. package/src/knowledge/knowledge-consolidator.ts +60 -30
  65. package/src/knowledge/knowledge-store.ts +83 -45
  66. package/src/learning/ProceduralCompiler.ts +6 -1
  67. package/src/learning/learning/MemoryConsolidator.ts +102 -0
  68. package/src/learning/learning/MemoryScorer.ts +69 -0
  69. package/src/learning/learning/ProceduralCompiler.ts +45 -0
  70. package/src/learning/learning/SemanticClusterer.ts +66 -0
  71. package/src/llm/llm-adapter.ts +24 -10
  72. package/src/mesh/index.ts +37 -17
  73. package/src/protocol/goal-synthesizer.ts +24 -34
  74. package/src/protocol/implementations.ts +91 -22
  75. package/src/protocol/micro-phase-decomposer.ts +25 -17
  76. package/src/protocol/micro-step-decomposer.test.ts +104 -39
  77. package/src/protocol-agent.test.ts +17 -7
  78. package/src/protocol-agent.ts +45 -42
  79. package/src/self-improve/absorb-scanner.ts +9 -6
  80. package/src/self-improve/evolution-engine.ts +36 -18
  81. package/src/self-improve/framework-absorber.ts +21 -16
  82. package/src/self-improve/index.ts +2 -10
  83. package/src/self-improve/prompt-optimizer.ts +31 -19
  84. package/src/self-improve/test-generator.ts +16 -12
  85. package/src/skill-router.ts +7 -6
  86. package/src/swarm/messaging/GossipProtocol.ts +1 -1
  87. package/src/swarm/messaging/__tests__/BroadcastChannel.prod.test.ts +31 -9
  88. package/src/swarm/messaging/__tests__/GossipProtocol.prod.test.ts +21 -7
  89. package/src/swarm/messaging/__tests__/SwarmEventBus.prod.test.ts +24 -8
  90. package/src/swarm/messaging/__tests__/SwarmEventBus.test.ts +6 -2
  91. package/src/team.ts +277 -122
  92. package/src/training/scripts/generate-spatial-dataset.ts +1 -1
  93. package/src/training/training/LRScheduler.ts +377 -0
  94. package/src/training/training/QualityScoringPipeline.ts +139 -0
  95. package/src/training/training/SoftDedup.ts +461 -0
  96. package/src/training/training/SparsityMonitor.ts +685 -0
  97. package/src/training/training/SparsityMonitorTypes.ts +209 -0
  98. package/src/training/training/SpatialTrainingDataGenerator.ts +1526 -0
  99. package/src/training/training/SpatialTrainingDataTypes.ts +216 -0
  100. package/src/training/training/TrainingPipelineConfig.ts +215 -0
  101. package/src/training/training/__tests__/CorpusValidation.test.ts +87 -0
  102. package/src/training/training/__tests__/LRScheduler.test.ts +592 -0
  103. package/src/training/training/__tests__/SoftDedup.test.ts +415 -0
  104. package/src/training/training/__tests__/SparsityMonitor.test.ts +1623 -0
  105. package/src/training/training/__tests__/SpatialCorpusValidation.test.ts +72 -0
  106. package/src/training/training/__tests__/SpatialTrainingDataGenerator.test.ts +1244 -0
  107. package/src/training/training/__tests__/TrainingMonkeyIntegration.test.ts +897 -0
  108. package/src/training/training/__tests__/TrainingPipelineConfig.test.ts +202 -0
  109. package/src/training/training/__tests__/schema.test.ts +72 -0
  110. package/src/training/training/__tests__/training-constants.test.ts +106 -0
  111. package/src/training/training/__tests__/trait-mappings.test.ts +81 -0
  112. package/src/training/training/constants.ts +94 -0
  113. package/src/training/training/index.ts +17 -0
  114. package/src/training/training/schema.ts +147 -0
  115. package/src/training/training/scripts/generate-novel-use-cases-dataset.ts +272 -0
  116. package/src/training/training/scripts/generate-spatial-dataset.ts +521 -0
  117. package/src/training/training/trainingmonkey/TrainingMonkeyIntegration.ts +477 -0
  118. package/src/training/training/trainingmonkey/TrainingMonkeyTypes.ts +230 -0
  119. package/src/training/training/trainingmonkey/index.ts +26 -0
  120. package/src/training/training/trait-mappings.ts +157 -0
  121. package/src/types.ts +2 -7
  122. package/ALL-test-results.json +0 -1
  123. package/LICENSE +0 -21
  124. package/dist/AgentManifest-CB4xM-Ma.d.ts +0 -704
  125. package/dist/BehaviorTree-BrBFECv5.d.ts +0 -103
  126. package/dist/InvisibleWallet-rtRrBOA8.d.ts +0 -1732
  127. package/dist/OrchestratorAgent-Q_CbVTmO.d.ts +0 -798
  128. package/dist/agents/index.d.ts +0 -1788
  129. package/dist/agents/index.js +0 -4695
  130. package/dist/ai/index.d.ts +0 -1753
  131. package/dist/ai/index.js +0 -5244
  132. package/dist/behavior.d.ts +0 -130
  133. package/dist/behavior.js +0 -407
  134. package/dist/economy/index.d.ts +0 -747
  135. package/dist/economy/index.js +0 -3617
  136. package/dist/implementations-D9T3un9D.d.ts +0 -236
  137. package/dist/index.d.ts +0 -1729
  138. package/dist/index.js +0 -24277
  139. package/dist/learning/index.d.ts +0 -104
  140. package/dist/learning/index.js +0 -189
  141. package/dist/negotiation/index.d.ts +0 -610
  142. package/dist/negotiation/index.js +0 -931
  143. package/dist/skills/index.d.ts +0 -289
  144. package/dist/skills/index.js +0 -1079
  145. package/dist/swarm/index.d.ts +0 -2433
  146. package/dist/swarm/index.js +0 -5221
  147. package/dist/training/index.d.ts +0 -1734
  148. package/dist/training/index.js +0 -2687
  149. package/extract-failures.js +0 -10
  150. package/src/training/training/data/novel-use-cases.jsonl +0 -153
  151. package/src/training/training/data/spatial-reasoning-10k.jsonl +0 -9354
  152. package/src/types/core-stubs.d.ts +0 -113
  153. package/test-output.txt +0 -0
  154. package/test-result.json +0 -1
  155. package/tsc-errors.txt +0 -4
  156. package/tsc_output.txt +0 -0
  157. package/typescript-errors-2.txt +0 -0
  158. package/typescript-errors.txt +0 -22
  159. package/vitest-log-utf8.txt +0 -268
  160. package/vitest-log.txt +0 -0
@@ -0,0 +1,415 @@
1
+ import { describe, it, expect } from 'vitest';
2
+ import { SoftDedup, createSoftDedup, DEFAULT_SOFTDEDUP_CONFIG } from '../SoftDedup';
3
+ import type { SoftDedupConfig, SoftDedupResult } from '../SoftDedup';
4
+
5
+ // =============================================================================
6
+ // TEST DATA
7
+ // =============================================================================
8
+
9
+ const UNIQUE_EXAMPLES = [
10
+ 'composition MyScene { orb Player { Grabbable {} Physics { mass: 10 } } }',
11
+ 'world Arena { orb Enemy { Animation { clip: "attack" duration: 2.0 } } }',
12
+ 'composition Garden { orb Tree { GaussianSplat { resolution: 512 } } }',
13
+ 'world Ocean { orb Fish { NPC { behavior: "patrol" speed: 3.0 } } }',
14
+ 'composition Castle { orb Knight { Tradeable { value: 100 } } }',
15
+ ];
16
+
17
+ const DUPLICATE_HEAVY_EXAMPLES = [
18
+ 'composition Scene { orb A { Grabbable {} } }',
19
+ 'composition Scene { orb B { Grabbable {} } }',
20
+ 'composition Scene { orb C { Grabbable {} } }',
21
+ 'composition Scene { orb D { Grabbable {} } }',
22
+ 'composition Scene { orb E { Grabbable {} } }',
23
+ 'composition Scene { orb F { Grabbable {} } }',
24
+ 'composition Scene { orb G { Grabbable {} } }',
25
+ 'composition Scene { orb H { Grabbable {} } }',
26
+ 'world UniqueWorld { orb Special { Physics { mass: 999 gravity: true } } }',
27
+ ];
28
+
29
+ // =============================================================================
30
+ // TESTS
31
+ // =============================================================================
32
+
33
+ describe('SoftDedup', () => {
34
+ // ---------------------------------------------------------------------------
35
+ // CONSTRUCTION & CONFIGURATION
36
+ // ---------------------------------------------------------------------------
37
+
38
+ describe('constructor', () => {
39
+ it('uses default config when no overrides provided', () => {
40
+ const dedup = new SoftDedup();
41
+ const config = dedup.getConfig();
42
+ expect(config).toEqual(DEFAULT_SOFTDEDUP_CONFIG);
43
+ });
44
+
45
+ it('merges partial config with defaults', () => {
46
+ const dedup = new SoftDedup({ temperature: 0.5, wordLevel: true });
47
+ const config = dedup.getConfig();
48
+ expect(config.temperature).toBe(0.5);
49
+ expect(config.wordLevel).toBe(true);
50
+ expect(config.minWeight).toBe(DEFAULT_SOFTDEDUP_CONFIG.minWeight);
51
+ expect(config.ngramSizes).toEqual(DEFAULT_SOFTDEDUP_CONFIG.ngramSizes);
52
+ });
53
+
54
+ it('throws on invalid minWeight (<= 0)', () => {
55
+ expect(() => new SoftDedup({ minWeight: 0 })).toThrow('minWeight');
56
+ expect(() => new SoftDedup({ minWeight: -0.5 })).toThrow('minWeight');
57
+ });
58
+
59
+ it('throws on invalid minWeight (> 1)', () => {
60
+ expect(() => new SoftDedup({ minWeight: 1.5 })).toThrow('minWeight');
61
+ });
62
+
63
+ it('throws on invalid maxWeight (< minWeight)', () => {
64
+ expect(() => new SoftDedup({ minWeight: 0.5, maxWeight: 0.3 })).toThrow('maxWeight');
65
+ });
66
+
67
+ it('throws on invalid maxWeight (> 1)', () => {
68
+ expect(() => new SoftDedup({ maxWeight: 1.5 })).toThrow('maxWeight');
69
+ });
70
+
71
+ it('throws on invalid temperature (<= 0)', () => {
72
+ expect(() => new SoftDedup({ temperature: 0 })).toThrow('temperature');
73
+ expect(() => new SoftDedup({ temperature: -1 })).toThrow('temperature');
74
+ });
75
+
76
+ it('throws on invalid commonThresholdPercentile', () => {
77
+ expect(() => new SoftDedup({ commonThresholdPercentile: -0.1 })).toThrow(
78
+ 'commonThresholdPercentile'
79
+ );
80
+ expect(() => new SoftDedup({ commonThresholdPercentile: 1.5 })).toThrow(
81
+ 'commonThresholdPercentile'
82
+ );
83
+ });
84
+
85
+ it('throws on empty ngramSizes', () => {
86
+ expect(() => new SoftDedup({ ngramSizes: [] })).toThrow('ngramSizes');
87
+ });
88
+
89
+ it('throws on non-integer ngramSizes', () => {
90
+ expect(() => new SoftDedup({ ngramSizes: [2.5] })).toThrow('positive integer');
91
+ });
92
+
93
+ it('throws on zero ngramSize', () => {
94
+ expect(() => new SoftDedup({ ngramSizes: [0] })).toThrow('positive integer');
95
+ });
96
+ });
97
+
98
+ // ---------------------------------------------------------------------------
99
+ // EDGE CASES
100
+ // ---------------------------------------------------------------------------
101
+
102
+ describe('edge cases', () => {
103
+ it('returns empty array for empty dataset', () => {
104
+ const dedup = new SoftDedup();
105
+ const results = dedup.process([]);
106
+ expect(results).toEqual([]);
107
+ });
108
+
109
+ it('returns max weight for single example', () => {
110
+ const dedup = new SoftDedup();
111
+ const results = dedup.process(['hello world']);
112
+ expect(results).toHaveLength(1);
113
+ expect(results[0].samplingWeight).toBe(1.0);
114
+ expect(results[0].commonnessScore).toBe(0);
115
+ expect(results[0].index).toBe(0);
116
+ });
117
+
118
+ it('handles empty string examples', () => {
119
+ const dedup = new SoftDedup();
120
+ const results = dedup.process(['', '']);
121
+ expect(results).toHaveLength(2);
122
+ // Empty strings produce no n-grams -> max weight
123
+ for (const r of results) {
124
+ expect(r.samplingWeight).toBe(1.0);
125
+ expect(r.ngramStats.totalNgrams).toBe(0);
126
+ }
127
+ });
128
+
129
+ it('handles very short strings (shorter than min n-gram size)', () => {
130
+ const dedup = new SoftDedup({ ngramSizes: [5] });
131
+ const results = dedup.process(['ab', 'cd']);
132
+ expect(results).toHaveLength(2);
133
+ // Strings shorter than n=5 produce no n-grams
134
+ for (const r of results) {
135
+ expect(r.ngramStats.totalNgrams).toBe(0);
136
+ expect(r.samplingWeight).toBe(1.0);
137
+ }
138
+ });
139
+
140
+ it('handles identical examples (maximum commonness)', () => {
141
+ const text = 'composition Scene { orb Player { Grabbable {} } }';
142
+ const dedup = new SoftDedup();
143
+ const results = dedup.process([text, text, text, text, text]);
144
+ expect(results).toHaveLength(5);
145
+
146
+ // All identical -> all should have the same (low) weight
147
+ const weights = results.map((r) => r.samplingWeight);
148
+ expect(new Set(weights).size).toBe(1); // All same weight
149
+ });
150
+
151
+ it('handles whitespace-only examples', () => {
152
+ const dedup = new SoftDedup({ ngramSizes: [3] });
153
+ const results = dedup.process([' ', ' ']);
154
+ expect(results).toHaveLength(2);
155
+ // Whitespace produces character n-grams; both identical -> common
156
+ });
157
+ });
158
+
159
+ // ---------------------------------------------------------------------------
160
+ // CORE FUNCTIONALITY
161
+ // ---------------------------------------------------------------------------
162
+
163
+ describe('process', () => {
164
+ it('assigns higher weights to unique examples', () => {
165
+ const dedup = new SoftDedup();
166
+ const results = dedup.process(DUPLICATE_HEAVY_EXAMPLES);
167
+
168
+ // The unique world example (last one) should have a higher weight
169
+ // than the template-based ones
170
+ const templateWeights = results.slice(0, -1).map((r) => r.samplingWeight);
171
+ const uniqueWeight = results[results.length - 1].samplingWeight;
172
+
173
+ const avgTemplateWeight = templateWeights.reduce((a, b) => a + b, 0) / templateWeights.length;
174
+
175
+ expect(uniqueWeight).toBeGreaterThanOrEqual(avgTemplateWeight);
176
+ });
177
+
178
+ it('produces weights in [minWeight, maxWeight] range', () => {
179
+ const dedup = new SoftDedup({ minWeight: 0.2, maxWeight: 0.9 });
180
+ const results = dedup.process(DUPLICATE_HEAVY_EXAMPLES);
181
+
182
+ for (const r of results) {
183
+ expect(r.samplingWeight).toBeGreaterThanOrEqual(0.2);
184
+ expect(r.samplingWeight).toBeLessThanOrEqual(0.9);
185
+ }
186
+ });
187
+
188
+ it('preserves correct indices', () => {
189
+ const dedup = new SoftDedup();
190
+ const results = dedup.process(UNIQUE_EXAMPLES);
191
+
192
+ for (let i = 0; i < results.length; i++) {
193
+ expect(results[i].index).toBe(i);
194
+ }
195
+ });
196
+
197
+ it('commonness scores are in [0, 1] range', () => {
198
+ const dedup = new SoftDedup();
199
+ const results = dedup.process(DUPLICATE_HEAVY_EXAMPLES);
200
+
201
+ for (const r of results) {
202
+ expect(r.commonnessScore).toBeGreaterThanOrEqual(0);
203
+ expect(r.commonnessScore).toBeLessThanOrEqual(1);
204
+ }
205
+ });
206
+
207
+ it('n-gram stats are consistent', () => {
208
+ const dedup = new SoftDedup();
209
+ const results = dedup.process(UNIQUE_EXAMPLES);
210
+
211
+ for (const r of results) {
212
+ expect(r.ngramStats.commonNgrams).toBeLessThanOrEqual(r.ngramStats.totalNgrams);
213
+ expect(r.ngramStats.commonRatio).toBeGreaterThanOrEqual(0);
214
+ expect(r.ngramStats.commonRatio).toBeLessThanOrEqual(1);
215
+
216
+ if (r.ngramStats.totalNgrams > 0) {
217
+ expect(r.ngramStats.commonRatio).toBeCloseTo(
218
+ r.ngramStats.commonNgrams / r.ngramStats.totalNgrams,
219
+ 10
220
+ );
221
+ }
222
+ }
223
+ });
224
+ });
225
+
226
+ // ---------------------------------------------------------------------------
227
+ // WORD-LEVEL N-GRAMS
228
+ // ---------------------------------------------------------------------------
229
+
230
+ describe('word-level n-grams', () => {
231
+ it('supports word-level tokenization', () => {
232
+ const dedup = new SoftDedup({
233
+ wordLevel: true,
234
+ ngramSizes: [2, 3],
235
+ });
236
+
237
+ const results = dedup.process([
238
+ 'composition Scene orb Player Grabbable',
239
+ 'composition Scene orb Player Grabbable',
240
+ 'world Arena orb Enemy Physics mass gravity',
241
+ ]);
242
+
243
+ expect(results).toHaveLength(3);
244
+ // Word-level should still detect duplicates
245
+ expect(results[0].samplingWeight).toBe(results[1].samplingWeight);
246
+ });
247
+
248
+ it('handles single-word examples with word-level n-grams', () => {
249
+ const dedup = new SoftDedup({
250
+ wordLevel: true,
251
+ ngramSizes: [2],
252
+ });
253
+
254
+ const results = dedup.process(['hello', 'world']);
255
+ // Single words can't form bigrams -> no n-grams -> max weight
256
+ for (const r of results) {
257
+ expect(r.ngramStats.totalNgrams).toBe(0);
258
+ expect(r.samplingWeight).toBe(1.0);
259
+ }
260
+ });
261
+ });
262
+
263
+ // ---------------------------------------------------------------------------
264
+ // TEMPERATURE SCALING
265
+ // ---------------------------------------------------------------------------
266
+
267
+ describe('temperature scaling', () => {
268
+ it('lower temperature produces more extreme weights', () => {
269
+ const lowTemp = new SoftDedup({ temperature: 0.3 });
270
+ const highTemp = new SoftDedup({ temperature: 2.0 });
271
+
272
+ const lowResults = lowTemp.process(DUPLICATE_HEAVY_EXAMPLES);
273
+ const highResults = highTemp.process(DUPLICATE_HEAVY_EXAMPLES);
274
+
275
+ // Low temperature should have larger weight variance
276
+ const lowWeights = lowResults.map((r) => r.samplingWeight);
277
+ const highWeights = highResults.map((r) => r.samplingWeight);
278
+
279
+ const lowVariance = computeVariance(lowWeights);
280
+ const highVariance = computeVariance(highWeights);
281
+
282
+ // Low temperature should produce more spread-out weights
283
+ // (higher variance or at least not lower)
284
+ expect(lowVariance).toBeGreaterThanOrEqual(highVariance - 0.01);
285
+ });
286
+ });
287
+
288
+ // ---------------------------------------------------------------------------
289
+ // STATISTICS
290
+ // ---------------------------------------------------------------------------
291
+
292
+ describe('computeStats', () => {
293
+ it('returns zero stats for empty results', () => {
294
+ const dedup = new SoftDedup();
295
+ const stats = dedup.computeStats([]);
296
+
297
+ expect(stats.totalExamples).toBe(0);
298
+ expect(stats.meanWeight).toBe(0);
299
+ expect(stats.medianWeight).toBe(0);
300
+ expect(stats.effectiveDatasetSize).toBe(0);
301
+ expect(stats.reductionRatio).toBe(0);
302
+ });
303
+
304
+ it('computes correct stats for uniform weights', () => {
305
+ const dedup = new SoftDedup();
306
+ const results = dedup.process(UNIQUE_EXAMPLES);
307
+ const stats = dedup.computeStats(results);
308
+
309
+ expect(stats.totalExamples).toBe(5);
310
+ expect(stats.meanWeight).toBeGreaterThan(0);
311
+ expect(stats.meanWeight).toBeLessThanOrEqual(1);
312
+ expect(stats.effectiveDatasetSize).toBeLessThanOrEqual(5);
313
+ expect(stats.reductionRatio).toBeGreaterThanOrEqual(0);
314
+ expect(stats.reductionRatio).toBeLessThanOrEqual(1);
315
+ });
316
+
317
+ it('reports positive reduction ratio for duplicate-heavy datasets', () => {
318
+ const dedup = new SoftDedup();
319
+ const results = dedup.process(DUPLICATE_HEAVY_EXAMPLES);
320
+ const stats = dedup.computeStats(results);
321
+
322
+ // With duplicates, effective size should be less than total
323
+ expect(stats.effectiveDatasetSize).toBeLessThanOrEqual(stats.totalExamples);
324
+ expect(stats.reductionRatio).toBeGreaterThanOrEqual(0);
325
+ });
326
+
327
+ it('computes correct median for even-length arrays', () => {
328
+ const dedup = new SoftDedup();
329
+ const results: SoftDedupResult[] = [
330
+ {
331
+ index: 0,
332
+ commonnessScore: 0,
333
+ samplingWeight: 0.2,
334
+ ngramStats: { totalNgrams: 10, commonNgrams: 0, commonRatio: 0 },
335
+ },
336
+ {
337
+ index: 1,
338
+ commonnessScore: 0,
339
+ samplingWeight: 0.8,
340
+ ngramStats: { totalNgrams: 10, commonNgrams: 0, commonRatio: 0 },
341
+ },
342
+ ];
343
+ const stats = dedup.computeStats(results);
344
+ expect(stats.medianWeight).toBe(0.5); // (0.2 + 0.8) / 2
345
+ });
346
+
347
+ it('computes correct median for odd-length arrays', () => {
348
+ const dedup = new SoftDedup();
349
+ const results: SoftDedupResult[] = [
350
+ {
351
+ index: 0,
352
+ commonnessScore: 0,
353
+ samplingWeight: 0.2,
354
+ ngramStats: { totalNgrams: 10, commonNgrams: 0, commonRatio: 0 },
355
+ },
356
+ {
357
+ index: 1,
358
+ commonnessScore: 0,
359
+ samplingWeight: 0.5,
360
+ ngramStats: { totalNgrams: 10, commonNgrams: 0, commonRatio: 0 },
361
+ },
362
+ {
363
+ index: 2,
364
+ commonnessScore: 0,
365
+ samplingWeight: 0.9,
366
+ ngramStats: { totalNgrams: 10, commonNgrams: 0, commonRatio: 0 },
367
+ },
368
+ ];
369
+ const stats = dedup.computeStats(results);
370
+ expect(stats.medianWeight).toBe(0.5);
371
+ });
372
+ });
373
+
374
+ // ---------------------------------------------------------------------------
375
+ // FACTORY FUNCTION
376
+ // ---------------------------------------------------------------------------
377
+
378
+ describe('createSoftDedup', () => {
379
+ it('creates a SoftDedup instance with defaults', () => {
380
+ const dedup = createSoftDedup();
381
+ expect(dedup).toBeInstanceOf(SoftDedup);
382
+ expect(dedup.getConfig()).toEqual(DEFAULT_SOFTDEDUP_CONFIG);
383
+ });
384
+
385
+ it('creates a SoftDedup instance with overrides', () => {
386
+ const dedup = createSoftDedup({ temperature: 2.0 });
387
+ expect(dedup.getConfig().temperature).toBe(2.0);
388
+ });
389
+ });
390
+
391
+ // ---------------------------------------------------------------------------
392
+ // DEFAULT CONFIG
393
+ // ---------------------------------------------------------------------------
394
+
395
+ describe('DEFAULT_SOFTDEDUP_CONFIG', () => {
396
+ it('has expected default values', () => {
397
+ expect(DEFAULT_SOFTDEDUP_CONFIG.ngramSizes).toEqual([3, 5, 7]);
398
+ expect(DEFAULT_SOFTDEDUP_CONFIG.wordLevel).toBe(false);
399
+ expect(DEFAULT_SOFTDEDUP_CONFIG.minWeight).toBe(0.1);
400
+ expect(DEFAULT_SOFTDEDUP_CONFIG.maxWeight).toBe(1.0);
401
+ expect(DEFAULT_SOFTDEDUP_CONFIG.temperature).toBe(1.0);
402
+ expect(DEFAULT_SOFTDEDUP_CONFIG.commonThresholdPercentile).toBe(0.7);
403
+ });
404
+ });
405
+ });
406
+
407
+ // =============================================================================
408
+ // HELPERS
409
+ // =============================================================================
410
+
411
+ function computeVariance(values: number[]): number {
412
+ if (values.length === 0) return 0;
413
+ const mean = values.reduce((a, b) => a + b, 0) / values.length;
414
+ return values.reduce((acc, v) => acc + (v - mean) ** 2, 0) / values.length;
415
+ }