@holoscript/framework 6.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (329) hide show
  1. package/ALL-test-results.json +1 -0
  2. package/CHANGELOG.md +8 -0
  3. package/LICENSE +21 -0
  4. package/ROADMAP.md +175 -0
  5. package/dist/AgentManifest-CB4xM-Ma.d.cts +704 -0
  6. package/dist/AgentManifest-CB4xM-Ma.d.ts +704 -0
  7. package/dist/BehaviorTree-BrBFECv5.d.cts +103 -0
  8. package/dist/BehaviorTree-BrBFECv5.d.ts +103 -0
  9. package/dist/InvisibleWallet-BB6tFvRA.d.cts +1732 -0
  10. package/dist/InvisibleWallet-rtRrBOA8.d.ts +1732 -0
  11. package/dist/OrchestratorAgent-BvWgf9uw.d.cts +798 -0
  12. package/dist/OrchestratorAgent-Q_CbVTmO.d.ts +798 -0
  13. package/dist/agents/index.cjs +4790 -0
  14. package/dist/agents/index.d.cts +1788 -0
  15. package/dist/agents/index.d.ts +1788 -0
  16. package/dist/agents/index.js +4695 -0
  17. package/dist/ai/index.cjs +5347 -0
  18. package/dist/ai/index.d.cts +1753 -0
  19. package/dist/ai/index.d.ts +1753 -0
  20. package/dist/ai/index.js +5244 -0
  21. package/dist/behavior.cjs +449 -0
  22. package/dist/behavior.d.cts +130 -0
  23. package/dist/behavior.d.ts +130 -0
  24. package/dist/behavior.js +407 -0
  25. package/dist/economy/index.cjs +3659 -0
  26. package/dist/economy/index.d.cts +747 -0
  27. package/dist/economy/index.d.ts +747 -0
  28. package/dist/economy/index.js +3617 -0
  29. package/dist/implementations-D9T3un9D.d.cts +236 -0
  30. package/dist/implementations-D9T3un9D.d.ts +236 -0
  31. package/dist/index.cjs +24550 -0
  32. package/dist/index.d.cts +1729 -0
  33. package/dist/index.d.ts +1729 -0
  34. package/dist/index.js +24277 -0
  35. package/dist/learning/index.cjs +219 -0
  36. package/dist/learning/index.d.cts +104 -0
  37. package/dist/learning/index.d.ts +104 -0
  38. package/dist/learning/index.js +189 -0
  39. package/dist/negotiation/index.cjs +970 -0
  40. package/dist/negotiation/index.d.cts +610 -0
  41. package/dist/negotiation/index.d.ts +610 -0
  42. package/dist/negotiation/index.js +931 -0
  43. package/dist/skills/index.cjs +1118 -0
  44. package/dist/skills/index.d.cts +289 -0
  45. package/dist/skills/index.d.ts +289 -0
  46. package/dist/skills/index.js +1079 -0
  47. package/dist/swarm/index.cjs +5268 -0
  48. package/dist/swarm/index.d.cts +2433 -0
  49. package/dist/swarm/index.d.ts +2433 -0
  50. package/dist/swarm/index.js +5221 -0
  51. package/dist/training/index.cjs +2745 -0
  52. package/dist/training/index.d.cts +1734 -0
  53. package/dist/training/index.d.ts +1734 -0
  54. package/dist/training/index.js +2687 -0
  55. package/extract-failures.js +10 -0
  56. package/package.json +82 -0
  57. package/src/__tests__/bounty-marketplace.test.ts +374 -0
  58. package/src/__tests__/delegation.test.ts +144 -0
  59. package/src/__tests__/distributed-claimer.test.ts +147 -0
  60. package/src/__tests__/done-log-audit.test.ts +342 -0
  61. package/src/__tests__/framework.test.ts +865 -0
  62. package/src/__tests__/goal-synthesizer.test.ts +236 -0
  63. package/src/__tests__/presence.test.ts +223 -0
  64. package/src/__tests__/protocol-agent.test.ts +254 -0
  65. package/src/__tests__/revenue-splitter.test.ts +114 -0
  66. package/src/__tests__/scenario-driven-todo.test.ts +197 -0
  67. package/src/__tests__/self-improve.test.ts +349 -0
  68. package/src/__tests__/service-lifecycle.test.ts +237 -0
  69. package/src/__tests__/skill-router.test.ts +121 -0
  70. package/src/agents/AgentManifest.ts +493 -0
  71. package/src/agents/AgentRegistry.ts +475 -0
  72. package/src/agents/AgentTypes.ts +585 -0
  73. package/src/agents/AgentWalletRegistry.ts +83 -0
  74. package/src/agents/AuthenticatedCRDT.ts +388 -0
  75. package/src/agents/CapabilityMatcher.ts +453 -0
  76. package/src/agents/CrossRealityHandoff.ts +305 -0
  77. package/src/agents/CulturalMemory.ts +454 -0
  78. package/src/agents/FederatedRegistryAdapter.ts +429 -0
  79. package/src/agents/NormEngine.ts +450 -0
  80. package/src/agents/OrchestratorAgent.ts +414 -0
  81. package/src/agents/SkillWorkflowEngine.ts +472 -0
  82. package/src/agents/TaskDelegationService.ts +551 -0
  83. package/src/agents/__tests__/AgentManifest.prod.test.ts +134 -0
  84. package/src/agents/__tests__/AgentManifest.test.ts +182 -0
  85. package/src/agents/__tests__/AgentModule.test.ts +864 -0
  86. package/src/agents/__tests__/AgentRegistry.prod.test.ts +125 -0
  87. package/src/agents/__tests__/AgentRegistry.test.ts +148 -0
  88. package/src/agents/__tests__/AgentTypes.test.ts +534 -0
  89. package/src/agents/__tests__/AgentWalletRegistry.test.ts +152 -0
  90. package/src/agents/__tests__/AuthenticatedCRDT.test.ts +558 -0
  91. package/src/agents/__tests__/CapabilityMatcher.prod.test.ts +117 -0
  92. package/src/agents/__tests__/CapabilityMatcher.test.ts +178 -0
  93. package/src/agents/__tests__/CrossRealityHandoff.test.ts +402 -0
  94. package/src/agents/__tests__/CulturalMemory.test.ts +200 -0
  95. package/src/agents/__tests__/FederatedRegistryAdapter.test.ts +409 -0
  96. package/src/agents/__tests__/NormEngine.test.ts +276 -0
  97. package/src/agents/__tests__/OrchestratorAgent.test.ts +182 -0
  98. package/src/agents/__tests__/SkillWorkflowEngine.test.ts +357 -0
  99. package/src/agents/__tests__/TaskDelegationService.test.ts +446 -0
  100. package/src/agents/index.ts +107 -0
  101. package/src/agents/spatial-comms/Layer1RealTime.ts +621 -0
  102. package/src/agents/spatial-comms/Layer2A2A.ts +661 -0
  103. package/src/agents/spatial-comms/Layer3MCP.ts +651 -0
  104. package/src/agents/spatial-comms/ProtocolTypes.ts +543 -0
  105. package/src/agents/spatial-comms/SpatialCommClient.ts +483 -0
  106. package/src/agents/spatial-comms/__tests__/performance-benchmark.test.ts +465 -0
  107. package/src/agents/spatial-comms/examples/multi-agent-world-creation.ts +409 -0
  108. package/src/agents/spatial-comms/index.ts +66 -0
  109. package/src/ai/AIAdapter.ts +313 -0
  110. package/src/ai/AICopilot.ts +331 -0
  111. package/src/ai/AIOutputValidator.ts +203 -0
  112. package/src/ai/BTNodes.ts +239 -0
  113. package/src/ai/BehaviorSelector.ts +135 -0
  114. package/src/ai/BehaviorTree.ts +153 -0
  115. package/src/ai/Blackboard.ts +165 -0
  116. package/src/ai/GenerationAnalytics.ts +461 -0
  117. package/src/ai/GenerationCache.ts +265 -0
  118. package/src/ai/GoalPlanner.ts +165 -0
  119. package/src/ai/HoloScriptGenerator.ts +580 -0
  120. package/src/ai/InfluenceMap.ts +180 -0
  121. package/src/ai/NavMesh.ts +168 -0
  122. package/src/ai/PerceptionSystem.ts +178 -0
  123. package/src/ai/PromptTemplates.ts +453 -0
  124. package/src/ai/SemanticSearchService.ts +80 -0
  125. package/src/ai/StateMachine.ts +196 -0
  126. package/src/ai/SteeringBehavior.ts +150 -0
  127. package/src/ai/SteeringBehaviors.ts +244 -0
  128. package/src/ai/TrainingDataGenerator.ts +1082 -0
  129. package/src/ai/UtilityAI.ts +145 -0
  130. package/src/ai/__tests__/AIAdapter.prod.test.ts +259 -0
  131. package/src/ai/__tests__/AIAdapter.test.ts +109 -0
  132. package/src/ai/__tests__/AICopilot.prod.test.ts +341 -0
  133. package/src/ai/__tests__/AICopilot.test.ts +178 -0
  134. package/src/ai/__tests__/AIOutputValidator.prod.test.ts +226 -0
  135. package/src/ai/__tests__/AIOutputValidator.test.ts +138 -0
  136. package/src/ai/__tests__/BTNodes.prod.test.ts +391 -0
  137. package/src/ai/__tests__/BTNodes.test.ts +263 -0
  138. package/src/ai/__tests__/BehaviorSelector.prod.test.ts +129 -0
  139. package/src/ai/__tests__/BehaviorSelector.test.ts +132 -0
  140. package/src/ai/__tests__/BehaviorTree.prod.test.ts +266 -0
  141. package/src/ai/__tests__/BehaviorTree.test.ts +216 -0
  142. package/src/ai/__tests__/Blackboard.prod.test.ts +339 -0
  143. package/src/ai/__tests__/Blackboard.test.ts +183 -0
  144. package/src/ai/__tests__/GenerationAnalytics.prod.test.ts +141 -0
  145. package/src/ai/__tests__/GenerationAnalytics.test.ts +165 -0
  146. package/src/ai/__tests__/GenerationCache.prod.test.ts +144 -0
  147. package/src/ai/__tests__/GenerationCache.test.ts +171 -0
  148. package/src/ai/__tests__/GoalPlanner.prod.test.ts +189 -0
  149. package/src/ai/__tests__/GoalPlanner.test.ts +137 -0
  150. package/src/ai/__tests__/GoalPlannerDepth.prod.test.ts +217 -0
  151. package/src/ai/__tests__/HoloScriptGenerator.test.ts +125 -0
  152. package/src/ai/__tests__/InfluenceMap.prod.test.ts +146 -0
  153. package/src/ai/__tests__/InfluenceMap.test.ts +149 -0
  154. package/src/ai/__tests__/NavMesh.prod.test.ts +141 -0
  155. package/src/ai/__tests__/NavMesh.test.ts +159 -0
  156. package/src/ai/__tests__/PerceptionSystem.prod.test.ts +135 -0
  157. package/src/ai/__tests__/PerceptionSystem.test.ts +250 -0
  158. package/src/ai/__tests__/PromptTemplates.prod.test.ts +313 -0
  159. package/src/ai/__tests__/PromptTemplates.test.ts +146 -0
  160. package/src/ai/__tests__/SemanticSearch.test.ts +37 -0
  161. package/src/ai/__tests__/StateMachine.prod.test.ts +162 -0
  162. package/src/ai/__tests__/StateMachine.test.ts +163 -0
  163. package/src/ai/__tests__/SteeringBehavior.prod.test.ts +251 -0
  164. package/src/ai/__tests__/SteeringBehavior.test.ts +135 -0
  165. package/src/ai/__tests__/SteeringBehaviors.prod.test.ts +133 -0
  166. package/src/ai/__tests__/SteeringBehaviors.test.ts +151 -0
  167. package/src/ai/__tests__/TrainingDataGenerator.prod.test.ts +286 -0
  168. package/src/ai/__tests__/TrainingDataGenerator.test.ts +286 -0
  169. package/src/ai/__tests__/UtilityAI.prod.test.ts +207 -0
  170. package/src/ai/__tests__/UtilityAI.test.ts +155 -0
  171. package/src/ai/__tests__/adapters.prod.test.ts +263 -0
  172. package/src/ai/__tests__/adapters.test.ts +320 -0
  173. package/src/ai/adapters.ts +1585 -0
  174. package/src/ai/index.ts +130 -0
  175. package/src/behavior/BehaviorPresets.ts +140 -0
  176. package/src/behavior/BehaviorTree.ts +236 -0
  177. package/src/behavior/StateMachine.ts +176 -0
  178. package/src/behavior/StateTrait.ts +67 -0
  179. package/src/behavior/index.ts +8 -0
  180. package/src/behavior.ts +8 -0
  181. package/src/board/audit.ts +284 -0
  182. package/src/board/board-ops.ts +336 -0
  183. package/src/board/board-types.ts +302 -0
  184. package/src/board/index.ts +69 -0
  185. package/src/define-agent.ts +46 -0
  186. package/src/define-team.ts +33 -0
  187. package/src/delegation.ts +265 -0
  188. package/src/distributed-claimer.ts +228 -0
  189. package/src/economy/AgentBudgetEnforcer.ts +464 -0
  190. package/src/economy/BountyManager.ts +185 -0
  191. package/src/economy/CreatorRevenueAggregator.ts +460 -0
  192. package/src/economy/InvisibleWallet.ts +82 -0
  193. package/src/economy/KnowledgeMarketplace.ts +193 -0
  194. package/src/economy/PaymentWebhookService.ts +512 -0
  195. package/src/economy/RevenueSplitter.ts +156 -0
  196. package/src/economy/SubscriptionManager.ts +546 -0
  197. package/src/economy/UnifiedBudgetOptimizer.ts +635 -0
  198. package/src/economy/UsageMeter.ts +440 -0
  199. package/src/economy/_core-stubs.ts +219 -0
  200. package/src/economy/index.ts +100 -0
  201. package/src/economy/x402-facilitator.ts +1978 -0
  202. package/src/index.ts +348 -0
  203. package/src/knowledge/__tests__/knowledge-consolidator.test.ts +444 -0
  204. package/src/knowledge/__tests__/knowledge-store-vector.test.ts +291 -0
  205. package/src/knowledge/brain.ts +167 -0
  206. package/src/knowledge/consolidation.ts +581 -0
  207. package/src/knowledge/knowledge-consolidator.ts +510 -0
  208. package/src/knowledge/knowledge-store.ts +616 -0
  209. package/src/learning/MemoryConsolidator.ts +102 -0
  210. package/src/learning/MemoryScorer.ts +69 -0
  211. package/src/learning/ProceduralCompiler.ts +45 -0
  212. package/src/learning/SemanticClusterer.ts +66 -0
  213. package/src/learning/index.ts +8 -0
  214. package/src/llm/llm-adapter.ts +159 -0
  215. package/src/mesh/index.ts +309 -0
  216. package/src/negotiation/NegotiationProtocol.ts +694 -0
  217. package/src/negotiation/NegotiationTypes.ts +473 -0
  218. package/src/negotiation/VotingMechanisms.ts +691 -0
  219. package/src/negotiation/index.ts +49 -0
  220. package/src/protocol/goal-synthesizer.ts +317 -0
  221. package/src/protocol/implementations.ts +474 -0
  222. package/src/protocol/micro-phase-decomposer.ts +299 -0
  223. package/src/protocol/micro-step-decomposer.test.ts +306 -0
  224. package/src/protocol-agent.test.ts +353 -0
  225. package/src/protocol-agent.ts +670 -0
  226. package/src/self-improve/absorb-scanner.ts +252 -0
  227. package/src/self-improve/evolution-engine.ts +149 -0
  228. package/src/self-improve/framework-absorber.ts +214 -0
  229. package/src/self-improve/index.ts +50 -0
  230. package/src/self-improve/prompt-optimizer.ts +212 -0
  231. package/src/self-improve/test-generator.ts +175 -0
  232. package/src/skill-router.ts +186 -0
  233. package/src/skills/index.ts +5 -0
  234. package/src/skills/skill-md-bridge.ts +1699 -0
  235. package/src/swarm/ACOEngine.ts +261 -0
  236. package/src/swarm/CollectiveIntelligence.ts +383 -0
  237. package/src/swarm/ContributionSynthesizer.ts +481 -0
  238. package/src/swarm/LeaderElection.ts +393 -0
  239. package/src/swarm/PSOEngine.ts +206 -0
  240. package/src/swarm/QuorumPolicy.ts +173 -0
  241. package/src/swarm/SwarmCoordinator.ts +335 -0
  242. package/src/swarm/SwarmManager.ts +442 -0
  243. package/src/swarm/SwarmMembership.ts +456 -0
  244. package/src/swarm/VotingRound.ts +255 -0
  245. package/src/swarm/__tests__/ACOEngine.prod.test.ts +164 -0
  246. package/src/swarm/__tests__/ACOEngine.test.ts +117 -0
  247. package/src/swarm/__tests__/CollectiveIntelligence.prod.test.ts +296 -0
  248. package/src/swarm/__tests__/CollectiveIntelligence.test.ts +457 -0
  249. package/src/swarm/__tests__/ContributionSynthesizer.prod.test.ts +269 -0
  250. package/src/swarm/__tests__/ContributionSynthesizer.test.ts +254 -0
  251. package/src/swarm/__tests__/LeaderElection.prod.test.ts +196 -0
  252. package/src/swarm/__tests__/LeaderElection.test.ts +151 -0
  253. package/src/swarm/__tests__/PSOEngine.prod.test.ts +162 -0
  254. package/src/swarm/__tests__/PSOEngine.test.ts +106 -0
  255. package/src/swarm/__tests__/QuorumPolicy.prod.test.ts +216 -0
  256. package/src/swarm/__tests__/QuorumPolicy.test.ts +177 -0
  257. package/src/swarm/__tests__/SwarmCoordinator.prod.test.ts +186 -0
  258. package/src/swarm/__tests__/SwarmCoordinator.test.ts +167 -0
  259. package/src/swarm/__tests__/SwarmManager.prod.test.ts +308 -0
  260. package/src/swarm/__tests__/SwarmManager.test.ts +373 -0
  261. package/src/swarm/__tests__/SwarmMembership.prod.test.ts +273 -0
  262. package/src/swarm/__tests__/SwarmMembership.test.ts +264 -0
  263. package/src/swarm/__tests__/VotingRound.prod.test.ts +233 -0
  264. package/src/swarm/__tests__/VotingRound.test.ts +174 -0
  265. package/src/swarm/analytics/SwarmInspector.ts +476 -0
  266. package/src/swarm/analytics/SwarmMetrics.ts +449 -0
  267. package/src/swarm/analytics/__tests__/SwarmInspector.prod.test.ts +366 -0
  268. package/src/swarm/analytics/__tests__/SwarmInspector.test.ts +454 -0
  269. package/src/swarm/analytics/__tests__/SwarmMetrics.prod.test.ts +254 -0
  270. package/src/swarm/analytics/__tests__/SwarmMetrics.test.ts +370 -0
  271. package/src/swarm/analytics/index.ts +7 -0
  272. package/src/swarm/index.ts +69 -0
  273. package/src/swarm/messaging/BroadcastChannel.ts +509 -0
  274. package/src/swarm/messaging/GossipProtocol.ts +565 -0
  275. package/src/swarm/messaging/SwarmEventBus.ts +443 -0
  276. package/src/swarm/messaging/__tests__/BroadcastChannel.prod.test.ts +331 -0
  277. package/src/swarm/messaging/__tests__/BroadcastChannel.test.ts +333 -0
  278. package/src/swarm/messaging/__tests__/GossipProtocol.prod.test.ts +356 -0
  279. package/src/swarm/messaging/__tests__/GossipProtocol.test.ts +437 -0
  280. package/src/swarm/messaging/__tests__/SwarmEventBus.prod.test.ts +191 -0
  281. package/src/swarm/messaging/__tests__/SwarmEventBus.test.ts +247 -0
  282. package/src/swarm/messaging/index.ts +8 -0
  283. package/src/swarm/spatial/FlockingBehavior.ts +462 -0
  284. package/src/swarm/spatial/FormationController.ts +500 -0
  285. package/src/swarm/spatial/Vector3.ts +170 -0
  286. package/src/swarm/spatial/ZoneClaiming.ts +509 -0
  287. package/src/swarm/spatial/__tests__/FlockingBehavior.prod.test.ts +239 -0
  288. package/src/swarm/spatial/__tests__/FlockingBehavior.test.ts +298 -0
  289. package/src/swarm/spatial/__tests__/FormationController.prod.test.ts +240 -0
  290. package/src/swarm/spatial/__tests__/FormationController.test.ts +297 -0
  291. package/src/swarm/spatial/__tests__/Vector3.prod.test.ts +283 -0
  292. package/src/swarm/spatial/__tests__/Vector3.test.ts +224 -0
  293. package/src/swarm/spatial/__tests__/ZoneClaiming.prod.test.ts +246 -0
  294. package/src/swarm/spatial/__tests__/ZoneClaiming.test.ts +374 -0
  295. package/src/swarm/spatial/index.ts +28 -0
  296. package/src/team.ts +1245 -0
  297. package/src/training/LRScheduler.ts +377 -0
  298. package/src/training/QualityScoringPipeline.ts +139 -0
  299. package/src/training/SoftDedup.ts +461 -0
  300. package/src/training/SparsityMonitor.ts +685 -0
  301. package/src/training/SparsityMonitorTypes.ts +209 -0
  302. package/src/training/SpatialTrainingDataGenerator.ts +1526 -0
  303. package/src/training/SpatialTrainingDataTypes.ts +216 -0
  304. package/src/training/TrainingPipelineConfig.ts +215 -0
  305. package/src/training/constants.ts +94 -0
  306. package/src/training/index.ts +138 -0
  307. package/src/training/schema.ts +147 -0
  308. package/src/training/scripts/generate-novel-use-cases-dataset.ts +272 -0
  309. package/src/training/scripts/generate-spatial-dataset.ts +521 -0
  310. package/src/training/training/data/novel-use-cases.jsonl +153 -0
  311. package/src/training/training/data/spatial-reasoning-10k.jsonl +9354 -0
  312. package/src/training/trainingmonkey/TrainingMonkeyIntegration.ts +477 -0
  313. package/src/training/trainingmonkey/TrainingMonkeyTypes.ts +230 -0
  314. package/src/training/trainingmonkey/index.ts +26 -0
  315. package/src/training/trait-mappings.ts +157 -0
  316. package/src/types/core-stubs.d.ts +113 -0
  317. package/src/types.ts +304 -0
  318. package/test-output.txt +0 -0
  319. package/test-result.json +1 -0
  320. package/tsc-errors.txt +4 -0
  321. package/tsc_output.txt +0 -0
  322. package/tsconfig.json +14 -0
  323. package/tsup-learning-esm.config.ts +12 -0
  324. package/tsup.config.ts +21 -0
  325. package/typescript-errors-2.txt +0 -0
  326. package/typescript-errors.txt +22 -0
  327. package/vitest-log-utf8.txt +268 -0
  328. package/vitest-log.txt +0 -0
  329. package/vitest.config.ts +8 -0
@@ -0,0 +1,461 @@
1
+ /**
2
+ * SoftDedup - Soft Deduplication via N-gram Commonness Scoring
3
+ *
4
+ * Instead of hard-deleting duplicate training examples, SoftDedup computes
5
+ * n-gram commonness scores and assigns sampling weights. Examples with
6
+ * high-frequency n-grams (template-generated / near-duplicate content)
7
+ * receive lower sampling weights, reducing their influence during training
8
+ * without discarding them entirely.
9
+ *
10
+ * Based on training rule W.008:
11
+ * "Reweight duplicates instead of deleting them. SoftDedup uses n-gram
12
+ * commonness scores to reduce sampling weight of high-frequency data.
13
+ * 26% faster training, +1.77% accuracy vs hard dedup alone."
14
+ *
15
+ * Pipeline position: Quality Filter -> Hard Dedup (W.004) -> SoftDedup (W.008)
16
+ *
17
+ * @module training/SoftDedup
18
+ */
19
+
20
+ // =============================================================================
21
+ // TYPES
22
+ // =============================================================================
23
+
24
+ /**
25
+ * Configuration for the SoftDedup algorithm.
26
+ */
27
+ export interface SoftDedupConfig {
28
+ /**
29
+ * N-gram sizes to compute commonness scores for.
30
+ * Using multiple sizes captures both local (small n) and structural (large n)
31
+ * patterns. Default: [3, 5, 7] (character-level trigrams, 5-grams, 7-grams).
32
+ */
33
+ ngramSizes: number[];
34
+
35
+ /**
36
+ * Whether to use word-level n-grams instead of character-level.
37
+ * Word-level captures semantic similarity; character-level captures
38
+ * template-level patterns. Default: false (character-level).
39
+ */
40
+ wordLevel: boolean;
41
+
42
+ /**
43
+ * Minimum sampling weight. Even the most common examples keep at least
44
+ * this weight to prevent complete exclusion. Default: 0.1 (10% weight).
45
+ * Must be in range (0, 1].
46
+ */
47
+ minWeight: number;
48
+
49
+ /**
50
+ * Maximum sampling weight. Rare/unique examples get at most this weight.
51
+ * Default: 1.0 (100% weight). Must be in range [minWeight, 1].
52
+ */
53
+ maxWeight: number;
54
+
55
+ /**
56
+ * Temperature parameter controlling how aggressively to downweight
57
+ * common examples. Higher temperature = more uniform weights.
58
+ * Lower temperature = more aggressive downweighting.
59
+ * Default: 1.0.
60
+ */
61
+ temperature: number;
62
+
63
+ /**
64
+ * Percentile threshold for "common" n-grams.
65
+ * N-grams appearing more frequently than this percentile of all n-gram
66
+ * frequencies are considered "common". Default: 0.7 (top 30% are common).
67
+ * Must be in range [0, 1].
68
+ */
69
+ commonThresholdPercentile: number;
70
+ }
71
+
72
+ /**
73
+ * Result for a single training example after SoftDedup scoring.
74
+ */
75
+ export interface SoftDedupResult {
76
+ /** Index of the example in the input array */
77
+ index: number;
78
+
79
+ /** Computed commonness score (0 = unique, 1 = fully common) */
80
+ commonnessScore: number;
81
+
82
+ /** Assigned sampling weight (minWeight to maxWeight) */
83
+ samplingWeight: number;
84
+
85
+ /** N-gram statistics for this example */
86
+ ngramStats: NgramStats;
87
+ }
88
+
89
+ /**
90
+ * N-gram statistics for a single example.
91
+ */
92
+ export interface NgramStats {
93
+ /** Total number of n-grams extracted */
94
+ totalNgrams: number;
95
+
96
+ /** Number of n-grams classified as "common" */
97
+ commonNgrams: number;
98
+
99
+ /** Ratio of common n-grams to total (0 to 1) */
100
+ commonRatio: number;
101
+ }
102
+
103
+ /**
104
+ * Aggregate statistics for the entire SoftDedup run.
105
+ */
106
+ export interface SoftDedupStats {
107
+ /** Total examples processed */
108
+ totalExamples: number;
109
+
110
+ /** Mean sampling weight across all examples */
111
+ meanWeight: number;
112
+
113
+ /** Median sampling weight */
114
+ medianWeight: number;
115
+
116
+ /** Standard deviation of sampling weights */
117
+ stdWeight: number;
118
+
119
+ /** Number of examples at minimum weight (heavily downweighted) */
120
+ atMinWeight: number;
121
+
122
+ /** Number of examples at maximum weight (unique/rare) */
123
+ atMaxWeight: number;
124
+
125
+ /** Effective dataset size (sum of all weights) */
126
+ effectiveDatasetSize: number;
127
+
128
+ /** Reduction ratio: 1 - (effectiveSize / totalExamples) */
129
+ reductionRatio: number;
130
+
131
+ /** Number of unique n-grams in the corpus */
132
+ uniqueNgramsInCorpus: number;
133
+
134
+ /** Commonness threshold frequency (absolute count) */
135
+ commonThresholdFrequency: number;
136
+ }
137
+
138
+ // =============================================================================
139
+ // DEFAULT CONFIGURATION
140
+ // =============================================================================
141
+
142
+ /**
143
+ * Default SoftDedup configuration.
144
+ * Tuned for HoloScript/Brittney training datasets (920K-1.5M examples).
145
+ */
146
+ export const DEFAULT_SOFTDEDUP_CONFIG: SoftDedupConfig = {
147
+ ngramSizes: [3, 5, 7],
148
+ wordLevel: false,
149
+ minWeight: 0.1,
150
+ maxWeight: 1.0,
151
+ temperature: 1.0,
152
+ commonThresholdPercentile: 0.7,
153
+ };
154
+
155
+ // =============================================================================
156
+ // SOFT DEDUP CLASS
157
+ // =============================================================================
158
+
159
+ /**
160
+ * SoftDedup processor for training data.
161
+ *
162
+ * Computes n-gram commonness scores and assigns sampling weights
163
+ * to training examples. Works AFTER hard dedup (W.004).
164
+ *
165
+ * @example
166
+ * ```ts
167
+ * const dedup = new SoftDedup();
168
+ * const results = dedup.process([
169
+ * 'composition MyScene { orb Player { Grabbable {} } }',
170
+ * 'composition MyScene { orb Player { Grabbable {} } }', // near-duplicate
171
+ * 'world Arena { orb Enemy { Physics { mass: 10 } } }', // unique
172
+ * ]);
173
+ *
174
+ * // results[0].samplingWeight ~= 0.3 (common template)
175
+ * // results[1].samplingWeight ~= 0.3 (common template)
176
+ * // results[2].samplingWeight ~= 1.0 (unique content)
177
+ * ```
178
+ */
179
+ export class SoftDedup {
180
+ private config: SoftDedupConfig;
181
+
182
+ constructor(config: Partial<SoftDedupConfig> = {}) {
183
+ this.config = { ...DEFAULT_SOFTDEDUP_CONFIG, ...config };
184
+ this.validateConfig();
185
+ }
186
+
187
+ /**
188
+ * Process a dataset of text examples and compute sampling weights.
189
+ *
190
+ * @param examples - Array of text strings (training examples)
191
+ * @returns Array of SoftDedupResult with sampling weights
192
+ */
193
+ process(examples: string[]): SoftDedupResult[] {
194
+ if (examples.length === 0) {
195
+ return [];
196
+ }
197
+
198
+ if (examples.length === 1) {
199
+ return [
200
+ {
201
+ index: 0,
202
+ commonnessScore: 0,
203
+ samplingWeight: this.config.maxWeight,
204
+ ngramStats: {
205
+ totalNgrams: this.extractNgrams(examples[0]).length,
206
+ commonNgrams: 0,
207
+ commonRatio: 0,
208
+ },
209
+ },
210
+ ];
211
+ }
212
+
213
+ // Step 1: Build corpus-wide n-gram frequency map
214
+ const corpusFrequencies = this.buildCorpusFrequencies(examples);
215
+
216
+ // Step 2: Compute commonness threshold
217
+ const threshold = this.computeThreshold(corpusFrequencies);
218
+
219
+ // Step 3: Score each example
220
+ const results: SoftDedupResult[] = examples.map((example, index) => {
221
+ const ngrams = this.extractNgrams(example);
222
+ const totalNgrams = ngrams.length;
223
+
224
+ if (totalNgrams === 0) {
225
+ return {
226
+ index,
227
+ commonnessScore: 0,
228
+ samplingWeight: this.config.maxWeight,
229
+ ngramStats: { totalNgrams: 0, commonNgrams: 0, commonRatio: 0 },
230
+ };
231
+ }
232
+
233
+ // Count how many of this example's n-grams are "common"
234
+ let commonCount = 0;
235
+ for (const ngram of ngrams) {
236
+ const freq = corpusFrequencies.get(ngram) ?? 0;
237
+ if (freq >= threshold) {
238
+ commonCount++;
239
+ }
240
+ }
241
+
242
+ const commonRatio = commonCount / totalNgrams;
243
+
244
+ // Commonness score is the ratio of common n-grams
245
+ const commonnessScore = commonRatio;
246
+
247
+ // Convert commonness to sampling weight using temperature scaling
248
+ const samplingWeight = this.commonnessToWeight(commonnessScore);
249
+
250
+ return {
251
+ index,
252
+ commonnessScore,
253
+ samplingWeight,
254
+ ngramStats: {
255
+ totalNgrams,
256
+ commonNgrams: commonCount,
257
+ commonRatio,
258
+ },
259
+ };
260
+ });
261
+
262
+ return results;
263
+ }
264
+
265
+ /**
266
+ * Compute aggregate statistics for a set of SoftDedup results.
267
+ */
268
+ computeStats(results: SoftDedupResult[]): SoftDedupStats {
269
+ if (results.length === 0) {
270
+ return {
271
+ totalExamples: 0,
272
+ meanWeight: 0,
273
+ medianWeight: 0,
274
+ stdWeight: 0,
275
+ atMinWeight: 0,
276
+ atMaxWeight: 0,
277
+ effectiveDatasetSize: 0,
278
+ reductionRatio: 0,
279
+ uniqueNgramsInCorpus: 0,
280
+ commonThresholdFrequency: 0,
281
+ };
282
+ }
283
+
284
+ const weights = results.map((r) => r.samplingWeight);
285
+ const totalExamples = results.length;
286
+ const sum = weights.reduce((a, b) => a + b, 0);
287
+ const meanWeight = sum / totalExamples;
288
+
289
+ // Median
290
+ const sorted = [...weights].sort((a, b) => a - b);
291
+ const mid = Math.floor(sorted.length / 2);
292
+ const medianWeight =
293
+ sorted.length % 2 === 0 ? (sorted[mid - 1] + sorted[mid]) / 2 : sorted[mid];
294
+
295
+ // Standard deviation
296
+ const variance = weights.reduce((acc, w) => acc + (w - meanWeight) ** 2, 0) / totalExamples;
297
+ const stdWeight = Math.sqrt(variance);
298
+
299
+ // Count extremes (with small epsilon for floating point)
300
+ const epsilon = 1e-9;
301
+ const atMinWeight = weights.filter((w) => Math.abs(w - this.config.minWeight) < epsilon).length;
302
+ const atMaxWeight = weights.filter((w) => Math.abs(w - this.config.maxWeight) < epsilon).length;
303
+
304
+ const effectiveDatasetSize = sum;
305
+ const reductionRatio = 1 - effectiveDatasetSize / totalExamples;
306
+
307
+ return {
308
+ totalExamples,
309
+ meanWeight,
310
+ medianWeight,
311
+ stdWeight,
312
+ atMinWeight,
313
+ atMaxWeight,
314
+ effectiveDatasetSize,
315
+ reductionRatio,
316
+ uniqueNgramsInCorpus: 0, // filled by caller if needed
317
+ commonThresholdFrequency: 0, // filled by caller if needed
318
+ };
319
+ }
320
+
321
+ /**
322
+ * Get the current configuration.
323
+ */
324
+ getConfig(): Readonly<SoftDedupConfig> {
325
+ return { ...this.config };
326
+ }
327
+
328
+ // ===========================================================================
329
+ // INTERNAL METHODS
330
+ // ===========================================================================
331
+
332
+ /**
333
+ * Extract n-grams from a text string.
334
+ * Supports both character-level and word-level n-grams.
335
+ */
336
+ private extractNgrams(text: string): string[] {
337
+ const ngrams: string[] = [];
338
+
339
+ for (const n of this.config.ngramSizes) {
340
+ if (this.config.wordLevel) {
341
+ const words = text.split(/\s+/).filter((w) => w.length > 0);
342
+ for (let i = 0; i <= words.length - n; i++) {
343
+ ngrams.push(words.slice(i, i + n).join(' '));
344
+ }
345
+ } else {
346
+ const normalized = text.toLowerCase();
347
+ for (let i = 0; i <= normalized.length - n; i++) {
348
+ ngrams.push(normalized.substring(i, i + n));
349
+ }
350
+ }
351
+ }
352
+
353
+ return ngrams;
354
+ }
355
+
356
+ /**
357
+ * Build a frequency map of all n-grams across the entire corpus.
358
+ */
359
+ private buildCorpusFrequencies(examples: string[]): Map<string, number> {
360
+ const frequencies = new Map<string, number>();
361
+
362
+ for (const example of examples) {
363
+ const ngrams = this.extractNgrams(example);
364
+ for (const ngram of ngrams) {
365
+ frequencies.set(ngram, (frequencies.get(ngram) ?? 0) + 1);
366
+ }
367
+ }
368
+
369
+ return frequencies;
370
+ }
371
+
372
+ /**
373
+ * Compute the frequency threshold above which an n-gram is considered "common".
374
+ * Uses the configured percentile of the frequency distribution.
375
+ */
376
+ private computeThreshold(frequencies: Map<string, number>): number {
377
+ if (frequencies.size === 0) {
378
+ return 1;
379
+ }
380
+
381
+ const freqValues = Array.from(frequencies.values()).sort((a, b) => a - b);
382
+ const percentileIndex = Math.floor(freqValues.length * this.config.commonThresholdPercentile);
383
+ const clampedIndex = Math.min(percentileIndex, freqValues.length - 1);
384
+
385
+ return Math.max(freqValues[clampedIndex], 2); // At least frequency 2 to be "common"
386
+ }
387
+
388
+ /**
389
+ * Convert a commonness score (0-1) to a sampling weight.
390
+ *
391
+ * Uses exponential decay with temperature scaling:
392
+ * weight = maxWeight * exp(-commonnessScore / temperature)
393
+ *
394
+ * Then clamps to [minWeight, maxWeight].
395
+ */
396
+ private commonnessToWeight(commonnessScore: number): number {
397
+ const { minWeight, maxWeight, temperature } = this.config;
398
+
399
+ // Exponential decay: high commonness -> low weight
400
+ const rawWeight = maxWeight * Math.exp(-commonnessScore / temperature);
401
+
402
+ // Clamp to [minWeight, maxWeight]
403
+ return Math.max(minWeight, Math.min(maxWeight, rawWeight));
404
+ }
405
+
406
+ /**
407
+ * Validate configuration parameters.
408
+ * @throws Error if configuration is invalid
409
+ */
410
+ private validateConfig(): void {
411
+ const { minWeight, maxWeight, temperature, commonThresholdPercentile, ngramSizes } =
412
+ this.config;
413
+
414
+ if (minWeight <= 0 || minWeight > 1) {
415
+ throw new Error(`SoftDedup: minWeight must be in (0, 1], got ${minWeight}`);
416
+ }
417
+
418
+ if (maxWeight < minWeight || maxWeight > 1) {
419
+ throw new Error(`SoftDedup: maxWeight must be in [minWeight, 1], got ${maxWeight}`);
420
+ }
421
+
422
+ if (temperature <= 0) {
423
+ throw new Error(`SoftDedup: temperature must be > 0, got ${temperature}`);
424
+ }
425
+
426
+ if (commonThresholdPercentile < 0 || commonThresholdPercentile > 1) {
427
+ throw new Error(
428
+ `SoftDedup: commonThresholdPercentile must be in [0, 1], got ${commonThresholdPercentile}`
429
+ );
430
+ }
431
+
432
+ if (ngramSizes.length === 0) {
433
+ throw new Error('SoftDedup: ngramSizes must have at least one entry');
434
+ }
435
+
436
+ for (const n of ngramSizes) {
437
+ if (n < 1 || !Number.isInteger(n)) {
438
+ throw new Error(`SoftDedup: each ngramSize must be a positive integer, got ${n}`);
439
+ }
440
+ }
441
+ }
442
+ }
443
+
444
+ // =============================================================================
445
+ // FACTORY FUNCTION
446
+ // =============================================================================
447
+
448
+ /**
449
+ * Create a SoftDedup processor with optional configuration overrides.
450
+ *
451
+ * @example
452
+ * ```ts
453
+ * const dedup = createSoftDedup({ wordLevel: true, temperature: 0.5 });
454
+ * const results = dedup.process(myDataset);
455
+ * const stats = dedup.computeStats(results);
456
+ * console.log(`Effective dataset size: ${stats.effectiveDatasetSize}`);
457
+ * ```
458
+ */
459
+ export function createSoftDedup(config: Partial<SoftDedupConfig> = {}): SoftDedup {
460
+ return new SoftDedup(config);
461
+ }