libthx 0.1.8__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. {libthx-0.1.8 → libthx-0.2.1}/PKG-INFO +1 -1
  2. {libthx-0.1.8 → libthx-0.2.1}/libthx.egg-info/PKG-INFO +1 -1
  3. {libthx-0.1.8 → libthx-0.2.1}/libthx.egg-info/SOURCES.txt +33 -0
  4. {libthx-0.1.8 → libthx-0.2.1}/pyproject.toml +2 -2
  5. libthx-0.2.1/tests/test_contrastive_roundtrip.py +143 -0
  6. libthx-0.2.1/tests/test_datasets.py +78 -0
  7. libthx-0.2.1/tests/test_eval_padding.py +184 -0
  8. libthx-0.2.1/tests/test_gpu_availability.py +71 -0
  9. libthx-0.2.1/tests/test_hardware_dispatch.py +233 -0
  10. libthx-0.2.1/tests/test_kv_cache.py +125 -0
  11. libthx-0.2.1/tests/test_lora.py +513 -0
  12. libthx-0.2.1/tests/test_mamba.py +307 -0
  13. libthx-0.2.1/tests/test_registries.py +65 -0
  14. {libthx-0.1.8 → libthx-0.2.1}/theseus/base/chip.py +5 -0
  15. {libthx-0.1.8 → libthx-0.2.1}/theseus/base/hardware.py +20 -18
  16. {libthx-0.1.8 → libthx-0.2.1}/theseus/base/job.py +45 -2
  17. {libthx-0.1.8 → libthx-0.2.1}/theseus/base/topology.py +8 -3
  18. {libthx-0.1.8 → libthx-0.2.1}/theseus/cli.py +266 -27
  19. {libthx-0.1.8 → libthx-0.2.1}/theseus/data/datasets/__init__.py +2 -0
  20. libthx-0.2.1/theseus/data/datasets/dictlearn.py +188 -0
  21. libthx-0.2.1/theseus/data/datasets/pile_injected.py +97 -0
  22. {libthx-0.1.8 → libthx-0.2.1}/theseus/data/tokenizer.py +30 -1
  23. {libthx-0.1.8 → libthx-0.2.1}/theseus/dispatch/bootstrap.py +56 -1
  24. {libthx-0.1.8 → libthx-0.2.1}/theseus/dispatch/config.py +25 -0
  25. {libthx-0.1.8 → libthx-0.2.1}/theseus/dispatch/dispatch.py +60 -5
  26. {libthx-0.1.8 → libthx-0.2.1}/theseus/dispatch/mailbox/mailbox.py +8 -2
  27. {libthx-0.1.8 → libthx-0.2.1}/theseus/dispatch/slurm.py +10 -8
  28. {libthx-0.1.8 → libthx-0.2.1}/theseus/dispatch/solve.py +76 -39
  29. {libthx-0.1.8 → libthx-0.2.1}/theseus/dispatch/ssh.py +18 -7
  30. {libthx-0.1.8 → libthx-0.2.1}/theseus/dispatch/volcano.py +14 -4
  31. libthx-0.2.1/theseus/evaluation/base.py +1335 -0
  32. {libthx-0.1.8 → libthx-0.2.1}/theseus/evaluation/datasets/__init__.py +5 -0
  33. libthx-0.2.1/theseus/evaluation/datasets/alpaca.py +64 -0
  34. libthx-0.2.1/theseus/evaluation/datasets/arithmetic.py +120 -0
  35. {libthx-0.1.8 → libthx-0.2.1}/theseus/evaluation/datasets/ccaligned.py +20 -6
  36. libthx-0.2.1/theseus/evaluation/datasets/dictlearn.py +83 -0
  37. libthx-0.2.1/theseus/evaluation/datasets/longbench.py +32 -0
  38. libthx-0.2.1/theseus/evaluation/datasets/longhealth.py +89 -0
  39. {libthx-0.1.8 → libthx-0.2.1}/theseus/evaluation/datasets/mtob.py +0 -5
  40. {libthx-0.1.8 → libthx-0.2.1}/theseus/evaluation/datasets/pes2o.py +2 -2
  41. {libthx-0.1.8 → libthx-0.2.1}/theseus/evaluation/datasets/pg19.py +2 -2
  42. libthx-0.2.1/theseus/evaluation/datasets/pg19_lengthgen.py +61 -0
  43. {libthx-0.1.8 → libthx-0.2.1}/theseus/evaluation/datasets/pile.py +2 -2
  44. libthx-0.2.1/theseus/evaluation/datasets/pile_injected.py +33 -0
  45. {libthx-0.1.8 → libthx-0.2.1}/theseus/evaluation/datasets/tinystories.py +2 -2
  46. {libthx-0.1.8 → libthx-0.2.1}/theseus/experiments/__init__.py +2 -0
  47. libthx-0.2.1/theseus/experiments/continual/__init__.py +17 -0
  48. {libthx-0.1.8 → libthx-0.2.1}/theseus/experiments/continual/abcd.py +161 -113
  49. libthx-0.2.1/theseus/experiments/continual/benchmark.py +353 -0
  50. libthx-0.2.1/theseus/experiments/models/moe.py +15 -0
  51. libthx-0.2.1/theseus/experiments/mok/__init__.py +1 -0
  52. libthx-0.2.1/theseus/experiments/mok/reward.py +68 -0
  53. libthx-0.2.1/theseus/experiments/mok/smoke.py +316 -0
  54. libthx-0.2.1/theseus/inference/base.py +879 -0
  55. {libthx-0.1.8 → libthx-0.2.1}/theseus/job.py +164 -69
  56. {libthx-0.1.8 → libthx-0.2.1}/theseus/mock.py +18 -6
  57. {libthx-0.1.8 → libthx-0.2.1}/theseus/model/attention/base.py +38 -6
  58. {libthx-0.1.8 → libthx-0.2.1}/theseus/model/attention/forking.py +16 -1
  59. {libthx-0.1.8 → libthx-0.2.1}/theseus/model/attention/grouped.py +99 -16
  60. {libthx-0.1.8 → libthx-0.2.1}/theseus/model/attention/scratching.py +11 -1
  61. {libthx-0.1.8 → libthx-0.2.1}/theseus/model/axes.py +2 -0
  62. {libthx-0.1.8 → libthx-0.2.1}/theseus/model/block/__init__.py +4 -0
  63. libthx-0.2.1/theseus/model/block/mamba.py +309 -0
  64. libthx-0.2.1/theseus/model/block/moe.py +48 -0
  65. {libthx-0.1.8 → libthx-0.2.1}/theseus/model/block/qwen.py +2 -0
  66. {libthx-0.1.8 → libthx-0.2.1}/theseus/model/block/scratching.py +5 -5
  67. {libthx-0.1.8 → libthx-0.2.1}/theseus/model/models/__init__.py +6 -0
  68. {libthx-0.1.8 → libthx-0.2.1}/theseus/model/models/contrib/gpt_neox.py +4 -1
  69. {libthx-0.1.8 → libthx-0.2.1}/theseus/model/models/contrib/llama.py +4 -1
  70. {libthx-0.1.8 → libthx-0.2.1}/theseus/model/models/contrib/marin.py +4 -1
  71. {libthx-0.1.8 → libthx-0.2.1}/theseus/model/models/contrib/qwen.py +13 -2
  72. libthx-0.2.1/theseus/model/models/hybrid.py +105 -0
  73. libthx-0.2.1/theseus/model/models/mamba.py +89 -0
  74. libthx-0.2.1/theseus/model/models/moe.py +45 -0
  75. {libthx-0.1.8 → libthx-0.2.1}/theseus/model/models/scratchbubbles.py +59 -30
  76. {libthx-0.1.8 → libthx-0.2.1}/theseus/model/models/thoughtbubbles.py +9 -2
  77. libthx-0.2.1/theseus/model/moe/__init__.py +4 -0
  78. libthx-0.2.1/theseus/model/moe/base.py +201 -0
  79. libthx-0.2.1/theseus/model/moe/bias_balanced.py +51 -0
  80. {libthx-0.1.8 → libthx-0.2.1}/theseus/plot.py +28 -16
  81. {libthx-0.1.8 → libthx-0.2.1}/theseus/quick.py +27 -2
  82. {libthx-0.1.8 → libthx-0.2.1}/theseus/registry.py +27 -1
  83. {libthx-0.1.8 → libthx-0.2.1}/theseus/training/base.py +38 -32
  84. libthx-0.2.1/theseus/training/grpo.py +118 -0
  85. libthx-0.2.1/theseus/training/lora.py +577 -0
  86. libthx-0.2.1/theseus/training/ppo.py +767 -0
  87. libthx-0.2.1/theseus/training/schedules/__init__.py +19 -0
  88. libthx-0.2.1/theseus/training/schedules/cosine_rewarm.py +92 -0
  89. libthx-0.1.8/theseus/evaluation/base.py +0 -1104
  90. libthx-0.1.8/theseus/evaluation/datasets/longbench.py +0 -75
  91. libthx-0.1.8/theseus/evaluation/datasets/longhealth.py +0 -130
  92. libthx-0.1.8/theseus/experiments/continual/__init__.py +0 -5
  93. libthx-0.1.8/theseus/inference/base.py +0 -567
  94. libthx-0.1.8/theseus/training/schedules/__init__.py +0 -6
  95. {libthx-0.1.8 → libthx-0.2.1}/LICENSE +0 -0
  96. {libthx-0.1.8 → libthx-0.2.1}/README.md +0 -0
  97. {libthx-0.1.8 → libthx-0.2.1}/libthx.egg-info/dependency_links.txt +0 -0
  98. {libthx-0.1.8 → libthx-0.2.1}/libthx.egg-info/entry_points.txt +0 -0
  99. {libthx-0.1.8 → libthx-0.2.1}/libthx.egg-info/requires.txt +0 -0
  100. {libthx-0.1.8 → libthx-0.2.1}/libthx.egg-info/top_level.txt +0 -0
  101. {libthx-0.1.8 → libthx-0.2.1}/setup.cfg +0 -0
  102. {libthx-0.1.8 → libthx-0.2.1}/theseus/__init__.py +0 -0
  103. {libthx-0.1.8 → libthx-0.2.1}/theseus/base/__init__.py +0 -0
  104. {libthx-0.1.8 → libthx-0.2.1}/theseus/base/axis.py +0 -0
  105. {libthx-0.1.8 → libthx-0.2.1}/theseus/config.py +0 -0
  106. {libthx-0.1.8 → libthx-0.2.1}/theseus/data/__init__.py +0 -0
  107. {libthx-0.1.8 → libthx-0.2.1}/theseus/data/datasets/alpaca.py +0 -0
  108. {libthx-0.1.8 → libthx-0.2.1}/theseus/data/datasets/bbq.py +0 -0
  109. {libthx-0.1.8 → libthx-0.2.1}/theseus/data/datasets/ccaligned.py +0 -0
  110. {libthx-0.1.8 → libthx-0.2.1}/theseus/data/datasets/cfq.py +0 -0
  111. {libthx-0.1.8 → libthx-0.2.1}/theseus/data/datasets/clutrr.py +0 -0
  112. {libthx-0.1.8 → libthx-0.2.1}/theseus/data/datasets/dataset.py +0 -0
  113. {libthx-0.1.8 → libthx-0.2.1}/theseus/data/datasets/fever.py +0 -0
  114. {libthx-0.1.8 → libthx-0.2.1}/theseus/data/datasets/fineweb.py +0 -0
  115. {libthx-0.1.8 → libthx-0.2.1}/theseus/data/datasets/harmfulqa.py +0 -0
  116. {libthx-0.1.8 → libthx-0.2.1}/theseus/data/datasets/longbench.py +0 -0
  117. {libthx-0.1.8 → libthx-0.2.1}/theseus/data/datasets/longhealth.py +0 -0
  118. {libthx-0.1.8 → libthx-0.2.1}/theseus/data/datasets/mmlu.py +0 -0
  119. {libthx-0.1.8 → libthx-0.2.1}/theseus/data/datasets/mnli.py +0 -0
  120. {libthx-0.1.8 → libthx-0.2.1}/theseus/data/datasets/mtob.py +0 -0
  121. {libthx-0.1.8 → libthx-0.2.1}/theseus/data/datasets/pes2o.py +0 -0
  122. {libthx-0.1.8 → libthx-0.2.1}/theseus/data/datasets/pg19.py +0 -0
  123. {libthx-0.1.8 → libthx-0.2.1}/theseus/data/datasets/pile.py +0 -0
  124. {libthx-0.1.8 → libthx-0.2.1}/theseus/data/datasets/pile_detoxify.py +0 -0
  125. {libthx-0.1.8 → libthx-0.2.1}/theseus/data/datasets/qqp.py +0 -0
  126. {libthx-0.1.8 → libthx-0.2.1}/theseus/data/datasets/redcodegen/__init__.py +0 -0
  127. {libthx-0.1.8 → libthx-0.2.1}/theseus/data/datasets/redcodegen/hardening.py +0 -0
  128. {libthx-0.1.8 → libthx-0.2.1}/theseus/data/datasets/siqa.py +0 -0
  129. {libthx-0.1.8 → libthx-0.2.1}/theseus/data/datasets/squad.py +0 -0
  130. {libthx-0.1.8 → libthx-0.2.1}/theseus/data/datasets/sst2.py +0 -0
  131. {libthx-0.1.8 → libthx-0.2.1}/theseus/data/datasets/winogrande.py +0 -0
  132. {libthx-0.1.8 → libthx-0.2.1}/theseus/data/tokenize.py +0 -0
  133. {libthx-0.1.8 → libthx-0.2.1}/theseus/dispatch/__init__.py +0 -0
  134. {libthx-0.1.8 → libthx-0.2.1}/theseus/dispatch/mailbox/__init__.py +0 -0
  135. {libthx-0.1.8 → libthx-0.2.1}/theseus/dispatch/mailbox/sidecar.py +0 -0
  136. {libthx-0.1.8 → libthx-0.2.1}/theseus/dispatch/sync.py +0 -0
  137. {libthx-0.1.8 → libthx-0.2.1}/theseus/dispatch/tpu.py +0 -0
  138. {libthx-0.1.8 → libthx-0.2.1}/theseus/evaluation/__init__.py +0 -0
  139. {libthx-0.1.8 → libthx-0.2.1}/theseus/evaluation/datasets/bbq.py +0 -0
  140. {libthx-0.1.8 → libthx-0.2.1}/theseus/evaluation/datasets/blimp.py +0 -0
  141. {libthx-0.1.8 → libthx-0.2.1}/theseus/evaluation/datasets/cfq.py +0 -0
  142. {libthx-0.1.8 → libthx-0.2.1}/theseus/evaluation/datasets/clutrr.py +0 -0
  143. {libthx-0.1.8 → libthx-0.2.1}/theseus/evaluation/datasets/fever.py +0 -0
  144. {libthx-0.1.8 → libthx-0.2.1}/theseus/evaluation/datasets/mmlu.py +0 -0
  145. {libthx-0.1.8 → libthx-0.2.1}/theseus/evaluation/datasets/mnli.py +0 -0
  146. {libthx-0.1.8 → libthx-0.2.1}/theseus/evaluation/datasets/perplexity_evals.py +0 -0
  147. {libthx-0.1.8 → libthx-0.2.1}/theseus/evaluation/datasets/qqp.py +0 -0
  148. {libthx-0.1.8 → libthx-0.2.1}/theseus/evaluation/datasets/siqa.py +0 -0
  149. {libthx-0.1.8 → libthx-0.2.1}/theseus/evaluation/datasets/squad.py +0 -0
  150. {libthx-0.1.8 → libthx-0.2.1}/theseus/evaluation/datasets/sst2.py +0 -0
  151. {libthx-0.1.8 → libthx-0.2.1}/theseus/evaluation/datasets/winogrande.py +0 -0
  152. {libthx-0.1.8 → libthx-0.2.1}/theseus/evaluation/huggingface.py +0 -0
  153. {libthx-0.1.8 → libthx-0.2.1}/theseus/experiments/models/__init__.py +0 -0
  154. {libthx-0.1.8 → libthx-0.2.1}/theseus/experiments/models/forking.py +0 -0
  155. {libthx-0.1.8 → libthx-0.2.1}/theseus/experiments/models/gpt.py +0 -0
  156. {libthx-0.1.8 → libthx-0.2.1}/theseus/experiments/models/gpt_neox.py +0 -0
  157. {libthx-0.1.8 → libthx-0.2.1}/theseus/experiments/models/llama.py +0 -0
  158. {libthx-0.1.8 → libthx-0.2.1}/theseus/experiments/models/qwen.py +0 -0
  159. {libthx-0.1.8 → libthx-0.2.1}/theseus/experiments/redcodegen/__init__.py +0 -0
  160. {libthx-0.1.8 → libthx-0.2.1}/theseus/experiments/redcodegen/hardening.py +0 -0
  161. {libthx-0.1.8 → libthx-0.2.1}/theseus/inference/__init__.py +0 -0
  162. {libthx-0.1.8 → libthx-0.2.1}/theseus/inference/huggingface.py +0 -0
  163. {libthx-0.1.8 → libthx-0.2.1}/theseus/model/__init__.py +0 -0
  164. {libthx-0.1.8 → libthx-0.2.1}/theseus/model/activations/__init__.py +0 -0
  165. {libthx-0.1.8 → libthx-0.2.1}/theseus/model/activations/swiglu.py +0 -0
  166. {libthx-0.1.8 → libthx-0.2.1}/theseus/model/attention/__init__.py +0 -0
  167. {libthx-0.1.8 → libthx-0.2.1}/theseus/model/attention/rope.py +0 -0
  168. {libthx-0.1.8 → libthx-0.2.1}/theseus/model/block/block.py +0 -0
  169. {libthx-0.1.8 → libthx-0.2.1}/theseus/model/block/forking.py +0 -0
  170. {libthx-0.1.8 → libthx-0.2.1}/theseus/model/block/gpt_neox.py +0 -0
  171. {libthx-0.1.8 → libthx-0.2.1}/theseus/model/block/llama.py +0 -0
  172. {libthx-0.1.8 → libthx-0.2.1}/theseus/model/huggingface.py +0 -0
  173. {libthx-0.1.8 → libthx-0.2.1}/theseus/model/layers/__init__.py +0 -0
  174. {libthx-0.1.8 → libthx-0.2.1}/theseus/model/layers/layernorm.py +0 -0
  175. {libthx-0.1.8 → libthx-0.2.1}/theseus/model/layers/mlp.py +0 -0
  176. {libthx-0.1.8 → libthx-0.2.1}/theseus/model/layers/rmsnorm.py +0 -0
  177. {libthx-0.1.8 → libthx-0.2.1}/theseus/model/layers/rope.py +0 -0
  178. {libthx-0.1.8 → libthx-0.2.1}/theseus/model/masks.py +0 -0
  179. {libthx-0.1.8 → libthx-0.2.1}/theseus/model/models/base.py +0 -0
  180. {libthx-0.1.8 → libthx-0.2.1}/theseus/model/models/contrib/__init__.py +0 -0
  181. {libthx-0.1.8 → libthx-0.2.1}/theseus/model/module.py +0 -0
  182. {libthx-0.1.8 → libthx-0.2.1}/theseus/training/__init__.py +0 -0
  183. {libthx-0.1.8 → libthx-0.2.1}/theseus/training/backbone.py +0 -0
  184. {libthx-0.1.8 → libthx-0.2.1}/theseus/training/contrastive.py +0 -0
  185. {libthx-0.1.8 → libthx-0.2.1}/theseus/training/flywheel/__init__.py +0 -0
  186. {libthx-0.1.8 → libthx-0.2.1}/theseus/training/flywheel/contrastive.py +0 -0
  187. {libthx-0.1.8 → libthx-0.2.1}/theseus/training/flywheel/padded.py +0 -0
  188. {libthx-0.1.8 → libthx-0.2.1}/theseus/training/flywheel/pmd.py +0 -0
  189. {libthx-0.1.8 → libthx-0.2.1}/theseus/training/flywheel/strategy.py +0 -0
  190. {libthx-0.1.8 → libthx-0.2.1}/theseus/training/huggingface.py +0 -0
  191. {libthx-0.1.8 → libthx-0.2.1}/theseus/training/kl_divergence.py +0 -0
  192. {libthx-0.1.8 → libthx-0.2.1}/theseus/training/optimizers/__init__.py +0 -0
  193. {libthx-0.1.8 → libthx-0.2.1}/theseus/training/optimizers/adamw.py +0 -0
  194. {libthx-0.1.8 → libthx-0.2.1}/theseus/training/optimizers/muon.py +0 -0
  195. {libthx-0.1.8 → libthx-0.2.1}/theseus/training/schedules/wsd.py +0 -0
  196. {libthx-0.1.8 → libthx-0.2.1}/theseus/training/schedules/wsds.py +0 -0
  197. {libthx-0.1.8 → libthx-0.2.1}/theseus/training/utils.py +0 -0
  198. {libthx-0.1.8 → libthx-0.2.1}/theseus/web/__init__.py +0 -0
  199. {libthx-0.1.8 → libthx-0.2.1}/theseus/web/app.py +0 -0
  200. {libthx-0.1.8 → libthx-0.2.1}/theseus/web/auth.py +0 -0
  201. {libthx-0.1.8 → libthx-0.2.1}/theseus/web/generate_password_hash.py +0 -0
  202. {libthx-0.1.8 → libthx-0.2.1}/theseus/web/models.py +0 -0
  203. {libthx-0.1.8 → libthx-0.2.1}/theseus/web/routes/__init__.py +0 -0
  204. {libthx-0.1.8 → libthx-0.2.1}/theseus/web/routes/api.py +0 -0
  205. {libthx-0.1.8 → libthx-0.2.1}/theseus/web/routes/auth.py +0 -0
  206. {libthx-0.1.8 → libthx-0.2.1}/theseus/web/routes/views.py +0 -0
  207. {libthx-0.1.8 → libthx-0.2.1}/theseus/web/services/__init__.py +0 -0
  208. {libthx-0.1.8 → libthx-0.2.1}/theseus/web/services/cache.py +0 -0
  209. {libthx-0.1.8 → libthx-0.2.1}/theseus/web/services/checkpoints.py +0 -0
  210. {libthx-0.1.8 → libthx-0.2.1}/theseus/web/services/logs.py +0 -0
  211. {libthx-0.1.8 → libthx-0.2.1}/theseus/web/services/status.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: libthx
3
- Version: 0.1.8
3
+ Version: 0.2.1
4
4
  Summary: Architecture experimentation and training infrastructure.
5
5
  Requires-Python: >=3.11
6
6
  Description-Content-Type: text/markdown
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: libthx
3
- Version: 0.1.8
3
+ Version: 0.2.1
4
4
  Summary: Architecture experimentation and training infrastructure.
5
5
  Requires-Python: >=3.11
6
6
  Description-Content-Type: text/markdown
@@ -7,6 +7,15 @@ libthx.egg-info/dependency_links.txt
7
7
  libthx.egg-info/entry_points.txt
8
8
  libthx.egg-info/requires.txt
9
9
  libthx.egg-info/top_level.txt
10
+ tests/test_contrastive_roundtrip.py
11
+ tests/test_datasets.py
12
+ tests/test_eval_padding.py
13
+ tests/test_gpu_availability.py
14
+ tests/test_hardware_dispatch.py
15
+ tests/test_kv_cache.py
16
+ tests/test_lora.py
17
+ tests/test_mamba.py
18
+ tests/test_registries.py
10
19
  theseus/__init__.py
11
20
  theseus/cli.py
12
21
  theseus/config.py
@@ -31,6 +40,7 @@ theseus/data/datasets/ccaligned.py
31
40
  theseus/data/datasets/cfq.py
32
41
  theseus/data/datasets/clutrr.py
33
42
  theseus/data/datasets/dataset.py
43
+ theseus/data/datasets/dictlearn.py
34
44
  theseus/data/datasets/fever.py
35
45
  theseus/data/datasets/fineweb.py
36
46
  theseus/data/datasets/harmfulqa.py
@@ -43,6 +53,7 @@ theseus/data/datasets/pes2o.py
43
53
  theseus/data/datasets/pg19.py
44
54
  theseus/data/datasets/pile.py
45
55
  theseus/data/datasets/pile_detoxify.py
56
+ theseus/data/datasets/pile_injected.py
46
57
  theseus/data/datasets/qqp.py
47
58
  theseus/data/datasets/siqa.py
48
59
  theseus/data/datasets/squad.py
@@ -67,11 +78,14 @@ theseus/evaluation/__init__.py
67
78
  theseus/evaluation/base.py
68
79
  theseus/evaluation/huggingface.py
69
80
  theseus/evaluation/datasets/__init__.py
81
+ theseus/evaluation/datasets/alpaca.py
82
+ theseus/evaluation/datasets/arithmetic.py
70
83
  theseus/evaluation/datasets/bbq.py
71
84
  theseus/evaluation/datasets/blimp.py
72
85
  theseus/evaluation/datasets/ccaligned.py
73
86
  theseus/evaluation/datasets/cfq.py
74
87
  theseus/evaluation/datasets/clutrr.py
88
+ theseus/evaluation/datasets/dictlearn.py
75
89
  theseus/evaluation/datasets/fever.py
76
90
  theseus/evaluation/datasets/longbench.py
77
91
  theseus/evaluation/datasets/longhealth.py
@@ -81,7 +95,9 @@ theseus/evaluation/datasets/mtob.py
81
95
  theseus/evaluation/datasets/perplexity_evals.py
82
96
  theseus/evaluation/datasets/pes2o.py
83
97
  theseus/evaluation/datasets/pg19.py
98
+ theseus/evaluation/datasets/pg19_lengthgen.py
84
99
  theseus/evaluation/datasets/pile.py
100
+ theseus/evaluation/datasets/pile_injected.py
85
101
  theseus/evaluation/datasets/qqp.py
86
102
  theseus/evaluation/datasets/siqa.py
87
103
  theseus/evaluation/datasets/squad.py
@@ -91,12 +107,17 @@ theseus/evaluation/datasets/winogrande.py
91
107
  theseus/experiments/__init__.py
92
108
  theseus/experiments/continual/__init__.py
93
109
  theseus/experiments/continual/abcd.py
110
+ theseus/experiments/continual/benchmark.py
94
111
  theseus/experiments/models/__init__.py
95
112
  theseus/experiments/models/forking.py
96
113
  theseus/experiments/models/gpt.py
97
114
  theseus/experiments/models/gpt_neox.py
98
115
  theseus/experiments/models/llama.py
116
+ theseus/experiments/models/moe.py
99
117
  theseus/experiments/models/qwen.py
118
+ theseus/experiments/mok/__init__.py
119
+ theseus/experiments/mok/reward.py
120
+ theseus/experiments/mok/smoke.py
100
121
  theseus/experiments/redcodegen/__init__.py
101
122
  theseus/experiments/redcodegen/hardening.py
102
123
  theseus/inference/__init__.py
@@ -120,6 +141,8 @@ theseus/model/block/block.py
120
141
  theseus/model/block/forking.py
121
142
  theseus/model/block/gpt_neox.py
122
143
  theseus/model/block/llama.py
144
+ theseus/model/block/mamba.py
145
+ theseus/model/block/moe.py
123
146
  theseus/model/block/qwen.py
124
147
  theseus/model/block/scratching.py
125
148
  theseus/model/layers/__init__.py
@@ -129,6 +152,9 @@ theseus/model/layers/rmsnorm.py
129
152
  theseus/model/layers/rope.py
130
153
  theseus/model/models/__init__.py
131
154
  theseus/model/models/base.py
155
+ theseus/model/models/hybrid.py
156
+ theseus/model/models/mamba.py
157
+ theseus/model/models/moe.py
132
158
  theseus/model/models/scratchbubbles.py
133
159
  theseus/model/models/thoughtbubbles.py
134
160
  theseus/model/models/contrib/__init__.py
@@ -136,12 +162,18 @@ theseus/model/models/contrib/gpt_neox.py
136
162
  theseus/model/models/contrib/llama.py
137
163
  theseus/model/models/contrib/marin.py
138
164
  theseus/model/models/contrib/qwen.py
165
+ theseus/model/moe/__init__.py
166
+ theseus/model/moe/base.py
167
+ theseus/model/moe/bias_balanced.py
139
168
  theseus/training/__init__.py
140
169
  theseus/training/backbone.py
141
170
  theseus/training/base.py
142
171
  theseus/training/contrastive.py
172
+ theseus/training/grpo.py
143
173
  theseus/training/huggingface.py
144
174
  theseus/training/kl_divergence.py
175
+ theseus/training/lora.py
176
+ theseus/training/ppo.py
145
177
  theseus/training/utils.py
146
178
  theseus/training/flywheel/__init__.py
147
179
  theseus/training/flywheel/contrastive.py
@@ -152,6 +184,7 @@ theseus/training/optimizers/__init__.py
152
184
  theseus/training/optimizers/adamw.py
153
185
  theseus/training/optimizers/muon.py
154
186
  theseus/training/schedules/__init__.py
187
+ theseus/training/schedules/cosine_rewarm.py
155
188
  theseus/training/schedules/wsd.py
156
189
  theseus/training/schedules/wsds.py
157
190
  theseus/web/__init__.py
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "libthx"
3
- version = "0.1.8"
3
+ version = "0.2.1"
4
4
  description = "Architecture experimentation and training infrastructure."
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11"
@@ -229,7 +229,7 @@ plugins = ['pydantic.mypy']
229
229
  python_version = "3.12"
230
230
  strict = true
231
231
  ignore_missing_imports = true
232
- exclude = ["tests", "examples"]
232
+ exclude = ["tests", "examples", "theseus/dispatch/bootstrap.py"]
233
233
 
234
234
  [[tool.mypy.overrides]]
235
235
  module = "huggingface_hub.*"
@@ -0,0 +1,143 @@
1
+ """Round-trip check for contrastive tokenization + loader + async strategy.
2
+
3
+ Builds a tiny in-memory contrastive dataset, writes memmaps in a temp dir,
4
+ loads via ContrastivePaddedDataset and Strategy(Async), and verifies shapes.
5
+
6
+ Migrated from scripts/test_contrastive_roundtrip.py.
7
+ """
8
+
9
+ import json
10
+ import random
11
+ import tempfile
12
+ from pathlib import Path
13
+ from typing import List, Tuple
14
+
15
+ import numpy as np
16
+ import pytest
17
+
18
+ from theseus.data.tokenize import (
19
+ TokenizeContrastiveDatasetConfig,
20
+ _build_padded_arrays,
21
+ _encode_dataset_item,
22
+ )
23
+ from theseus.data.tokenizer import TokenizerConfig, get_tokenizer
24
+ from theseus.training.flywheel.contrastive import ContrastivePaddedDataset
25
+ from theseus.training.flywheel.strategy import DatasetStyle, Sampling, Strategy
26
+ from theseus.base.job import ExecutionSpec
27
+
28
+
29
+ PAIRS: List[Tuple[str, str]] = [
30
+ ("the quick brown fox", "the slow red fox"),
31
+ ("hello world", "hello mars"),
32
+ ("good code is readable", "good code is obscure"),
33
+ ("unit tests prevent bugs", "unit tests waste time"),
34
+ ]
35
+
36
+
37
+ class TinyContrastive:
38
+ def __init__(self, data: List[Tuple[str, str]]):
39
+ self.data = data
40
+
41
+ def __len__(self) -> int:
42
+ return len(self.data)
43
+
44
+ def __getitem__(self, idx: int) -> Tuple[str, str]:
45
+ return self.data[idx]
46
+
47
+
48
+ def _write_contrastive_data(
49
+ td_path: Path, tokenizer_cfg: TokenizerConfig, label: str
50
+ ) -> str:
51
+ """Write contrastive memmaps and return dataset name."""
52
+ name = f"tmpcontrast_{label}"
53
+ out = td_path / "data" / name
54
+ out.mkdir(parents=True, exist_ok=True)
55
+
56
+ tokenizer = get_tokenizer(tokenizer_cfg)
57
+ args = TokenizeContrastiveDatasetConfig(
58
+ name="fever", block_size=32, pad_token=0, val_pct=0.25, seed=42,
59
+ )
60
+
61
+ dataset = TinyContrastive(PAIRS)
62
+ indices = list(range(len(dataset)))
63
+ random.seed(args.seed)
64
+ random.shuffle(indices)
65
+ val_size = int(len(dataset) * args.val_pct)
66
+ splits = {"train": indices[val_size:], "val": indices[:val_size]}
67
+ shapes = {}
68
+ dtype = np.uint32
69
+
70
+ for split_name, split_indices in splits.items():
71
+ num_samples = len(split_indices)
72
+ pos_tokens = np.memmap(
73
+ out / f"{split_name}.pos.bin", dtype=dtype, mode="w+",
74
+ shape=(num_samples, args.block_size),
75
+ )
76
+ pos_mask = np.memmap(
77
+ out / f"{split_name}.pos.bin.mask", dtype=np.bool_, mode="w+",
78
+ shape=(num_samples, args.block_size),
79
+ )
80
+ neg_tokens = np.memmap(
81
+ out / f"{split_name}.neg.bin", dtype=dtype, mode="w+",
82
+ shape=(num_samples, args.block_size),
83
+ )
84
+ neg_mask = np.memmap(
85
+ out / f"{split_name}.neg.bin.mask", dtype=np.bool_, mode="w+",
86
+ shape=(num_samples, args.block_size),
87
+ )
88
+
89
+ for arr_idx, didx in enumerate(split_indices):
90
+ pos_str, neg_str = dataset[didx]
91
+ for s, target_tokens, target_mask in [
92
+ (pos_str, pos_tokens, pos_mask),
93
+ (neg_str, neg_tokens, neg_mask),
94
+ ]:
95
+ ids, mask_list = _encode_dataset_item(s, False, tokenizer, args)
96
+ t, m, *_ = _build_padded_arrays(
97
+ ids, mask_list, args.block_size, args.pad_token, dtype
98
+ )
99
+ target_tokens[arr_idx] = t
100
+ target_mask[arr_idx] = m
101
+
102
+ pos_tokens.flush()
103
+ pos_mask.flush()
104
+ neg_tokens.flush()
105
+ neg_mask.flush()
106
+ shapes[split_name] = {
107
+ "pos": [num_samples, args.block_size],
108
+ "neg": [num_samples, args.block_size],
109
+ }
110
+
111
+ with open(out / "shape.json", "w") as f:
112
+ json.dump(shapes, f)
113
+ with open(out / "config.json", "w") as f:
114
+ json.dump({}, f)
115
+
116
+ return name
117
+
118
+
119
+ class TestContrastiveRoundtrip:
120
+ def test_tiktoken_roundtrip(self):
121
+ with tempfile.TemporaryDirectory() as td:
122
+ td_path = Path(td)
123
+ tok_cfg = TokenizerConfig(backend="tiktoken", name="cl100k_base")
124
+ name = _write_contrastive_data(td_path, tok_cfg, "tiktoken")
125
+
126
+ spec = ExecutionSpec.local(root_dir=str(td_path))
127
+ ds = ContrastivePaddedDataset(spec, block_size=32, name=name, suffix="")
128
+ batch = ds.get_batch(batch_size=2, split="train")
129
+
130
+ assert batch["padding_mask_pos"].shape == (2, 32)
131
+ assert batch["padding_mask_neg"].shape == (2, 32)
132
+
133
+ # Async loader
134
+ strat = Strategy(
135
+ spec, block_size=32,
136
+ mixture=[Sampling(name=name, rate=1.0, style=DatasetStyle.CONTRASTIVE)],
137
+ )
138
+ async_loader = strat.get_async_batches(batch_size=2, split="train")
139
+ async_batch = async_loader.get_batch()
140
+ async_loader.close()
141
+
142
+ assert async_batch["padding_mask_pos"].shape == (2, 32)
143
+ assert async_batch["padding_mask_neg"].shape == (2, 32)
@@ -0,0 +1,78 @@
1
+ """Tests for new benchmark datasets and evaluations."""
2
+
3
+ import pytest
4
+
5
+
6
+ class TestPileInjected:
7
+ """Tests for the injected Pile dataset."""
8
+
9
+ def test_injected_texts_reproducible(self):
10
+ """Same seed always produces the same injected sequences."""
11
+ from theseus.data.datasets.pile_injected import (
12
+ _generate_injected_texts,
13
+ INJECTED_TEXTS,
14
+ )
15
+
16
+ texts_a = _generate_injected_texts(n_sequences=100, seed=42)
17
+ texts_b = _generate_injected_texts(n_sequences=100, seed=42)
18
+ assert texts_a == texts_b
19
+ assert texts_a == list(INJECTED_TEXTS)
20
+
21
+ def test_injected_texts_count(self):
22
+ from theseus.data.datasets.pile_injected import INJECTED_TEXTS
23
+
24
+ assert len(INJECTED_TEXTS) == 100
25
+
26
+ def test_injected_texts_nonempty(self):
27
+ from theseus.data.datasets.pile_injected import INJECTED_TEXTS
28
+
29
+ for text in INJECTED_TEXTS:
30
+ assert len(text) > 100 # each should be substantial
31
+
32
+ def test_injection_positions_sorted(self):
33
+ from theseus.data.datasets.pile_injected import INJECTION_POSITIONS
34
+
35
+ assert INJECTION_POSITIONS == sorted(INJECTION_POSITIONS)
36
+ assert len(INJECTION_POSITIONS) == 100
37
+
38
+ def test_different_seeds_different_texts(self):
39
+ from theseus.data.datasets.pile_injected import _generate_injected_texts
40
+
41
+ texts_a = _generate_injected_texts(n_sequences=10, seed=42)
42
+ texts_b = _generate_injected_texts(n_sequences=10, seed=99)
43
+ assert texts_a != texts_b
44
+
45
+
46
+ class TestPileInjectedEval:
47
+ """Tests for the injected sequence memorization evaluation."""
48
+
49
+ def test_eval_uses_same_texts(self):
50
+ from theseus.data.datasets.pile_injected import INJECTED_TEXTS
51
+ from theseus.evaluation.datasets.pile_injected import PileInjectedEval
52
+
53
+ ev = PileInjectedEval()
54
+ assert len(ev) == len(INJECTED_TEXTS)
55
+ for i in range(len(ev)):
56
+ assert ev.get(i) == INJECTED_TEXTS[i]
57
+
58
+ def test_eval_name(self):
59
+ from theseus.evaluation.datasets.pile_injected import PileInjectedEval
60
+
61
+ ev = PileInjectedEval()
62
+ assert ev.name == "pile_injected_ppl"
63
+
64
+
65
+ class TestPG19LengthGen:
66
+ """Tests for variable-length PG-19 evaluations."""
67
+
68
+ def test_eval_names_registered(self):
69
+ from theseus.registry import EVALUATIONS
70
+
71
+ for name in [
72
+ "pg19_2k_ppl",
73
+ "pg19_4k_ppl",
74
+ "pg19_8k_ppl",
75
+ "pg19_16k_ppl",
76
+ "pg19_32k_ppl",
77
+ ]:
78
+ assert name in EVALUATIONS, f"{name} not registered"
@@ -0,0 +1,184 @@
1
+ """Evaluation padding tests — verifies evaluations work when dataset
2
+ size is not a multiple of the batch unit.
3
+
4
+ Migrated from scripts/test_eval_padding.py.
5
+ """
6
+
7
+ import flax
8
+ import jax
9
+ import jax.numpy as jnp
10
+ import numpy as np
11
+ from jax.sharding import Mesh, NamedSharding, PartitionSpec as P
12
+
13
+ from theseus.base import Axis
14
+ from theseus.evaluation import (
15
+ EncodingEvaluation,
16
+ PerplexityComparisonEvaluation,
17
+ PerplexityEvaluation,
18
+ RolloutEvaluation,
19
+ )
20
+
21
+
22
+ class ToyTokenizer:
23
+ pad_token = 0
24
+
25
+ def encode(self, text: str) -> list[int]:
26
+ return [ord(ch) + 1 for ch in text]
27
+
28
+ def encode_batch(
29
+ self, text_list: list[str], allowed_special: str | None = None
30
+ ) -> list[list[int]]:
31
+ del allowed_special
32
+ return [self.encode(text) for text in text_list]
33
+
34
+ def decode_batch(self, seqs: list[list[int]]) -> list[str]:
35
+ return ["".join(chr(tok - 1) for tok in seq if tok > 0) for seq in seqs]
36
+
37
+
38
+ @flax.struct.dataclass
39
+ class DummyState:
40
+ params: jax.Array
41
+
42
+
43
+ class DummyInference:
44
+ def __init__(self, batch_unit: int = 8, block_size: int = 8, rollout_token: str = "!"):
45
+ self.replicas = 1
46
+ self.local_replicas = 1
47
+ self.per_device_batch_size = batch_unit
48
+ self.block_size = block_size
49
+ self.mesh = Mesh(np.array(jax.devices()).reshape((1,)), (Axis.BATCH,))
50
+ scalar_sharding = NamedSharding(self.mesh, P())
51
+ self.state = DummyState(
52
+ params=jax.device_put(jnp.array(0, dtype=jnp.int32), scalar_sharding)
53
+ )
54
+ self.state_sharding = DummyState(params=scalar_sharding)
55
+ self.key = jax.random.PRNGKey(0)
56
+ self.vocab_size = 256
57
+ self.rollout_token_id = ord(rollout_token) + 1
58
+
59
+ @staticmethod
60
+ def pad(seqs: list[list[int]], pad_token: int = 0) -> tuple[jax.Array, jax.Array]:
61
+ max_len = max(len(seq) for seq in seqs)
62
+ padded = [([pad_token] * (max_len - len(seq))) + seq for seq in seqs]
63
+ masks = [([False] * (max_len - len(seq))) + ([True] * len(seq)) for seq in seqs]
64
+ return jnp.array(padded, dtype=jnp.int32), jnp.array(masks, dtype=jnp.bool_)
65
+
66
+ def _autoregress(self, state, key, input, input_mask, num_tokens, temperature, top_p):
67
+ del state, key, input_mask, temperature, top_p
68
+ extra = num_tokens - input.shape[-1]
69
+ if extra <= 0:
70
+ return input[:, :num_tokens]
71
+ generated = jnp.full((input.shape[0], extra), self.rollout_token_id, dtype=jnp.int32)
72
+ return jnp.concatenate([input, generated], axis=-1)
73
+
74
+ def forward(self, state, params, batch, key, deterministic):
75
+ del state, params, key, deterministic
76
+ x_batch, _, mask_batch = batch
77
+ next_tokens = jnp.roll(x_batch, -1, axis=-1)
78
+ next_tokens = next_tokens.at[:, -1].set(0)
79
+ next_tokens = jnp.where(mask_batch, next_tokens, 0)
80
+ logits = jax.nn.one_hot(next_tokens, self.vocab_size, dtype=jnp.float32) * 20.0
81
+ return logits, None, None
82
+
83
+
84
+ class ToyRolloutEval(RolloutEvaluation):
85
+ def __init__(self):
86
+ self.items = [("aa", "!"), ("bb", "!"), ("cc", "!")]
87
+
88
+ @property
89
+ def name(self):
90
+ return "toy_rollout"
91
+
92
+ def __len__(self):
93
+ return len(self.items)
94
+
95
+ def clean(self, y_hat):
96
+ return y_hat[-1:]
97
+
98
+ def get(self, indx):
99
+ return self.items[indx]
100
+
101
+ def max_new_tokens(self, inference):
102
+ return 1
103
+
104
+ def check(self, y, y_hat):
105
+ return y == y_hat
106
+
107
+
108
+ class ToyEncodingEval(EncodingEvaluation):
109
+ def __init__(self):
110
+ self.items = ["abc", "def", "ghi"]
111
+
112
+ @property
113
+ def name(self):
114
+ return "toy_encoding"
115
+
116
+ def __len__(self):
117
+ return len(self.items)
118
+
119
+ def clean(self, y_hat):
120
+ return y_hat
121
+
122
+ def get(self, indx):
123
+ return self.items[indx]
124
+
125
+ def check(self, x, y_hat):
126
+ return y_hat == x[1:]
127
+
128
+
129
+ class ToyPerplexityEval(PerplexityEvaluation):
130
+ def __init__(self):
131
+ self.items = ["abc", "def", "ghi"]
132
+
133
+ @property
134
+ def name(self):
135
+ return "toy_ppl"
136
+
137
+ def __len__(self):
138
+ return len(self.items)
139
+
140
+ def get(self, indx):
141
+ return self.items[indx]
142
+
143
+
144
+ class ToyComparisonEval(PerplexityComparisonEvaluation):
145
+ def __init__(self):
146
+ self.items = [("a", ["bc", "zz"], 0), ("d", ["ef", "yy"], 0), ("g", ["hi", "xx"], 0)]
147
+
148
+ @property
149
+ def name(self):
150
+ return "toy_compare"
151
+
152
+ def __len__(self):
153
+ return len(self.items)
154
+
155
+ def get(self, indx):
156
+ return self.items[indx]
157
+
158
+
159
+ class TestEvalPadding:
160
+ """Test that evaluations handle padding correctly when dataset size < batch unit."""
161
+
162
+ def test_rollout_eval(self):
163
+ tokenizer = ToyTokenizer()
164
+ inference = DummyInference(batch_unit=8, block_size=8)
165
+ score = ToyRolloutEval()(inference, tokenizer)
166
+ assert score == 1.0
167
+
168
+ def test_encoding_eval(self):
169
+ tokenizer = ToyTokenizer()
170
+ inference = DummyInference(batch_unit=8, block_size=8)
171
+ score = ToyEncodingEval()(inference, tokenizer)
172
+ assert score == 1.0
173
+
174
+ def test_perplexity_eval(self):
175
+ tokenizer = ToyTokenizer()
176
+ inference = DummyInference(batch_unit=8, block_size=8)
177
+ score = ToyPerplexityEval()(inference, tokenizer)
178
+ assert 0.99 < score <= 1.0
179
+
180
+ def test_comparison_eval(self):
181
+ tokenizer = ToyTokenizer()
182
+ inference = DummyInference(batch_unit=8, block_size=8)
183
+ score = ToyComparisonEval()(inference, tokenizer)
184
+ assert score == 1.0
@@ -0,0 +1,71 @@
1
+ """Tests for _check_plain_host_availability GPU memory parsing logic.
2
+
3
+ Migrated from scripts/test_gpu_availability.py.
4
+ """
5
+
6
+ from unittest.mock import MagicMock, patch
7
+ import pytest
8
+
9
+
10
+ def make_run_result(stdout: str, ok: bool = True) -> MagicMock:
11
+ r = MagicMock()
12
+ r.ok = ok
13
+ r.stdout = stdout
14
+ r.stderr = ""
15
+ return r
16
+
17
+
18
+ def check(nvidia_smi_output: str, configured: int = 4) -> int:
19
+ from theseus.dispatch.solve import _check_plain_host_availability
20
+
21
+ with patch(
22
+ "theseus.dispatch.ssh.run", return_value=make_run_result(nvidia_smi_output)
23
+ ):
24
+ return _check_plain_host_availability("fake-host", configured, timeout=5.0)
25
+
26
+
27
+ class TestGPUAvailability:
28
+ def test_no_processes(self):
29
+ assert check("") == 4
30
+
31
+ def test_xorg_only(self):
32
+ output = "GPU-abc, 3374, /usr/lib/xorg/Xorg, 4\nGPU-def, 3374, /usr/lib/xorg/Xorg, 4\n"
33
+ assert check(output) == 4
34
+
35
+ def test_zero_memory(self):
36
+ output = "GPU-abc, 1234, some-daemon, 0\n"
37
+ assert check(output) == 4
38
+
39
+ def test_na_memory(self):
40
+ output = "GPU-abc, 1234, some-process, N/A\n"
41
+ assert check(output) == 4
42
+
43
+ def test_missing_memory_field(self):
44
+ output = "GPU-abc, 1234, some-process\n"
45
+ assert check(output) == 4
46
+
47
+ def test_real_training_job(self):
48
+ output = "GPU-abc, 9999, python, 38000\n"
49
+ assert check(output) == 0
50
+
51
+ def test_mixed_noise_and_real(self):
52
+ output = "GPU-abc, 3374, /usr/lib/xorg/Xorg, 4\nGPU-def, 9999, python, 38000\n"
53
+ assert check(output) == 0
54
+
55
+ def test_exactly_at_threshold(self):
56
+ output = "GPU-abc, 9999, python, 100\n"
57
+ assert check(output) == 0
58
+
59
+ def test_just_below_threshold(self):
60
+ output = "GPU-abc, 9999, some-process, 99\n"
61
+ assert check(output) == 4
62
+
63
+ def test_nvidia_smi_failure(self):
64
+ from theseus.dispatch.solve import _check_plain_host_availability
65
+
66
+ with patch(
67
+ "theseus.dispatch.ssh.run",
68
+ return_value=make_run_result("", ok=False),
69
+ ):
70
+ result = _check_plain_host_availability("fake-host", 4, timeout=5.0)
71
+ assert result == 0