libthx 0.2.1__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (235) hide show
  1. {libthx-0.2.1 → libthx-0.3.0}/PKG-INFO +3 -2
  2. {libthx-0.2.1 → libthx-0.3.0}/libthx.egg-info/PKG-INFO +3 -2
  3. {libthx-0.2.1 → libthx-0.3.0}/libthx.egg-info/SOURCES.txt +22 -0
  4. {libthx-0.2.1 → libthx-0.3.0}/libthx.egg-info/requires.txt +1 -0
  5. {libthx-0.2.1 → libthx-0.3.0}/pyproject.toml +12 -3
  6. {libthx-0.2.1 → libthx-0.3.0}/tests/test_kv_cache.py +54 -1
  7. libthx-0.3.0/tests/test_lact.py +625 -0
  8. libthx-0.3.0/tests/test_mamba.py +651 -0
  9. libthx-0.3.0/tests/test_vsubmit_log_fetcher.py +201 -0
  10. {libthx-0.2.1 → libthx-0.3.0}/theseus/cli.py +105 -35
  11. {libthx-0.2.1 → libthx-0.3.0}/theseus/config.py +11 -5
  12. {libthx-0.2.1 → libthx-0.3.0}/theseus/data/datasets/__init__.py +2 -0
  13. libthx-0.3.0/theseus/data/datasets/flan.py +32 -0
  14. libthx-0.3.0/theseus/data/datasets/openr1_math.py +47 -0
  15. libthx-0.3.0/theseus/data/datasets/pes2o.py +59 -0
  16. {libthx-0.2.1 → libthx-0.3.0}/theseus/dispatch/bootstrap.py +1 -1
  17. {libthx-0.2.1 → libthx-0.3.0}/theseus/dispatch/config.py +8 -1
  18. {libthx-0.2.1 → libthx-0.3.0}/theseus/dispatch/dispatch.py +108 -55
  19. {libthx-0.2.1 → libthx-0.3.0}/theseus/dispatch/slurm.py +5 -2
  20. {libthx-0.2.1 → libthx-0.3.0}/theseus/dispatch/volcano.py +291 -203
  21. {libthx-0.2.1 → libthx-0.3.0}/theseus/evaluation/base.py +29 -8
  22. {libthx-0.2.1 → libthx-0.3.0}/theseus/evaluation/datasets/__init__.py +5 -0
  23. libthx-0.3.0/theseus/evaluation/datasets/arc_challenge.py +82 -0
  24. libthx-0.3.0/theseus/evaluation/datasets/bbh.py +174 -0
  25. libthx-0.3.0/theseus/evaluation/datasets/gsm8k.py +102 -0
  26. libthx-0.3.0/theseus/evaluation/datasets/hellaswag.py +74 -0
  27. libthx-0.3.0/theseus/evaluation/datasets/math.py +91 -0
  28. {libthx-0.2.1 → libthx-0.3.0}/theseus/evaluation/datasets/perplexity_evals.py +29 -0
  29. {libthx-0.2.1 → libthx-0.3.0}/theseus/experiments/__init__.py +9 -0
  30. libthx-0.3.0/theseus/experiments/benchmark.py +125 -0
  31. {libthx-0.2.1 → libthx-0.3.0}/theseus/experiments/continual/abcd.py +12 -5
  32. {libthx-0.2.1 → libthx-0.3.0}/theseus/experiments/continual/benchmark.py +80 -4
  33. libthx-0.3.0/theseus/experiments/models/lact.py +41 -0
  34. libthx-0.3.0/theseus/experiments/models/qwen_3_5.py +55 -0
  35. libthx-0.3.0/theseus/inference/__init__.py +4 -0
  36. libthx-0.3.0/theseus/inference/ttt.py +77 -0
  37. {libthx-0.2.1 → libthx-0.3.0}/theseus/job.py +22 -11
  38. {libthx-0.2.1 → libthx-0.3.0}/theseus/model/attention/__init__.py +2 -0
  39. {libthx-0.2.1 → libthx-0.3.0}/theseus/model/attention/base.py +51 -3
  40. libthx-0.3.0/theseus/model/attention/gated_delta.py +458 -0
  41. {libthx-0.2.1 → libthx-0.3.0}/theseus/model/attention/grouped.py +55 -6
  42. {libthx-0.2.1 → libthx-0.3.0}/theseus/model/axes.py +2 -0
  43. {libthx-0.2.1 → libthx-0.3.0}/theseus/model/block/__init__.py +5 -0
  44. libthx-0.3.0/theseus/model/block/lact.py +271 -0
  45. libthx-0.3.0/theseus/model/block/mamba.py +438 -0
  46. libthx-0.3.0/theseus/model/block/qwen_3_5.py +83 -0
  47. {libthx-0.2.1 → libthx-0.3.0}/theseus/model/layers/__init__.py +4 -1
  48. libthx-0.3.0/theseus/model/layers/lact.py +320 -0
  49. {libthx-0.2.1 → libthx-0.3.0}/theseus/model/layers/mlp.py +1 -1
  50. libthx-0.3.0/theseus/model/layers/mrope.py +82 -0
  51. libthx-0.3.0/theseus/model/layers/rmsnorm.py +75 -0
  52. {libthx-0.2.1 → libthx-0.3.0}/theseus/model/masks.py +9 -2
  53. {libthx-0.2.1 → libthx-0.3.0}/theseus/model/models/__init__.py +6 -0
  54. libthx-0.3.0/theseus/model/models/contrib/__init__.py +8 -0
  55. libthx-0.3.0/theseus/model/models/contrib/qwen_3_5.py +496 -0
  56. libthx-0.3.0/theseus/model/models/contrib/qwen_3_5_moe.py +302 -0
  57. {libthx-0.2.1 → libthx-0.3.0}/theseus/model/models/hybrid.py +0 -7
  58. libthx-0.3.0/theseus/model/models/lact.py +79 -0
  59. {libthx-0.2.1 → libthx-0.3.0}/theseus/model/models/mamba.py +35 -14
  60. {libthx-0.2.1 → libthx-0.3.0}/theseus/model/models/moe.py +6 -1
  61. libthx-0.3.0/theseus/model/models/scratchbubbles.py +338 -0
  62. libthx-0.3.0/theseus/model/moe/__init__.py +5 -0
  63. {libthx-0.2.1 → libthx-0.3.0}/theseus/model/moe/base.py +57 -6
  64. libthx-0.3.0/theseus/model/moe/shared.py +114 -0
  65. {libthx-0.2.1 → libthx-0.3.0}/theseus/plot.py +53 -2
  66. {libthx-0.2.1 → libthx-0.3.0}/theseus/training/backbone.py +54 -3
  67. {libthx-0.2.1 → libthx-0.3.0}/theseus/training/base.py +87 -19
  68. {libthx-0.2.1 → libthx-0.3.0}/theseus/training/kl_divergence.py +2 -3
  69. {libthx-0.2.1 → libthx-0.3.0}/theseus/training/ppo.py +2 -1
  70. libthx-0.2.1/tests/test_mamba.py +0 -307
  71. libthx-0.2.1/theseus/data/datasets/pes2o.py +0 -38
  72. libthx-0.2.1/theseus/inference/__init__.py +0 -3
  73. libthx-0.2.1/theseus/model/block/mamba.py +0 -309
  74. libthx-0.2.1/theseus/model/layers/rmsnorm.py +0 -31
  75. libthx-0.2.1/theseus/model/models/contrib/__init__.py +0 -6
  76. libthx-0.2.1/theseus/model/models/scratchbubbles.py +0 -154
  77. libthx-0.2.1/theseus/model/moe/__init__.py +0 -4
  78. {libthx-0.2.1 → libthx-0.3.0}/LICENSE +0 -0
  79. {libthx-0.2.1 → libthx-0.3.0}/README.md +0 -0
  80. {libthx-0.2.1 → libthx-0.3.0}/libthx.egg-info/dependency_links.txt +0 -0
  81. {libthx-0.2.1 → libthx-0.3.0}/libthx.egg-info/entry_points.txt +0 -0
  82. {libthx-0.2.1 → libthx-0.3.0}/libthx.egg-info/top_level.txt +0 -0
  83. {libthx-0.2.1 → libthx-0.3.0}/setup.cfg +0 -0
  84. {libthx-0.2.1 → libthx-0.3.0}/tests/test_contrastive_roundtrip.py +0 -0
  85. {libthx-0.2.1 → libthx-0.3.0}/tests/test_datasets.py +0 -0
  86. {libthx-0.2.1 → libthx-0.3.0}/tests/test_eval_padding.py +0 -0
  87. {libthx-0.2.1 → libthx-0.3.0}/tests/test_gpu_availability.py +0 -0
  88. {libthx-0.2.1 → libthx-0.3.0}/tests/test_hardware_dispatch.py +0 -0
  89. {libthx-0.2.1 → libthx-0.3.0}/tests/test_lora.py +0 -0
  90. {libthx-0.2.1 → libthx-0.3.0}/tests/test_registries.py +0 -0
  91. {libthx-0.2.1 → libthx-0.3.0}/theseus/__init__.py +0 -0
  92. {libthx-0.2.1 → libthx-0.3.0}/theseus/base/__init__.py +0 -0
  93. {libthx-0.2.1 → libthx-0.3.0}/theseus/base/axis.py +0 -0
  94. {libthx-0.2.1 → libthx-0.3.0}/theseus/base/chip.py +0 -0
  95. {libthx-0.2.1 → libthx-0.3.0}/theseus/base/hardware.py +0 -0
  96. {libthx-0.2.1 → libthx-0.3.0}/theseus/base/job.py +0 -0
  97. {libthx-0.2.1 → libthx-0.3.0}/theseus/base/topology.py +0 -0
  98. {libthx-0.2.1 → libthx-0.3.0}/theseus/data/__init__.py +0 -0
  99. {libthx-0.2.1 → libthx-0.3.0}/theseus/data/datasets/alpaca.py +0 -0
  100. {libthx-0.2.1 → libthx-0.3.0}/theseus/data/datasets/bbq.py +0 -0
  101. {libthx-0.2.1 → libthx-0.3.0}/theseus/data/datasets/ccaligned.py +0 -0
  102. {libthx-0.2.1 → libthx-0.3.0}/theseus/data/datasets/cfq.py +0 -0
  103. {libthx-0.2.1 → libthx-0.3.0}/theseus/data/datasets/clutrr.py +0 -0
  104. {libthx-0.2.1 → libthx-0.3.0}/theseus/data/datasets/dataset.py +0 -0
  105. {libthx-0.2.1 → libthx-0.3.0}/theseus/data/datasets/dictlearn.py +0 -0
  106. {libthx-0.2.1 → libthx-0.3.0}/theseus/data/datasets/fever.py +0 -0
  107. {libthx-0.2.1 → libthx-0.3.0}/theseus/data/datasets/fineweb.py +0 -0
  108. {libthx-0.2.1 → libthx-0.3.0}/theseus/data/datasets/harmfulqa.py +0 -0
  109. {libthx-0.2.1 → libthx-0.3.0}/theseus/data/datasets/longbench.py +0 -0
  110. {libthx-0.2.1 → libthx-0.3.0}/theseus/data/datasets/longhealth.py +0 -0
  111. {libthx-0.2.1 → libthx-0.3.0}/theseus/data/datasets/mmlu.py +0 -0
  112. {libthx-0.2.1 → libthx-0.3.0}/theseus/data/datasets/mnli.py +0 -0
  113. {libthx-0.2.1 → libthx-0.3.0}/theseus/data/datasets/mtob.py +0 -0
  114. {libthx-0.2.1 → libthx-0.3.0}/theseus/data/datasets/pg19.py +0 -0
  115. {libthx-0.2.1 → libthx-0.3.0}/theseus/data/datasets/pile.py +0 -0
  116. {libthx-0.2.1 → libthx-0.3.0}/theseus/data/datasets/pile_detoxify.py +0 -0
  117. {libthx-0.2.1 → libthx-0.3.0}/theseus/data/datasets/pile_injected.py +0 -0
  118. {libthx-0.2.1 → libthx-0.3.0}/theseus/data/datasets/qqp.py +0 -0
  119. {libthx-0.2.1/theseus/training/flywheel → libthx-0.3.0/theseus/data/datasets/redcodegen}/__init__.py +0 -0
  120. {libthx-0.2.1 → libthx-0.3.0}/theseus/data/datasets/redcodegen/hardening.py +0 -0
  121. {libthx-0.2.1 → libthx-0.3.0}/theseus/data/datasets/siqa.py +0 -0
  122. {libthx-0.2.1 → libthx-0.3.0}/theseus/data/datasets/squad.py +0 -0
  123. {libthx-0.2.1 → libthx-0.3.0}/theseus/data/datasets/sst2.py +0 -0
  124. {libthx-0.2.1 → libthx-0.3.0}/theseus/data/datasets/winogrande.py +0 -0
  125. {libthx-0.2.1 → libthx-0.3.0}/theseus/data/tokenize.py +0 -0
  126. {libthx-0.2.1 → libthx-0.3.0}/theseus/data/tokenizer.py +0 -0
  127. {libthx-0.2.1 → libthx-0.3.0}/theseus/dispatch/__init__.py +0 -0
  128. {libthx-0.2.1 → libthx-0.3.0}/theseus/dispatch/mailbox/__init__.py +0 -0
  129. {libthx-0.2.1 → libthx-0.3.0}/theseus/dispatch/mailbox/mailbox.py +0 -0
  130. {libthx-0.2.1 → libthx-0.3.0}/theseus/dispatch/mailbox/sidecar.py +0 -0
  131. {libthx-0.2.1 → libthx-0.3.0}/theseus/dispatch/solve.py +0 -0
  132. {libthx-0.2.1 → libthx-0.3.0}/theseus/dispatch/ssh.py +0 -0
  133. {libthx-0.2.1 → libthx-0.3.0}/theseus/dispatch/sync.py +0 -0
  134. {libthx-0.2.1 → libthx-0.3.0}/theseus/dispatch/tpu.py +0 -0
  135. {libthx-0.2.1 → libthx-0.3.0}/theseus/evaluation/__init__.py +0 -0
  136. {libthx-0.2.1 → libthx-0.3.0}/theseus/evaluation/datasets/alpaca.py +0 -0
  137. {libthx-0.2.1 → libthx-0.3.0}/theseus/evaluation/datasets/arithmetic.py +0 -0
  138. {libthx-0.2.1 → libthx-0.3.0}/theseus/evaluation/datasets/bbq.py +0 -0
  139. {libthx-0.2.1 → libthx-0.3.0}/theseus/evaluation/datasets/blimp.py +0 -0
  140. {libthx-0.2.1 → libthx-0.3.0}/theseus/evaluation/datasets/ccaligned.py +0 -0
  141. {libthx-0.2.1 → libthx-0.3.0}/theseus/evaluation/datasets/cfq.py +0 -0
  142. {libthx-0.2.1 → libthx-0.3.0}/theseus/evaluation/datasets/clutrr.py +0 -0
  143. {libthx-0.2.1 → libthx-0.3.0}/theseus/evaluation/datasets/dictlearn.py +0 -0
  144. {libthx-0.2.1 → libthx-0.3.0}/theseus/evaluation/datasets/fever.py +0 -0
  145. {libthx-0.2.1 → libthx-0.3.0}/theseus/evaluation/datasets/longbench.py +0 -0
  146. {libthx-0.2.1 → libthx-0.3.0}/theseus/evaluation/datasets/longhealth.py +0 -0
  147. {libthx-0.2.1 → libthx-0.3.0}/theseus/evaluation/datasets/mmlu.py +0 -0
  148. {libthx-0.2.1 → libthx-0.3.0}/theseus/evaluation/datasets/mnli.py +0 -0
  149. {libthx-0.2.1 → libthx-0.3.0}/theseus/evaluation/datasets/mtob.py +0 -0
  150. {libthx-0.2.1 → libthx-0.3.0}/theseus/evaluation/datasets/pes2o.py +0 -0
  151. {libthx-0.2.1 → libthx-0.3.0}/theseus/evaluation/datasets/pg19.py +0 -0
  152. {libthx-0.2.1 → libthx-0.3.0}/theseus/evaluation/datasets/pg19_lengthgen.py +0 -0
  153. {libthx-0.2.1 → libthx-0.3.0}/theseus/evaluation/datasets/pile.py +0 -0
  154. {libthx-0.2.1 → libthx-0.3.0}/theseus/evaluation/datasets/pile_injected.py +0 -0
  155. {libthx-0.2.1 → libthx-0.3.0}/theseus/evaluation/datasets/qqp.py +0 -0
  156. {libthx-0.2.1 → libthx-0.3.0}/theseus/evaluation/datasets/siqa.py +0 -0
  157. {libthx-0.2.1 → libthx-0.3.0}/theseus/evaluation/datasets/squad.py +0 -0
  158. {libthx-0.2.1 → libthx-0.3.0}/theseus/evaluation/datasets/sst2.py +0 -0
  159. {libthx-0.2.1 → libthx-0.3.0}/theseus/evaluation/datasets/tinystories.py +0 -0
  160. {libthx-0.2.1 → libthx-0.3.0}/theseus/evaluation/datasets/winogrande.py +0 -0
  161. {libthx-0.2.1 → libthx-0.3.0}/theseus/evaluation/huggingface.py +0 -0
  162. {libthx-0.2.1 → libthx-0.3.0}/theseus/experiments/continual/__init__.py +0 -0
  163. {libthx-0.2.1/theseus/model → libthx-0.3.0/theseus/experiments/models}/__init__.py +0 -0
  164. {libthx-0.2.1 → libthx-0.3.0}/theseus/experiments/models/forking.py +0 -0
  165. {libthx-0.2.1 → libthx-0.3.0}/theseus/experiments/models/gpt.py +0 -0
  166. {libthx-0.2.1 → libthx-0.3.0}/theseus/experiments/models/gpt_neox.py +0 -0
  167. {libthx-0.2.1 → libthx-0.3.0}/theseus/experiments/models/llama.py +0 -0
  168. {libthx-0.2.1 → libthx-0.3.0}/theseus/experiments/models/moe.py +0 -0
  169. {libthx-0.2.1 → libthx-0.3.0}/theseus/experiments/models/qwen.py +0 -0
  170. {libthx-0.2.1 → libthx-0.3.0}/theseus/experiments/mok/__init__.py +0 -0
  171. {libthx-0.2.1 → libthx-0.3.0}/theseus/experiments/mok/reward.py +0 -0
  172. {libthx-0.2.1 → libthx-0.3.0}/theseus/experiments/mok/smoke.py +0 -0
  173. {libthx-0.2.1 → libthx-0.3.0}/theseus/experiments/redcodegen/__init__.py +0 -0
  174. {libthx-0.2.1 → libthx-0.3.0}/theseus/experiments/redcodegen/hardening.py +0 -0
  175. {libthx-0.2.1 → libthx-0.3.0}/theseus/inference/base.py +0 -0
  176. {libthx-0.2.1 → libthx-0.3.0}/theseus/inference/huggingface.py +0 -0
  177. {libthx-0.2.1 → libthx-0.3.0}/theseus/mock.py +0 -0
  178. {libthx-0.2.1/theseus/experiments/models → libthx-0.3.0/theseus/model}/__init__.py +0 -0
  179. {libthx-0.2.1 → libthx-0.3.0}/theseus/model/activations/__init__.py +0 -0
  180. {libthx-0.2.1 → libthx-0.3.0}/theseus/model/activations/swiglu.py +0 -0
  181. {libthx-0.2.1 → libthx-0.3.0}/theseus/model/attention/forking.py +0 -0
  182. {libthx-0.2.1 → libthx-0.3.0}/theseus/model/attention/rope.py +0 -0
  183. {libthx-0.2.1 → libthx-0.3.0}/theseus/model/attention/scratching.py +0 -0
  184. {libthx-0.2.1 → libthx-0.3.0}/theseus/model/block/block.py +0 -0
  185. {libthx-0.2.1 → libthx-0.3.0}/theseus/model/block/forking.py +0 -0
  186. {libthx-0.2.1 → libthx-0.3.0}/theseus/model/block/gpt_neox.py +0 -0
  187. {libthx-0.2.1 → libthx-0.3.0}/theseus/model/block/llama.py +0 -0
  188. {libthx-0.2.1 → libthx-0.3.0}/theseus/model/block/moe.py +0 -0
  189. {libthx-0.2.1 → libthx-0.3.0}/theseus/model/block/qwen.py +0 -0
  190. {libthx-0.2.1 → libthx-0.3.0}/theseus/model/block/scratching.py +0 -0
  191. {libthx-0.2.1 → libthx-0.3.0}/theseus/model/huggingface.py +0 -0
  192. {libthx-0.2.1 → libthx-0.3.0}/theseus/model/layers/layernorm.py +0 -0
  193. {libthx-0.2.1 → libthx-0.3.0}/theseus/model/layers/rope.py +0 -0
  194. {libthx-0.2.1 → libthx-0.3.0}/theseus/model/models/base.py +0 -0
  195. {libthx-0.2.1 → libthx-0.3.0}/theseus/model/models/contrib/gpt_neox.py +0 -0
  196. {libthx-0.2.1 → libthx-0.3.0}/theseus/model/models/contrib/llama.py +0 -0
  197. {libthx-0.2.1 → libthx-0.3.0}/theseus/model/models/contrib/marin.py +0 -0
  198. {libthx-0.2.1 → libthx-0.3.0}/theseus/model/models/contrib/qwen.py +0 -0
  199. {libthx-0.2.1 → libthx-0.3.0}/theseus/model/models/thoughtbubbles.py +0 -0
  200. {libthx-0.2.1 → libthx-0.3.0}/theseus/model/module.py +0 -0
  201. {libthx-0.2.1 → libthx-0.3.0}/theseus/model/moe/bias_balanced.py +0 -0
  202. {libthx-0.2.1 → libthx-0.3.0}/theseus/quick.py +0 -0
  203. {libthx-0.2.1 → libthx-0.3.0}/theseus/registry.py +0 -0
  204. {libthx-0.2.1 → libthx-0.3.0}/theseus/training/__init__.py +0 -0
  205. {libthx-0.2.1 → libthx-0.3.0}/theseus/training/contrastive.py +0 -0
  206. {libthx-0.2.1/theseus/data/datasets/redcodegen → libthx-0.3.0/theseus/training/flywheel}/__init__.py +0 -0
  207. {libthx-0.2.1 → libthx-0.3.0}/theseus/training/flywheel/contrastive.py +0 -0
  208. {libthx-0.2.1 → libthx-0.3.0}/theseus/training/flywheel/padded.py +0 -0
  209. {libthx-0.2.1 → libthx-0.3.0}/theseus/training/flywheel/pmd.py +0 -0
  210. {libthx-0.2.1 → libthx-0.3.0}/theseus/training/flywheel/strategy.py +0 -0
  211. {libthx-0.2.1 → libthx-0.3.0}/theseus/training/grpo.py +0 -0
  212. {libthx-0.2.1 → libthx-0.3.0}/theseus/training/huggingface.py +0 -0
  213. {libthx-0.2.1 → libthx-0.3.0}/theseus/training/lora.py +0 -0
  214. {libthx-0.2.1 → libthx-0.3.0}/theseus/training/optimizers/__init__.py +0 -0
  215. {libthx-0.2.1 → libthx-0.3.0}/theseus/training/optimizers/adamw.py +0 -0
  216. {libthx-0.2.1 → libthx-0.3.0}/theseus/training/optimizers/muon.py +0 -0
  217. {libthx-0.2.1 → libthx-0.3.0}/theseus/training/schedules/__init__.py +0 -0
  218. {libthx-0.2.1 → libthx-0.3.0}/theseus/training/schedules/cosine_rewarm.py +0 -0
  219. {libthx-0.2.1 → libthx-0.3.0}/theseus/training/schedules/wsd.py +0 -0
  220. {libthx-0.2.1 → libthx-0.3.0}/theseus/training/schedules/wsds.py +0 -0
  221. {libthx-0.2.1 → libthx-0.3.0}/theseus/training/utils.py +0 -0
  222. {libthx-0.2.1 → libthx-0.3.0}/theseus/web/__init__.py +0 -0
  223. {libthx-0.2.1 → libthx-0.3.0}/theseus/web/app.py +0 -0
  224. {libthx-0.2.1 → libthx-0.3.0}/theseus/web/auth.py +0 -0
  225. {libthx-0.2.1 → libthx-0.3.0}/theseus/web/generate_password_hash.py +0 -0
  226. {libthx-0.2.1 → libthx-0.3.0}/theseus/web/models.py +0 -0
  227. {libthx-0.2.1 → libthx-0.3.0}/theseus/web/routes/__init__.py +0 -0
  228. {libthx-0.2.1 → libthx-0.3.0}/theseus/web/routes/api.py +0 -0
  229. {libthx-0.2.1 → libthx-0.3.0}/theseus/web/routes/auth.py +0 -0
  230. {libthx-0.2.1 → libthx-0.3.0}/theseus/web/routes/views.py +0 -0
  231. {libthx-0.2.1 → libthx-0.3.0}/theseus/web/services/__init__.py +0 -0
  232. {libthx-0.2.1 → libthx-0.3.0}/theseus/web/services/cache.py +0 -0
  233. {libthx-0.2.1 → libthx-0.3.0}/theseus/web/services/checkpoints.py +0 -0
  234. {libthx-0.2.1 → libthx-0.3.0}/theseus/web/services/logs.py +0 -0
  235. {libthx-0.2.1 → libthx-0.3.0}/theseus/web/services/status.py +0 -0
@@ -1,8 +1,8 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: libthx
3
- Version: 0.2.1
3
+ Version: 0.3.0
4
4
  Summary: Architecture experimentation and training infrastructure.
5
- Requires-Python: >=3.11
5
+ Requires-Python: <3.14,>=3.11
6
6
  Description-Content-Type: text/markdown
7
7
  License-File: LICENSE
8
8
  Requires-Dist: click<=8.2.1
@@ -20,6 +20,7 @@ Requires-Dist: tiktoken>=0.12.0
20
20
  Requires-Dist: torchax>=0.0.11
21
21
  Requires-Dist: wandb>=0.24.1
22
22
  Requires-Dist: datasets>=4.5.0
23
+ Requires-Dist: accelerate>=1.13.0
23
24
  Provides-Extra: fever
24
25
  Requires-Dist: wikipedia>=1.4.0; extra == "fever"
25
26
  Provides-Extra: huggingface
@@ -1,8 +1,8 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: libthx
3
- Version: 0.2.1
3
+ Version: 0.3.0
4
4
  Summary: Architecture experimentation and training infrastructure.
5
- Requires-Python: >=3.11
5
+ Requires-Python: <3.14,>=3.11
6
6
  Description-Content-Type: text/markdown
7
7
  License-File: LICENSE
8
8
  Requires-Dist: click<=8.2.1
@@ -20,6 +20,7 @@ Requires-Dist: tiktoken>=0.12.0
20
20
  Requires-Dist: torchax>=0.0.11
21
21
  Requires-Dist: wandb>=0.24.1
22
22
  Requires-Dist: datasets>=4.5.0
23
+ Requires-Dist: accelerate>=1.13.0
23
24
  Provides-Extra: fever
24
25
  Requires-Dist: wikipedia>=1.4.0; extra == "fever"
25
26
  Provides-Extra: huggingface
@@ -13,9 +13,11 @@ tests/test_eval_padding.py
13
13
  tests/test_gpu_availability.py
14
14
  tests/test_hardware_dispatch.py
15
15
  tests/test_kv_cache.py
16
+ tests/test_lact.py
16
17
  tests/test_lora.py
17
18
  tests/test_mamba.py
18
19
  tests/test_registries.py
20
+ tests/test_vsubmit_log_fetcher.py
19
21
  theseus/__init__.py
20
22
  theseus/cli.py
21
23
  theseus/config.py
@@ -43,12 +45,14 @@ theseus/data/datasets/dataset.py
43
45
  theseus/data/datasets/dictlearn.py
44
46
  theseus/data/datasets/fever.py
45
47
  theseus/data/datasets/fineweb.py
48
+ theseus/data/datasets/flan.py
46
49
  theseus/data/datasets/harmfulqa.py
47
50
  theseus/data/datasets/longbench.py
48
51
  theseus/data/datasets/longhealth.py
49
52
  theseus/data/datasets/mmlu.py
50
53
  theseus/data/datasets/mnli.py
51
54
  theseus/data/datasets/mtob.py
55
+ theseus/data/datasets/openr1_math.py
52
56
  theseus/data/datasets/pes2o.py
53
57
  theseus/data/datasets/pg19.py
54
58
  theseus/data/datasets/pile.py
@@ -79,7 +83,9 @@ theseus/evaluation/base.py
79
83
  theseus/evaluation/huggingface.py
80
84
  theseus/evaluation/datasets/__init__.py
81
85
  theseus/evaluation/datasets/alpaca.py
86
+ theseus/evaluation/datasets/arc_challenge.py
82
87
  theseus/evaluation/datasets/arithmetic.py
88
+ theseus/evaluation/datasets/bbh.py
83
89
  theseus/evaluation/datasets/bbq.py
84
90
  theseus/evaluation/datasets/blimp.py
85
91
  theseus/evaluation/datasets/ccaligned.py
@@ -87,8 +93,11 @@ theseus/evaluation/datasets/cfq.py
87
93
  theseus/evaluation/datasets/clutrr.py
88
94
  theseus/evaluation/datasets/dictlearn.py
89
95
  theseus/evaluation/datasets/fever.py
96
+ theseus/evaluation/datasets/gsm8k.py
97
+ theseus/evaluation/datasets/hellaswag.py
90
98
  theseus/evaluation/datasets/longbench.py
91
99
  theseus/evaluation/datasets/longhealth.py
100
+ theseus/evaluation/datasets/math.py
92
101
  theseus/evaluation/datasets/mmlu.py
93
102
  theseus/evaluation/datasets/mnli.py
94
103
  theseus/evaluation/datasets/mtob.py
@@ -105,6 +114,7 @@ theseus/evaluation/datasets/sst2.py
105
114
  theseus/evaluation/datasets/tinystories.py
106
115
  theseus/evaluation/datasets/winogrande.py
107
116
  theseus/experiments/__init__.py
117
+ theseus/experiments/benchmark.py
108
118
  theseus/experiments/continual/__init__.py
109
119
  theseus/experiments/continual/abcd.py
110
120
  theseus/experiments/continual/benchmark.py
@@ -112,9 +122,11 @@ theseus/experiments/models/__init__.py
112
122
  theseus/experiments/models/forking.py
113
123
  theseus/experiments/models/gpt.py
114
124
  theseus/experiments/models/gpt_neox.py
125
+ theseus/experiments/models/lact.py
115
126
  theseus/experiments/models/llama.py
116
127
  theseus/experiments/models/moe.py
117
128
  theseus/experiments/models/qwen.py
129
+ theseus/experiments/models/qwen_3_5.py
118
130
  theseus/experiments/mok/__init__.py
119
131
  theseus/experiments/mok/reward.py
120
132
  theseus/experiments/mok/smoke.py
@@ -123,6 +135,7 @@ theseus/experiments/redcodegen/hardening.py
123
135
  theseus/inference/__init__.py
124
136
  theseus/inference/base.py
125
137
  theseus/inference/huggingface.py
138
+ theseus/inference/ttt.py
126
139
  theseus/model/__init__.py
127
140
  theseus/model/axes.py
128
141
  theseus/model/huggingface.py
@@ -133,6 +146,7 @@ theseus/model/activations/swiglu.py
133
146
  theseus/model/attention/__init__.py
134
147
  theseus/model/attention/base.py
135
148
  theseus/model/attention/forking.py
149
+ theseus/model/attention/gated_delta.py
136
150
  theseus/model/attention/grouped.py
137
151
  theseus/model/attention/rope.py
138
152
  theseus/model/attention/scratching.py
@@ -140,19 +154,24 @@ theseus/model/block/__init__.py
140
154
  theseus/model/block/block.py
141
155
  theseus/model/block/forking.py
142
156
  theseus/model/block/gpt_neox.py
157
+ theseus/model/block/lact.py
143
158
  theseus/model/block/llama.py
144
159
  theseus/model/block/mamba.py
145
160
  theseus/model/block/moe.py
146
161
  theseus/model/block/qwen.py
162
+ theseus/model/block/qwen_3_5.py
147
163
  theseus/model/block/scratching.py
148
164
  theseus/model/layers/__init__.py
165
+ theseus/model/layers/lact.py
149
166
  theseus/model/layers/layernorm.py
150
167
  theseus/model/layers/mlp.py
168
+ theseus/model/layers/mrope.py
151
169
  theseus/model/layers/rmsnorm.py
152
170
  theseus/model/layers/rope.py
153
171
  theseus/model/models/__init__.py
154
172
  theseus/model/models/base.py
155
173
  theseus/model/models/hybrid.py
174
+ theseus/model/models/lact.py
156
175
  theseus/model/models/mamba.py
157
176
  theseus/model/models/moe.py
158
177
  theseus/model/models/scratchbubbles.py
@@ -162,9 +181,12 @@ theseus/model/models/contrib/gpt_neox.py
162
181
  theseus/model/models/contrib/llama.py
163
182
  theseus/model/models/contrib/marin.py
164
183
  theseus/model/models/contrib/qwen.py
184
+ theseus/model/models/contrib/qwen_3_5.py
185
+ theseus/model/models/contrib/qwen_3_5_moe.py
165
186
  theseus/model/moe/__init__.py
166
187
  theseus/model/moe/base.py
167
188
  theseus/model/moe/bias_balanced.py
189
+ theseus/model/moe/shared.py
168
190
  theseus/training/__init__.py
169
191
  theseus/training/backbone.py
170
192
  theseus/training/base.py
@@ -13,6 +13,7 @@ tiktoken>=0.12.0
13
13
  torchax>=0.0.11
14
14
  wandb>=0.24.1
15
15
  datasets>=4.5.0
16
+ accelerate>=1.13.0
16
17
 
17
18
  [cpu]
18
19
  jax>=0.4.23
@@ -1,9 +1,9 @@
1
1
  [project]
2
2
  name = "libthx"
3
- version = "0.2.1"
3
+ version = "0.3.0"
4
4
  description = "Architecture experimentation and training infrastructure."
5
5
  readme = "README.md"
6
- requires-python = ">=3.11"
6
+ requires-python = ">=3.11,<3.14"
7
7
  dependencies = [
8
8
  "click<=8.2.1", # mkdocs breaks otherwise...
9
9
  "flax>=0.12.2",
@@ -19,7 +19,8 @@ dependencies = [
19
19
  "tiktoken>=0.12.0",
20
20
  "torchax>=0.0.11",
21
21
  "wandb>=0.24.1",
22
- "datasets>=4.5.0"
22
+ "datasets>=4.5.0",
23
+ "accelerate>=1.13.0",
23
24
  ]
24
25
 
25
26
 
@@ -224,6 +225,14 @@ indent-style = "space"
224
225
  docstring-code-format = true
225
226
  docstring-code-line-length = 20
226
227
 
228
+ [tool.pytest.ini_options]
229
+ # ``contrib`` is not installed as a package by setuptools (only
230
+ # ``theseus*`` is — see ``[tool.setuptools.packages.find]``), but tests
231
+ # can still import from it because uv runs everything with the repo
232
+ # root as cwd. Pytest, however, doesn't add the rootdir to ``sys.path``
233
+ # by default, so we do it here.
234
+ pythonpath = ["."]
235
+
227
236
  [tool.mypy]
228
237
  plugins = ['pydantic.mypy']
229
238
  python_version = "3.12"
@@ -12,7 +12,6 @@ from typing import Any
12
12
  import numpy as np
13
13
  import jax
14
14
  import jax.numpy as jnp
15
- import pytest
16
15
  from omegaconf import OmegaConf
17
16
 
18
17
  from theseus.config import build, configuration
@@ -123,3 +122,57 @@ class TestKVCacheGPTNeoX:
123
122
  )
124
123
  with _build_config_ctx(GPTNeoX, kwargs):
125
124
  _kv_cache_parity(GPTNeoX, kwargs)
125
+
126
+
127
+ # Hybrid (full + linear/gated-delta attention). Exercises both the GQA KV
128
+ # cache and the GatedDeltaNet recurrent/conv decode cache in one model.
129
+ _QWEN35_KWARGS = dict(
130
+ n_layers=4,
131
+ n_embd=64,
132
+ n_head=4,
133
+ n_kv_head=2,
134
+ head_dim=16,
135
+ intermediate_size=128,
136
+ rope_theta=1e6,
137
+ partial_rotary_factor=0.25,
138
+ rms_norm_eps=1e-6,
139
+ block_size=32,
140
+ vocab_size=128,
141
+ dropout=0.0,
142
+ attn_dropout=0.0,
143
+ bias=False,
144
+ attention_bias=False,
145
+ layer_types=[
146
+ "linear_attention",
147
+ "full_attention",
148
+ "linear_attention",
149
+ "full_attention",
150
+ ],
151
+ linear_num_value_heads=4,
152
+ linear_num_key_heads=2,
153
+ linear_key_head_dim=16,
154
+ linear_value_head_dim=16,
155
+ linear_conv_kernel_dim=4,
156
+ )
157
+
158
+
159
+ class TestKVCacheQwen35:
160
+ def test_qwen_3_5_dense(self):
161
+ from theseus.model.models.contrib.qwen_3_5 import Qwen3_5
162
+
163
+ kwargs = dict(_QWEN35_KWARGS)
164
+ with _build_config_ctx(Qwen3_5, kwargs):
165
+ _kv_cache_parity(Qwen3_5, kwargs, atol=2e-3)
166
+
167
+ def test_qwen_3_5_moe(self):
168
+ from theseus.model.models.contrib.qwen_3_5_moe import Qwen3_5MoE
169
+
170
+ kwargs = dict(
171
+ _QWEN35_KWARGS,
172
+ num_experts=4,
173
+ num_experts_per_tok=2,
174
+ moe_intermediate_size=32,
175
+ shared_expert_intermediate_size=32,
176
+ )
177
+ with _build_config_ctx(Qwen3_5MoE, kwargs):
178
+ _kv_cache_parity(Qwen3_5MoE, kwargs, atol=2e-3)