PyPI - libthx - Versions diffs - 0.2.1__tar.gz → 0.3.0__tar.gz - Mend

libthx 0.2.1tar.gz → 0.3.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (235) hide show

{libthx-0.2.1 → libthx-0.3.0}/PKG-INFO RENAMED Viewed

@@ -1,8 +1,8 @@
 Metadata-Version: 2.4
 Name: libthx
-Version: 0.2.1
+Version: 0.3.0
 Summary: Architecture experimentation and training infrastructure.
-Requires-Python: >=3.11
+Requires-Python: <3.14,>=3.11
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: click<=8.2.1
@@ -20,6 +20,7 @@ Requires-Dist: tiktoken>=0.12.0
 Requires-Dist: torchax>=0.0.11
 Requires-Dist: wandb>=0.24.1
 Requires-Dist: datasets>=4.5.0
+Requires-Dist: accelerate>=1.13.0
 Provides-Extra: fever
 Requires-Dist: wikipedia>=1.4.0; extra == "fever"
 Provides-Extra: huggingface

{libthx-0.2.1 → libthx-0.3.0}/libthx.egg-info/PKG-INFO RENAMED Viewed

@@ -1,8 +1,8 @@
 Metadata-Version: 2.4
 Name: libthx
-Version: 0.2.1
+Version: 0.3.0
 Summary: Architecture experimentation and training infrastructure.
-Requires-Python: >=3.11
+Requires-Python: <3.14,>=3.11
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: click<=8.2.1
@@ -20,6 +20,7 @@ Requires-Dist: tiktoken>=0.12.0
 Requires-Dist: torchax>=0.0.11
 Requires-Dist: wandb>=0.24.1
 Requires-Dist: datasets>=4.5.0
+Requires-Dist: accelerate>=1.13.0
 Provides-Extra: fever
 Requires-Dist: wikipedia>=1.4.0; extra == "fever"
 Provides-Extra: huggingface

{libthx-0.2.1 → libthx-0.3.0}/libthx.egg-info/SOURCES.txt RENAMED Viewed

@@ -13,9 +13,11 @@ tests/test_eval_padding.py
 tests/test_gpu_availability.py
 tests/test_hardware_dispatch.py
 tests/test_kv_cache.py
+tests/test_lact.py
 tests/test_lora.py
 tests/test_mamba.py
 tests/test_registries.py
+tests/test_vsubmit_log_fetcher.py
 theseus/__init__.py
 theseus/cli.py
 theseus/config.py
@@ -43,12 +45,14 @@ theseus/data/datasets/dataset.py
 theseus/data/datasets/dictlearn.py
 theseus/data/datasets/fever.py
 theseus/data/datasets/fineweb.py
+theseus/data/datasets/flan.py
 theseus/data/datasets/harmfulqa.py
 theseus/data/datasets/longbench.py
 theseus/data/datasets/longhealth.py
 theseus/data/datasets/mmlu.py
 theseus/data/datasets/mnli.py
 theseus/data/datasets/mtob.py
+theseus/data/datasets/openr1_math.py
 theseus/data/datasets/pes2o.py
 theseus/data/datasets/pg19.py
 theseus/data/datasets/pile.py
@@ -79,7 +83,9 @@ theseus/evaluation/base.py
 theseus/evaluation/huggingface.py
 theseus/evaluation/datasets/__init__.py
 theseus/evaluation/datasets/alpaca.py
+theseus/evaluation/datasets/arc_challenge.py
 theseus/evaluation/datasets/arithmetic.py
+theseus/evaluation/datasets/bbh.py
 theseus/evaluation/datasets/bbq.py
 theseus/evaluation/datasets/blimp.py
 theseus/evaluation/datasets/ccaligned.py
@@ -87,8 +93,11 @@ theseus/evaluation/datasets/cfq.py
 theseus/evaluation/datasets/clutrr.py
 theseus/evaluation/datasets/dictlearn.py
 theseus/evaluation/datasets/fever.py
+theseus/evaluation/datasets/gsm8k.py
+theseus/evaluation/datasets/hellaswag.py
 theseus/evaluation/datasets/longbench.py
 theseus/evaluation/datasets/longhealth.py
+theseus/evaluation/datasets/math.py
 theseus/evaluation/datasets/mmlu.py
 theseus/evaluation/datasets/mnli.py
 theseus/evaluation/datasets/mtob.py
@@ -105,6 +114,7 @@ theseus/evaluation/datasets/sst2.py
 theseus/evaluation/datasets/tinystories.py
 theseus/evaluation/datasets/winogrande.py
 theseus/experiments/__init__.py
+theseus/experiments/benchmark.py
 theseus/experiments/continual/__init__.py
 theseus/experiments/continual/abcd.py
 theseus/experiments/continual/benchmark.py
@@ -112,9 +122,11 @@ theseus/experiments/models/__init__.py
 theseus/experiments/models/forking.py
 theseus/experiments/models/gpt.py
 theseus/experiments/models/gpt_neox.py
+theseus/experiments/models/lact.py
 theseus/experiments/models/llama.py
 theseus/experiments/models/moe.py
 theseus/experiments/models/qwen.py
+theseus/experiments/models/qwen_3_5.py
 theseus/experiments/mok/__init__.py
 theseus/experiments/mok/reward.py
 theseus/experiments/mok/smoke.py
@@ -123,6 +135,7 @@ theseus/experiments/redcodegen/hardening.py
 theseus/inference/__init__.py
 theseus/inference/base.py
 theseus/inference/huggingface.py
+theseus/inference/ttt.py
 theseus/model/__init__.py
 theseus/model/axes.py
 theseus/model/huggingface.py
@@ -133,6 +146,7 @@ theseus/model/activations/swiglu.py
 theseus/model/attention/__init__.py
 theseus/model/attention/base.py
 theseus/model/attention/forking.py
+theseus/model/attention/gated_delta.py
 theseus/model/attention/grouped.py
 theseus/model/attention/rope.py
 theseus/model/attention/scratching.py
@@ -140,19 +154,24 @@ theseus/model/block/__init__.py
 theseus/model/block/block.py
 theseus/model/block/forking.py
 theseus/model/block/gpt_neox.py
+theseus/model/block/lact.py
 theseus/model/block/llama.py
 theseus/model/block/mamba.py
 theseus/model/block/moe.py
 theseus/model/block/qwen.py
+theseus/model/block/qwen_3_5.py
 theseus/model/block/scratching.py
 theseus/model/layers/__init__.py
+theseus/model/layers/lact.py
 theseus/model/layers/layernorm.py
 theseus/model/layers/mlp.py
+theseus/model/layers/mrope.py
 theseus/model/layers/rmsnorm.py
 theseus/model/layers/rope.py
 theseus/model/models/__init__.py
 theseus/model/models/base.py
 theseus/model/models/hybrid.py
+theseus/model/models/lact.py
 theseus/model/models/mamba.py
 theseus/model/models/moe.py
 theseus/model/models/scratchbubbles.py
@@ -162,9 +181,12 @@ theseus/model/models/contrib/gpt_neox.py
 theseus/model/models/contrib/llama.py
 theseus/model/models/contrib/marin.py
 theseus/model/models/contrib/qwen.py
+theseus/model/models/contrib/qwen_3_5.py
+theseus/model/models/contrib/qwen_3_5_moe.py
 theseus/model/moe/__init__.py
 theseus/model/moe/base.py
 theseus/model/moe/bias_balanced.py
+theseus/model/moe/shared.py
 theseus/training/__init__.py
 theseus/training/backbone.py
 theseus/training/base.py

{libthx-0.2.1 → libthx-0.3.0}/libthx.egg-info/requires.txt RENAMED Viewed

@@ -13,6 +13,7 @@ tiktoken>=0.12.0
 torchax>=0.0.11
 wandb>=0.24.1
 datasets>=4.5.0
+accelerate>=1.13.0
 [cpu]
 jax>=0.4.23

{libthx-0.2.1 → libthx-0.3.0}/pyproject.toml RENAMED Viewed

@@ -1,9 +1,9 @@
 [project]
 name = "libthx"
-version = "0.2.1"
+version = "0.3.0"
 description = "Architecture experimentation and training infrastructure."
 readme = "README.md"
-requires-python = ">=3.11"
+requires-python = ">=3.11,<3.14"
 dependencies = [
     "click<=8.2.1", # mkdocs breaks otherwise...
     "flax>=0.12.2",
@@ -19,7 +19,8 @@ dependencies = [
     "tiktoken>=0.12.0",
     "torchax>=0.0.11",
     "wandb>=0.24.1",
-    "datasets>=4.5.0"
+    "datasets>=4.5.0",
+    "accelerate>=1.13.0",
 ]
@@ -224,6 +225,14 @@ indent-style = "space"
 docstring-code-format = true
 docstring-code-line-length = 20
+[tool.pytest.ini_options]
+# ``contrib`` is not installed as a package by setuptools (only
+# ``theseus*`` is — see ``[tool.setuptools.packages.find]``), but tests
+# can still import from it because uv runs everything with the repo
+# root as cwd. Pytest, however, doesn't add the rootdir to ``sys.path``
+# by default, so we do it here.
+pythonpath = ["."]
 [tool.mypy]
 plugins = ['pydantic.mypy']
 python_version = "3.12"

{libthx-0.2.1 → libthx-0.3.0}/tests/test_kv_cache.py RENAMED Viewed

@@ -12,7 +12,6 @@ from typing import Any
 import numpy as np
 import jax
 import jax.numpy as jnp
-import pytest
 from omegaconf import OmegaConf
 from theseus.config import build, configuration
@@ -123,3 +122,57 @@ class TestKVCacheGPTNeoX:
         )
         with _build_config_ctx(GPTNeoX, kwargs):
             _kv_cache_parity(GPTNeoX, kwargs)
+# Hybrid (full + linear/gated-delta attention). Exercises both the GQA KV
+# cache and the GatedDeltaNet recurrent/conv decode cache in one model.
+_QWEN35_KWARGS = dict(
+    n_layers=4,
+    n_embd=64,
+    n_head=4,
+    n_kv_head=2,
+    head_dim=16,
+    intermediate_size=128,
+    rope_theta=1e6,
+    partial_rotary_factor=0.25,
+    rms_norm_eps=1e-6,
+    block_size=32,
+    vocab_size=128,
+    dropout=0.0,
+    attn_dropout=0.0,
+    bias=False,
+    attention_bias=False,
+    layer_types=[
+        "linear_attention",
+        "full_attention",
+        "linear_attention",
+        "full_attention",
+    ],
+    linear_num_value_heads=4,
+    linear_num_key_heads=2,
+    linear_key_head_dim=16,
+    linear_value_head_dim=16,
+    linear_conv_kernel_dim=4,
+)
+class TestKVCacheQwen35:
+    def test_qwen_3_5_dense(self):
+        from theseus.model.models.contrib.qwen_3_5 import Qwen3_5
+        kwargs = dict(_QWEN35_KWARGS)
+        with _build_config_ctx(Qwen3_5, kwargs):
+            _kv_cache_parity(Qwen3_5, kwargs, atol=2e-3)
+    def test_qwen_3_5_moe(self):
+        from theseus.model.models.contrib.qwen_3_5_moe import Qwen3_5MoE
+        kwargs = dict(
+            _QWEN35_KWARGS,
+            num_experts=4,
+            num_experts_per_tok=2,
+            moe_intermediate_size=32,
+            shared_expert_intermediate_size=32,
+        )
+        with _build_config_ctx(Qwen3_5MoE, kwargs):
+            _kv_cache_parity(Qwen3_5MoE, kwargs, atol=2e-3)

libthx 0.2.1__tar.gz → 0.3.0__tar.gz

libthx 0.2.1tar.gz → 0.3.0tar.gz