libthx 0.1.8__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {libthx-0.1.8 → libthx-0.2.0}/PKG-INFO +1 -1
- {libthx-0.1.8 → libthx-0.2.0}/libthx.egg-info/PKG-INFO +1 -1
- {libthx-0.1.8 → libthx-0.2.0}/libthx.egg-info/SOURCES.txt +33 -0
- {libthx-0.1.8 → libthx-0.2.0}/pyproject.toml +2 -2
- libthx-0.2.0/tests/test_contrastive_roundtrip.py +143 -0
- libthx-0.2.0/tests/test_datasets.py +78 -0
- libthx-0.2.0/tests/test_eval_padding.py +184 -0
- libthx-0.2.0/tests/test_gpu_availability.py +71 -0
- libthx-0.2.0/tests/test_hardware_dispatch.py +233 -0
- libthx-0.2.0/tests/test_kv_cache.py +125 -0
- libthx-0.2.0/tests/test_lora.py +513 -0
- libthx-0.2.0/tests/test_mamba.py +307 -0
- libthx-0.2.0/tests/test_registries.py +65 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/base/chip.py +5 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/base/hardware.py +20 -18
- {libthx-0.1.8 → libthx-0.2.0}/theseus/base/job.py +45 -2
- {libthx-0.1.8 → libthx-0.2.0}/theseus/base/topology.py +8 -3
- {libthx-0.1.8 → libthx-0.2.0}/theseus/cli.py +266 -27
- {libthx-0.1.8 → libthx-0.2.0}/theseus/data/datasets/__init__.py +2 -0
- libthx-0.2.0/theseus/data/datasets/dictlearn.py +188 -0
- libthx-0.2.0/theseus/data/datasets/pile_injected.py +97 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/data/tokenizer.py +30 -1
- {libthx-0.1.8 → libthx-0.2.0}/theseus/dispatch/bootstrap.py +56 -1
- {libthx-0.1.8 → libthx-0.2.0}/theseus/dispatch/config.py +25 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/dispatch/dispatch.py +60 -5
- {libthx-0.1.8 → libthx-0.2.0}/theseus/dispatch/mailbox/mailbox.py +8 -2
- {libthx-0.1.8 → libthx-0.2.0}/theseus/dispatch/slurm.py +10 -8
- {libthx-0.1.8 → libthx-0.2.0}/theseus/dispatch/solve.py +76 -39
- {libthx-0.1.8 → libthx-0.2.0}/theseus/dispatch/ssh.py +18 -7
- {libthx-0.1.8 → libthx-0.2.0}/theseus/dispatch/volcano.py +14 -4
- libthx-0.2.0/theseus/evaluation/base.py +1308 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/evaluation/datasets/__init__.py +5 -0
- libthx-0.2.0/theseus/evaluation/datasets/alpaca.py +64 -0
- libthx-0.2.0/theseus/evaluation/datasets/arithmetic.py +120 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/evaluation/datasets/ccaligned.py +20 -6
- libthx-0.2.0/theseus/evaluation/datasets/dictlearn.py +83 -0
- libthx-0.2.0/theseus/evaluation/datasets/longbench.py +32 -0
- libthx-0.2.0/theseus/evaluation/datasets/longhealth.py +89 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/evaluation/datasets/mtob.py +0 -5
- {libthx-0.1.8 → libthx-0.2.0}/theseus/evaluation/datasets/pes2o.py +2 -2
- {libthx-0.1.8 → libthx-0.2.0}/theseus/evaluation/datasets/pg19.py +2 -2
- libthx-0.2.0/theseus/evaluation/datasets/pg19_lengthgen.py +61 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/evaluation/datasets/pile.py +2 -2
- libthx-0.2.0/theseus/evaluation/datasets/pile_injected.py +33 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/evaluation/datasets/tinystories.py +2 -2
- {libthx-0.1.8 → libthx-0.2.0}/theseus/experiments/__init__.py +2 -0
- libthx-0.2.0/theseus/experiments/continual/__init__.py +17 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/experiments/continual/abcd.py +161 -113
- libthx-0.2.0/theseus/experiments/continual/benchmark.py +353 -0
- libthx-0.2.0/theseus/experiments/models/moe.py +15 -0
- libthx-0.2.0/theseus/experiments/mok/__init__.py +1 -0
- libthx-0.2.0/theseus/experiments/mok/reward.py +96 -0
- libthx-0.2.0/theseus/experiments/mok/smoke.py +232 -0
- libthx-0.2.0/theseus/inference/base.py +851 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/job.py +164 -69
- {libthx-0.1.8 → libthx-0.2.0}/theseus/mock.py +18 -6
- {libthx-0.1.8 → libthx-0.2.0}/theseus/model/attention/base.py +38 -6
- {libthx-0.1.8 → libthx-0.2.0}/theseus/model/attention/forking.py +16 -1
- {libthx-0.1.8 → libthx-0.2.0}/theseus/model/attention/grouped.py +99 -16
- {libthx-0.1.8 → libthx-0.2.0}/theseus/model/attention/scratching.py +11 -1
- {libthx-0.1.8 → libthx-0.2.0}/theseus/model/axes.py +2 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/model/block/__init__.py +4 -0
- libthx-0.2.0/theseus/model/block/mamba.py +309 -0
- libthx-0.2.0/theseus/model/block/moe.py +48 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/model/block/qwen.py +2 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/model/block/scratching.py +5 -5
- {libthx-0.1.8 → libthx-0.2.0}/theseus/model/models/__init__.py +6 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/model/models/contrib/gpt_neox.py +4 -1
- {libthx-0.1.8 → libthx-0.2.0}/theseus/model/models/contrib/llama.py +4 -1
- {libthx-0.1.8 → libthx-0.2.0}/theseus/model/models/contrib/marin.py +4 -1
- {libthx-0.1.8 → libthx-0.2.0}/theseus/model/models/contrib/qwen.py +13 -2
- libthx-0.2.0/theseus/model/models/hybrid.py +105 -0
- libthx-0.2.0/theseus/model/models/mamba.py +89 -0
- libthx-0.2.0/theseus/model/models/moe.py +45 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/model/models/scratchbubbles.py +59 -30
- {libthx-0.1.8 → libthx-0.2.0}/theseus/model/models/thoughtbubbles.py +9 -2
- libthx-0.2.0/theseus/model/moe/__init__.py +4 -0
- libthx-0.2.0/theseus/model/moe/base.py +201 -0
- libthx-0.2.0/theseus/model/moe/bias_balanced.py +51 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/plot.py +19 -16
- {libthx-0.1.8 → libthx-0.2.0}/theseus/quick.py +27 -2
- {libthx-0.1.8 → libthx-0.2.0}/theseus/registry.py +27 -1
- {libthx-0.1.8 → libthx-0.2.0}/theseus/training/base.py +35 -32
- libthx-0.2.0/theseus/training/grpo.py +95 -0
- libthx-0.2.0/theseus/training/lora.py +577 -0
- libthx-0.2.0/theseus/training/ppo.py +654 -0
- libthx-0.2.0/theseus/training/schedules/__init__.py +19 -0
- libthx-0.2.0/theseus/training/schedules/cosine_rewarm.py +92 -0
- libthx-0.1.8/theseus/evaluation/base.py +0 -1104
- libthx-0.1.8/theseus/evaluation/datasets/longbench.py +0 -75
- libthx-0.1.8/theseus/evaluation/datasets/longhealth.py +0 -130
- libthx-0.1.8/theseus/experiments/continual/__init__.py +0 -5
- libthx-0.1.8/theseus/inference/base.py +0 -567
- libthx-0.1.8/theseus/training/schedules/__init__.py +0 -6
- {libthx-0.1.8 → libthx-0.2.0}/LICENSE +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/README.md +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/libthx.egg-info/dependency_links.txt +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/libthx.egg-info/entry_points.txt +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/libthx.egg-info/requires.txt +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/libthx.egg-info/top_level.txt +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/setup.cfg +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/__init__.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/base/__init__.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/base/axis.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/config.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/data/__init__.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/data/datasets/alpaca.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/data/datasets/bbq.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/data/datasets/ccaligned.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/data/datasets/cfq.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/data/datasets/clutrr.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/data/datasets/dataset.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/data/datasets/fever.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/data/datasets/fineweb.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/data/datasets/harmfulqa.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/data/datasets/longbench.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/data/datasets/longhealth.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/data/datasets/mmlu.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/data/datasets/mnli.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/data/datasets/mtob.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/data/datasets/pes2o.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/data/datasets/pg19.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/data/datasets/pile.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/data/datasets/pile_detoxify.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/data/datasets/qqp.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/data/datasets/redcodegen/__init__.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/data/datasets/redcodegen/hardening.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/data/datasets/siqa.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/data/datasets/squad.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/data/datasets/sst2.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/data/datasets/winogrande.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/data/tokenize.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/dispatch/__init__.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/dispatch/mailbox/__init__.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/dispatch/mailbox/sidecar.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/dispatch/sync.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/dispatch/tpu.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/evaluation/__init__.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/evaluation/datasets/bbq.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/evaluation/datasets/blimp.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/evaluation/datasets/cfq.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/evaluation/datasets/clutrr.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/evaluation/datasets/fever.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/evaluation/datasets/mmlu.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/evaluation/datasets/mnli.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/evaluation/datasets/perplexity_evals.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/evaluation/datasets/qqp.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/evaluation/datasets/siqa.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/evaluation/datasets/squad.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/evaluation/datasets/sst2.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/evaluation/datasets/winogrande.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/evaluation/huggingface.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/experiments/models/__init__.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/experiments/models/forking.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/experiments/models/gpt.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/experiments/models/gpt_neox.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/experiments/models/llama.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/experiments/models/qwen.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/experiments/redcodegen/__init__.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/experiments/redcodegen/hardening.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/inference/__init__.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/inference/huggingface.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/model/__init__.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/model/activations/__init__.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/model/activations/swiglu.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/model/attention/__init__.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/model/attention/rope.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/model/block/block.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/model/block/forking.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/model/block/gpt_neox.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/model/block/llama.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/model/huggingface.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/model/layers/__init__.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/model/layers/layernorm.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/model/layers/mlp.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/model/layers/rmsnorm.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/model/layers/rope.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/model/masks.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/model/models/base.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/model/models/contrib/__init__.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/model/module.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/training/__init__.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/training/backbone.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/training/contrastive.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/training/flywheel/__init__.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/training/flywheel/contrastive.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/training/flywheel/padded.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/training/flywheel/pmd.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/training/flywheel/strategy.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/training/huggingface.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/training/kl_divergence.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/training/optimizers/__init__.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/training/optimizers/adamw.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/training/optimizers/muon.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/training/schedules/wsd.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/training/schedules/wsds.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/training/utils.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/web/__init__.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/web/app.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/web/auth.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/web/generate_password_hash.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/web/models.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/web/routes/__init__.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/web/routes/api.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/web/routes/auth.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/web/routes/views.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/web/services/__init__.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/web/services/cache.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/web/services/checkpoints.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/web/services/logs.py +0 -0
- {libthx-0.1.8 → libthx-0.2.0}/theseus/web/services/status.py +0 -0
|
@@ -7,6 +7,15 @@ libthx.egg-info/dependency_links.txt
|
|
|
7
7
|
libthx.egg-info/entry_points.txt
|
|
8
8
|
libthx.egg-info/requires.txt
|
|
9
9
|
libthx.egg-info/top_level.txt
|
|
10
|
+
tests/test_contrastive_roundtrip.py
|
|
11
|
+
tests/test_datasets.py
|
|
12
|
+
tests/test_eval_padding.py
|
|
13
|
+
tests/test_gpu_availability.py
|
|
14
|
+
tests/test_hardware_dispatch.py
|
|
15
|
+
tests/test_kv_cache.py
|
|
16
|
+
tests/test_lora.py
|
|
17
|
+
tests/test_mamba.py
|
|
18
|
+
tests/test_registries.py
|
|
10
19
|
theseus/__init__.py
|
|
11
20
|
theseus/cli.py
|
|
12
21
|
theseus/config.py
|
|
@@ -31,6 +40,7 @@ theseus/data/datasets/ccaligned.py
|
|
|
31
40
|
theseus/data/datasets/cfq.py
|
|
32
41
|
theseus/data/datasets/clutrr.py
|
|
33
42
|
theseus/data/datasets/dataset.py
|
|
43
|
+
theseus/data/datasets/dictlearn.py
|
|
34
44
|
theseus/data/datasets/fever.py
|
|
35
45
|
theseus/data/datasets/fineweb.py
|
|
36
46
|
theseus/data/datasets/harmfulqa.py
|
|
@@ -43,6 +53,7 @@ theseus/data/datasets/pes2o.py
|
|
|
43
53
|
theseus/data/datasets/pg19.py
|
|
44
54
|
theseus/data/datasets/pile.py
|
|
45
55
|
theseus/data/datasets/pile_detoxify.py
|
|
56
|
+
theseus/data/datasets/pile_injected.py
|
|
46
57
|
theseus/data/datasets/qqp.py
|
|
47
58
|
theseus/data/datasets/siqa.py
|
|
48
59
|
theseus/data/datasets/squad.py
|
|
@@ -67,11 +78,14 @@ theseus/evaluation/__init__.py
|
|
|
67
78
|
theseus/evaluation/base.py
|
|
68
79
|
theseus/evaluation/huggingface.py
|
|
69
80
|
theseus/evaluation/datasets/__init__.py
|
|
81
|
+
theseus/evaluation/datasets/alpaca.py
|
|
82
|
+
theseus/evaluation/datasets/arithmetic.py
|
|
70
83
|
theseus/evaluation/datasets/bbq.py
|
|
71
84
|
theseus/evaluation/datasets/blimp.py
|
|
72
85
|
theseus/evaluation/datasets/ccaligned.py
|
|
73
86
|
theseus/evaluation/datasets/cfq.py
|
|
74
87
|
theseus/evaluation/datasets/clutrr.py
|
|
88
|
+
theseus/evaluation/datasets/dictlearn.py
|
|
75
89
|
theseus/evaluation/datasets/fever.py
|
|
76
90
|
theseus/evaluation/datasets/longbench.py
|
|
77
91
|
theseus/evaluation/datasets/longhealth.py
|
|
@@ -81,7 +95,9 @@ theseus/evaluation/datasets/mtob.py
|
|
|
81
95
|
theseus/evaluation/datasets/perplexity_evals.py
|
|
82
96
|
theseus/evaluation/datasets/pes2o.py
|
|
83
97
|
theseus/evaluation/datasets/pg19.py
|
|
98
|
+
theseus/evaluation/datasets/pg19_lengthgen.py
|
|
84
99
|
theseus/evaluation/datasets/pile.py
|
|
100
|
+
theseus/evaluation/datasets/pile_injected.py
|
|
85
101
|
theseus/evaluation/datasets/qqp.py
|
|
86
102
|
theseus/evaluation/datasets/siqa.py
|
|
87
103
|
theseus/evaluation/datasets/squad.py
|
|
@@ -91,12 +107,17 @@ theseus/evaluation/datasets/winogrande.py
|
|
|
91
107
|
theseus/experiments/__init__.py
|
|
92
108
|
theseus/experiments/continual/__init__.py
|
|
93
109
|
theseus/experiments/continual/abcd.py
|
|
110
|
+
theseus/experiments/continual/benchmark.py
|
|
94
111
|
theseus/experiments/models/__init__.py
|
|
95
112
|
theseus/experiments/models/forking.py
|
|
96
113
|
theseus/experiments/models/gpt.py
|
|
97
114
|
theseus/experiments/models/gpt_neox.py
|
|
98
115
|
theseus/experiments/models/llama.py
|
|
116
|
+
theseus/experiments/models/moe.py
|
|
99
117
|
theseus/experiments/models/qwen.py
|
|
118
|
+
theseus/experiments/mok/__init__.py
|
|
119
|
+
theseus/experiments/mok/reward.py
|
|
120
|
+
theseus/experiments/mok/smoke.py
|
|
100
121
|
theseus/experiments/redcodegen/__init__.py
|
|
101
122
|
theseus/experiments/redcodegen/hardening.py
|
|
102
123
|
theseus/inference/__init__.py
|
|
@@ -120,6 +141,8 @@ theseus/model/block/block.py
|
|
|
120
141
|
theseus/model/block/forking.py
|
|
121
142
|
theseus/model/block/gpt_neox.py
|
|
122
143
|
theseus/model/block/llama.py
|
|
144
|
+
theseus/model/block/mamba.py
|
|
145
|
+
theseus/model/block/moe.py
|
|
123
146
|
theseus/model/block/qwen.py
|
|
124
147
|
theseus/model/block/scratching.py
|
|
125
148
|
theseus/model/layers/__init__.py
|
|
@@ -129,6 +152,9 @@ theseus/model/layers/rmsnorm.py
|
|
|
129
152
|
theseus/model/layers/rope.py
|
|
130
153
|
theseus/model/models/__init__.py
|
|
131
154
|
theseus/model/models/base.py
|
|
155
|
+
theseus/model/models/hybrid.py
|
|
156
|
+
theseus/model/models/mamba.py
|
|
157
|
+
theseus/model/models/moe.py
|
|
132
158
|
theseus/model/models/scratchbubbles.py
|
|
133
159
|
theseus/model/models/thoughtbubbles.py
|
|
134
160
|
theseus/model/models/contrib/__init__.py
|
|
@@ -136,12 +162,18 @@ theseus/model/models/contrib/gpt_neox.py
|
|
|
136
162
|
theseus/model/models/contrib/llama.py
|
|
137
163
|
theseus/model/models/contrib/marin.py
|
|
138
164
|
theseus/model/models/contrib/qwen.py
|
|
165
|
+
theseus/model/moe/__init__.py
|
|
166
|
+
theseus/model/moe/base.py
|
|
167
|
+
theseus/model/moe/bias_balanced.py
|
|
139
168
|
theseus/training/__init__.py
|
|
140
169
|
theseus/training/backbone.py
|
|
141
170
|
theseus/training/base.py
|
|
142
171
|
theseus/training/contrastive.py
|
|
172
|
+
theseus/training/grpo.py
|
|
143
173
|
theseus/training/huggingface.py
|
|
144
174
|
theseus/training/kl_divergence.py
|
|
175
|
+
theseus/training/lora.py
|
|
176
|
+
theseus/training/ppo.py
|
|
145
177
|
theseus/training/utils.py
|
|
146
178
|
theseus/training/flywheel/__init__.py
|
|
147
179
|
theseus/training/flywheel/contrastive.py
|
|
@@ -152,6 +184,7 @@ theseus/training/optimizers/__init__.py
|
|
|
152
184
|
theseus/training/optimizers/adamw.py
|
|
153
185
|
theseus/training/optimizers/muon.py
|
|
154
186
|
theseus/training/schedules/__init__.py
|
|
187
|
+
theseus/training/schedules/cosine_rewarm.py
|
|
155
188
|
theseus/training/schedules/wsd.py
|
|
156
189
|
theseus/training/schedules/wsds.py
|
|
157
190
|
theseus/web/__init__.py
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "libthx"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.2.0"
|
|
4
4
|
description = "Architecture experimentation and training infrastructure."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.11"
|
|
@@ -229,7 +229,7 @@ plugins = ['pydantic.mypy']
|
|
|
229
229
|
python_version = "3.12"
|
|
230
230
|
strict = true
|
|
231
231
|
ignore_missing_imports = true
|
|
232
|
-
exclude = ["tests", "examples"]
|
|
232
|
+
exclude = ["tests", "examples", "theseus/dispatch/bootstrap.py"]
|
|
233
233
|
|
|
234
234
|
[[tool.mypy.overrides]]
|
|
235
235
|
module = "huggingface_hub.*"
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
"""Round-trip check for contrastive tokenization + loader + async strategy.
|
|
2
|
+
|
|
3
|
+
Builds a tiny in-memory contrastive dataset, writes memmaps in a temp dir,
|
|
4
|
+
loads via ContrastivePaddedDataset and Strategy(Async), and verifies shapes.
|
|
5
|
+
|
|
6
|
+
Migrated from scripts/test_contrastive_roundtrip.py.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import random
|
|
11
|
+
import tempfile
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import List, Tuple
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
import pytest
|
|
17
|
+
|
|
18
|
+
from theseus.data.tokenize import (
|
|
19
|
+
TokenizeContrastiveDatasetConfig,
|
|
20
|
+
_build_padded_arrays,
|
|
21
|
+
_encode_dataset_item,
|
|
22
|
+
)
|
|
23
|
+
from theseus.data.tokenizer import TokenizerConfig, get_tokenizer
|
|
24
|
+
from theseus.training.flywheel.contrastive import ContrastivePaddedDataset
|
|
25
|
+
from theseus.training.flywheel.strategy import DatasetStyle, Sampling, Strategy
|
|
26
|
+
from theseus.base.job import ExecutionSpec
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
PAIRS: List[Tuple[str, str]] = [
|
|
30
|
+
("the quick brown fox", "the slow red fox"),
|
|
31
|
+
("hello world", "hello mars"),
|
|
32
|
+
("good code is readable", "good code is obscure"),
|
|
33
|
+
("unit tests prevent bugs", "unit tests waste time"),
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class TinyContrastive:
|
|
38
|
+
def __init__(self, data: List[Tuple[str, str]]):
|
|
39
|
+
self.data = data
|
|
40
|
+
|
|
41
|
+
def __len__(self) -> int:
|
|
42
|
+
return len(self.data)
|
|
43
|
+
|
|
44
|
+
def __getitem__(self, idx: int) -> Tuple[str, str]:
|
|
45
|
+
return self.data[idx]
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _write_contrastive_data(
|
|
49
|
+
td_path: Path, tokenizer_cfg: TokenizerConfig, label: str
|
|
50
|
+
) -> str:
|
|
51
|
+
"""Write contrastive memmaps and return dataset name."""
|
|
52
|
+
name = f"tmpcontrast_{label}"
|
|
53
|
+
out = td_path / "data" / name
|
|
54
|
+
out.mkdir(parents=True, exist_ok=True)
|
|
55
|
+
|
|
56
|
+
tokenizer = get_tokenizer(tokenizer_cfg)
|
|
57
|
+
args = TokenizeContrastiveDatasetConfig(
|
|
58
|
+
name="fever", block_size=32, pad_token=0, val_pct=0.25, seed=42,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
dataset = TinyContrastive(PAIRS)
|
|
62
|
+
indices = list(range(len(dataset)))
|
|
63
|
+
random.seed(args.seed)
|
|
64
|
+
random.shuffle(indices)
|
|
65
|
+
val_size = int(len(dataset) * args.val_pct)
|
|
66
|
+
splits = {"train": indices[val_size:], "val": indices[:val_size]}
|
|
67
|
+
shapes = {}
|
|
68
|
+
dtype = np.uint32
|
|
69
|
+
|
|
70
|
+
for split_name, split_indices in splits.items():
|
|
71
|
+
num_samples = len(split_indices)
|
|
72
|
+
pos_tokens = np.memmap(
|
|
73
|
+
out / f"{split_name}.pos.bin", dtype=dtype, mode="w+",
|
|
74
|
+
shape=(num_samples, args.block_size),
|
|
75
|
+
)
|
|
76
|
+
pos_mask = np.memmap(
|
|
77
|
+
out / f"{split_name}.pos.bin.mask", dtype=np.bool_, mode="w+",
|
|
78
|
+
shape=(num_samples, args.block_size),
|
|
79
|
+
)
|
|
80
|
+
neg_tokens = np.memmap(
|
|
81
|
+
out / f"{split_name}.neg.bin", dtype=dtype, mode="w+",
|
|
82
|
+
shape=(num_samples, args.block_size),
|
|
83
|
+
)
|
|
84
|
+
neg_mask = np.memmap(
|
|
85
|
+
out / f"{split_name}.neg.bin.mask", dtype=np.bool_, mode="w+",
|
|
86
|
+
shape=(num_samples, args.block_size),
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
for arr_idx, didx in enumerate(split_indices):
|
|
90
|
+
pos_str, neg_str = dataset[didx]
|
|
91
|
+
for s, target_tokens, target_mask in [
|
|
92
|
+
(pos_str, pos_tokens, pos_mask),
|
|
93
|
+
(neg_str, neg_tokens, neg_mask),
|
|
94
|
+
]:
|
|
95
|
+
ids, mask_list = _encode_dataset_item(s, False, tokenizer, args)
|
|
96
|
+
t, m, *_ = _build_padded_arrays(
|
|
97
|
+
ids, mask_list, args.block_size, args.pad_token, dtype
|
|
98
|
+
)
|
|
99
|
+
target_tokens[arr_idx] = t
|
|
100
|
+
target_mask[arr_idx] = m
|
|
101
|
+
|
|
102
|
+
pos_tokens.flush()
|
|
103
|
+
pos_mask.flush()
|
|
104
|
+
neg_tokens.flush()
|
|
105
|
+
neg_mask.flush()
|
|
106
|
+
shapes[split_name] = {
|
|
107
|
+
"pos": [num_samples, args.block_size],
|
|
108
|
+
"neg": [num_samples, args.block_size],
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
with open(out / "shape.json", "w") as f:
|
|
112
|
+
json.dump(shapes, f)
|
|
113
|
+
with open(out / "config.json", "w") as f:
|
|
114
|
+
json.dump({}, f)
|
|
115
|
+
|
|
116
|
+
return name
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class TestContrastiveRoundtrip:
|
|
120
|
+
def test_tiktoken_roundtrip(self):
|
|
121
|
+
with tempfile.TemporaryDirectory() as td:
|
|
122
|
+
td_path = Path(td)
|
|
123
|
+
tok_cfg = TokenizerConfig(backend="tiktoken", name="cl100k_base")
|
|
124
|
+
name = _write_contrastive_data(td_path, tok_cfg, "tiktoken")
|
|
125
|
+
|
|
126
|
+
spec = ExecutionSpec.local(root_dir=str(td_path))
|
|
127
|
+
ds = ContrastivePaddedDataset(spec, block_size=32, name=name, suffix="")
|
|
128
|
+
batch = ds.get_batch(batch_size=2, split="train")
|
|
129
|
+
|
|
130
|
+
assert batch["padding_mask_pos"].shape == (2, 32)
|
|
131
|
+
assert batch["padding_mask_neg"].shape == (2, 32)
|
|
132
|
+
|
|
133
|
+
# Async loader
|
|
134
|
+
strat = Strategy(
|
|
135
|
+
spec, block_size=32,
|
|
136
|
+
mixture=[Sampling(name=name, rate=1.0, style=DatasetStyle.CONTRASTIVE)],
|
|
137
|
+
)
|
|
138
|
+
async_loader = strat.get_async_batches(batch_size=2, split="train")
|
|
139
|
+
async_batch = async_loader.get_batch()
|
|
140
|
+
async_loader.close()
|
|
141
|
+
|
|
142
|
+
assert async_batch["padding_mask_pos"].shape == (2, 32)
|
|
143
|
+
assert async_batch["padding_mask_neg"].shape == (2, 32)
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""Tests for new benchmark datasets and evaluations."""
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class TestPileInjected:
|
|
7
|
+
"""Tests for the injected Pile dataset."""
|
|
8
|
+
|
|
9
|
+
def test_injected_texts_reproducible(self):
|
|
10
|
+
"""Same seed always produces the same injected sequences."""
|
|
11
|
+
from theseus.data.datasets.pile_injected import (
|
|
12
|
+
_generate_injected_texts,
|
|
13
|
+
INJECTED_TEXTS,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
texts_a = _generate_injected_texts(n_sequences=100, seed=42)
|
|
17
|
+
texts_b = _generate_injected_texts(n_sequences=100, seed=42)
|
|
18
|
+
assert texts_a == texts_b
|
|
19
|
+
assert texts_a == list(INJECTED_TEXTS)
|
|
20
|
+
|
|
21
|
+
def test_injected_texts_count(self):
|
|
22
|
+
from theseus.data.datasets.pile_injected import INJECTED_TEXTS
|
|
23
|
+
|
|
24
|
+
assert len(INJECTED_TEXTS) == 100
|
|
25
|
+
|
|
26
|
+
def test_injected_texts_nonempty(self):
|
|
27
|
+
from theseus.data.datasets.pile_injected import INJECTED_TEXTS
|
|
28
|
+
|
|
29
|
+
for text in INJECTED_TEXTS:
|
|
30
|
+
assert len(text) > 100 # each should be substantial
|
|
31
|
+
|
|
32
|
+
def test_injection_positions_sorted(self):
|
|
33
|
+
from theseus.data.datasets.pile_injected import INJECTION_POSITIONS
|
|
34
|
+
|
|
35
|
+
assert INJECTION_POSITIONS == sorted(INJECTION_POSITIONS)
|
|
36
|
+
assert len(INJECTION_POSITIONS) == 100
|
|
37
|
+
|
|
38
|
+
def test_different_seeds_different_texts(self):
|
|
39
|
+
from theseus.data.datasets.pile_injected import _generate_injected_texts
|
|
40
|
+
|
|
41
|
+
texts_a = _generate_injected_texts(n_sequences=10, seed=42)
|
|
42
|
+
texts_b = _generate_injected_texts(n_sequences=10, seed=99)
|
|
43
|
+
assert texts_a != texts_b
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class TestPileInjectedEval:
|
|
47
|
+
"""Tests for the injected sequence memorization evaluation."""
|
|
48
|
+
|
|
49
|
+
def test_eval_uses_same_texts(self):
|
|
50
|
+
from theseus.data.datasets.pile_injected import INJECTED_TEXTS
|
|
51
|
+
from theseus.evaluation.datasets.pile_injected import PileInjectedEval
|
|
52
|
+
|
|
53
|
+
ev = PileInjectedEval()
|
|
54
|
+
assert len(ev) == len(INJECTED_TEXTS)
|
|
55
|
+
for i in range(len(ev)):
|
|
56
|
+
assert ev.get(i) == INJECTED_TEXTS[i]
|
|
57
|
+
|
|
58
|
+
def test_eval_name(self):
|
|
59
|
+
from theseus.evaluation.datasets.pile_injected import PileInjectedEval
|
|
60
|
+
|
|
61
|
+
ev = PileInjectedEval()
|
|
62
|
+
assert ev.name == "pile_injected_ppl"
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class TestPG19LengthGen:
|
|
66
|
+
"""Tests for variable-length PG-19 evaluations."""
|
|
67
|
+
|
|
68
|
+
def test_eval_names_registered(self):
|
|
69
|
+
from theseus.registry import EVALUATIONS
|
|
70
|
+
|
|
71
|
+
for name in [
|
|
72
|
+
"pg19_2k_ppl",
|
|
73
|
+
"pg19_4k_ppl",
|
|
74
|
+
"pg19_8k_ppl",
|
|
75
|
+
"pg19_16k_ppl",
|
|
76
|
+
"pg19_32k_ppl",
|
|
77
|
+
]:
|
|
78
|
+
assert name in EVALUATIONS, f"{name} not registered"
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
"""Evaluation padding tests — verifies evaluations work when dataset
|
|
2
|
+
size is not a multiple of the batch unit.
|
|
3
|
+
|
|
4
|
+
Migrated from scripts/test_eval_padding.py.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import flax
|
|
8
|
+
import jax
|
|
9
|
+
import jax.numpy as jnp
|
|
10
|
+
import numpy as np
|
|
11
|
+
from jax.sharding import Mesh, NamedSharding, PartitionSpec as P
|
|
12
|
+
|
|
13
|
+
from theseus.base import Axis
|
|
14
|
+
from theseus.evaluation import (
|
|
15
|
+
EncodingEvaluation,
|
|
16
|
+
PerplexityComparisonEvaluation,
|
|
17
|
+
PerplexityEvaluation,
|
|
18
|
+
RolloutEvaluation,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ToyTokenizer:
|
|
23
|
+
pad_token = 0
|
|
24
|
+
|
|
25
|
+
def encode(self, text: str) -> list[int]:
|
|
26
|
+
return [ord(ch) + 1 for ch in text]
|
|
27
|
+
|
|
28
|
+
def encode_batch(
|
|
29
|
+
self, text_list: list[str], allowed_special: str | None = None
|
|
30
|
+
) -> list[list[int]]:
|
|
31
|
+
del allowed_special
|
|
32
|
+
return [self.encode(text) for text in text_list]
|
|
33
|
+
|
|
34
|
+
def decode_batch(self, seqs: list[list[int]]) -> list[str]:
|
|
35
|
+
return ["".join(chr(tok - 1) for tok in seq if tok > 0) for seq in seqs]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@flax.struct.dataclass
|
|
39
|
+
class DummyState:
|
|
40
|
+
params: jax.Array
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class DummyInference:
|
|
44
|
+
def __init__(self, batch_unit: int = 8, block_size: int = 8, rollout_token: str = "!"):
|
|
45
|
+
self.replicas = 1
|
|
46
|
+
self.local_replicas = 1
|
|
47
|
+
self.per_device_batch_size = batch_unit
|
|
48
|
+
self.block_size = block_size
|
|
49
|
+
self.mesh = Mesh(np.array(jax.devices()).reshape((1,)), (Axis.BATCH,))
|
|
50
|
+
scalar_sharding = NamedSharding(self.mesh, P())
|
|
51
|
+
self.state = DummyState(
|
|
52
|
+
params=jax.device_put(jnp.array(0, dtype=jnp.int32), scalar_sharding)
|
|
53
|
+
)
|
|
54
|
+
self.state_sharding = DummyState(params=scalar_sharding)
|
|
55
|
+
self.key = jax.random.PRNGKey(0)
|
|
56
|
+
self.vocab_size = 256
|
|
57
|
+
self.rollout_token_id = ord(rollout_token) + 1
|
|
58
|
+
|
|
59
|
+
@staticmethod
|
|
60
|
+
def pad(seqs: list[list[int]], pad_token: int = 0) -> tuple[jax.Array, jax.Array]:
|
|
61
|
+
max_len = max(len(seq) for seq in seqs)
|
|
62
|
+
padded = [([pad_token] * (max_len - len(seq))) + seq for seq in seqs]
|
|
63
|
+
masks = [([False] * (max_len - len(seq))) + ([True] * len(seq)) for seq in seqs]
|
|
64
|
+
return jnp.array(padded, dtype=jnp.int32), jnp.array(masks, dtype=jnp.bool_)
|
|
65
|
+
|
|
66
|
+
def _autoregress(self, state, key, input, input_mask, num_tokens, temperature, top_p):
|
|
67
|
+
del state, key, input_mask, temperature, top_p
|
|
68
|
+
extra = num_tokens - input.shape[-1]
|
|
69
|
+
if extra <= 0:
|
|
70
|
+
return input[:, :num_tokens]
|
|
71
|
+
generated = jnp.full((input.shape[0], extra), self.rollout_token_id, dtype=jnp.int32)
|
|
72
|
+
return jnp.concatenate([input, generated], axis=-1)
|
|
73
|
+
|
|
74
|
+
def forward(self, state, params, batch, key, deterministic):
|
|
75
|
+
del state, params, key, deterministic
|
|
76
|
+
x_batch, _, mask_batch = batch
|
|
77
|
+
next_tokens = jnp.roll(x_batch, -1, axis=-1)
|
|
78
|
+
next_tokens = next_tokens.at[:, -1].set(0)
|
|
79
|
+
next_tokens = jnp.where(mask_batch, next_tokens, 0)
|
|
80
|
+
logits = jax.nn.one_hot(next_tokens, self.vocab_size, dtype=jnp.float32) * 20.0
|
|
81
|
+
return logits, None, None
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class ToyRolloutEval(RolloutEvaluation):
|
|
85
|
+
def __init__(self):
|
|
86
|
+
self.items = [("aa", "!"), ("bb", "!"), ("cc", "!")]
|
|
87
|
+
|
|
88
|
+
@property
|
|
89
|
+
def name(self):
|
|
90
|
+
return "toy_rollout"
|
|
91
|
+
|
|
92
|
+
def __len__(self):
|
|
93
|
+
return len(self.items)
|
|
94
|
+
|
|
95
|
+
def clean(self, y_hat):
|
|
96
|
+
return y_hat[-1:]
|
|
97
|
+
|
|
98
|
+
def get(self, indx):
|
|
99
|
+
return self.items[indx]
|
|
100
|
+
|
|
101
|
+
def max_new_tokens(self, inference):
|
|
102
|
+
return 1
|
|
103
|
+
|
|
104
|
+
def check(self, y, y_hat):
|
|
105
|
+
return y == y_hat
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class ToyEncodingEval(EncodingEvaluation):
|
|
109
|
+
def __init__(self):
|
|
110
|
+
self.items = ["abc", "def", "ghi"]
|
|
111
|
+
|
|
112
|
+
@property
|
|
113
|
+
def name(self):
|
|
114
|
+
return "toy_encoding"
|
|
115
|
+
|
|
116
|
+
def __len__(self):
|
|
117
|
+
return len(self.items)
|
|
118
|
+
|
|
119
|
+
def clean(self, y_hat):
|
|
120
|
+
return y_hat
|
|
121
|
+
|
|
122
|
+
def get(self, indx):
|
|
123
|
+
return self.items[indx]
|
|
124
|
+
|
|
125
|
+
def check(self, x, y_hat):
|
|
126
|
+
return y_hat == x[1:]
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class ToyPerplexityEval(PerplexityEvaluation):
|
|
130
|
+
def __init__(self):
|
|
131
|
+
self.items = ["abc", "def", "ghi"]
|
|
132
|
+
|
|
133
|
+
@property
|
|
134
|
+
def name(self):
|
|
135
|
+
return "toy_ppl"
|
|
136
|
+
|
|
137
|
+
def __len__(self):
|
|
138
|
+
return len(self.items)
|
|
139
|
+
|
|
140
|
+
def get(self, indx):
|
|
141
|
+
return self.items[indx]
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
class ToyComparisonEval(PerplexityComparisonEvaluation):
|
|
145
|
+
def __init__(self):
|
|
146
|
+
self.items = [("a", ["bc", "zz"], 0), ("d", ["ef", "yy"], 0), ("g", ["hi", "xx"], 0)]
|
|
147
|
+
|
|
148
|
+
@property
|
|
149
|
+
def name(self):
|
|
150
|
+
return "toy_compare"
|
|
151
|
+
|
|
152
|
+
def __len__(self):
|
|
153
|
+
return len(self.items)
|
|
154
|
+
|
|
155
|
+
def get(self, indx):
|
|
156
|
+
return self.items[indx]
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
class TestEvalPadding:
|
|
160
|
+
"""Test that evaluations handle padding correctly when dataset size < batch unit."""
|
|
161
|
+
|
|
162
|
+
def test_rollout_eval(self):
|
|
163
|
+
tokenizer = ToyTokenizer()
|
|
164
|
+
inference = DummyInference(batch_unit=8, block_size=8)
|
|
165
|
+
score = ToyRolloutEval()(inference, tokenizer)
|
|
166
|
+
assert score == 1.0
|
|
167
|
+
|
|
168
|
+
def test_encoding_eval(self):
|
|
169
|
+
tokenizer = ToyTokenizer()
|
|
170
|
+
inference = DummyInference(batch_unit=8, block_size=8)
|
|
171
|
+
score = ToyEncodingEval()(inference, tokenizer)
|
|
172
|
+
assert score == 1.0
|
|
173
|
+
|
|
174
|
+
def test_perplexity_eval(self):
|
|
175
|
+
tokenizer = ToyTokenizer()
|
|
176
|
+
inference = DummyInference(batch_unit=8, block_size=8)
|
|
177
|
+
score = ToyPerplexityEval()(inference, tokenizer)
|
|
178
|
+
assert 0.99 < score <= 1.0
|
|
179
|
+
|
|
180
|
+
def test_comparison_eval(self):
|
|
181
|
+
tokenizer = ToyTokenizer()
|
|
182
|
+
inference = DummyInference(batch_unit=8, block_size=8)
|
|
183
|
+
score = ToyComparisonEval()(inference, tokenizer)
|
|
184
|
+
assert score == 1.0
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""Tests for _check_plain_host_availability GPU memory parsing logic.
|
|
2
|
+
|
|
3
|
+
Migrated from scripts/test_gpu_availability.py.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from unittest.mock import MagicMock, patch
|
|
7
|
+
import pytest
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def make_run_result(stdout: str, ok: bool = True) -> MagicMock:
|
|
11
|
+
r = MagicMock()
|
|
12
|
+
r.ok = ok
|
|
13
|
+
r.stdout = stdout
|
|
14
|
+
r.stderr = ""
|
|
15
|
+
return r
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def check(nvidia_smi_output: str, configured: int = 4) -> int:
|
|
19
|
+
from theseus.dispatch.solve import _check_plain_host_availability
|
|
20
|
+
|
|
21
|
+
with patch(
|
|
22
|
+
"theseus.dispatch.ssh.run", return_value=make_run_result(nvidia_smi_output)
|
|
23
|
+
):
|
|
24
|
+
return _check_plain_host_availability("fake-host", configured, timeout=5.0)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class TestGPUAvailability:
|
|
28
|
+
def test_no_processes(self):
|
|
29
|
+
assert check("") == 4
|
|
30
|
+
|
|
31
|
+
def test_xorg_only(self):
|
|
32
|
+
output = "GPU-abc, 3374, /usr/lib/xorg/Xorg, 4\nGPU-def, 3374, /usr/lib/xorg/Xorg, 4\n"
|
|
33
|
+
assert check(output) == 4
|
|
34
|
+
|
|
35
|
+
def test_zero_memory(self):
|
|
36
|
+
output = "GPU-abc, 1234, some-daemon, 0\n"
|
|
37
|
+
assert check(output) == 4
|
|
38
|
+
|
|
39
|
+
def test_na_memory(self):
|
|
40
|
+
output = "GPU-abc, 1234, some-process, N/A\n"
|
|
41
|
+
assert check(output) == 4
|
|
42
|
+
|
|
43
|
+
def test_missing_memory_field(self):
|
|
44
|
+
output = "GPU-abc, 1234, some-process\n"
|
|
45
|
+
assert check(output) == 4
|
|
46
|
+
|
|
47
|
+
def test_real_training_job(self):
|
|
48
|
+
output = "GPU-abc, 9999, python, 38000\n"
|
|
49
|
+
assert check(output) == 0
|
|
50
|
+
|
|
51
|
+
def test_mixed_noise_and_real(self):
|
|
52
|
+
output = "GPU-abc, 3374, /usr/lib/xorg/Xorg, 4\nGPU-def, 9999, python, 38000\n"
|
|
53
|
+
assert check(output) == 0
|
|
54
|
+
|
|
55
|
+
def test_exactly_at_threshold(self):
|
|
56
|
+
output = "GPU-abc, 9999, python, 100\n"
|
|
57
|
+
assert check(output) == 0
|
|
58
|
+
|
|
59
|
+
def test_just_below_threshold(self):
|
|
60
|
+
output = "GPU-abc, 9999, some-process, 99\n"
|
|
61
|
+
assert check(output) == 4
|
|
62
|
+
|
|
63
|
+
def test_nvidia_smi_failure(self):
|
|
64
|
+
from theseus.dispatch.solve import _check_plain_host_availability
|
|
65
|
+
|
|
66
|
+
with patch(
|
|
67
|
+
"theseus.dispatch.ssh.run",
|
|
68
|
+
return_value=make_run_result("", ok=False),
|
|
69
|
+
):
|
|
70
|
+
result = _check_plain_host_availability("fake-host", 4, timeout=5.0)
|
|
71
|
+
assert result == 0
|