d9d 0.1.0__tar.gz → 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {d9d-0.1.0 → d9d-0.1.1}/PKG-INFO +21 -1
- {d9d-0.1.0 → d9d-0.1.1}/README.md +20 -0
- {d9d-0.1.0 → d9d-0.1.1}/pyproject.toml +29 -4
- {d9d-0.1.0 → d9d-0.1.1}/d9d/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/core/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/core/autograd/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/core/autograd/grad_context.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/core/dist_context/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/core/dist_context/configured.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/core/dist_context/device_mesh_domains.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/core/dist_context/log.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/core/dist_context/params.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/core/dist_ops/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/core/dist_ops/object.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/core/dist_ops/tensor.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/core/protocol/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/core/protocol/training.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/core/sharding/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/core/sharding/auto_spec.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/core/sharding/shard.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/core/sharding/spec.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/core/sharding/unshard.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/core/types/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/core/types/data.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/core/types/pytree.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/dataset/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/dataset/buffer_sorted.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/dataset/padding.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/dataset/sharded.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/internals/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/internals/determinism/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/internals/determinism/seed.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/internals/grad_norm/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/internals/grad_norm/group.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/internals/grad_norm/norm.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/internals/grad_sync/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/internals/grad_sync/bucket.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/internals/grad_sync/placement_helper.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/internals/grad_sync/synchronizer.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/internals/pipeline_state/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/internals/pipeline_state/api.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/internals/pipeline_state/handler.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/internals/pipeline_state/storage.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/internals/profiling/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/internals/profiling/profile.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/internals/state/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/internals/state/main_process.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/kernel/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/kernel/cce/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/kernel/cce/cce.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/kernel/cce/main.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/kernel/general/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/kernel/general/get_int_dtype.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/kernel/gmm/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/kernel/gmm/function.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/kernel/moe/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/kernel/moe/indices_to_multihot.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/kernel/moe/permute_with_probs.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/kernel/stochastic/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/kernel/stochastic/adamw_step.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/kernel/stochastic/copy.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/kernel/stochastic/ops/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/kernel/stochastic/ops/round.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/kernel/swiglu/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/kernel/swiglu/function.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/kernel/swiglu/op.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/loop/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/loop/auto/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/loop/auto/auto_lr_scheduler.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/loop/auto/auto_optimizer.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/loop/component/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/loop/component/batch_maths.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/loop/component/checkpointer.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/loop/component/data_loader_factory.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/loop/component/garbage_collector.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/loop/component/gradient_clipper.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/loop/component/gradient_manager.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/loop/component/job_logger.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/loop/component/job_profiler.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/loop/component/loss_computer.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/loop/component/model_stage_exporter.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/loop/component/model_stage_factory.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/loop/component/optimizer_factory.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/loop/component/stepper.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/loop/component/timeout_manager.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/loop/component/train_task_operator.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/loop/config/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/loop/config/config.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/loop/config/types.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/loop/control/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/loop/control/dataset_provider.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/loop/control/lr_scheduler_provider.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/loop/control/model_provider.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/loop/control/optimizer_provider.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/loop/control/task.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/loop/run/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/loop/run/train.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/loop/state.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/lr_scheduler/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/lr_scheduler/piecewise/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/lr_scheduler/piecewise/builder.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/lr_scheduler/piecewise/config.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/lr_scheduler/piecewise/curves.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/lr_scheduler/piecewise/engine.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/lr_scheduler/visualizer.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/metric/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/metric/abc.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/metric/impl/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/metric/impl/compose.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/metric/impl/mean.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/model_state/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/model_state/io/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/model_state/io/dto.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/model_state/io/module_reader.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/model_state/io/module_writer.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/model_state/io/reader.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/model_state/io/writer.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/model_state/mapper/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/model_state/mapper/abc.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/model_state/mapper/adapters/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/model_state/mapper/adapters/mapper.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/model_state/mapper/adapters/module.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/model_state/mapper/compose/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/model_state/mapper/compose/helper.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/model_state/mapper/compose/parallel.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/model_state/mapper/compose/sequential.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/model_state/mapper/compose/shard.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/model_state/mapper/leaf/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/model_state/mapper/leaf/dtensor.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/model_state/mapper/leaf/identity.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/model_state/mapper/leaf/rename.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/model_state/mapper/leaf/select_child.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/model_state/mapper/leaf/stack.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/base/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/base/late_init.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/block/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/block/attention/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/block/attention/grouped_query.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/block/attention/sdpa/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/block/attention/sdpa/flash.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/block/embedding/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/block/embedding/shard_token_embedding.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/block/ffn/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/block/ffn/swiglu.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/block/head/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/block/head/language_modelling.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/block/hidden_states_aggregator/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/block/hidden_states_aggregator/base.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/block/hidden_states_aggregator/factory.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/block/hidden_states_aggregator/mean.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/block/hidden_states_aggregator/noop.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/block/moe/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/block/moe/communications/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/block/moe/communications/base.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/block/moe/communications/deepep.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/block/moe/communications/naive.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/block/moe/grouped_experts.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/block/moe/grouped_linear.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/block/moe/layer.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/block/moe/router.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/block/positional/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/block/positional/rope.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/model/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/model/qwen3_moe/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/model/qwen3_moe/decoder_layer.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/model/qwen3_moe/model.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/model/qwen3_moe/params.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/parallelism/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/parallelism/api/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/parallelism/api/expert_parallel.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/parallelism/api/fully_sharded.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/parallelism/api/hybrid_sharded.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/parallelism/api/replicate_parallel.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/parallelism/model/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/parallelism/model/qwen3_moe.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/parallelism/style/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/parallelism/style/shard_experts.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/module/parallelism/style/to_local.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/optim/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/optim/stochastic/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/optim/stochastic/adamw.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/peft/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/peft/all/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/peft/all/config.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/peft/all/method.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/peft/applicator.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/peft/base.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/peft/full_tune/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/peft/full_tune/config.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/peft/full_tune/method.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/peft/lora/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/peft/lora/config.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/peft/lora/layer.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/peft/lora/method.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/pipelining/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/pipelining/api/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/pipelining/api/module.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/pipelining/api/schedule.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/pipelining/api/sharding.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/pipelining/factory/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/pipelining/factory/config.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/pipelining/factory/factory.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/pipelining/factory/registry.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/pipelining/infra/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/pipelining/infra/schedule/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/pipelining/infra/schedule/component/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/pipelining/infra/schedule/component/program/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/pipelining/infra/schedule/component/program/base.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/pipelining/infra/schedule/component/program/communications.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/pipelining/infra/schedule/component/program/topology.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/pipelining/infra/schedule/component/runtime/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/pipelining/infra/schedule/component/runtime/action.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/pipelining/infra/schedule/component/runtime/communications.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/pipelining/infra/schedule/component/runtime/executor.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/pipelining/infra/schedule/component/runtime/loss.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/pipelining/infra/schedule/program/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/pipelining/infra/schedule/program/bfs.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/pipelining/infra/schedule/program/dualpipev.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/pipelining/infra/schedule/program/interleaved.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/pipelining/infra/schedule/program/zerobubblev.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/pipelining/infra/stage/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/pipelining/infra/stage/communications.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/pipelining/infra/stage/computations.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/pipelining/infra/stage/splitgrad.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/pipelining/infra/stage/stage.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/pipelining/infra/stage/struct_helper.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/pipelining/training/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/pipelining/training/optimizer.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/pipelining/training/scheduler.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/tracker/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/tracker/base.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/tracker/factory.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/tracker/provider/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/tracker/provider/aim/__init__.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/tracker/provider/aim/config.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/tracker/provider/aim/tracker.py +0 -0
- {d9d-0.1.0 → d9d-0.1.1}/d9d/tracker/provider/null.py +0 -0
{d9d-0.1.0 → d9d-0.1.1}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: d9d
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.1
|
|
4
4
|
Summary: d9d - d[istribute]d - distributed training framework based on PyTorch that tries to be efficient yet hackable
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Author: Maksim Afanasyev
|
|
@@ -38,12 +38,32 @@ Project-URL: Issues, https://github.com/d9d-project/d9d/issues
|
|
|
38
38
|
Project-URL: Repository, https://github.com/d9d-project/d9d
|
|
39
39
|
Description-Content-Type: text/markdown
|
|
40
40
|
|
|
41
|
+
---
|
|
42
|
+
title: Home
|
|
43
|
+
---
|
|
44
|
+
|
|
41
45
|
# The d9d Project
|
|
42
46
|
|
|
43
47
|
**d9d** is a distributed training framework built on top of PyTorch 2.0. It aims to be hackable, modular, and efficient, designed to scale from single-GPU debugging to massive clusters running 6D-Parallelism.
|
|
44
48
|
|
|
45
49
|
[LET'S START TRAINING 🚀](https://d9d-project.github.io/d9d/)
|
|
46
50
|
|
|
51
|
+
## Installation
|
|
52
|
+
|
|
53
|
+
Just use your favourite package manager:
|
|
54
|
+
```bash
|
|
55
|
+
pip install d9d
|
|
56
|
+
poetry add d9d
|
|
57
|
+
uv add d9d
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### Extras
|
|
61
|
+
|
|
62
|
+
* `d9d[aim]`: [Aim](https://aimstack.io/) experiment tracker integration.
|
|
63
|
+
* `d9d[visualization]`: Plotting libraries required to some advanced visualization functionality.
|
|
64
|
+
* `d9d[moe]`: Efficient Mixture of Experts GPU kernels. You should build and install some dependencies manually before installation: [DeepEP](https://github.com/deepseek-ai/DeepEP), [grouped-gemm](https://github.com/fanshiqing/grouped_gemm/).
|
|
65
|
+
* `d9d[cce]`: Efficient Fused Cross-Entropy kernels. You should build and install some dependencies manually before installation: [Cut Cross Entropy](https://github.com/apple/ml-cross-entropy).
|
|
66
|
+
|
|
47
67
|
## Why another framework?
|
|
48
68
|
|
|
49
69
|
Distributed training frameworks such as **Megatron-LM** are monolithic in the way you run a script from the command line to train any of a set of *predefined* models, using *predefined* regimes. While powerful, these systems can be difficult to hack and integrate into novel research workflows. Their focus is often on providing a complete, end-to-end solution, which can limit flexibility for experimentally-driven research.
|
|
@@ -1,9 +1,29 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: Home
|
|
3
|
+
---
|
|
4
|
+
|
|
1
5
|
# The d9d Project
|
|
2
6
|
|
|
3
7
|
**d9d** is a distributed training framework built on top of PyTorch 2.0. It aims to be hackable, modular, and efficient, designed to scale from single-GPU debugging to massive clusters running 6D-Parallelism.
|
|
4
8
|
|
|
5
9
|
[LET'S START TRAINING 🚀](https://d9d-project.github.io/d9d/)
|
|
6
10
|
|
|
11
|
+
## Installation
|
|
12
|
+
|
|
13
|
+
Just use your favourite package manager:
|
|
14
|
+
```bash
|
|
15
|
+
pip install d9d
|
|
16
|
+
poetry add d9d
|
|
17
|
+
uv add d9d
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
### Extras
|
|
21
|
+
|
|
22
|
+
* `d9d[aim]`: [Aim](https://aimstack.io/) experiment tracker integration.
|
|
23
|
+
* `d9d[visualization]`: Plotting libraries required to some advanced visualization functionality.
|
|
24
|
+
* `d9d[moe]`: Efficient Mixture of Experts GPU kernels. You should build and install some dependencies manually before installation: [DeepEP](https://github.com/deepseek-ai/DeepEP), [grouped-gemm](https://github.com/fanshiqing/grouped_gemm/).
|
|
25
|
+
* `d9d[cce]`: Efficient Fused Cross-Entropy kernels. You should build and install some dependencies manually before installation: [Cut Cross Entropy](https://github.com/apple/ml-cross-entropy).
|
|
26
|
+
|
|
7
27
|
## Why another framework?
|
|
8
28
|
|
|
9
29
|
Distributed training frameworks such as **Megatron-LM** are monolithic in the way you run a script from the command line to train any of a set of *predefined* models, using *predefined* regimes. While powerful, these systems can be difficult to hack and integrate into novel research workflows. Their focus is often on providing a complete, end-to-end solution, which can limit flexibility for experimentally-driven research.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "d9d"
|
|
3
|
-
version = "0.1.
|
|
3
|
+
version = "0.1.1"
|
|
4
4
|
description = "d9d - d[istribute]d - distributed training framework based on PyTorch that tries to be efficient yet hackable"
|
|
5
5
|
authors = [
|
|
6
6
|
{name = "Maksim Afanasyev", email = "mr.applexz@gmail.com"}
|
|
@@ -66,9 +66,9 @@ cce = ["cut-cross-entropy"]
|
|
|
66
66
|
ruff = "^0.14.13"
|
|
67
67
|
mypy = "^1.19.1"
|
|
68
68
|
pre-commit = "^4.5.1"
|
|
69
|
+
python-semantic-release = "^10.5.3"
|
|
69
70
|
|
|
70
71
|
|
|
71
|
-
# Group containing dependencies for running tests
|
|
72
72
|
[tool.poetry.group.test.dependencies]
|
|
73
73
|
pytest = "^9.0.2"
|
|
74
74
|
coverage = "^7.13.1"
|
|
@@ -78,7 +78,6 @@ matplotlib = "^3.10.8" # for triton benchmarks
|
|
|
78
78
|
transformers = "^4.57.6" # for comparing models
|
|
79
79
|
|
|
80
80
|
|
|
81
|
-
# Group containing dependencies for building documentation website
|
|
82
81
|
[tool.poetry.group.docs.dependencies]
|
|
83
82
|
mkdocs = "^1.6.1"
|
|
84
83
|
mkdocstrings = {extras = ["python"], version = "^1.0.1"}
|
|
@@ -94,7 +93,6 @@ deep-ep = {path = "packages/deep_ep-1.2.1+9af0e0d-cp311-cp311-linux_x86_64.whl"}
|
|
|
94
93
|
nv-grouped-gemm = {path = "packages/nv_grouped_gemm-1.1.4.post8-cp311-cp311-linux_x86_64.whl"}
|
|
95
94
|
|
|
96
95
|
|
|
97
|
-
# Group containing dependencies for running example scripts
|
|
98
96
|
[tool.poetry.group.examples.dependencies]
|
|
99
97
|
datasets = "^4.5.0"
|
|
100
98
|
|
|
@@ -265,3 +263,30 @@ module = [
|
|
|
265
263
|
"d9d.module.block.moe.communications.deepep.*"
|
|
266
264
|
]
|
|
267
265
|
ignore_errors = true
|
|
266
|
+
|
|
267
|
+
[tool.semantic_release]
|
|
268
|
+
allow_zero_version = true
|
|
269
|
+
major_on_zero = true
|
|
270
|
+
version_toml = ["pyproject.toml:project.version"]
|
|
271
|
+
build_command = "pip install poetry && poetry build"
|
|
272
|
+
commit_parser = "conventional"
|
|
273
|
+
|
|
274
|
+
[tool.semantic_release.commit_parser_options]
|
|
275
|
+
minor_tags = ["feat"]
|
|
276
|
+
patch_tags = ["fix", "perf"]
|
|
277
|
+
parse_squash_commits = true
|
|
278
|
+
ignore_merge_commits = true
|
|
279
|
+
|
|
280
|
+
[tool.semantic_release.changelog]
|
|
281
|
+
exclude_commit_patterns = [
|
|
282
|
+
'''chore(?:\([^)]*?\))?: .+''',
|
|
283
|
+
'''ci(?:\([^)]*?\))?: .+''',
|
|
284
|
+
'''refactor(?:\([^)]*?\))?: .+''',
|
|
285
|
+
'''style(?:\([^)]*?\))?: .+''',
|
|
286
|
+
'''test(?:\([^)]*?\))?: .+''',
|
|
287
|
+
'''build\((?!deps\): .+)''',
|
|
288
|
+
'''Initial [Cc]ommit.*''',
|
|
289
|
+
]
|
|
290
|
+
|
|
291
|
+
[tool.semantic_release.remote]
|
|
292
|
+
type = "github"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|