d9d 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- d9d/__init__.py +0 -0
- d9d/core/__init__.py +0 -0
- d9d/core/autograd/__init__.py +7 -0
- d9d/core/autograd/grad_context.py +85 -0
- d9d/core/dist_context/__init__.py +19 -0
- d9d/core/dist_context/configured.py +215 -0
- d9d/core/dist_context/device_mesh_domains.py +185 -0
- d9d/core/dist_context/log.py +30 -0
- d9d/core/dist_context/params.py +113 -0
- d9d/core/dist_ops/__init__.py +16 -0
- d9d/core/dist_ops/object.py +68 -0
- d9d/core/dist_ops/tensor.py +192 -0
- d9d/core/protocol/__init__.py +8 -0
- d9d/core/protocol/training.py +38 -0
- d9d/core/sharding/__init__.py +15 -0
- d9d/core/sharding/auto_spec.py +66 -0
- d9d/core/sharding/shard.py +154 -0
- d9d/core/sharding/spec.py +28 -0
- d9d/core/sharding/unshard.py +117 -0
- d9d/core/types/__init__.py +12 -0
- d9d/core/types/data.py +14 -0
- d9d/core/types/pytree.py +26 -0
- d9d/dataset/__init__.py +17 -0
- d9d/dataset/buffer_sorted.py +143 -0
- d9d/dataset/padding.py +79 -0
- d9d/dataset/sharded.py +195 -0
- d9d/internals/__init__.py +0 -0
- d9d/internals/determinism/__init__.py +10 -0
- d9d/internals/determinism/seed.py +63 -0
- d9d/internals/grad_norm/__init__.py +8 -0
- d9d/internals/grad_norm/group.py +87 -0
- d9d/internals/grad_norm/norm.py +169 -0
- d9d/internals/grad_sync/__init__.py +14 -0
- d9d/internals/grad_sync/bucket.py +317 -0
- d9d/internals/grad_sync/placement_helper.py +23 -0
- d9d/internals/grad_sync/synchronizer.py +257 -0
- d9d/internals/pipeline_state/__init__.py +14 -0
- d9d/internals/pipeline_state/api.py +45 -0
- d9d/internals/pipeline_state/handler.py +111 -0
- d9d/internals/pipeline_state/storage.py +236 -0
- d9d/internals/profiling/__init__.py +7 -0
- d9d/internals/profiling/profile.py +112 -0
- d9d/internals/state/__init__.py +6 -0
- d9d/internals/state/main_process.py +44 -0
- d9d/kernel/__init__.py +0 -0
- d9d/kernel/cce/__init__.py +5 -0
- d9d/kernel/cce/cce.py +298 -0
- d9d/kernel/cce/main.py +282 -0
- d9d/kernel/general/__init__.py +5 -0
- d9d/kernel/general/get_int_dtype.py +7 -0
- d9d/kernel/gmm/__init__.py +5 -0
- d9d/kernel/gmm/function.py +78 -0
- d9d/kernel/moe/__init__.py +8 -0
- d9d/kernel/moe/indices_to_multihot.py +268 -0
- d9d/kernel/moe/permute_with_probs.py +1035 -0
- d9d/kernel/stochastic/__init__.py +11 -0
- d9d/kernel/stochastic/adamw_step.py +204 -0
- d9d/kernel/stochastic/copy.py +104 -0
- d9d/kernel/stochastic/ops/__init__.py +5 -0
- d9d/kernel/stochastic/ops/round.py +22 -0
- d9d/kernel/swiglu/__init__.py +5 -0
- d9d/kernel/swiglu/function.py +36 -0
- d9d/kernel/swiglu/op.py +167 -0
- d9d/loop/__init__.py +0 -0
- d9d/loop/auto/__init__.py +9 -0
- d9d/loop/auto/auto_lr_scheduler.py +46 -0
- d9d/loop/auto/auto_optimizer.py +196 -0
- d9d/loop/component/__init__.py +35 -0
- d9d/loop/component/batch_maths.py +106 -0
- d9d/loop/component/checkpointer.py +172 -0
- d9d/loop/component/data_loader_factory.py +258 -0
- d9d/loop/component/garbage_collector.py +94 -0
- d9d/loop/component/gradient_clipper.py +89 -0
- d9d/loop/component/gradient_manager.py +149 -0
- d9d/loop/component/job_logger.py +146 -0
- d9d/loop/component/job_profiler.py +62 -0
- d9d/loop/component/loss_computer.py +86 -0
- d9d/loop/component/model_stage_exporter.py +37 -0
- d9d/loop/component/model_stage_factory.py +261 -0
- d9d/loop/component/optimizer_factory.py +88 -0
- d9d/loop/component/stepper.py +52 -0
- d9d/loop/component/timeout_manager.py +54 -0
- d9d/loop/component/train_task_operator.py +152 -0
- d9d/loop/config/__init__.py +36 -0
- d9d/loop/config/config.py +225 -0
- d9d/loop/config/types.py +24 -0
- d9d/loop/control/__init__.py +61 -0
- d9d/loop/control/dataset_provider.py +58 -0
- d9d/loop/control/lr_scheduler_provider.py +47 -0
- d9d/loop/control/model_provider.py +162 -0
- d9d/loop/control/optimizer_provider.py +45 -0
- d9d/loop/control/task.py +304 -0
- d9d/loop/run/__init__.py +6 -0
- d9d/loop/run/train.py +355 -0
- d9d/loop/state.py +143 -0
- d9d/lr_scheduler/__init__.py +9 -0
- d9d/lr_scheduler/piecewise/__init__.py +18 -0
- d9d/lr_scheduler/piecewise/builder.py +152 -0
- d9d/lr_scheduler/piecewise/config.py +176 -0
- d9d/lr_scheduler/piecewise/curves.py +75 -0
- d9d/lr_scheduler/piecewise/engine.py +76 -0
- d9d/lr_scheduler/visualizer.py +74 -0
- d9d/metric/__init__.py +10 -0
- d9d/metric/abc.py +79 -0
- d9d/metric/impl/__init__.py +7 -0
- d9d/metric/impl/compose.py +54 -0
- d9d/metric/impl/mean.py +94 -0
- d9d/model_state/__init__.py +0 -0
- d9d/model_state/io/__init__.py +21 -0
- d9d/model_state/io/dto.py +30 -0
- d9d/model_state/io/module_reader.py +75 -0
- d9d/model_state/io/module_writer.py +123 -0
- d9d/model_state/io/reader.py +125 -0
- d9d/model_state/io/writer.py +309 -0
- d9d/model_state/mapper/__init__.py +10 -0
- d9d/model_state/mapper/abc.py +70 -0
- d9d/model_state/mapper/adapters/__init__.py +12 -0
- d9d/model_state/mapper/adapters/mapper.py +27 -0
- d9d/model_state/mapper/adapters/module.py +22 -0
- d9d/model_state/mapper/compose/__init__.py +17 -0
- d9d/model_state/mapper/compose/helper.py +22 -0
- d9d/model_state/mapper/compose/parallel.py +58 -0
- d9d/model_state/mapper/compose/sequential.py +131 -0
- d9d/model_state/mapper/compose/shard.py +36 -0
- d9d/model_state/mapper/leaf/__init__.py +18 -0
- d9d/model_state/mapper/leaf/dtensor.py +56 -0
- d9d/model_state/mapper/leaf/identity.py +23 -0
- d9d/model_state/mapper/leaf/rename.py +26 -0
- d9d/model_state/mapper/leaf/select_child.py +37 -0
- d9d/model_state/mapper/leaf/stack.py +29 -0
- d9d/module/__init__.py +0 -0
- d9d/module/base/__init__.py +7 -0
- d9d/module/base/late_init.py +10 -0
- d9d/module/block/__init__.py +0 -0
- d9d/module/block/attention/__init__.py +7 -0
- d9d/module/block/attention/grouped_query.py +139 -0
- d9d/module/block/attention/sdpa/__init__.py +5 -0
- d9d/module/block/attention/sdpa/flash.py +52 -0
- d9d/module/block/embedding/__init__.py +7 -0
- d9d/module/block/embedding/shard_token_embedding.py +103 -0
- d9d/module/block/ffn/__init__.py +5 -0
- d9d/module/block/ffn/swiglu.py +60 -0
- d9d/module/block/head/__init__.py +6 -0
- d9d/module/block/head/language_modelling.py +87 -0
- d9d/module/block/hidden_states_aggregator/__init__.py +12 -0
- d9d/module/block/hidden_states_aggregator/base.py +35 -0
- d9d/module/block/hidden_states_aggregator/factory.py +48 -0
- d9d/module/block/hidden_states_aggregator/mean.py +61 -0
- d9d/module/block/hidden_states_aggregator/noop.py +27 -0
- d9d/module/block/moe/__init__.py +13 -0
- d9d/module/block/moe/communications/__init__.py +11 -0
- d9d/module/block/moe/communications/base.py +58 -0
- d9d/module/block/moe/communications/deepep.py +300 -0
- d9d/module/block/moe/communications/naive.py +68 -0
- d9d/module/block/moe/grouped_experts.py +81 -0
- d9d/module/block/moe/grouped_linear.py +78 -0
- d9d/module/block/moe/layer.py +122 -0
- d9d/module/block/moe/router.py +103 -0
- d9d/module/block/positional/__init__.py +8 -0
- d9d/module/block/positional/rope.py +150 -0
- d9d/module/model/__init__.py +0 -0
- d9d/module/model/qwen3_moe/__init__.py +16 -0
- d9d/module/model/qwen3_moe/decoder_layer.py +110 -0
- d9d/module/model/qwen3_moe/model.py +373 -0
- d9d/module/model/qwen3_moe/params.py +69 -0
- d9d/module/parallelism/__init__.py +0 -0
- d9d/module/parallelism/api/__init__.py +18 -0
- d9d/module/parallelism/api/expert_parallel.py +36 -0
- d9d/module/parallelism/api/fully_sharded.py +43 -0
- d9d/module/parallelism/api/hybrid_sharded.py +49 -0
- d9d/module/parallelism/api/replicate_parallel.py +33 -0
- d9d/module/parallelism/model/__init__.py +0 -0
- d9d/module/parallelism/model/qwen3_moe.py +99 -0
- d9d/module/parallelism/style/__init__.py +7 -0
- d9d/module/parallelism/style/shard_experts.py +60 -0
- d9d/module/parallelism/style/to_local.py +86 -0
- d9d/optim/__init__.py +0 -0
- d9d/optim/stochastic/__init__.py +5 -0
- d9d/optim/stochastic/adamw.py +158 -0
- d9d/peft/__init__.py +13 -0
- d9d/peft/all/__init__.py +12 -0
- d9d/peft/all/config.py +31 -0
- d9d/peft/all/method.py +76 -0
- d9d/peft/applicator.py +47 -0
- d9d/peft/base.py +70 -0
- d9d/peft/full_tune/__init__.py +11 -0
- d9d/peft/full_tune/config.py +20 -0
- d9d/peft/full_tune/method.py +46 -0
- d9d/peft/lora/__init__.py +15 -0
- d9d/peft/lora/config.py +35 -0
- d9d/peft/lora/layer.py +177 -0
- d9d/peft/lora/method.py +132 -0
- d9d/pipelining/__init__.py +0 -0
- d9d/pipelining/api/__init__.py +19 -0
- d9d/pipelining/api/module.py +149 -0
- d9d/pipelining/api/schedule.py +50 -0
- d9d/pipelining/api/sharding.py +9 -0
- d9d/pipelining/factory/__init__.py +21 -0
- d9d/pipelining/factory/config.py +89 -0
- d9d/pipelining/factory/factory.py +114 -0
- d9d/pipelining/factory/registry.py +82 -0
- d9d/pipelining/infra/__init__.py +0 -0
- d9d/pipelining/infra/schedule/__init__.py +0 -0
- d9d/pipelining/infra/schedule/component/__init__.py +0 -0
- d9d/pipelining/infra/schedule/component/program/__init__.py +22 -0
- d9d/pipelining/infra/schedule/component/program/base.py +35 -0
- d9d/pipelining/infra/schedule/component/program/communications.py +203 -0
- d9d/pipelining/infra/schedule/component/program/topology.py +78 -0
- d9d/pipelining/infra/schedule/component/runtime/__init__.py +29 -0
- d9d/pipelining/infra/schedule/component/runtime/action.py +361 -0
- d9d/pipelining/infra/schedule/component/runtime/communications.py +101 -0
- d9d/pipelining/infra/schedule/component/runtime/executor.py +113 -0
- d9d/pipelining/infra/schedule/component/runtime/loss.py +55 -0
- d9d/pipelining/infra/schedule/program/__init__.py +15 -0
- d9d/pipelining/infra/schedule/program/bfs.py +86 -0
- d9d/pipelining/infra/schedule/program/dualpipev.py +234 -0
- d9d/pipelining/infra/schedule/program/interleaved.py +240 -0
- d9d/pipelining/infra/schedule/program/zerobubblev.py +227 -0
- d9d/pipelining/infra/stage/__init__.py +5 -0
- d9d/pipelining/infra/stage/communications.py +274 -0
- d9d/pipelining/infra/stage/computations.py +317 -0
- d9d/pipelining/infra/stage/splitgrad.py +377 -0
- d9d/pipelining/infra/stage/stage.py +321 -0
- d9d/pipelining/infra/stage/struct_helper.py +46 -0
- d9d/pipelining/training/__init__.py +7 -0
- d9d/pipelining/training/optimizer.py +41 -0
- d9d/pipelining/training/scheduler.py +34 -0
- d9d/tracker/__init__.py +14 -0
- d9d/tracker/base.py +124 -0
- d9d/tracker/factory.py +57 -0
- d9d/tracker/provider/__init__.py +0 -0
- d9d/tracker/provider/aim/__init__.py +0 -0
- d9d/tracker/provider/aim/config.py +23 -0
- d9d/tracker/provider/aim/tracker.py +114 -0
- d9d/tracker/provider/null.py +61 -0
- d9d-0.1.0.dist-info/METADATA +90 -0
- d9d-0.1.0.dist-info/RECORD +238 -0
- d9d-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
from collections.abc import Generator
|
|
2
|
+
from contextlib import contextmanager
|
|
3
|
+
from typing import Any, Self, TypedDict, cast
|
|
4
|
+
|
|
5
|
+
import torch
|
|
6
|
+
from aim import Distribution, Run
|
|
7
|
+
|
|
8
|
+
from d9d.tracker import BaseTracker, BaseTrackerRun, RunConfig
|
|
9
|
+
|
|
10
|
+
from .config import AimConfig
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class AimState(TypedDict):
|
|
14
|
+
"""
|
|
15
|
+
State dictionary format for persisting Aim tracker state.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
restart_hash: str | None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class AimRun(BaseTrackerRun):
|
|
22
|
+
"""
|
|
23
|
+
Active run implementation for Aim.
|
|
24
|
+
|
|
25
|
+
Wraps the underlying `aim.Run` object to adhere to the d9d BaseTrackerRun interface.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(self, run: Run):
|
|
29
|
+
self._run = run
|
|
30
|
+
self._step = 0
|
|
31
|
+
self._context: dict[str, str] = {}
|
|
32
|
+
|
|
33
|
+
def set_step(self, step: int):
|
|
34
|
+
self._step = step
|
|
35
|
+
|
|
36
|
+
def set_context(self, context: dict[str, str]):
|
|
37
|
+
self._context = context
|
|
38
|
+
|
|
39
|
+
def scalar(self, name: str, value: float, context: dict[str, str] | None = None):
|
|
40
|
+
if context is None:
|
|
41
|
+
track_context = self._context
|
|
42
|
+
else:
|
|
43
|
+
track_context = {**self._context, **context}
|
|
44
|
+
|
|
45
|
+
self._run.track(
|
|
46
|
+
name=name,
|
|
47
|
+
value=value,
|
|
48
|
+
context=track_context,
|
|
49
|
+
step=self._step
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
def bins(self, name: str, values: torch.Tensor, context: dict[str, str] | None = None):
|
|
53
|
+
if context is None:
|
|
54
|
+
track_context = self._context
|
|
55
|
+
else:
|
|
56
|
+
track_context = {**self._context, **context}
|
|
57
|
+
|
|
58
|
+
self._run.track(
|
|
59
|
+
name=name,
|
|
60
|
+
value=Distribution(
|
|
61
|
+
hist=values.numpy(),
|
|
62
|
+
bin_range=(0, values.shape[0])
|
|
63
|
+
),
|
|
64
|
+
context=track_context,
|
|
65
|
+
step=self._step
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class AimTracker(BaseTracker[AimConfig]):
|
|
70
|
+
"""
|
|
71
|
+
Aim-based tracker implementation.
|
|
72
|
+
|
|
73
|
+
Caches the run hash to allow experiment resumption from checkpoints.
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
def __init__(self, config: AimConfig):
|
|
77
|
+
self._config = config
|
|
78
|
+
|
|
79
|
+
self._restart_hash: str | None = None
|
|
80
|
+
self._run: Run | None = None
|
|
81
|
+
|
|
82
|
+
def load_state_dict(self, state_dict: dict[str, Any]) -> None:
|
|
83
|
+
state = cast(AimState, state_dict)
|
|
84
|
+
self._restart_hash = state["restart_hash"]
|
|
85
|
+
|
|
86
|
+
def state_dict(self) -> dict[str, Any]:
|
|
87
|
+
return {
|
|
88
|
+
"restart_hash": self._restart_hash
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
@contextmanager
|
|
92
|
+
def open(self, properties: RunConfig) -> Generator[BaseTrackerRun, None, None]:
|
|
93
|
+
run = Run(
|
|
94
|
+
run_hash=self._restart_hash,
|
|
95
|
+
repo=self._config.repo,
|
|
96
|
+
log_system_params=self._config.log_system_params,
|
|
97
|
+
capture_terminal_logs=self._config.capture_terminal_logs,
|
|
98
|
+
system_tracking_interval=self._config.system_tracking_interval
|
|
99
|
+
)
|
|
100
|
+
run.name = properties.name
|
|
101
|
+
run.description = properties.description
|
|
102
|
+
run["hparams"] = properties.hparams
|
|
103
|
+
|
|
104
|
+
self._restart_hash = run.hash
|
|
105
|
+
self._run = run
|
|
106
|
+
|
|
107
|
+
yield AimRun(run)
|
|
108
|
+
|
|
109
|
+
self._run.close()
|
|
110
|
+
self._run = None
|
|
111
|
+
|
|
112
|
+
@classmethod
|
|
113
|
+
def from_config(cls, config: AimConfig) -> Self:
|
|
114
|
+
return cls(config)
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
from collections.abc import Generator
|
|
2
|
+
from contextlib import contextmanager
|
|
3
|
+
from typing import Any, Literal, Self
|
|
4
|
+
|
|
5
|
+
import torch
|
|
6
|
+
from pydantic import BaseModel
|
|
7
|
+
|
|
8
|
+
from d9d.tracker import BaseTracker, BaseTrackerRun, RunConfig
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class NullTrackerConfig(BaseModel):
|
|
12
|
+
"""
|
|
13
|
+
Configuration for the Null (no-op) tracker.
|
|
14
|
+
|
|
15
|
+
Attributes:
|
|
16
|
+
provider: Discriminator field, must be 'null'.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
provider: Literal["null"] = "null"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class NullRun(BaseTrackerRun):
|
|
23
|
+
"""
|
|
24
|
+
No-op implementation of a tracking run.
|
|
25
|
+
|
|
26
|
+
Discard all inputs; useful for testing or when tracking is disabled.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def set_step(self, step: int):
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
def set_context(self, context: dict[str, str]):
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
def scalar(self, name: str, value: float, context: dict[str, str] | None = None):
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
def bins(self, name: str, values: torch.Tensor, context: dict[str, str] | None = None):
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class NullTracker(BaseTracker[NullTrackerConfig]):
|
|
43
|
+
"""
|
|
44
|
+
No-op tracker factory.
|
|
45
|
+
|
|
46
|
+
Does not modify state or perform any IO.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
@contextmanager
|
|
50
|
+
def open(self, properties: RunConfig) -> Generator[BaseTrackerRun, None, None]:
|
|
51
|
+
yield NullRun()
|
|
52
|
+
|
|
53
|
+
@classmethod
|
|
54
|
+
def from_config(cls, config: NullTrackerConfig) -> Self:
|
|
55
|
+
return cls()
|
|
56
|
+
|
|
57
|
+
def state_dict(self) -> dict[str, Any]:
|
|
58
|
+
return {}
|
|
59
|
+
|
|
60
|
+
def load_state_dict(self, state_dict: dict[str, Any]) -> None:
|
|
61
|
+
pass
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: d9d
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: d9d - d[istribute]d - distributed training framework based on PyTorch that tries to be efficient yet hackable
|
|
5
|
+
License: Apache-2.0
|
|
6
|
+
Author: Maksim Afanasyev
|
|
7
|
+
Author-email: mr.applexz@gmail.com
|
|
8
|
+
Requires-Python: >=3.11,<3.15
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Intended Audience :: Education
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: Topic :: Scientific/Engineering
|
|
14
|
+
Classifier: Topic :: Scientific/Engineering :: Mathematics
|
|
15
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
16
|
+
Classifier: Topic :: Software Development
|
|
17
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
18
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
19
|
+
Provides-Extra: aim
|
|
20
|
+
Provides-Extra: cce
|
|
21
|
+
Provides-Extra: moe
|
|
22
|
+
Provides-Extra: visualization
|
|
23
|
+
Requires-Dist: aim (>=3.0.0,<4.0.0) ; extra == "aim"
|
|
24
|
+
Requires-Dist: cut-cross-entropy (>=25.9.3) ; extra == "cce"
|
|
25
|
+
Requires-Dist: deep-ep (>=1.2.1) ; extra == "moe"
|
|
26
|
+
Requires-Dist: nv-grouped-gemm (>=1.1.4) ; extra == "moe"
|
|
27
|
+
Requires-Dist: plotly (>=6.0.0) ; extra == "visualization"
|
|
28
|
+
Requires-Dist: pydantic (>=2.0.0)
|
|
29
|
+
Requires-Dist: safetensors (>=0.7.0)
|
|
30
|
+
Requires-Dist: setuptools (>=70.0.0) ; extra == "aim"
|
|
31
|
+
Requires-Dist: torch (>=2.10.0)
|
|
32
|
+
Requires-Dist: torchdata (>=0.11.0)
|
|
33
|
+
Requires-Dist: tqdm (>=4.0.0)
|
|
34
|
+
Requires-Dist: triton (>=3.6.0)
|
|
35
|
+
Project-URL: Documentation, https://d9d-project.github.io/d9d
|
|
36
|
+
Project-URL: Homepage, https://d9d-project.github.io/d9d
|
|
37
|
+
Project-URL: Issues, https://github.com/d9d-project/d9d/issues
|
|
38
|
+
Project-URL: Repository, https://github.com/d9d-project/d9d
|
|
39
|
+
Description-Content-Type: text/markdown
|
|
40
|
+
|
|
41
|
+
# The d9d Project
|
|
42
|
+
|
|
43
|
+
**d9d** is a distributed training framework built on top of PyTorch 2.0. It aims to be hackable, modular, and efficient, designed to scale from single-GPU debugging to massive clusters running 6D-Parallelism.
|
|
44
|
+
|
|
45
|
+
[LET'S START TRAINING 🚀](https://d9d-project.github.io/d9d/)
|
|
46
|
+
|
|
47
|
+
## Why another framework?
|
|
48
|
+
|
|
49
|
+
Distributed training frameworks such as **Megatron-LM** are monolithic in the way you run a script from the command line to train any of a set of *predefined* models, using *predefined* regimes. While powerful, these systems can be difficult to hack and integrate into novel research workflows. Their focus is often on providing a complete, end-to-end solution, which can limit flexibility for experimentally-driven research.
|
|
50
|
+
|
|
51
|
+
Conversely, creating your own distributed training solution from scratch is tricky. You have to implement many low-level components (like distributed checkpoints and synchronization) that are identical across setups, and manually tackle common performance bottlenecks.
|
|
52
|
+
|
|
53
|
+
**d9d** was designed to fill the gap between monolithic frameworks and homebrew setups, providing a modular yet effective solution for distributed training.
|
|
54
|
+
|
|
55
|
+
## What d9d is and isn't
|
|
56
|
+
|
|
57
|
+
In terms of **core concept**:
|
|
58
|
+
|
|
59
|
+
* **IS** a pluggable framework for implementing distributed training regimes for your deep learning models.
|
|
60
|
+
* **IS** built on clear interfaces and building blocks that may be composed and implemented in your own way.
|
|
61
|
+
* **IS NOT** an all-in-one CLI platform for setting up pre-training and post-training like **torchtitan**, **Megatron-LM**, or **torchforge**.
|
|
62
|
+
|
|
63
|
+
In terms of **codebase & engineering**:
|
|
64
|
+
|
|
65
|
+
* **IS** built on a **strong engineering foundation**: We enforce strict type-checking and rigorous linting to catch errors before execution.
|
|
66
|
+
* **IS** reliable: The framework is backed by a suite of **over 450 tests**, covering unit logic, integration flows, and End-to-End distributed scenarios.
|
|
67
|
+
* **IS** eager to use performance hacks (like **DeepEp** or custom kernels) if they improve MFU, even if they aren't PyTorch-native.
|
|
68
|
+
* **IS NOT** for legacy setups: We do not maintain backward compatibility with older PyTorch versions or hardware. We prioritize simplicity and modern APIs (like `DTensor`).
|
|
69
|
+
|
|
70
|
+
## Key Philosophies
|
|
71
|
+
|
|
72
|
+
To achieve the balance between hackability and performance, d9d adheres to specific design principles:
|
|
73
|
+
|
|
74
|
+
* **Composition over Monoliths**: We avoid "God Classes" like `DistributedDataParallel` or `ParallelDims` that assume ownership of the entire execution loop. Instead, we provide composable and extendable APIs. For instance, specific horizontal parallelism strategies for specific layers (`parallelize_replicate`, `parallelize_expert_parallel`, ...).
|
|
75
|
+
* **White-Box Modelling**: We encourage standard PyTorch code. Models are not wrapped in obscure metadata specifications; they are standard `nn.Module`s that implement lightweight protocols.
|
|
76
|
+
* **Pragmatic Efficiency**: While we prefer native PyTorch, we are eager to integrate non-native solutions if they improve MFU. For example, we implement MoE using **DeepEp** communications, reindexing kernels from **Megatron-LM**, and efficient grouped-GEMM implementations.
|
|
77
|
+
* **Graph-Based State Management**: Our IO system treats model checkpoints as directed acyclic graphs. This allows you to transform architectures (e.g., merging `q`, `k`, `v` into `qkv`) on-the-fly while streaming from disk, without massive memory overhead.
|
|
78
|
+
* **DTensors**: We mandate that distributed parameters be represented as `torch.distributed.tensor.DTensor`. This simplifies checkpointing by making them topology-aware automatically. We leverage modern PyTorch 2.0 APIs (`DeviceMesh`) as much as possible.
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
## Examples
|
|
83
|
+
|
|
84
|
+
### Qwen3-MoE Pretraining
|
|
85
|
+
An example showing causal LM pretraing for the Qwen3-MoE model.
|
|
86
|
+
|
|
87
|
+
WIP: MoE load balancing is currently work in progress.
|
|
88
|
+
|
|
89
|
+
[Link](https://github.com/d9d-project/d9d/blob/main/example/qwen3_moe/pretrain.py).
|
|
90
|
+
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
d9d/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
d9d/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
+
d9d/core/autograd/__init__.py,sha256=uQrNtaXUMxXYyt05j4vRtiI5_pOUg0l4TXDXG3BeXyA,167
|
|
4
|
+
d9d/core/autograd/grad_context.py,sha256=nWN3BvM-GEFO4CZ8pEH5pjdYkrymOkQaNQXQF7laRPc,2891
|
|
5
|
+
d9d/core/dist_context/__init__.py,sha256=HqGt6_gplSNYnohGZl2GEmDO7fKloJVNiKBC9GXIQdM,498
|
|
6
|
+
d9d/core/dist_context/configured.py,sha256=wqVRzpOibMSYKULNLU4HynCoLWhRU2z3nNhTNZ3Y3BI,7127
|
|
7
|
+
d9d/core/dist_context/device_mesh_domains.py,sha256=pI7gW2e-tQgx0TcZOAT4aFBYiiRph6VgF03zJL6WlyE,4854
|
|
8
|
+
d9d/core/dist_context/log.py,sha256=mG-3gkZBleBTUJiOlUGDfXaSFRf5q5fGK97HybWe_E8,900
|
|
9
|
+
d9d/core/dist_context/params.py,sha256=SOgu5T5Q1PNSdM_06U-wuRHwSMxIT8_6ua3t8dTmkeg,3645
|
|
10
|
+
d9d/core/dist_ops/__init__.py,sha256=iwatM6f60NWU8oAYoHcFWvBc7HwTfsQpaaRFg9GwIDc,397
|
|
11
|
+
d9d/core/dist_ops/object.py,sha256=cKAL2M5aUeCYggYqeari-FlsMEYvZvDszhXdLTHTDlo,2006
|
|
12
|
+
d9d/core/dist_ops/tensor.py,sha256=6zqHakdQPDXUbjOQ0tu2rLvM5fsf2RP8503tQzhfciU,6165
|
|
13
|
+
d9d/core/protocol/__init__.py,sha256=amTi465uKy2kEEQfYxrfIQ3METESZluwDoBBBi2BjYE,203
|
|
14
|
+
d9d/core/protocol/training.py,sha256=i3DHGy9P9G9X8UPg45zLX-_eOrEI5Y3DjQ4A2Wix5Ec,961
|
|
15
|
+
d9d/core/sharding/__init__.py,sha256=DYkdFk8LVIssTZr-DdnlXUmz4Xd3OpsoqFLzqsFdsRg,385
|
|
16
|
+
d9d/core/sharding/auto_spec.py,sha256=M8fIm_8zqtGsAsPk8UpPZaUHdhLipjHDvFj6L7zTlLA,2283
|
|
17
|
+
d9d/core/sharding/shard.py,sha256=UhcaCgmJNwb8vhRvKi3mwJ-22zwtB4E1o0qPNn69IgE,5243
|
|
18
|
+
d9d/core/sharding/spec.py,sha256=4sA70POEdWzekBzOP0DwdUbfJa6HBr0VrLVqOTpjUWM,668
|
|
19
|
+
d9d/core/sharding/unshard.py,sha256=T1OQRZ8BwCSTf0Dw4_-N79IDDD6Fwii_-idjpurg7hY,3696
|
|
20
|
+
d9d/core/types/__init__.py,sha256=f2fX48bowRCELUEMLICLwLwi-sl_B5f9VQLfpVJCdIY,223
|
|
21
|
+
d9d/core/types/data.py,sha256=IyZtwLSlofHPzc_F-BrCQVgpBISh6wbInuCMVW92IXk,473
|
|
22
|
+
d9d/core/types/pytree.py,sha256=L93BReqv7fiegPVRzZQd63FYv986rJdMbk-bYFv2bYA,842
|
|
23
|
+
d9d/dataset/__init__.py,sha256=UhxpUpSM5gGtXaVSZ1Y5m8eEO61E5ebcYx-hF9z8T7Q,509
|
|
24
|
+
d9d/dataset/buffer_sorted.py,sha256=oiCnoKfhofo-RQszEX9UZGNEi-E9Gq-oQUbpD5-qYe8,4996
|
|
25
|
+
d9d/dataset/padding.py,sha256=MSG7tkVHjAJdUAR1BKkW_s-14mBJ-g-0GZyf-8MA0PE,2333
|
|
26
|
+
d9d/dataset/sharded.py,sha256=D699wxy0TsGsuIKwiRK4fpuJYn5CJoSddyXREdUFIYs,6923
|
|
27
|
+
d9d/internals/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
28
|
+
d9d/internals/determinism/__init__.py,sha256=702y3p8NFHExaCQ962JImMLKPbvEZpV3nE4MYfEOWIs,150
|
|
29
|
+
d9d/internals/determinism/seed.py,sha256=daJ85obD8bw580zCeKGqXCRZ0MesunZUTX41yD2ljGU,2271
|
|
30
|
+
d9d/internals/grad_norm/__init__.py,sha256=rS8lXo3vOLxLUY-rujevcGEQHl4gLKLfAse2gGwyoYY,217
|
|
31
|
+
d9d/internals/grad_norm/group.py,sha256=tWybTJTGYt-c_9by3wdfj6SwoTFbRC6G2U2OotFo2nI,2802
|
|
32
|
+
d9d/internals/grad_norm/norm.py,sha256=c_rQNfbErswYhaaEuFeTd-4rW20w2PxcolC4TL2kYaw,5275
|
|
33
|
+
d9d/internals/grad_sync/__init__.py,sha256=tllIQpOxQcz3bRbBFgG5FrHDzyGXWwQ-IPLKYW7cIZU,326
|
|
34
|
+
d9d/internals/grad_sync/bucket.py,sha256=G5asn2dErBvFYCBlhYlafiAay65EfeiEMTQu-ZZ-id0,8935
|
|
35
|
+
d9d/internals/grad_sync/placement_helper.py,sha256=-3Tdkrf745iz9vd68Iil4PQ-oP8eSGLi_XrjWjp0uYc,634
|
|
36
|
+
d9d/internals/grad_sync/synchronizer.py,sha256=-z3PHc9nPw5pHz9Ht5DYOWNQsJWss5e1I82w9Cc_8lE,8012
|
|
37
|
+
d9d/internals/pipeline_state/__init__.py,sha256=WYEWgFgp_8ycn7_T43DTGG_Kj2Iq_xo6lsMmyzS3a7g,359
|
|
38
|
+
d9d/internals/pipeline_state/api.py,sha256=JZCqSZuJPXHvodyrcMvTs_gxO39FMoAlmhWSLAOGDHI,1102
|
|
39
|
+
d9d/internals/pipeline_state/handler.py,sha256=MOUmv7Xvl0d7oo8Hg5G6Ayvhw8XNX-PCB7F1r7YZTtQ,3172
|
|
40
|
+
d9d/internals/pipeline_state/storage.py,sha256=4EScog_SKZfCHlTivFEJnXfMnxhX9rXfbtEGWStEoDs,6522
|
|
41
|
+
d9d/internals/profiling/__init__.py,sha256=8timWWLXgZyJjYACmdgi-SdUC-rpWKJqCcG_Fr1Boy8,110
|
|
42
|
+
d9d/internals/profiling/profile.py,sha256=vm7jgJRyRCzRW7w-ebSbaOtz6OU0ElOgwe9EIR9hp2s,3848
|
|
43
|
+
d9d/internals/state/__init__.py,sha256=8SOtDBRhP9k-WVKLTnmMkcgNUnSQY6z4LaBajD0mO2s,161
|
|
44
|
+
d9d/internals/state/main_process.py,sha256=I5CxCjZ7xoLVtqbFbBY3Y_1T6_7a89BSqlzy9YsDdzg,1533
|
|
45
|
+
d9d/kernel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
46
|
+
d9d/kernel/cce/__init__.py,sha256=ndtKganqw3kgjdYE0WldhbsLecBA9H_c2I-2DyAE1UQ,81
|
|
47
|
+
d9d/kernel/cce/cce.py,sha256=cL4Us9GEBPtjPrSMjOAeODOWVyBsTOQAwg2O6oVvyXE,9755
|
|
48
|
+
d9d/kernel/cce/main.py,sha256=LxheuDLO5gCHtwy6qJ6a2jMGJbXT9Ex26iXy_e0A7fk,9087
|
|
49
|
+
d9d/kernel/general/__init__.py,sha256=Gv-CuWwtHXwgjqhus_eSVJMiOtOnmbWi2uliCnGjr1s,76
|
|
50
|
+
d9d/kernel/general/get_int_dtype.py,sha256=IcRLpo3AiPJ7pJjkRYWaodf8X8XPNJBxEq5-6k-7FMQ,183
|
|
51
|
+
d9d/kernel/gmm/__init__.py,sha256=h9VaTX9wiYLvXUi00zk98CAc35Nx_dAwwny4vaheYeQ,51
|
|
52
|
+
d9d/kernel/gmm/function.py,sha256=rVRwowNsZYlLlsV47-7LQNJBxRZ1-Arj7PPYClWKSoQ,2623
|
|
53
|
+
d9d/kernel/moe/__init__.py,sha256=tLQ9bF4gDNR6MvVpS2No5RyTCl_b4lpRTPimkiEwbHA,237
|
|
54
|
+
d9d/kernel/moe/indices_to_multihot.py,sha256=xdUVsRHLT19R9JR2dnMcPqdjyHSUyt5312sU8pbLhyU,10138
|
|
55
|
+
d9d/kernel/moe/permute_with_probs.py,sha256=c244yZTEojDH7vbrTJhOTTmzQsiAKQM03ODTFgRSGcQ,35713
|
|
56
|
+
d9d/kernel/stochastic/__init__.py,sha256=yXJ5Cf8SJ3Iag_2_uAKUi2BU4xkxdIp-ontyWfEE87I,245
|
|
57
|
+
d9d/kernel/stochastic/adamw_step.py,sha256=dD7Z9tucQR6U203-XU_s3lEw7q1X_5qVkDXvgH8EOEs,6831
|
|
58
|
+
d9d/kernel/stochastic/copy.py,sha256=S5NNOp1Wxzi8KszhkCh_j5pU0dBE-wsRpl9KHWCuvzQ,3211
|
|
59
|
+
d9d/kernel/stochastic/ops/__init__.py,sha256=aOGiKLx82ZzW42fhTabz0rD7OSYyX0JIZkhM4jXNaW4,80
|
|
60
|
+
d9d/kernel/stochastic/ops/round.py,sha256=w511LzP9a_p5oJWjQmD4QHAVzvpLNK8xnsHbLkMgDnI,545
|
|
61
|
+
d9d/kernel/swiglu/__init__.py,sha256=bYHPadWelNxz9NVCYiDvm_igMZ-gnOLutSXQlVYbyLk,61
|
|
62
|
+
d9d/kernel/swiglu/function.py,sha256=KMpDFEIN9CUmbKT1Z3Eqy9hS_2cvdMxmuqD-dzHleBo,918
|
|
63
|
+
d9d/kernel/swiglu/op.py,sha256=r0AS6ckIl0R_0TeW6xDSJWgJqS2_TWa_ZrulwuOTxxo,4454
|
|
64
|
+
d9d/loop/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
65
|
+
d9d/loop/auto/__init__.py,sha256=zvpOu934BCSblPK7N03VHEKUzz4EoiEmDEhDpACm7D0,279
|
|
66
|
+
d9d/loop/auto/auto_lr_scheduler.py,sha256=53244OD5ooBpx6lch9SW0KDNVamLhPsCguxuVOKjpeY,1419
|
|
67
|
+
d9d/loop/auto/auto_optimizer.py,sha256=2823wmCdFydERuQKVu5jDMuKAjb0volA3-XNdE6ntZc,6035
|
|
68
|
+
d9d/loop/component/__init__.py,sha256=IgVUupHm-nPEkLx3BNcDClJrxwVqVm62Bo5g47o8QJM,1090
|
|
69
|
+
d9d/loop/component/batch_maths.py,sha256=IYWf952-8aOuoZXVKWPgK7vgPYnn0It3QpXK-tg4cho,3962
|
|
70
|
+
d9d/loop/component/checkpointer.py,sha256=3yhjMutYzeQLxkHYzMe1wpKcNpIeVAv_wtMpQjQAa5Y,5794
|
|
71
|
+
d9d/loop/component/data_loader_factory.py,sha256=XOI9_XLNmqjMkmQguPM4g_-LbhANNS73hGPE_6fCyOQ,8029
|
|
72
|
+
d9d/loop/component/garbage_collector.py,sha256=t1wHN4HvaaDfvhnBPfSsq5_7THd_OuYO8tuUA95-UGQ,2867
|
|
73
|
+
d9d/loop/component/gradient_clipper.py,sha256=0nDGELrwczlqCpHojeC4N3j7dMUhELnod2pWTY1Hv3g,3078
|
|
74
|
+
d9d/loop/component/gradient_manager.py,sha256=rtuDooke4HeoBoMXr_YqxSYecZHSzr-pAbXGmSsl10Q,4976
|
|
75
|
+
d9d/loop/component/job_logger.py,sha256=CZx9EGnILwGQ8mqTaKstjVp_n_5_UO5fQnTEWuJAdsM,5183
|
|
76
|
+
d9d/loop/component/job_profiler.py,sha256=qTK9LUAbLzuvwfkA_UyOVUMsO5s6cy61x7ZeuWQmiCQ,1835
|
|
77
|
+
d9d/loop/component/loss_computer.py,sha256=J1SBzFRjyDZ7QbE7rfMvxyzLjtod5E0NynwNJqJ-qlE,2845
|
|
78
|
+
d9d/loop/component/model_stage_exporter.py,sha256=g1f7WDmsJ6MAN_H8ymh4B3MHIURibUhO_3Mvespwfpc,1332
|
|
79
|
+
d9d/loop/component/model_stage_factory.py,sha256=e1BpmjMwfot9V1pI5drcHCmrAvMJTyb0AwXfu_2MgTM,9831
|
|
80
|
+
d9d/loop/component/optimizer_factory.py,sha256=fjqknSmlYbcxVogO2YgZht6-YhcMvfIh0AExFQ-aDdw,3636
|
|
81
|
+
d9d/loop/component/stepper.py,sha256=5IM-5uvXswfB3---Kk_Dpi2iNBjmOCWqDXGl9v263wU,1804
|
|
82
|
+
d9d/loop/component/timeout_manager.py,sha256=S7bF2iTQVkfvvhzR6wUVXuD2OCTzOMkzFt95eCs-MUE,1679
|
|
83
|
+
d9d/loop/component/train_task_operator.py,sha256=cpeF_16H6Cj5WdLYogr34NI-ko8Q2MO-BSKzsgf_H-o,5438
|
|
84
|
+
d9d/loop/config/__init__.py,sha256=jjikLZrT8zqGcq_LD02T1qd1np0O4j0zaDKI_XUF0_g,824
|
|
85
|
+
d9d/loop/config/config.py,sha256=Pzb2c1f_z66_0KFyc_L-_abeK3rxezg_KNMtCY8BXR8,7039
|
|
86
|
+
d9d/loop/config/types.py,sha256=JZ0A8-pytCV65drry17vmfWWzT9cyzQ6V4uU_ercru0,652
|
|
87
|
+
d9d/loop/control/__init__.py,sha256=x_42CRVmwh1RKfh238QalrycFY7wDa_L7pY8wFnqgI0,1758
|
|
88
|
+
d9d/loop/control/dataset_provider.py,sha256=KELIAbkVthBP78XUE3E3T3M8zUL1l0ra3Ezr6_FDInU,1605
|
|
89
|
+
d9d/loop/control/lr_scheduler_provider.py,sha256=0hxJxIhvlFl-z22rZpeWryQ79LZK8VTU6hx8-oeAQUw,1177
|
|
90
|
+
d9d/loop/control/model_provider.py,sha256=SAIFJwF8rMk4YGEAzFVTmWkIqPBjlrUxvqgK8F_jo-M,4628
|
|
91
|
+
d9d/loop/control/optimizer_provider.py,sha256=M7DL_6298avKGsjto7j4VparfXQcIHvZ2CiYfSlV7T0,1039
|
|
92
|
+
d9d/loop/control/task.py,sha256=YC3Itf_qo7WZmPrX3TVVIaoYVUsf18j56majdO6icjk,8150
|
|
93
|
+
d9d/loop/run/__init__.py,sha256=wQlVo3TayCwoHaONBsfwsAPBu3uue8ckHaM4YDqIkMc,106
|
|
94
|
+
d9d/loop/run/train.py,sha256=XbCa9r7xpVbVPb0dnD9uutPDDfdOZi1JhtD7dQwehOw,12244
|
|
95
|
+
d9d/loop/state.py,sha256=s0Iz6W8iHO9md_3QqbQouqyVz1r7vt8xvcj1AoLqtvU,5017
|
|
96
|
+
d9d/lr_scheduler/__init__.py,sha256=ZQke_m2Zl1adfFGp3wCvYQo-CO_c3bpVePrVGAERhfU,140
|
|
97
|
+
d9d/lr_scheduler/piecewise/__init__.py,sha256=8SPGeMnZSF40pPjfEjkhAd5fb4e8fYjeNdF-pRuC1lM,496
|
|
98
|
+
d9d/lr_scheduler/piecewise/builder.py,sha256=94ufa-aoL90xUIGu_W2XwkWPIkE53KHeUfNvwK61zew,4974
|
|
99
|
+
d9d/lr_scheduler/piecewise/config.py,sha256=lljITKE8gpVyzEIfONzmYbhRSXZSpgieqIFH3yxnoec,4699
|
|
100
|
+
d9d/lr_scheduler/piecewise/curves.py,sha256=LG2GzEYKJhiF9701D8lzxvtO4pzvhSObVEHMiayVCTE,1995
|
|
101
|
+
d9d/lr_scheduler/piecewise/engine.py,sha256=ZV1L4mRDxZABIPhCR8BmwGZ8qzr2z4NsNSJZl9-hcXY,2054
|
|
102
|
+
d9d/lr_scheduler/visualizer.py,sha256=CcaSOewQbM__IGlLNkzRzyvj0X9BAgCEFaWGu55aCmM,2083
|
|
103
|
+
d9d/metric/__init__.py,sha256=VTByKUezSl_xOjpt0FPsRl_zzAXTu3pBHWYrCbocPa8,115
|
|
104
|
+
d9d/metric/abc.py,sha256=Nkd6dhS-KhAf5MWacYCsjmBgqsUeEm-UgA5KPHkv2Rs,2485
|
|
105
|
+
d9d/metric/impl/__init__.py,sha256=6xHkax_7Tglg5EUHnrluo7Pa7kcBp4l4fZ-LRT7x83A,133
|
|
106
|
+
d9d/metric/impl/compose.py,sha256=oQyp3mmku-m_SXqDK5WbjF9Gv0HSsUQVfi9gmLwNuHI,1693
|
|
107
|
+
d9d/metric/impl/mean.py,sha256=Lan6eDNUK617OMk4p99pFA1rGe1f1YwL0e51MB0SoA0,2832
|
|
108
|
+
d9d/model_state/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
109
|
+
d9d/model_state/io/__init__.py,sha256=3RXRA5RLHS3hCQlvhRsy9q-KrGSNuh0r6kAppak0EA4,542
|
|
110
|
+
d9d/model_state/io/dto.py,sha256=NY_C8sU9QoeXJyRqiGYjl7W-pfUUYSPe1L_Z_dvctkE,704
|
|
111
|
+
d9d/model_state/io/module_reader.py,sha256=3-xOLPbdm20Ssm-E8quluCmL5Cwc70eWkUOYNe6bkB4,2779
|
|
112
|
+
d9d/model_state/io/module_writer.py,sha256=TG7kVb_YS90U7Hv7oHqqzYx8pp6q6ba1TTu3E5AHJK8,4373
|
|
113
|
+
d9d/model_state/io/reader.py,sha256=WACBaeVrezQUWU-xKqwIo_e08MeyEy-kr4anDTYQFMk,4546
|
|
114
|
+
d9d/model_state/io/writer.py,sha256=5TpK2EMepKbQ6hOy_stgV7j33OzafQWw6czVwCsmkyQ,11621
|
|
115
|
+
d9d/model_state/mapper/__init__.py,sha256=EVjhWQF8WLLtJp_Z7irGJomw5R0TYbA9x4_dttt0rVg,178
|
|
116
|
+
d9d/model_state/mapper/abc.py,sha256=yXI7FDblBWrcSJb-_rPWHRARJSbw2JrojQI_ENluBFg,2634
|
|
117
|
+
d9d/model_state/mapper/adapters/__init__.py,sha256=T5utmHgdzAVb-RcoJMN3U126EJb7CQX98uS5nCrnsrY,362
|
|
118
|
+
d9d/model_state/mapper/adapters/mapper.py,sha256=rqROLlPux0qzU078fvu8nJ67cooGwe0POOtAQSIzNss,1083
|
|
119
|
+
d9d/model_state/mapper/adapters/module.py,sha256=DM6VYVR97OWhfRUE1I7MaobPnF3pZErkkg-i9l7nS5c,775
|
|
120
|
+
d9d/model_state/mapper/compose/__init__.py,sha256=-i0nYcl3iUHTW7d6eErk9VMh33-GBYXVpMdYXC9wa8s,469
|
|
121
|
+
d9d/model_state/mapper/compose/helper.py,sha256=oPDJdJb5brqbWubVEnArE-UNJd572j33lqbyw3TrmlM,637
|
|
122
|
+
d9d/model_state/mapper/compose/parallel.py,sha256=2zU8fEGhOKprM6bQfjbYI0oB8FA2gC0mTwwyvhrur8Y,2234
|
|
123
|
+
d9d/model_state/mapper/compose/sequential.py,sha256=CNLFadwlvoPy0iZsI3Q4GKCq5weGkw5zRUtjqFnoNLI,5548
|
|
124
|
+
d9d/model_state/mapper/compose/shard.py,sha256=ah2ONYdpxmPBJS4OssSnAzb0Wki1YQdrHOUsgwB3UEs,1484
|
|
125
|
+
d9d/model_state/mapper/leaf/__init__.py,sha256=-S2w0jFXsSq5NiMUCO8w6PJAHO_IQpR2IAv10O7OoIQ,570
|
|
126
|
+
d9d/model_state/mapper/leaf/dtensor.py,sha256=uDYltxXprQJfE1e22V06dpVJfoxi5kBHDQpvd-Gzl0U,1861
|
|
127
|
+
d9d/model_state/mapper/leaf/identity.py,sha256=mZZAdurT42V6dy4964vCxEP4i-sXgiZfOBZOIyYKjTo,603
|
|
128
|
+
d9d/model_state/mapper/leaf/rename.py,sha256=obsPpw9Y1SOGW-DZUX2scxFC2mAs8V7ivVEh19YUihw,739
|
|
129
|
+
d9d/model_state/mapper/leaf/select_child.py,sha256=QxBft_g9WfBds_f8FTpFK6pLDMeumgBehhMDH6Kr3SQ,1161
|
|
130
|
+
d9d/model_state/mapper/leaf/stack.py,sha256=TZ3ulpahcJn3qbKErzCJOIEt65mOLooxCmqktZ7F3Qw,1017
|
|
131
|
+
d9d/module/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
132
|
+
d9d/module/base/__init__.py,sha256=vl05xyGyB-jQV_S50NvgR--jfThueRSl3vssLXRNh3U,178
|
|
133
|
+
d9d/module/base/late_init.py,sha256=-Re3QCZcJO6vzBxN7FTj5r1H0hzPX6_wyIgEud9s-eo,292
|
|
134
|
+
d9d/module/block/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
135
|
+
d9d/module/block/attention/__init__.py,sha256=hT32obgT6bRpUAJIFmDgfqtR5P9zwTS6UiiYL9v2hMk,141
|
|
136
|
+
d9d/module/block/attention/grouped_query.py,sha256=4lqZfQgLNt0GkQ2GueQ9SJTKuV76qT_skJ41coHmlls,4814
|
|
137
|
+
d9d/module/block/attention/sdpa/__init__.py,sha256=YijNzSFZedbMjGYGz0aE8AMriADadYffcjgP1oNbYzg,60
|
|
138
|
+
d9d/module/block/attention/sdpa/flash.py,sha256=0YHd0Mtd7-Hrx-VORNcdZVmMHwE_N8bkScSoxfCNlQY,1925
|
|
139
|
+
d9d/module/block/embedding/__init__.py,sha256=tNRPw0FMryV_SCRlf5YVhyTUGtqlezB1aBB0pwN0ZfM,163
|
|
140
|
+
d9d/module/block/embedding/shard_token_embedding.py,sha256=PeaW_ekbhsYxf03dzlzIoW7K8tsT5JSs4b3pNWtZ_Hc,3519
|
|
141
|
+
d9d/module/block/ffn/__init__.py,sha256=3WbxF0WP1Kp1Z5_qIVuYjmuDiNiiUNX3r3_rMstf8nQ,55
|
|
142
|
+
d9d/module/block/ffn/swiglu.py,sha256=bIAVs3mNOJo_vvCV9p9MkmLkY8UpHAqo68rkHJmYbP0,1598
|
|
143
|
+
d9d/module/block/head/__init__.py,sha256=9fhLQSXXOycz-dN2Pym-r9bzZCqsb1VTFBrS67BHMGc,147
|
|
144
|
+
d9d/module/block/head/language_modelling.py,sha256=TKOvxPIYAQqiVAzRMORI3e1zB0UNKj54hznvAblGfP4,3071
|
|
145
|
+
d9d/module/block/hidden_states_aggregator/__init__.py,sha256=oyi_9ZXn-0gkw6YQ8uzEyNOJupkuaimqbQkQUgKDHYc,305
|
|
146
|
+
d9d/module/block/hidden_states_aggregator/base.py,sha256=Ty1_UlOOV7DuIPbpcjGnzjxrMAWoEfsZQCeiySF7mIE,1261
|
|
147
|
+
d9d/module/block/hidden_states_aggregator/factory.py,sha256=fmar06v-oMwmo-zEdFpFDCGC5ic6x1LGEgyzVUO_e04,1511
|
|
148
|
+
d9d/module/block/hidden_states_aggregator/mean.py,sha256=MN9x2TMaGTHrAkft9Wsh3u8jF9nXgkArPXjcZBStM60,2125
|
|
149
|
+
d9d/module/block/hidden_states_aggregator/noop.py,sha256=dqqOZ-ZCfOYQJXU7rxUE-nLtPca9G_dcd2RACsqW6Dw,654
|
|
150
|
+
d9d/module/block/moe/__init__.py,sha256=oL0Li4raYSufCQY_3llP_RBYXOnQi3_cmoEib8YcqDE,310
|
|
151
|
+
d9d/module/block/moe/communications/__init__.py,sha256=cud1BcwcozsGENkQ70DtIPTXUE1G2o-5E_Okg8ffOC0,330
|
|
152
|
+
d9d/module/block/moe/communications/base.py,sha256=zZq6IBMG86Eo2j0EZZGgkMj3bJ3sj2rH-SszhbcqHeQ,2107
|
|
153
|
+
d9d/module/block/moe/communications/deepep.py,sha256=PRTCCEWnGT3u9XB_3V3cbupDGdhsrZkFk44sIrBOn-I,8809
|
|
154
|
+
d9d/module/block/moe/communications/naive.py,sha256=jQU8u9z8ooFiEN18Wc8uwqq505Fa9c18My5SP03vNAE,2196
|
|
155
|
+
d9d/module/block/moe/grouped_experts.py,sha256=0bMDGPyXINmDrcIbxJeAS8NwI0ZlUFfxRaQ7sg0Lq7Q,2782
|
|
156
|
+
d9d/module/block/moe/grouped_linear.py,sha256=eVMvwnholSL-m5lDrGfsK0kYcGYbbPqkuH80JAOao9k,2529
|
|
157
|
+
d9d/module/block/moe/layer.py,sha256=UhKuRLAFLtDyv9o7-9SxjEi_N_TaG9AkWITuPR6qqgI,4331
|
|
158
|
+
d9d/module/block/moe/router.py,sha256=7l7C2Hr3AINlOvyfVRA---fEXMT6IHptPX-w0eS_fxw,3291
|
|
159
|
+
d9d/module/block/positional/__init__.py,sha256=RsPCbuRgl3UW7Fow5dRf014MuwntakJuQiW-XezWYxw,236
|
|
160
|
+
d9d/module/block/positional/rope.py,sha256=SB9cCcUTQXQOjdebGcn3v7nWDgMU8vfyNiZQY4NyAbE,4954
|
|
161
|
+
d9d/module/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
162
|
+
d9d/module/model/qwen3_moe/__init__.py,sha256=dQbZpQYSFtoq0cSLnX4OHNTiEBDxEmmY9_BMUB9iXtQ,384
|
|
163
|
+
d9d/module/model/qwen3_moe/decoder_layer.py,sha256=Pu8J8qi3ZdYLQ5UG1LmkcBDRceSPJLFbQn9wIjiMAlI,3334
|
|
164
|
+
d9d/module/model/qwen3_moe/model.py,sha256=KHDX1XApNRcm-0hiNRhIKPwS-D5E2J9biiBVnVkRso4,14025
|
|
165
|
+
d9d/module/model/qwen3_moe/params.py,sha256=OlnDLVeUWCydkyCnrbzdGhQiuVY9YgQWSTmFTLoQjsg,2480
|
|
166
|
+
d9d/module/parallelism/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
167
|
+
d9d/module/parallelism/api/__init__.py,sha256=KCiPpSlL2yy0-KN1B-zex6v0oQo-ep7mXupKBKEkvtQ,558
|
|
168
|
+
d9d/module/parallelism/api/expert_parallel.py,sha256=tid3TXadzeUhPRfVcYMP7eynSq2oMSkzrA3gQ3LxfAw,1399
|
|
169
|
+
d9d/module/parallelism/api/fully_sharded.py,sha256=K0ttq70F6IF0dmvFGxR4PwcIb9Qpw7LKx5DkQQR2dw8,1643
|
|
170
|
+
d9d/module/parallelism/api/hybrid_sharded.py,sha256=230Rg8KG9SfOF9Iv17oW1GVsdajc5UEuJztmWvyNsKQ,1715
|
|
171
|
+
d9d/module/parallelism/api/replicate_parallel.py,sha256=HHMxLybsxtX7yaTx6ek1u8hU3Yo0p8yK7TGf4YBqpgk,1246
|
|
172
|
+
d9d/module/parallelism/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
173
|
+
d9d/module/parallelism/model/qwen3_moe.py,sha256=eKC9DkCMrnzemvcdryhkkZ3l5Q0Do3i2A9PhwvZqRhQ,3575
|
|
174
|
+
d9d/module/parallelism/style/__init__.py,sha256=ICUJJs6V-W3AWQ9gTY4fRDQHKzG18EzfaisSHEgHL1w,169
|
|
175
|
+
d9d/module/parallelism/style/shard_experts.py,sha256=wm8x7OVfRJl7oe8quQ3no3qu0VLyJsPUwXMkQohzJa0,2158
|
|
176
|
+
d9d/module/parallelism/style/to_local.py,sha256=xCwBKhbnybElPXhivf3qb92YddxWa5X55pGFfkrh4YE,3273
|
|
177
|
+
d9d/optim/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
178
|
+
d9d/optim/stochastic/__init__.py,sha256=H4sJvNTHs27gIJ8lR9h8AM0j6BrnPlVlgOi8LiDid4c,72
|
|
179
|
+
d9d/optim/stochastic/adamw.py,sha256=XNTUtm3Z4YAPqZFKJsD09cTWmKd0i5EnrJ-sC-1WnQc,5530
|
|
180
|
+
d9d/peft/__init__.py,sha256=XTgJcW9v4sQTkur7lcMZ0BkJaqWU9BruotImwK1fCss,322
|
|
181
|
+
d9d/peft/all/__init__.py,sha256=ID2lcBOD5-4W4sL7lsSM37psqxV4-EfsU2NXO3D6nWY,243
|
|
182
|
+
d9d/peft/all/config.py,sha256=YvCsWGDU2TRO0eIRk5-kTbX9V9DArob5KxxXK7HxGoc,765
|
|
183
|
+
d9d/peft/all/method.py,sha256=2IztonETLROCOTxt61nJ4knEwXRVcQxAi1ptgRn80BA,2117
|
|
184
|
+
d9d/peft/applicator.py,sha256=jrqEwKY9EbLwo_qZHaQhTubVDgv8okQmZdcd6PBS1BI,1393
|
|
185
|
+
d9d/peft/base.py,sha256=kPImZaZETnIhew3_MhbViYrXvZJG16uraqEDZpr26XI,1729
|
|
186
|
+
d9d/peft/full_tune/__init__.py,sha256=Z4U8Nbqt6kZJZqNlDV8kX-m2YW9QjLJ5dc0BxvpLru8,195
|
|
187
|
+
d9d/peft/full_tune/config.py,sha256=ZdYDFn1DVmKdibk502WvU9rdAmJj0NKopPzOzmhAJYM,494
|
|
188
|
+
d9d/peft/full_tune/method.py,sha256=Hut8VHMfX05RnmSS6lJzkSv49uB4BpwagQEus5KX4d4,1265
|
|
189
|
+
d9d/peft/lora/__init__.py,sha256=nr3R8uJlCkt2mmZYpFtWiBfRDdpUtaU9zx2r5Nl8NYo,294
|
|
190
|
+
d9d/peft/lora/config.py,sha256=KElokzzMJeh91SwRrxj-ZTzSLHF8ZVSb8ehNfYKFFpg,801
|
|
191
|
+
d9d/peft/lora/layer.py,sha256=7i3QGR4QveJ_Tk_yNl_QZuLzK-l1y9xp5O0N6DRtOrE,4990
|
|
192
|
+
d9d/peft/lora/method.py,sha256=psYYkSvL0telO2ULM8dtDTlf4H87lTtn_4hlB07M-iw,4123
|
|
193
|
+
d9d/pipelining/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
194
|
+
d9d/pipelining/api/__init__.py,sha256=6vwCq9PiJWkREzoM68yO-UySf2YEy6BK14oph3IOuHo,439
|
|
195
|
+
d9d/pipelining/api/module.py,sha256=WhDa4GTGoaL00XMoPAQB-XdjcmGiL_U0T9mTwRSMBN8,5280
|
|
196
|
+
d9d/pipelining/api/schedule.py,sha256=m6FCe1Yh2cOYGxShirq3HakDlRnQyAIwePA5-UU_xxE,1629
|
|
197
|
+
d9d/pipelining/api/sharding.py,sha256=XhaRarZOHf6nDqwnzLvz0SPh9VHi6kDiQkkx_gRNoHY,204
|
|
198
|
+
d9d/pipelining/factory/__init__.py,sha256=_tN9unIv9bHojC26MKt34W9ra-zMdGrfS0i0a4JyurU,602
|
|
199
|
+
d9d/pipelining/factory/config.py,sha256=MW8ARW0gPZGN9jsPPq-nTSFaGYMKPDwIBO06juy4DTE,2565
|
|
200
|
+
d9d/pipelining/factory/factory.py,sha256=E_eUyvYP6YXMzy8QdFUoBfpFUkA39eFk7zeYsidqGzU,3920
|
|
201
|
+
d9d/pipelining/factory/registry.py,sha256=k8nBSM2vlYE_fAo4pV7zzX9iqOopLJ0YugeieYwwU10,3163
|
|
202
|
+
d9d/pipelining/infra/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
203
|
+
d9d/pipelining/infra/schedule/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
204
|
+
d9d/pipelining/infra/schedule/component/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
205
|
+
d9d/pipelining/infra/schedule/component/program/__init__.py,sha256=c_FBMR7y2fo8Hbzxu23fBXgnfJ0PYXoFHnvS78tuPKo,572
|
|
206
|
+
d9d/pipelining/infra/schedule/component/program/base.py,sha256=tVJNJr6IWSg-qT72Nci39Z4eCX0ggO5oJcJocwJFOOk,1010
|
|
207
|
+
d9d/pipelining/infra/schedule/component/program/communications.py,sha256=2Q-4LCVWgB1TAdf1-GcxRTo8GZuh4ljlHyVusTl25Qo,6893
|
|
208
|
+
d9d/pipelining/infra/schedule/component/program/topology.py,sha256=4c47yx_9-tx37ySCzdK-DXpJi0e42xC_Fkdg2Z2cwQI,2364
|
|
209
|
+
d9d/pipelining/infra/schedule/component/runtime/__init__.py,sha256=XouJZVjEfekcoeRWyzjGHD0D8yRg90JaXVeqojRnJPI,633
|
|
210
|
+
d9d/pipelining/infra/schedule/component/runtime/action.py,sha256=8Rgtk5pZjXCbsjoJxhArkEFIeNzY1ZWvESCgLnA8Hyg,10733
|
|
211
|
+
d9d/pipelining/infra/schedule/component/runtime/communications.py,sha256=X1vSYnBHY4uBB0TvAApiSbdibJxxwNV-EZ3l_LXRnC4,3344
|
|
212
|
+
d9d/pipelining/infra/schedule/component/runtime/executor.py,sha256=QAbvdA6vfS909jririg_Rw_FK2H6aZUYyAILGOpGHU8,4392
|
|
213
|
+
d9d/pipelining/infra/schedule/component/runtime/loss.py,sha256=DGATscTXF2fKG7M2UiagpnVYvUI3DSmKKOMH1QLvH0k,1646
|
|
214
|
+
d9d/pipelining/infra/schedule/program/__init__.py,sha256=991C1CTiAoC93hxsZxTJoBLBFUa-9phcNIIH8SUw4vA,447
|
|
215
|
+
d9d/pipelining/infra/schedule/program/bfs.py,sha256=wiTGaFaUJ2O9nAB3qTtgH1AM8NZrZbF3v5bqDY5VQvw,3092
|
|
216
|
+
d9d/pipelining/infra/schedule/program/dualpipev.py,sha256=GDuNHmqjOjTHgee18EE4py_9p97pBg91BJDcYh5HlSo,7875
|
|
217
|
+
d9d/pipelining/infra/schedule/program/interleaved.py,sha256=6aGQsFTY_Jv3NU19lNenax_U4ji5pAO0dxXR_uRKfq0,8655
|
|
218
|
+
d9d/pipelining/infra/schedule/program/zerobubblev.py,sha256=eWEeUcp-XItGmFKAP4-TmVVXpLTkfXAz4Mfsm2NvJ7g,7219
|
|
219
|
+
d9d/pipelining/infra/stage/__init__.py,sha256=q_6-TTpeg55mSIhzM0aux5FSsB31SQfanDy6pau5Kwk,68
|
|
220
|
+
d9d/pipelining/infra/stage/communications.py,sha256=h72UrnTK15AaJC-rb00cKrTu1JL6ny1TycH9tmuAzlc,9865
|
|
221
|
+
d9d/pipelining/infra/stage/computations.py,sha256=F1LCCMAdqU7iXHnLGjbHn6WsU0aKAq7ctZb70W1GUXc,10426
|
|
222
|
+
d9d/pipelining/infra/stage/splitgrad.py,sha256=xJkbhy6uKDxVAUyW-LUQhJQ55DRRKe5_8_V0GC0eroc,13166
|
|
223
|
+
d9d/pipelining/infra/stage/stage.py,sha256=Ac7lAERDIUvnYNaY5UTGOOLMrVUd5-KSbxMSNRpShuE,11521
|
|
224
|
+
d9d/pipelining/infra/stage/struct_helper.py,sha256=y9gTbKUmmsDHO6YCXPSKAj_Xq-vg6yoqHEmRkLVpekE,1361
|
|
225
|
+
d9d/pipelining/training/__init__.py,sha256=mjxKtgXNU59QfgaxQrcahEG-kv6xwsNJBkBdq6FWJT0,154
|
|
226
|
+
d9d/pipelining/training/optimizer.py,sha256=7VZ4b8I8CC0DnW4h-QYEhrf9jZm5LFsYUpei1Q2lkR4,1322
|
|
227
|
+
d9d/pipelining/training/scheduler.py,sha256=QBDu4Z9P7Vqgt1R8AOJvhEISH4BChysjwtFRCZWyEfw,1147
|
|
228
|
+
d9d/tracker/__init__.py,sha256=KShifEoIeGYksmAMMxm8jcuyaw9WsX4OY0wuU0DlhdM,323
|
|
229
|
+
d9d/tracker/base.py,sha256=keS0UsgbffYT4mf8_LfZc-FueOsujePHdlBF4OIfPA4,3492
|
|
230
|
+
d9d/tracker/factory.py,sha256=RVE_NZu9Tn9sI_1QEl3b-DPRvikg-nxksOZ_AcxK6Yw,1660
|
|
231
|
+
d9d/tracker/provider/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
232
|
+
d9d/tracker/provider/aim/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
233
|
+
d9d/tracker/provider/aim/config.py,sha256=7-Kgt2XhdNRhgxLYEWpB27uhgB-RBiPrGvH6S2L3rIM,672
|
|
234
|
+
d9d/tracker/provider/aim/tracker.py,sha256=g34BdjIYEftEwuK2oTKKzHLN3dpG7-i14zhXIF_onIo,3110
|
|
235
|
+
d9d/tracker/provider/null.py,sha256=c1nvUaOz8RbRY8XzwSPTi7t0lSsmdlwGAYfYgprwaf8,1440
|
|
236
|
+
d9d-0.1.0.dist-info/METADATA,sha256=NyAv5QdE_TtWzvNWcWormljU9O3DzQ8yAEp_8XZJv9s,5681
|
|
237
|
+
d9d-0.1.0.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
|
|
238
|
+
d9d-0.1.0.dist-info/RECORD,,
|