d9d 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (238) hide show
  1. d9d/__init__.py +0 -0
  2. d9d/core/__init__.py +0 -0
  3. d9d/core/autograd/__init__.py +7 -0
  4. d9d/core/autograd/grad_context.py +85 -0
  5. d9d/core/dist_context/__init__.py +19 -0
  6. d9d/core/dist_context/configured.py +215 -0
  7. d9d/core/dist_context/device_mesh_domains.py +185 -0
  8. d9d/core/dist_context/log.py +30 -0
  9. d9d/core/dist_context/params.py +113 -0
  10. d9d/core/dist_ops/__init__.py +16 -0
  11. d9d/core/dist_ops/object.py +68 -0
  12. d9d/core/dist_ops/tensor.py +192 -0
  13. d9d/core/protocol/__init__.py +8 -0
  14. d9d/core/protocol/training.py +38 -0
  15. d9d/core/sharding/__init__.py +15 -0
  16. d9d/core/sharding/auto_spec.py +66 -0
  17. d9d/core/sharding/shard.py +154 -0
  18. d9d/core/sharding/spec.py +28 -0
  19. d9d/core/sharding/unshard.py +117 -0
  20. d9d/core/types/__init__.py +12 -0
  21. d9d/core/types/data.py +14 -0
  22. d9d/core/types/pytree.py +26 -0
  23. d9d/dataset/__init__.py +17 -0
  24. d9d/dataset/buffer_sorted.py +143 -0
  25. d9d/dataset/padding.py +79 -0
  26. d9d/dataset/sharded.py +195 -0
  27. d9d/internals/__init__.py +0 -0
  28. d9d/internals/determinism/__init__.py +10 -0
  29. d9d/internals/determinism/seed.py +63 -0
  30. d9d/internals/grad_norm/__init__.py +8 -0
  31. d9d/internals/grad_norm/group.py +87 -0
  32. d9d/internals/grad_norm/norm.py +169 -0
  33. d9d/internals/grad_sync/__init__.py +14 -0
  34. d9d/internals/grad_sync/bucket.py +317 -0
  35. d9d/internals/grad_sync/placement_helper.py +23 -0
  36. d9d/internals/grad_sync/synchronizer.py +257 -0
  37. d9d/internals/pipeline_state/__init__.py +14 -0
  38. d9d/internals/pipeline_state/api.py +45 -0
  39. d9d/internals/pipeline_state/handler.py +111 -0
  40. d9d/internals/pipeline_state/storage.py +236 -0
  41. d9d/internals/profiling/__init__.py +7 -0
  42. d9d/internals/profiling/profile.py +112 -0
  43. d9d/internals/state/__init__.py +6 -0
  44. d9d/internals/state/main_process.py +44 -0
  45. d9d/kernel/__init__.py +0 -0
  46. d9d/kernel/cce/__init__.py +5 -0
  47. d9d/kernel/cce/cce.py +298 -0
  48. d9d/kernel/cce/main.py +282 -0
  49. d9d/kernel/general/__init__.py +5 -0
  50. d9d/kernel/general/get_int_dtype.py +7 -0
  51. d9d/kernel/gmm/__init__.py +5 -0
  52. d9d/kernel/gmm/function.py +78 -0
  53. d9d/kernel/moe/__init__.py +8 -0
  54. d9d/kernel/moe/indices_to_multihot.py +268 -0
  55. d9d/kernel/moe/permute_with_probs.py +1035 -0
  56. d9d/kernel/stochastic/__init__.py +11 -0
  57. d9d/kernel/stochastic/adamw_step.py +204 -0
  58. d9d/kernel/stochastic/copy.py +104 -0
  59. d9d/kernel/stochastic/ops/__init__.py +5 -0
  60. d9d/kernel/stochastic/ops/round.py +22 -0
  61. d9d/kernel/swiglu/__init__.py +5 -0
  62. d9d/kernel/swiglu/function.py +36 -0
  63. d9d/kernel/swiglu/op.py +167 -0
  64. d9d/loop/__init__.py +0 -0
  65. d9d/loop/auto/__init__.py +9 -0
  66. d9d/loop/auto/auto_lr_scheduler.py +46 -0
  67. d9d/loop/auto/auto_optimizer.py +196 -0
  68. d9d/loop/component/__init__.py +35 -0
  69. d9d/loop/component/batch_maths.py +106 -0
  70. d9d/loop/component/checkpointer.py +172 -0
  71. d9d/loop/component/data_loader_factory.py +258 -0
  72. d9d/loop/component/garbage_collector.py +94 -0
  73. d9d/loop/component/gradient_clipper.py +89 -0
  74. d9d/loop/component/gradient_manager.py +149 -0
  75. d9d/loop/component/job_logger.py +146 -0
  76. d9d/loop/component/job_profiler.py +62 -0
  77. d9d/loop/component/loss_computer.py +86 -0
  78. d9d/loop/component/model_stage_exporter.py +37 -0
  79. d9d/loop/component/model_stage_factory.py +261 -0
  80. d9d/loop/component/optimizer_factory.py +88 -0
  81. d9d/loop/component/stepper.py +52 -0
  82. d9d/loop/component/timeout_manager.py +54 -0
  83. d9d/loop/component/train_task_operator.py +152 -0
  84. d9d/loop/config/__init__.py +36 -0
  85. d9d/loop/config/config.py +225 -0
  86. d9d/loop/config/types.py +24 -0
  87. d9d/loop/control/__init__.py +61 -0
  88. d9d/loop/control/dataset_provider.py +58 -0
  89. d9d/loop/control/lr_scheduler_provider.py +47 -0
  90. d9d/loop/control/model_provider.py +162 -0
  91. d9d/loop/control/optimizer_provider.py +45 -0
  92. d9d/loop/control/task.py +304 -0
  93. d9d/loop/run/__init__.py +6 -0
  94. d9d/loop/run/train.py +355 -0
  95. d9d/loop/state.py +143 -0
  96. d9d/lr_scheduler/__init__.py +9 -0
  97. d9d/lr_scheduler/piecewise/__init__.py +18 -0
  98. d9d/lr_scheduler/piecewise/builder.py +152 -0
  99. d9d/lr_scheduler/piecewise/config.py +176 -0
  100. d9d/lr_scheduler/piecewise/curves.py +75 -0
  101. d9d/lr_scheduler/piecewise/engine.py +76 -0
  102. d9d/lr_scheduler/visualizer.py +74 -0
  103. d9d/metric/__init__.py +10 -0
  104. d9d/metric/abc.py +79 -0
  105. d9d/metric/impl/__init__.py +7 -0
  106. d9d/metric/impl/compose.py +54 -0
  107. d9d/metric/impl/mean.py +94 -0
  108. d9d/model_state/__init__.py +0 -0
  109. d9d/model_state/io/__init__.py +21 -0
  110. d9d/model_state/io/dto.py +30 -0
  111. d9d/model_state/io/module_reader.py +75 -0
  112. d9d/model_state/io/module_writer.py +123 -0
  113. d9d/model_state/io/reader.py +125 -0
  114. d9d/model_state/io/writer.py +309 -0
  115. d9d/model_state/mapper/__init__.py +10 -0
  116. d9d/model_state/mapper/abc.py +70 -0
  117. d9d/model_state/mapper/adapters/__init__.py +12 -0
  118. d9d/model_state/mapper/adapters/mapper.py +27 -0
  119. d9d/model_state/mapper/adapters/module.py +22 -0
  120. d9d/model_state/mapper/compose/__init__.py +17 -0
  121. d9d/model_state/mapper/compose/helper.py +22 -0
  122. d9d/model_state/mapper/compose/parallel.py +58 -0
  123. d9d/model_state/mapper/compose/sequential.py +131 -0
  124. d9d/model_state/mapper/compose/shard.py +36 -0
  125. d9d/model_state/mapper/leaf/__init__.py +18 -0
  126. d9d/model_state/mapper/leaf/dtensor.py +56 -0
  127. d9d/model_state/mapper/leaf/identity.py +23 -0
  128. d9d/model_state/mapper/leaf/rename.py +26 -0
  129. d9d/model_state/mapper/leaf/select_child.py +37 -0
  130. d9d/model_state/mapper/leaf/stack.py +29 -0
  131. d9d/module/__init__.py +0 -0
  132. d9d/module/base/__init__.py +7 -0
  133. d9d/module/base/late_init.py +10 -0
  134. d9d/module/block/__init__.py +0 -0
  135. d9d/module/block/attention/__init__.py +7 -0
  136. d9d/module/block/attention/grouped_query.py +139 -0
  137. d9d/module/block/attention/sdpa/__init__.py +5 -0
  138. d9d/module/block/attention/sdpa/flash.py +52 -0
  139. d9d/module/block/embedding/__init__.py +7 -0
  140. d9d/module/block/embedding/shard_token_embedding.py +103 -0
  141. d9d/module/block/ffn/__init__.py +5 -0
  142. d9d/module/block/ffn/swiglu.py +60 -0
  143. d9d/module/block/head/__init__.py +6 -0
  144. d9d/module/block/head/language_modelling.py +87 -0
  145. d9d/module/block/hidden_states_aggregator/__init__.py +12 -0
  146. d9d/module/block/hidden_states_aggregator/base.py +35 -0
  147. d9d/module/block/hidden_states_aggregator/factory.py +48 -0
  148. d9d/module/block/hidden_states_aggregator/mean.py +61 -0
  149. d9d/module/block/hidden_states_aggregator/noop.py +27 -0
  150. d9d/module/block/moe/__init__.py +13 -0
  151. d9d/module/block/moe/communications/__init__.py +11 -0
  152. d9d/module/block/moe/communications/base.py +58 -0
  153. d9d/module/block/moe/communications/deepep.py +300 -0
  154. d9d/module/block/moe/communications/naive.py +68 -0
  155. d9d/module/block/moe/grouped_experts.py +81 -0
  156. d9d/module/block/moe/grouped_linear.py +78 -0
  157. d9d/module/block/moe/layer.py +122 -0
  158. d9d/module/block/moe/router.py +103 -0
  159. d9d/module/block/positional/__init__.py +8 -0
  160. d9d/module/block/positional/rope.py +150 -0
  161. d9d/module/model/__init__.py +0 -0
  162. d9d/module/model/qwen3_moe/__init__.py +16 -0
  163. d9d/module/model/qwen3_moe/decoder_layer.py +110 -0
  164. d9d/module/model/qwen3_moe/model.py +373 -0
  165. d9d/module/model/qwen3_moe/params.py +69 -0
  166. d9d/module/parallelism/__init__.py +0 -0
  167. d9d/module/parallelism/api/__init__.py +18 -0
  168. d9d/module/parallelism/api/expert_parallel.py +36 -0
  169. d9d/module/parallelism/api/fully_sharded.py +43 -0
  170. d9d/module/parallelism/api/hybrid_sharded.py +49 -0
  171. d9d/module/parallelism/api/replicate_parallel.py +33 -0
  172. d9d/module/parallelism/model/__init__.py +0 -0
  173. d9d/module/parallelism/model/qwen3_moe.py +99 -0
  174. d9d/module/parallelism/style/__init__.py +7 -0
  175. d9d/module/parallelism/style/shard_experts.py +60 -0
  176. d9d/module/parallelism/style/to_local.py +86 -0
  177. d9d/optim/__init__.py +0 -0
  178. d9d/optim/stochastic/__init__.py +5 -0
  179. d9d/optim/stochastic/adamw.py +158 -0
  180. d9d/peft/__init__.py +13 -0
  181. d9d/peft/all/__init__.py +12 -0
  182. d9d/peft/all/config.py +31 -0
  183. d9d/peft/all/method.py +76 -0
  184. d9d/peft/applicator.py +47 -0
  185. d9d/peft/base.py +70 -0
  186. d9d/peft/full_tune/__init__.py +11 -0
  187. d9d/peft/full_tune/config.py +20 -0
  188. d9d/peft/full_tune/method.py +46 -0
  189. d9d/peft/lora/__init__.py +15 -0
  190. d9d/peft/lora/config.py +35 -0
  191. d9d/peft/lora/layer.py +177 -0
  192. d9d/peft/lora/method.py +132 -0
  193. d9d/pipelining/__init__.py +0 -0
  194. d9d/pipelining/api/__init__.py +19 -0
  195. d9d/pipelining/api/module.py +149 -0
  196. d9d/pipelining/api/schedule.py +50 -0
  197. d9d/pipelining/api/sharding.py +9 -0
  198. d9d/pipelining/factory/__init__.py +21 -0
  199. d9d/pipelining/factory/config.py +89 -0
  200. d9d/pipelining/factory/factory.py +114 -0
  201. d9d/pipelining/factory/registry.py +82 -0
  202. d9d/pipelining/infra/__init__.py +0 -0
  203. d9d/pipelining/infra/schedule/__init__.py +0 -0
  204. d9d/pipelining/infra/schedule/component/__init__.py +0 -0
  205. d9d/pipelining/infra/schedule/component/program/__init__.py +22 -0
  206. d9d/pipelining/infra/schedule/component/program/base.py +35 -0
  207. d9d/pipelining/infra/schedule/component/program/communications.py +203 -0
  208. d9d/pipelining/infra/schedule/component/program/topology.py +78 -0
  209. d9d/pipelining/infra/schedule/component/runtime/__init__.py +29 -0
  210. d9d/pipelining/infra/schedule/component/runtime/action.py +361 -0
  211. d9d/pipelining/infra/schedule/component/runtime/communications.py +101 -0
  212. d9d/pipelining/infra/schedule/component/runtime/executor.py +113 -0
  213. d9d/pipelining/infra/schedule/component/runtime/loss.py +55 -0
  214. d9d/pipelining/infra/schedule/program/__init__.py +15 -0
  215. d9d/pipelining/infra/schedule/program/bfs.py +86 -0
  216. d9d/pipelining/infra/schedule/program/dualpipev.py +234 -0
  217. d9d/pipelining/infra/schedule/program/interleaved.py +240 -0
  218. d9d/pipelining/infra/schedule/program/zerobubblev.py +227 -0
  219. d9d/pipelining/infra/stage/__init__.py +5 -0
  220. d9d/pipelining/infra/stage/communications.py +274 -0
  221. d9d/pipelining/infra/stage/computations.py +317 -0
  222. d9d/pipelining/infra/stage/splitgrad.py +377 -0
  223. d9d/pipelining/infra/stage/stage.py +321 -0
  224. d9d/pipelining/infra/stage/struct_helper.py +46 -0
  225. d9d/pipelining/training/__init__.py +7 -0
  226. d9d/pipelining/training/optimizer.py +41 -0
  227. d9d/pipelining/training/scheduler.py +34 -0
  228. d9d/tracker/__init__.py +14 -0
  229. d9d/tracker/base.py +124 -0
  230. d9d/tracker/factory.py +57 -0
  231. d9d/tracker/provider/__init__.py +0 -0
  232. d9d/tracker/provider/aim/__init__.py +0 -0
  233. d9d/tracker/provider/aim/config.py +23 -0
  234. d9d/tracker/provider/aim/tracker.py +114 -0
  235. d9d/tracker/provider/null.py +61 -0
  236. d9d-0.1.0.dist-info/METADATA +90 -0
  237. d9d-0.1.0.dist-info/RECORD +238 -0
  238. d9d-0.1.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,114 @@
1
+ from collections.abc import Generator
2
+ from contextlib import contextmanager
3
+ from typing import Any, Self, TypedDict, cast
4
+
5
+ import torch
6
+ from aim import Distribution, Run
7
+
8
+ from d9d.tracker import BaseTracker, BaseTrackerRun, RunConfig
9
+
10
+ from .config import AimConfig
11
+
12
+
13
+ class AimState(TypedDict):
14
+ """
15
+ State dictionary format for persisting Aim tracker state.
16
+ """
17
+
18
+ restart_hash: str | None
19
+
20
+
21
+ class AimRun(BaseTrackerRun):
22
+ """
23
+ Active run implementation for Aim.
24
+
25
+ Wraps the underlying `aim.Run` object to adhere to the d9d BaseTrackerRun interface.
26
+ """
27
+
28
+ def __init__(self, run: Run):
29
+ self._run = run
30
+ self._step = 0
31
+ self._context: dict[str, str] = {}
32
+
33
+ def set_step(self, step: int):
34
+ self._step = step
35
+
36
+ def set_context(self, context: dict[str, str]):
37
+ self._context = context
38
+
39
+ def scalar(self, name: str, value: float, context: dict[str, str] | None = None):
40
+ if context is None:
41
+ track_context = self._context
42
+ else:
43
+ track_context = {**self._context, **context}
44
+
45
+ self._run.track(
46
+ name=name,
47
+ value=value,
48
+ context=track_context,
49
+ step=self._step
50
+ )
51
+
52
+ def bins(self, name: str, values: torch.Tensor, context: dict[str, str] | None = None):
53
+ if context is None:
54
+ track_context = self._context
55
+ else:
56
+ track_context = {**self._context, **context}
57
+
58
+ self._run.track(
59
+ name=name,
60
+ value=Distribution(
61
+ hist=values.numpy(),
62
+ bin_range=(0, values.shape[0])
63
+ ),
64
+ context=track_context,
65
+ step=self._step
66
+ )
67
+
68
+
69
+ class AimTracker(BaseTracker[AimConfig]):
70
+ """
71
+ Aim-based tracker implementation.
72
+
73
+ Caches the run hash to allow experiment resumption from checkpoints.
74
+ """
75
+
76
+ def __init__(self, config: AimConfig):
77
+ self._config = config
78
+
79
+ self._restart_hash: str | None = None
80
+ self._run: Run | None = None
81
+
82
+ def load_state_dict(self, state_dict: dict[str, Any]) -> None:
83
+ state = cast(AimState, state_dict)
84
+ self._restart_hash = state["restart_hash"]
85
+
86
+ def state_dict(self) -> dict[str, Any]:
87
+ return {
88
+ "restart_hash": self._restart_hash
89
+ }
90
+
91
+ @contextmanager
92
+ def open(self, properties: RunConfig) -> Generator[BaseTrackerRun, None, None]:
93
+ run = Run(
94
+ run_hash=self._restart_hash,
95
+ repo=self._config.repo,
96
+ log_system_params=self._config.log_system_params,
97
+ capture_terminal_logs=self._config.capture_terminal_logs,
98
+ system_tracking_interval=self._config.system_tracking_interval
99
+ )
100
+ run.name = properties.name
101
+ run.description = properties.description
102
+ run["hparams"] = properties.hparams
103
+
104
+ self._restart_hash = run.hash
105
+ self._run = run
106
+
107
+ yield AimRun(run)
108
+
109
+ self._run.close()
110
+ self._run = None
111
+
112
+ @classmethod
113
+ def from_config(cls, config: AimConfig) -> Self:
114
+ return cls(config)
@@ -0,0 +1,61 @@
1
+ from collections.abc import Generator
2
+ from contextlib import contextmanager
3
+ from typing import Any, Literal, Self
4
+
5
+ import torch
6
+ from pydantic import BaseModel
7
+
8
+ from d9d.tracker import BaseTracker, BaseTrackerRun, RunConfig
9
+
10
+
11
+ class NullTrackerConfig(BaseModel):
12
+ """
13
+ Configuration for the Null (no-op) tracker.
14
+
15
+ Attributes:
16
+ provider: Discriminator field, must be 'null'.
17
+ """
18
+
19
+ provider: Literal["null"] = "null"
20
+
21
+
22
+ class NullRun(BaseTrackerRun):
23
+ """
24
+ No-op implementation of a tracking run.
25
+
26
+ Discard all inputs; useful for testing or when tracking is disabled.
27
+ """
28
+
29
+ def set_step(self, step: int):
30
+ pass
31
+
32
+ def set_context(self, context: dict[str, str]):
33
+ pass
34
+
35
+ def scalar(self, name: str, value: float, context: dict[str, str] | None = None):
36
+ pass
37
+
38
+ def bins(self, name: str, values: torch.Tensor, context: dict[str, str] | None = None):
39
+ pass
40
+
41
+
42
+ class NullTracker(BaseTracker[NullTrackerConfig]):
43
+ """
44
+ No-op tracker factory.
45
+
46
+ Does not modify state or perform any IO.
47
+ """
48
+
49
+ @contextmanager
50
+ def open(self, properties: RunConfig) -> Generator[BaseTrackerRun, None, None]:
51
+ yield NullRun()
52
+
53
+ @classmethod
54
+ def from_config(cls, config: NullTrackerConfig) -> Self:
55
+ return cls()
56
+
57
+ def state_dict(self) -> dict[str, Any]:
58
+ return {}
59
+
60
+ def load_state_dict(self, state_dict: dict[str, Any]) -> None:
61
+ pass
@@ -0,0 +1,90 @@
1
+ Metadata-Version: 2.4
2
+ Name: d9d
3
+ Version: 0.1.0
4
+ Summary: d9d - d[istribute]d - distributed training framework based on PyTorch that tries to be efficient yet hackable
5
+ License: Apache-2.0
6
+ Author: Maksim Afanasyev
7
+ Author-email: mr.applexz@gmail.com
8
+ Requires-Python: >=3.11,<3.15
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Intended Audience :: Education
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: Topic :: Scientific/Engineering
14
+ Classifier: Topic :: Scientific/Engineering :: Mathematics
15
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
16
+ Classifier: Topic :: Software Development
17
+ Classifier: Topic :: Software Development :: Libraries
18
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
19
+ Provides-Extra: aim
20
+ Provides-Extra: cce
21
+ Provides-Extra: moe
22
+ Provides-Extra: visualization
23
+ Requires-Dist: aim (>=3.0.0,<4.0.0) ; extra == "aim"
24
+ Requires-Dist: cut-cross-entropy (>=25.9.3) ; extra == "cce"
25
+ Requires-Dist: deep-ep (>=1.2.1) ; extra == "moe"
26
+ Requires-Dist: nv-grouped-gemm (>=1.1.4) ; extra == "moe"
27
+ Requires-Dist: plotly (>=6.0.0) ; extra == "visualization"
28
+ Requires-Dist: pydantic (>=2.0.0)
29
+ Requires-Dist: safetensors (>=0.7.0)
30
+ Requires-Dist: setuptools (>=70.0.0) ; extra == "aim"
31
+ Requires-Dist: torch (>=2.10.0)
32
+ Requires-Dist: torchdata (>=0.11.0)
33
+ Requires-Dist: tqdm (>=4.0.0)
34
+ Requires-Dist: triton (>=3.6.0)
35
+ Project-URL: Documentation, https://d9d-project.github.io/d9d
36
+ Project-URL: Homepage, https://d9d-project.github.io/d9d
37
+ Project-URL: Issues, https://github.com/d9d-project/d9d/issues
38
+ Project-URL: Repository, https://github.com/d9d-project/d9d
39
+ Description-Content-Type: text/markdown
40
+
41
+ # The d9d Project
42
+
43
+ **d9d** is a distributed training framework built on top of PyTorch 2.0. It aims to be hackable, modular, and efficient, designed to scale from single-GPU debugging to massive clusters running 6D-Parallelism.
44
+
45
+ [LET'S START TRAINING 🚀](https://d9d-project.github.io/d9d/)
46
+
47
+ ## Why another framework?
48
+
49
+ Distributed training frameworks such as **Megatron-LM** are monolithic in the way you run a script from the command line to train any of a set of *predefined* models, using *predefined* regimes. While powerful, these systems can be difficult to hack and integrate into novel research workflows. Their focus is often on providing a complete, end-to-end solution, which can limit flexibility for experimentally-driven research.
50
+
51
+ Conversely, creating your own distributed training solution from scratch is tricky. You have to implement many low-level components (like distributed checkpoints and synchronization) that are identical across setups, and manually tackle common performance bottlenecks.
52
+
53
+ **d9d** was designed to fill the gap between monolithic frameworks and homebrew setups, providing a modular yet effective solution for distributed training.
54
+
55
+ ## What d9d is and isn't
56
+
57
+ In terms of **core concept**:
58
+
59
+ * **IS** a pluggable framework for implementing distributed training regimes for your deep learning models.
60
+ * **IS** built on clear interfaces and building blocks that may be composed and implemented in your own way.
61
+ * **IS NOT** an all-in-one CLI platform for setting up pre-training and post-training like **torchtitan**, **Megatron-LM**, or **torchforge**.
62
+
63
+ In terms of **codebase & engineering**:
64
+
65
+ * **IS** built on a **strong engineering foundation**: We enforce strict type-checking and rigorous linting to catch errors before execution.
66
+ * **IS** reliable: The framework is backed by a suite of **over 450 tests**, covering unit logic, integration flows, and End-to-End distributed scenarios.
67
+ * **IS** eager to use performance hacks (like **DeepEp** or custom kernels) if they improve MFU, even if they aren't PyTorch-native.
68
+ * **IS NOT** for legacy setups: We do not maintain backward compatibility with older PyTorch versions or hardware. We prioritize simplicity and modern APIs (like `DTensor`).
69
+
70
+ ## Key Philosophies
71
+
72
+ To achieve the balance between hackability and performance, d9d adheres to specific design principles:
73
+
74
+ * **Composition over Monoliths**: We avoid "God Classes" like `DistributedDataParallel` or `ParallelDims` that assume ownership of the entire execution loop. Instead, we provide composable and extendable APIs. For instance, specific horizontal parallelism strategies for specific layers (`parallelize_replicate`, `parallelize_expert_parallel`, ...).
75
+ * **White-Box Modelling**: We encourage standard PyTorch code. Models are not wrapped in obscure metadata specifications; they are standard `nn.Module`s that implement lightweight protocols.
76
+ * **Pragmatic Efficiency**: While we prefer native PyTorch, we are eager to integrate non-native solutions if they improve MFU. For example, we implement MoE using **DeepEp** communications, reindexing kernels from **Megatron-LM**, and efficient grouped-GEMM implementations.
77
+ * **Graph-Based State Management**: Our IO system treats model checkpoints as directed acyclic graphs. This allows you to transform architectures (e.g., merging `q`, `k`, `v` into `qkv`) on-the-fly while streaming from disk, without massive memory overhead.
78
+ * **DTensors**: We mandate that distributed parameters be represented as `torch.distributed.tensor.DTensor`. This simplifies checkpointing by making them topology-aware automatically. We leverage modern PyTorch 2.0 APIs (`DeviceMesh`) as much as possible.
79
+
80
+ ---
81
+
82
+ ## Examples
83
+
84
+ ### Qwen3-MoE Pretraining
85
+ An example showing causal LM pretraing for the Qwen3-MoE model.
86
+
87
+ WIP: MoE load balancing is currently work in progress.
88
+
89
+ [Link](https://github.com/d9d-project/d9d/blob/main/example/qwen3_moe/pretrain.py).
90
+
@@ -0,0 +1,238 @@
1
+ d9d/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ d9d/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ d9d/core/autograd/__init__.py,sha256=uQrNtaXUMxXYyt05j4vRtiI5_pOUg0l4TXDXG3BeXyA,167
4
+ d9d/core/autograd/grad_context.py,sha256=nWN3BvM-GEFO4CZ8pEH5pjdYkrymOkQaNQXQF7laRPc,2891
5
+ d9d/core/dist_context/__init__.py,sha256=HqGt6_gplSNYnohGZl2GEmDO7fKloJVNiKBC9GXIQdM,498
6
+ d9d/core/dist_context/configured.py,sha256=wqVRzpOibMSYKULNLU4HynCoLWhRU2z3nNhTNZ3Y3BI,7127
7
+ d9d/core/dist_context/device_mesh_domains.py,sha256=pI7gW2e-tQgx0TcZOAT4aFBYiiRph6VgF03zJL6WlyE,4854
8
+ d9d/core/dist_context/log.py,sha256=mG-3gkZBleBTUJiOlUGDfXaSFRf5q5fGK97HybWe_E8,900
9
+ d9d/core/dist_context/params.py,sha256=SOgu5T5Q1PNSdM_06U-wuRHwSMxIT8_6ua3t8dTmkeg,3645
10
+ d9d/core/dist_ops/__init__.py,sha256=iwatM6f60NWU8oAYoHcFWvBc7HwTfsQpaaRFg9GwIDc,397
11
+ d9d/core/dist_ops/object.py,sha256=cKAL2M5aUeCYggYqeari-FlsMEYvZvDszhXdLTHTDlo,2006
12
+ d9d/core/dist_ops/tensor.py,sha256=6zqHakdQPDXUbjOQ0tu2rLvM5fsf2RP8503tQzhfciU,6165
13
+ d9d/core/protocol/__init__.py,sha256=amTi465uKy2kEEQfYxrfIQ3METESZluwDoBBBi2BjYE,203
14
+ d9d/core/protocol/training.py,sha256=i3DHGy9P9G9X8UPg45zLX-_eOrEI5Y3DjQ4A2Wix5Ec,961
15
+ d9d/core/sharding/__init__.py,sha256=DYkdFk8LVIssTZr-DdnlXUmz4Xd3OpsoqFLzqsFdsRg,385
16
+ d9d/core/sharding/auto_spec.py,sha256=M8fIm_8zqtGsAsPk8UpPZaUHdhLipjHDvFj6L7zTlLA,2283
17
+ d9d/core/sharding/shard.py,sha256=UhcaCgmJNwb8vhRvKi3mwJ-22zwtB4E1o0qPNn69IgE,5243
18
+ d9d/core/sharding/spec.py,sha256=4sA70POEdWzekBzOP0DwdUbfJa6HBr0VrLVqOTpjUWM,668
19
+ d9d/core/sharding/unshard.py,sha256=T1OQRZ8BwCSTf0Dw4_-N79IDDD6Fwii_-idjpurg7hY,3696
20
+ d9d/core/types/__init__.py,sha256=f2fX48bowRCELUEMLICLwLwi-sl_B5f9VQLfpVJCdIY,223
21
+ d9d/core/types/data.py,sha256=IyZtwLSlofHPzc_F-BrCQVgpBISh6wbInuCMVW92IXk,473
22
+ d9d/core/types/pytree.py,sha256=L93BReqv7fiegPVRzZQd63FYv986rJdMbk-bYFv2bYA,842
23
+ d9d/dataset/__init__.py,sha256=UhxpUpSM5gGtXaVSZ1Y5m8eEO61E5ebcYx-hF9z8T7Q,509
24
+ d9d/dataset/buffer_sorted.py,sha256=oiCnoKfhofo-RQszEX9UZGNEi-E9Gq-oQUbpD5-qYe8,4996
25
+ d9d/dataset/padding.py,sha256=MSG7tkVHjAJdUAR1BKkW_s-14mBJ-g-0GZyf-8MA0PE,2333
26
+ d9d/dataset/sharded.py,sha256=D699wxy0TsGsuIKwiRK4fpuJYn5CJoSddyXREdUFIYs,6923
27
+ d9d/internals/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
+ d9d/internals/determinism/__init__.py,sha256=702y3p8NFHExaCQ962JImMLKPbvEZpV3nE4MYfEOWIs,150
29
+ d9d/internals/determinism/seed.py,sha256=daJ85obD8bw580zCeKGqXCRZ0MesunZUTX41yD2ljGU,2271
30
+ d9d/internals/grad_norm/__init__.py,sha256=rS8lXo3vOLxLUY-rujevcGEQHl4gLKLfAse2gGwyoYY,217
31
+ d9d/internals/grad_norm/group.py,sha256=tWybTJTGYt-c_9by3wdfj6SwoTFbRC6G2U2OotFo2nI,2802
32
+ d9d/internals/grad_norm/norm.py,sha256=c_rQNfbErswYhaaEuFeTd-4rW20w2PxcolC4TL2kYaw,5275
33
+ d9d/internals/grad_sync/__init__.py,sha256=tllIQpOxQcz3bRbBFgG5FrHDzyGXWwQ-IPLKYW7cIZU,326
34
+ d9d/internals/grad_sync/bucket.py,sha256=G5asn2dErBvFYCBlhYlafiAay65EfeiEMTQu-ZZ-id0,8935
35
+ d9d/internals/grad_sync/placement_helper.py,sha256=-3Tdkrf745iz9vd68Iil4PQ-oP8eSGLi_XrjWjp0uYc,634
36
+ d9d/internals/grad_sync/synchronizer.py,sha256=-z3PHc9nPw5pHz9Ht5DYOWNQsJWss5e1I82w9Cc_8lE,8012
37
+ d9d/internals/pipeline_state/__init__.py,sha256=WYEWgFgp_8ycn7_T43DTGG_Kj2Iq_xo6lsMmyzS3a7g,359
38
+ d9d/internals/pipeline_state/api.py,sha256=JZCqSZuJPXHvodyrcMvTs_gxO39FMoAlmhWSLAOGDHI,1102
39
+ d9d/internals/pipeline_state/handler.py,sha256=MOUmv7Xvl0d7oo8Hg5G6Ayvhw8XNX-PCB7F1r7YZTtQ,3172
40
+ d9d/internals/pipeline_state/storage.py,sha256=4EScog_SKZfCHlTivFEJnXfMnxhX9rXfbtEGWStEoDs,6522
41
+ d9d/internals/profiling/__init__.py,sha256=8timWWLXgZyJjYACmdgi-SdUC-rpWKJqCcG_Fr1Boy8,110
42
+ d9d/internals/profiling/profile.py,sha256=vm7jgJRyRCzRW7w-ebSbaOtz6OU0ElOgwe9EIR9hp2s,3848
43
+ d9d/internals/state/__init__.py,sha256=8SOtDBRhP9k-WVKLTnmMkcgNUnSQY6z4LaBajD0mO2s,161
44
+ d9d/internals/state/main_process.py,sha256=I5CxCjZ7xoLVtqbFbBY3Y_1T6_7a89BSqlzy9YsDdzg,1533
45
+ d9d/kernel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
+ d9d/kernel/cce/__init__.py,sha256=ndtKganqw3kgjdYE0WldhbsLecBA9H_c2I-2DyAE1UQ,81
47
+ d9d/kernel/cce/cce.py,sha256=cL4Us9GEBPtjPrSMjOAeODOWVyBsTOQAwg2O6oVvyXE,9755
48
+ d9d/kernel/cce/main.py,sha256=LxheuDLO5gCHtwy6qJ6a2jMGJbXT9Ex26iXy_e0A7fk,9087
49
+ d9d/kernel/general/__init__.py,sha256=Gv-CuWwtHXwgjqhus_eSVJMiOtOnmbWi2uliCnGjr1s,76
50
+ d9d/kernel/general/get_int_dtype.py,sha256=IcRLpo3AiPJ7pJjkRYWaodf8X8XPNJBxEq5-6k-7FMQ,183
51
+ d9d/kernel/gmm/__init__.py,sha256=h9VaTX9wiYLvXUi00zk98CAc35Nx_dAwwny4vaheYeQ,51
52
+ d9d/kernel/gmm/function.py,sha256=rVRwowNsZYlLlsV47-7LQNJBxRZ1-Arj7PPYClWKSoQ,2623
53
+ d9d/kernel/moe/__init__.py,sha256=tLQ9bF4gDNR6MvVpS2No5RyTCl_b4lpRTPimkiEwbHA,237
54
+ d9d/kernel/moe/indices_to_multihot.py,sha256=xdUVsRHLT19R9JR2dnMcPqdjyHSUyt5312sU8pbLhyU,10138
55
+ d9d/kernel/moe/permute_with_probs.py,sha256=c244yZTEojDH7vbrTJhOTTmzQsiAKQM03ODTFgRSGcQ,35713
56
+ d9d/kernel/stochastic/__init__.py,sha256=yXJ5Cf8SJ3Iag_2_uAKUi2BU4xkxdIp-ontyWfEE87I,245
57
+ d9d/kernel/stochastic/adamw_step.py,sha256=dD7Z9tucQR6U203-XU_s3lEw7q1X_5qVkDXvgH8EOEs,6831
58
+ d9d/kernel/stochastic/copy.py,sha256=S5NNOp1Wxzi8KszhkCh_j5pU0dBE-wsRpl9KHWCuvzQ,3211
59
+ d9d/kernel/stochastic/ops/__init__.py,sha256=aOGiKLx82ZzW42fhTabz0rD7OSYyX0JIZkhM4jXNaW4,80
60
+ d9d/kernel/stochastic/ops/round.py,sha256=w511LzP9a_p5oJWjQmD4QHAVzvpLNK8xnsHbLkMgDnI,545
61
+ d9d/kernel/swiglu/__init__.py,sha256=bYHPadWelNxz9NVCYiDvm_igMZ-gnOLutSXQlVYbyLk,61
62
+ d9d/kernel/swiglu/function.py,sha256=KMpDFEIN9CUmbKT1Z3Eqy9hS_2cvdMxmuqD-dzHleBo,918
63
+ d9d/kernel/swiglu/op.py,sha256=r0AS6ckIl0R_0TeW6xDSJWgJqS2_TWa_ZrulwuOTxxo,4454
64
+ d9d/loop/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
65
+ d9d/loop/auto/__init__.py,sha256=zvpOu934BCSblPK7N03VHEKUzz4EoiEmDEhDpACm7D0,279
66
+ d9d/loop/auto/auto_lr_scheduler.py,sha256=53244OD5ooBpx6lch9SW0KDNVamLhPsCguxuVOKjpeY,1419
67
+ d9d/loop/auto/auto_optimizer.py,sha256=2823wmCdFydERuQKVu5jDMuKAjb0volA3-XNdE6ntZc,6035
68
+ d9d/loop/component/__init__.py,sha256=IgVUupHm-nPEkLx3BNcDClJrxwVqVm62Bo5g47o8QJM,1090
69
+ d9d/loop/component/batch_maths.py,sha256=IYWf952-8aOuoZXVKWPgK7vgPYnn0It3QpXK-tg4cho,3962
70
+ d9d/loop/component/checkpointer.py,sha256=3yhjMutYzeQLxkHYzMe1wpKcNpIeVAv_wtMpQjQAa5Y,5794
71
+ d9d/loop/component/data_loader_factory.py,sha256=XOI9_XLNmqjMkmQguPM4g_-LbhANNS73hGPE_6fCyOQ,8029
72
+ d9d/loop/component/garbage_collector.py,sha256=t1wHN4HvaaDfvhnBPfSsq5_7THd_OuYO8tuUA95-UGQ,2867
73
+ d9d/loop/component/gradient_clipper.py,sha256=0nDGELrwczlqCpHojeC4N3j7dMUhELnod2pWTY1Hv3g,3078
74
+ d9d/loop/component/gradient_manager.py,sha256=rtuDooke4HeoBoMXr_YqxSYecZHSzr-pAbXGmSsl10Q,4976
75
+ d9d/loop/component/job_logger.py,sha256=CZx9EGnILwGQ8mqTaKstjVp_n_5_UO5fQnTEWuJAdsM,5183
76
+ d9d/loop/component/job_profiler.py,sha256=qTK9LUAbLzuvwfkA_UyOVUMsO5s6cy61x7ZeuWQmiCQ,1835
77
+ d9d/loop/component/loss_computer.py,sha256=J1SBzFRjyDZ7QbE7rfMvxyzLjtod5E0NynwNJqJ-qlE,2845
78
+ d9d/loop/component/model_stage_exporter.py,sha256=g1f7WDmsJ6MAN_H8ymh4B3MHIURibUhO_3Mvespwfpc,1332
79
+ d9d/loop/component/model_stage_factory.py,sha256=e1BpmjMwfot9V1pI5drcHCmrAvMJTyb0AwXfu_2MgTM,9831
80
+ d9d/loop/component/optimizer_factory.py,sha256=fjqknSmlYbcxVogO2YgZht6-YhcMvfIh0AExFQ-aDdw,3636
81
+ d9d/loop/component/stepper.py,sha256=5IM-5uvXswfB3---Kk_Dpi2iNBjmOCWqDXGl9v263wU,1804
82
+ d9d/loop/component/timeout_manager.py,sha256=S7bF2iTQVkfvvhzR6wUVXuD2OCTzOMkzFt95eCs-MUE,1679
83
+ d9d/loop/component/train_task_operator.py,sha256=cpeF_16H6Cj5WdLYogr34NI-ko8Q2MO-BSKzsgf_H-o,5438
84
+ d9d/loop/config/__init__.py,sha256=jjikLZrT8zqGcq_LD02T1qd1np0O4j0zaDKI_XUF0_g,824
85
+ d9d/loop/config/config.py,sha256=Pzb2c1f_z66_0KFyc_L-_abeK3rxezg_KNMtCY8BXR8,7039
86
+ d9d/loop/config/types.py,sha256=JZ0A8-pytCV65drry17vmfWWzT9cyzQ6V4uU_ercru0,652
87
+ d9d/loop/control/__init__.py,sha256=x_42CRVmwh1RKfh238QalrycFY7wDa_L7pY8wFnqgI0,1758
88
+ d9d/loop/control/dataset_provider.py,sha256=KELIAbkVthBP78XUE3E3T3M8zUL1l0ra3Ezr6_FDInU,1605
89
+ d9d/loop/control/lr_scheduler_provider.py,sha256=0hxJxIhvlFl-z22rZpeWryQ79LZK8VTU6hx8-oeAQUw,1177
90
+ d9d/loop/control/model_provider.py,sha256=SAIFJwF8rMk4YGEAzFVTmWkIqPBjlrUxvqgK8F_jo-M,4628
91
+ d9d/loop/control/optimizer_provider.py,sha256=M7DL_6298avKGsjto7j4VparfXQcIHvZ2CiYfSlV7T0,1039
92
+ d9d/loop/control/task.py,sha256=YC3Itf_qo7WZmPrX3TVVIaoYVUsf18j56majdO6icjk,8150
93
+ d9d/loop/run/__init__.py,sha256=wQlVo3TayCwoHaONBsfwsAPBu3uue8ckHaM4YDqIkMc,106
94
+ d9d/loop/run/train.py,sha256=XbCa9r7xpVbVPb0dnD9uutPDDfdOZi1JhtD7dQwehOw,12244
95
+ d9d/loop/state.py,sha256=s0Iz6W8iHO9md_3QqbQouqyVz1r7vt8xvcj1AoLqtvU,5017
96
+ d9d/lr_scheduler/__init__.py,sha256=ZQke_m2Zl1adfFGp3wCvYQo-CO_c3bpVePrVGAERhfU,140
97
+ d9d/lr_scheduler/piecewise/__init__.py,sha256=8SPGeMnZSF40pPjfEjkhAd5fb4e8fYjeNdF-pRuC1lM,496
98
+ d9d/lr_scheduler/piecewise/builder.py,sha256=94ufa-aoL90xUIGu_W2XwkWPIkE53KHeUfNvwK61zew,4974
99
+ d9d/lr_scheduler/piecewise/config.py,sha256=lljITKE8gpVyzEIfONzmYbhRSXZSpgieqIFH3yxnoec,4699
100
+ d9d/lr_scheduler/piecewise/curves.py,sha256=LG2GzEYKJhiF9701D8lzxvtO4pzvhSObVEHMiayVCTE,1995
101
+ d9d/lr_scheduler/piecewise/engine.py,sha256=ZV1L4mRDxZABIPhCR8BmwGZ8qzr2z4NsNSJZl9-hcXY,2054
102
+ d9d/lr_scheduler/visualizer.py,sha256=CcaSOewQbM__IGlLNkzRzyvj0X9BAgCEFaWGu55aCmM,2083
103
+ d9d/metric/__init__.py,sha256=VTByKUezSl_xOjpt0FPsRl_zzAXTu3pBHWYrCbocPa8,115
104
+ d9d/metric/abc.py,sha256=Nkd6dhS-KhAf5MWacYCsjmBgqsUeEm-UgA5KPHkv2Rs,2485
105
+ d9d/metric/impl/__init__.py,sha256=6xHkax_7Tglg5EUHnrluo7Pa7kcBp4l4fZ-LRT7x83A,133
106
+ d9d/metric/impl/compose.py,sha256=oQyp3mmku-m_SXqDK5WbjF9Gv0HSsUQVfi9gmLwNuHI,1693
107
+ d9d/metric/impl/mean.py,sha256=Lan6eDNUK617OMk4p99pFA1rGe1f1YwL0e51MB0SoA0,2832
108
+ d9d/model_state/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
109
+ d9d/model_state/io/__init__.py,sha256=3RXRA5RLHS3hCQlvhRsy9q-KrGSNuh0r6kAppak0EA4,542
110
+ d9d/model_state/io/dto.py,sha256=NY_C8sU9QoeXJyRqiGYjl7W-pfUUYSPe1L_Z_dvctkE,704
111
+ d9d/model_state/io/module_reader.py,sha256=3-xOLPbdm20Ssm-E8quluCmL5Cwc70eWkUOYNe6bkB4,2779
112
+ d9d/model_state/io/module_writer.py,sha256=TG7kVb_YS90U7Hv7oHqqzYx8pp6q6ba1TTu3E5AHJK8,4373
113
+ d9d/model_state/io/reader.py,sha256=WACBaeVrezQUWU-xKqwIo_e08MeyEy-kr4anDTYQFMk,4546
114
+ d9d/model_state/io/writer.py,sha256=5TpK2EMepKbQ6hOy_stgV7j33OzafQWw6czVwCsmkyQ,11621
115
+ d9d/model_state/mapper/__init__.py,sha256=EVjhWQF8WLLtJp_Z7irGJomw5R0TYbA9x4_dttt0rVg,178
116
+ d9d/model_state/mapper/abc.py,sha256=yXI7FDblBWrcSJb-_rPWHRARJSbw2JrojQI_ENluBFg,2634
117
+ d9d/model_state/mapper/adapters/__init__.py,sha256=T5utmHgdzAVb-RcoJMN3U126EJb7CQX98uS5nCrnsrY,362
118
+ d9d/model_state/mapper/adapters/mapper.py,sha256=rqROLlPux0qzU078fvu8nJ67cooGwe0POOtAQSIzNss,1083
119
+ d9d/model_state/mapper/adapters/module.py,sha256=DM6VYVR97OWhfRUE1I7MaobPnF3pZErkkg-i9l7nS5c,775
120
+ d9d/model_state/mapper/compose/__init__.py,sha256=-i0nYcl3iUHTW7d6eErk9VMh33-GBYXVpMdYXC9wa8s,469
121
+ d9d/model_state/mapper/compose/helper.py,sha256=oPDJdJb5brqbWubVEnArE-UNJd572j33lqbyw3TrmlM,637
122
+ d9d/model_state/mapper/compose/parallel.py,sha256=2zU8fEGhOKprM6bQfjbYI0oB8FA2gC0mTwwyvhrur8Y,2234
123
+ d9d/model_state/mapper/compose/sequential.py,sha256=CNLFadwlvoPy0iZsI3Q4GKCq5weGkw5zRUtjqFnoNLI,5548
124
+ d9d/model_state/mapper/compose/shard.py,sha256=ah2ONYdpxmPBJS4OssSnAzb0Wki1YQdrHOUsgwB3UEs,1484
125
+ d9d/model_state/mapper/leaf/__init__.py,sha256=-S2w0jFXsSq5NiMUCO8w6PJAHO_IQpR2IAv10O7OoIQ,570
126
+ d9d/model_state/mapper/leaf/dtensor.py,sha256=uDYltxXprQJfE1e22V06dpVJfoxi5kBHDQpvd-Gzl0U,1861
127
+ d9d/model_state/mapper/leaf/identity.py,sha256=mZZAdurT42V6dy4964vCxEP4i-sXgiZfOBZOIyYKjTo,603
128
+ d9d/model_state/mapper/leaf/rename.py,sha256=obsPpw9Y1SOGW-DZUX2scxFC2mAs8V7ivVEh19YUihw,739
129
+ d9d/model_state/mapper/leaf/select_child.py,sha256=QxBft_g9WfBds_f8FTpFK6pLDMeumgBehhMDH6Kr3SQ,1161
130
+ d9d/model_state/mapper/leaf/stack.py,sha256=TZ3ulpahcJn3qbKErzCJOIEt65mOLooxCmqktZ7F3Qw,1017
131
+ d9d/module/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
132
+ d9d/module/base/__init__.py,sha256=vl05xyGyB-jQV_S50NvgR--jfThueRSl3vssLXRNh3U,178
133
+ d9d/module/base/late_init.py,sha256=-Re3QCZcJO6vzBxN7FTj5r1H0hzPX6_wyIgEud9s-eo,292
134
+ d9d/module/block/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
135
+ d9d/module/block/attention/__init__.py,sha256=hT32obgT6bRpUAJIFmDgfqtR5P9zwTS6UiiYL9v2hMk,141
136
+ d9d/module/block/attention/grouped_query.py,sha256=4lqZfQgLNt0GkQ2GueQ9SJTKuV76qT_skJ41coHmlls,4814
137
+ d9d/module/block/attention/sdpa/__init__.py,sha256=YijNzSFZedbMjGYGz0aE8AMriADadYffcjgP1oNbYzg,60
138
+ d9d/module/block/attention/sdpa/flash.py,sha256=0YHd0Mtd7-Hrx-VORNcdZVmMHwE_N8bkScSoxfCNlQY,1925
139
+ d9d/module/block/embedding/__init__.py,sha256=tNRPw0FMryV_SCRlf5YVhyTUGtqlezB1aBB0pwN0ZfM,163
140
+ d9d/module/block/embedding/shard_token_embedding.py,sha256=PeaW_ekbhsYxf03dzlzIoW7K8tsT5JSs4b3pNWtZ_Hc,3519
141
+ d9d/module/block/ffn/__init__.py,sha256=3WbxF0WP1Kp1Z5_qIVuYjmuDiNiiUNX3r3_rMstf8nQ,55
142
+ d9d/module/block/ffn/swiglu.py,sha256=bIAVs3mNOJo_vvCV9p9MkmLkY8UpHAqo68rkHJmYbP0,1598
143
+ d9d/module/block/head/__init__.py,sha256=9fhLQSXXOycz-dN2Pym-r9bzZCqsb1VTFBrS67BHMGc,147
144
+ d9d/module/block/head/language_modelling.py,sha256=TKOvxPIYAQqiVAzRMORI3e1zB0UNKj54hznvAblGfP4,3071
145
+ d9d/module/block/hidden_states_aggregator/__init__.py,sha256=oyi_9ZXn-0gkw6YQ8uzEyNOJupkuaimqbQkQUgKDHYc,305
146
+ d9d/module/block/hidden_states_aggregator/base.py,sha256=Ty1_UlOOV7DuIPbpcjGnzjxrMAWoEfsZQCeiySF7mIE,1261
147
+ d9d/module/block/hidden_states_aggregator/factory.py,sha256=fmar06v-oMwmo-zEdFpFDCGC5ic6x1LGEgyzVUO_e04,1511
148
+ d9d/module/block/hidden_states_aggregator/mean.py,sha256=MN9x2TMaGTHrAkft9Wsh3u8jF9nXgkArPXjcZBStM60,2125
149
+ d9d/module/block/hidden_states_aggregator/noop.py,sha256=dqqOZ-ZCfOYQJXU7rxUE-nLtPca9G_dcd2RACsqW6Dw,654
150
+ d9d/module/block/moe/__init__.py,sha256=oL0Li4raYSufCQY_3llP_RBYXOnQi3_cmoEib8YcqDE,310
151
+ d9d/module/block/moe/communications/__init__.py,sha256=cud1BcwcozsGENkQ70DtIPTXUE1G2o-5E_Okg8ffOC0,330
152
+ d9d/module/block/moe/communications/base.py,sha256=zZq6IBMG86Eo2j0EZZGgkMj3bJ3sj2rH-SszhbcqHeQ,2107
153
+ d9d/module/block/moe/communications/deepep.py,sha256=PRTCCEWnGT3u9XB_3V3cbupDGdhsrZkFk44sIrBOn-I,8809
154
+ d9d/module/block/moe/communications/naive.py,sha256=jQU8u9z8ooFiEN18Wc8uwqq505Fa9c18My5SP03vNAE,2196
155
+ d9d/module/block/moe/grouped_experts.py,sha256=0bMDGPyXINmDrcIbxJeAS8NwI0ZlUFfxRaQ7sg0Lq7Q,2782
156
+ d9d/module/block/moe/grouped_linear.py,sha256=eVMvwnholSL-m5lDrGfsK0kYcGYbbPqkuH80JAOao9k,2529
157
+ d9d/module/block/moe/layer.py,sha256=UhKuRLAFLtDyv9o7-9SxjEi_N_TaG9AkWITuPR6qqgI,4331
158
+ d9d/module/block/moe/router.py,sha256=7l7C2Hr3AINlOvyfVRA---fEXMT6IHptPX-w0eS_fxw,3291
159
+ d9d/module/block/positional/__init__.py,sha256=RsPCbuRgl3UW7Fow5dRf014MuwntakJuQiW-XezWYxw,236
160
+ d9d/module/block/positional/rope.py,sha256=SB9cCcUTQXQOjdebGcn3v7nWDgMU8vfyNiZQY4NyAbE,4954
161
+ d9d/module/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
162
+ d9d/module/model/qwen3_moe/__init__.py,sha256=dQbZpQYSFtoq0cSLnX4OHNTiEBDxEmmY9_BMUB9iXtQ,384
163
+ d9d/module/model/qwen3_moe/decoder_layer.py,sha256=Pu8J8qi3ZdYLQ5UG1LmkcBDRceSPJLFbQn9wIjiMAlI,3334
164
+ d9d/module/model/qwen3_moe/model.py,sha256=KHDX1XApNRcm-0hiNRhIKPwS-D5E2J9biiBVnVkRso4,14025
165
+ d9d/module/model/qwen3_moe/params.py,sha256=OlnDLVeUWCydkyCnrbzdGhQiuVY9YgQWSTmFTLoQjsg,2480
166
+ d9d/module/parallelism/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
167
+ d9d/module/parallelism/api/__init__.py,sha256=KCiPpSlL2yy0-KN1B-zex6v0oQo-ep7mXupKBKEkvtQ,558
168
+ d9d/module/parallelism/api/expert_parallel.py,sha256=tid3TXadzeUhPRfVcYMP7eynSq2oMSkzrA3gQ3LxfAw,1399
169
+ d9d/module/parallelism/api/fully_sharded.py,sha256=K0ttq70F6IF0dmvFGxR4PwcIb9Qpw7LKx5DkQQR2dw8,1643
170
+ d9d/module/parallelism/api/hybrid_sharded.py,sha256=230Rg8KG9SfOF9Iv17oW1GVsdajc5UEuJztmWvyNsKQ,1715
171
+ d9d/module/parallelism/api/replicate_parallel.py,sha256=HHMxLybsxtX7yaTx6ek1u8hU3Yo0p8yK7TGf4YBqpgk,1246
172
+ d9d/module/parallelism/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
173
+ d9d/module/parallelism/model/qwen3_moe.py,sha256=eKC9DkCMrnzemvcdryhkkZ3l5Q0Do3i2A9PhwvZqRhQ,3575
174
+ d9d/module/parallelism/style/__init__.py,sha256=ICUJJs6V-W3AWQ9gTY4fRDQHKzG18EzfaisSHEgHL1w,169
175
+ d9d/module/parallelism/style/shard_experts.py,sha256=wm8x7OVfRJl7oe8quQ3no3qu0VLyJsPUwXMkQohzJa0,2158
176
+ d9d/module/parallelism/style/to_local.py,sha256=xCwBKhbnybElPXhivf3qb92YddxWa5X55pGFfkrh4YE,3273
177
+ d9d/optim/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
178
+ d9d/optim/stochastic/__init__.py,sha256=H4sJvNTHs27gIJ8lR9h8AM0j6BrnPlVlgOi8LiDid4c,72
179
+ d9d/optim/stochastic/adamw.py,sha256=XNTUtm3Z4YAPqZFKJsD09cTWmKd0i5EnrJ-sC-1WnQc,5530
180
+ d9d/peft/__init__.py,sha256=XTgJcW9v4sQTkur7lcMZ0BkJaqWU9BruotImwK1fCss,322
181
+ d9d/peft/all/__init__.py,sha256=ID2lcBOD5-4W4sL7lsSM37psqxV4-EfsU2NXO3D6nWY,243
182
+ d9d/peft/all/config.py,sha256=YvCsWGDU2TRO0eIRk5-kTbX9V9DArob5KxxXK7HxGoc,765
183
+ d9d/peft/all/method.py,sha256=2IztonETLROCOTxt61nJ4knEwXRVcQxAi1ptgRn80BA,2117
184
+ d9d/peft/applicator.py,sha256=jrqEwKY9EbLwo_qZHaQhTubVDgv8okQmZdcd6PBS1BI,1393
185
+ d9d/peft/base.py,sha256=kPImZaZETnIhew3_MhbViYrXvZJG16uraqEDZpr26XI,1729
186
+ d9d/peft/full_tune/__init__.py,sha256=Z4U8Nbqt6kZJZqNlDV8kX-m2YW9QjLJ5dc0BxvpLru8,195
187
+ d9d/peft/full_tune/config.py,sha256=ZdYDFn1DVmKdibk502WvU9rdAmJj0NKopPzOzmhAJYM,494
188
+ d9d/peft/full_tune/method.py,sha256=Hut8VHMfX05RnmSS6lJzkSv49uB4BpwagQEus5KX4d4,1265
189
+ d9d/peft/lora/__init__.py,sha256=nr3R8uJlCkt2mmZYpFtWiBfRDdpUtaU9zx2r5Nl8NYo,294
190
+ d9d/peft/lora/config.py,sha256=KElokzzMJeh91SwRrxj-ZTzSLHF8ZVSb8ehNfYKFFpg,801
191
+ d9d/peft/lora/layer.py,sha256=7i3QGR4QveJ_Tk_yNl_QZuLzK-l1y9xp5O0N6DRtOrE,4990
192
+ d9d/peft/lora/method.py,sha256=psYYkSvL0telO2ULM8dtDTlf4H87lTtn_4hlB07M-iw,4123
193
+ d9d/pipelining/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
194
+ d9d/pipelining/api/__init__.py,sha256=6vwCq9PiJWkREzoM68yO-UySf2YEy6BK14oph3IOuHo,439
195
+ d9d/pipelining/api/module.py,sha256=WhDa4GTGoaL00XMoPAQB-XdjcmGiL_U0T9mTwRSMBN8,5280
196
+ d9d/pipelining/api/schedule.py,sha256=m6FCe1Yh2cOYGxShirq3HakDlRnQyAIwePA5-UU_xxE,1629
197
+ d9d/pipelining/api/sharding.py,sha256=XhaRarZOHf6nDqwnzLvz0SPh9VHi6kDiQkkx_gRNoHY,204
198
+ d9d/pipelining/factory/__init__.py,sha256=_tN9unIv9bHojC26MKt34W9ra-zMdGrfS0i0a4JyurU,602
199
+ d9d/pipelining/factory/config.py,sha256=MW8ARW0gPZGN9jsPPq-nTSFaGYMKPDwIBO06juy4DTE,2565
200
+ d9d/pipelining/factory/factory.py,sha256=E_eUyvYP6YXMzy8QdFUoBfpFUkA39eFk7zeYsidqGzU,3920
201
+ d9d/pipelining/factory/registry.py,sha256=k8nBSM2vlYE_fAo4pV7zzX9iqOopLJ0YugeieYwwU10,3163
202
+ d9d/pipelining/infra/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
203
+ d9d/pipelining/infra/schedule/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
204
+ d9d/pipelining/infra/schedule/component/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
205
+ d9d/pipelining/infra/schedule/component/program/__init__.py,sha256=c_FBMR7y2fo8Hbzxu23fBXgnfJ0PYXoFHnvS78tuPKo,572
206
+ d9d/pipelining/infra/schedule/component/program/base.py,sha256=tVJNJr6IWSg-qT72Nci39Z4eCX0ggO5oJcJocwJFOOk,1010
207
+ d9d/pipelining/infra/schedule/component/program/communications.py,sha256=2Q-4LCVWgB1TAdf1-GcxRTo8GZuh4ljlHyVusTl25Qo,6893
208
+ d9d/pipelining/infra/schedule/component/program/topology.py,sha256=4c47yx_9-tx37ySCzdK-DXpJi0e42xC_Fkdg2Z2cwQI,2364
209
+ d9d/pipelining/infra/schedule/component/runtime/__init__.py,sha256=XouJZVjEfekcoeRWyzjGHD0D8yRg90JaXVeqojRnJPI,633
210
+ d9d/pipelining/infra/schedule/component/runtime/action.py,sha256=8Rgtk5pZjXCbsjoJxhArkEFIeNzY1ZWvESCgLnA8Hyg,10733
211
+ d9d/pipelining/infra/schedule/component/runtime/communications.py,sha256=X1vSYnBHY4uBB0TvAApiSbdibJxxwNV-EZ3l_LXRnC4,3344
212
+ d9d/pipelining/infra/schedule/component/runtime/executor.py,sha256=QAbvdA6vfS909jririg_Rw_FK2H6aZUYyAILGOpGHU8,4392
213
+ d9d/pipelining/infra/schedule/component/runtime/loss.py,sha256=DGATscTXF2fKG7M2UiagpnVYvUI3DSmKKOMH1QLvH0k,1646
214
+ d9d/pipelining/infra/schedule/program/__init__.py,sha256=991C1CTiAoC93hxsZxTJoBLBFUa-9phcNIIH8SUw4vA,447
215
+ d9d/pipelining/infra/schedule/program/bfs.py,sha256=wiTGaFaUJ2O9nAB3qTtgH1AM8NZrZbF3v5bqDY5VQvw,3092
216
+ d9d/pipelining/infra/schedule/program/dualpipev.py,sha256=GDuNHmqjOjTHgee18EE4py_9p97pBg91BJDcYh5HlSo,7875
217
+ d9d/pipelining/infra/schedule/program/interleaved.py,sha256=6aGQsFTY_Jv3NU19lNenax_U4ji5pAO0dxXR_uRKfq0,8655
218
+ d9d/pipelining/infra/schedule/program/zerobubblev.py,sha256=eWEeUcp-XItGmFKAP4-TmVVXpLTkfXAz4Mfsm2NvJ7g,7219
219
+ d9d/pipelining/infra/stage/__init__.py,sha256=q_6-TTpeg55mSIhzM0aux5FSsB31SQfanDy6pau5Kwk,68
220
+ d9d/pipelining/infra/stage/communications.py,sha256=h72UrnTK15AaJC-rb00cKrTu1JL6ny1TycH9tmuAzlc,9865
221
+ d9d/pipelining/infra/stage/computations.py,sha256=F1LCCMAdqU7iXHnLGjbHn6WsU0aKAq7ctZb70W1GUXc,10426
222
+ d9d/pipelining/infra/stage/splitgrad.py,sha256=xJkbhy6uKDxVAUyW-LUQhJQ55DRRKe5_8_V0GC0eroc,13166
223
+ d9d/pipelining/infra/stage/stage.py,sha256=Ac7lAERDIUvnYNaY5UTGOOLMrVUd5-KSbxMSNRpShuE,11521
224
+ d9d/pipelining/infra/stage/struct_helper.py,sha256=y9gTbKUmmsDHO6YCXPSKAj_Xq-vg6yoqHEmRkLVpekE,1361
225
+ d9d/pipelining/training/__init__.py,sha256=mjxKtgXNU59QfgaxQrcahEG-kv6xwsNJBkBdq6FWJT0,154
226
+ d9d/pipelining/training/optimizer.py,sha256=7VZ4b8I8CC0DnW4h-QYEhrf9jZm5LFsYUpei1Q2lkR4,1322
227
+ d9d/pipelining/training/scheduler.py,sha256=QBDu4Z9P7Vqgt1R8AOJvhEISH4BChysjwtFRCZWyEfw,1147
228
+ d9d/tracker/__init__.py,sha256=KShifEoIeGYksmAMMxm8jcuyaw9WsX4OY0wuU0DlhdM,323
229
+ d9d/tracker/base.py,sha256=keS0UsgbffYT4mf8_LfZc-FueOsujePHdlBF4OIfPA4,3492
230
+ d9d/tracker/factory.py,sha256=RVE_NZu9Tn9sI_1QEl3b-DPRvikg-nxksOZ_AcxK6Yw,1660
231
+ d9d/tracker/provider/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
232
+ d9d/tracker/provider/aim/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
233
+ d9d/tracker/provider/aim/config.py,sha256=7-Kgt2XhdNRhgxLYEWpB27uhgB-RBiPrGvH6S2L3rIM,672
234
+ d9d/tracker/provider/aim/tracker.py,sha256=g34BdjIYEftEwuK2oTKKzHLN3dpG7-i14zhXIF_onIo,3110
235
+ d9d/tracker/provider/null.py,sha256=c1nvUaOz8RbRY8XzwSPTi7t0lSsmdlwGAYfYgprwaf8,1440
236
+ d9d-0.1.0.dist-info/METADATA,sha256=NyAv5QdE_TtWzvNWcWormljU9O3DzQ8yAEp_8XZJv9s,5681
237
+ d9d-0.1.0.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
238
+ d9d-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: poetry-core 2.2.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any