orbit-torch 0.0.4a1__py3-none-any.whl → 0.1.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. orbit/__init__.py +3 -1
  2. orbit/callback.py +4 -3
  3. orbit/dataset/__init__.py +1 -0
  4. orbit/dataset/cogn.py +138 -0
  5. orbit/dataset/data/cogn_en.jsonl +45 -0
  6. orbit/dataset/data/cogn_zh.jsonl +113 -0
  7. orbit/engine.py +210 -146
  8. orbit/kit/__init__.py +2 -0
  9. orbit/kit/interface.py +154 -0
  10. orbit/kit/wrapper.py +157 -0
  11. orbit/model/__init__.py +5 -0
  12. orbit/model/base.py +125 -0
  13. orbit/model/block/__init__.py +34 -0
  14. orbit/model/block/attention.py +265 -0
  15. orbit/model/block/bio.py +537 -0
  16. orbit/model/block/codebook.py +122 -0
  17. orbit/model/block/conv.py +505 -0
  18. orbit/model/block/embedding.py +252 -0
  19. orbit/model/block/film.py +176 -0
  20. orbit/model/block/fusion.py +335 -0
  21. orbit/model/block/gate.py +334 -0
  22. orbit/model/block/lora.py +776 -0
  23. orbit/model/block/mlp.py +68 -0
  24. orbit/model/block/moe.py +94 -0
  25. orbit/model/block/tcn.py +99 -0
  26. orbit/model/config.py +62 -0
  27. orbit/model/kit/__init__.py +6 -0
  28. orbit/model/kit/discriminator.py +46 -0
  29. orbit/model/kit/losses.py +193 -0
  30. orbit/model/motif/__init__.py +0 -0
  31. orbit/model/motif/vision/__init__.py +0 -0
  32. orbit/model/motif/vision/v1.py +645 -0
  33. orbit/model/registry.py +53 -0
  34. orbit/optim/__init__.py +2 -2
  35. orbit/optim/sam.py +10 -3
  36. orbit/plugin/__init__.py +12 -8
  37. orbit/plugin/board.py +1 -2
  38. orbit/plugin/checkpoint.py +137 -62
  39. orbit/plugin/classification.py +2 -2
  40. orbit/plugin/display_model.py +1 -2
  41. orbit/plugin/early_stopping.py +1 -2
  42. orbit/plugin/ema.py +1 -2
  43. orbit/plugin/gradient_accumulation.py +1 -2
  44. orbit/plugin/lora.py +346 -0
  45. orbit/plugin/memory_estimator.py +1 -2
  46. orbit/plugin/warmup.py +1 -2
  47. orbit/utils/__init__.py +24 -1
  48. orbit/utils/cuda.py +10 -0
  49. orbit/utils/freeze.py +61 -17
  50. orbit/utils/image.py +164 -0
  51. orbit/utils/initialization.py +184 -94
  52. orbit/utils/layer_io.py +66 -7
  53. orbit/utils/lora.py +480 -0
  54. orbit/utils/moe.py +55 -0
  55. orbit/utils/seed.py +3 -19
  56. orbit/utils/sft.py +93 -0
  57. orbit_torch-0.1.0b1.dist-info/METADATA +208 -0
  58. orbit_torch-0.1.0b1.dist-info/RECORD +65 -0
  59. orbit_torch-0.0.4a1.dist-info/METADATA +0 -25
  60. orbit_torch-0.0.4a1.dist-info/RECORD +0 -29
  61. {orbit_torch-0.0.4a1.dist-info → orbit_torch-0.1.0b1.dist-info}/WHEEL +0 -0
  62. {orbit_torch-0.0.4a1.dist-info → orbit_torch-0.1.0b1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,505 @@
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+ import math
5
+ from typing import Tuple, Union
6
+
7
+ from orbit.model import BaseBlock, register_model
8
+
9
+
10
+ def calculate_causal_layer(step: int, kernel_size: int = 3) -> Tuple[int, int]:
11
+ '''
12
+ 计算因果卷积所需的层数和感受野。
13
+
14
+ Args:
15
+ step (int): 目标序列长度或时间步数。
16
+ kernel_size (int, optional): 卷积核大小。默认为 3。
17
+
18
+ Returns:
19
+ tuple[int, int]:
20
+ - L (int): 所需的层数。
21
+ - R (int): 最终的感受野大小。
22
+
23
+ Raises:
24
+ ValueError: 如果 kernel_size <= 1。
25
+ '''
26
+ if kernel_size <= 1:
27
+ raise ValueError('kernel_size must be greater than 1')
28
+ L = math.ceil(math.log2((step - 1) / (kernel_size - 1) + 1))
29
+ R = 1 + (kernel_size - 1) * (2 ** L - 1)
30
+ return int(L), R
31
+
32
+
33
+ @register_model()
34
+ class CausalConv1d(BaseBlock):
35
+ '''
36
+ 因果一维卷积层 (Causal 1D Convolution)。
37
+
38
+ 通过膨胀卷积 (Dilated Convolution) 和因果填充 (Causal Padding) 实现,
39
+ 确保当前时刻的输出仅依赖于当前及过去的输入,不看未来。
40
+ 常用于时序数据处理和波形生成 (如 WaveNet)。
41
+ '''
42
+
43
+ def __init__(
44
+ self,
45
+ in_channels: int,
46
+ out_channels: int,
47
+ kernel_size: int = 3,
48
+ dilation: int = 1,
49
+ norm: str = None,
50
+ activation: str = 'leaky_relu',
51
+ leaky_relu: float = 0.1,
52
+ use_res: bool = True,
53
+ dropout: float = 0.2
54
+ ):
55
+ '''
56
+ 初始化 CausalConv1d 模块。
57
+
58
+ Args:
59
+ in_channels (int): 输入通道数。
60
+ out_channels (int): 输出通道数。
61
+ kernel_size (int, optional): 卷积核大小。默认为 3。
62
+ dilation (int, optional): 膨胀系数。默认为 1。
63
+ norm (str, optional): 归一化类型。默认为 None。
64
+ activation (str, optional): 激活函数类型。默认为 'leaky_relu'。
65
+ leaky_relu (float, optional): LeakyReLU 的负斜率 (仅当 activation='leaky_relu' 时有效)。默认为 0.1。
66
+ use_res (bool, optional): 是否使用残差连接。默认为 True。
67
+ dropout (float, optional): Dropout 概率。默认为 0.2。
68
+ '''
69
+ super(CausalConv1d, self).__init__()
70
+
71
+ self.padding = (kernel_size - 1) * dilation
72
+ self.use_res = use_res
73
+
74
+ self.block = ConvBlock(
75
+ in_channels=in_channels,
76
+ out_channels=out_channels,
77
+ kernel_size=kernel_size,
78
+ stride=1,
79
+ padding=0,
80
+ dilation=dilation,
81
+ dim=1,
82
+ norm=norm,
83
+ activation=activation,
84
+ dropout=dropout
85
+ )
86
+
87
+ if activation == 'leaky_relu' and isinstance(self.block.act, nn.LeakyReLU) and leaky_relu != 0.1:
88
+ self.block.act = nn.LeakyReLU(leaky_relu, inplace=True)
89
+
90
+ self.block.conv = nn.utils.parametrizations.weight_norm(self.block.conv)
91
+
92
+ self.downsample = None
93
+ if use_res and in_channels != out_channels:
94
+ self.downsample = ConvBlock(
95
+ in_channels=in_channels,
96
+ out_channels=out_channels,
97
+ kernel_size=1,
98
+ padding=0,
99
+ dim=1,
100
+ norm=None,
101
+ activation=None,
102
+ dropout=0.0
103
+ )
104
+
105
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
106
+ '''
107
+ 前向传播。
108
+
109
+ Args:
110
+ x (torch.Tensor): 输入张量。Shape: [Batch, in_channels, Seq_Len]
111
+
112
+ Returns:
113
+ torch.Tensor: 输出张量。Shape: [Batch, out_channels, Seq_Len]
114
+ '''
115
+ residual = x
116
+ x = F.pad(x, (self.padding, 0))
117
+ x = self.block(x)
118
+
119
+ if self.use_res:
120
+ if self.downsample is not None:
121
+ residual = self.downsample(residual)
122
+ x = x + residual
123
+
124
+ return x
125
+
126
+ @staticmethod
127
+ def auto_block(
128
+ in_channels: int,
129
+ out_channels: int,
130
+ step: int,
131
+ kernel_size: int = 3,
132
+ norm: str = None,
133
+ activation: str = 'leaky_relu',
134
+ leaky_relu: float = 0.1,
135
+ use_res: bool = True,
136
+ dropout: float = 0.2
137
+ ) -> nn.Sequential:
138
+ '''
139
+ 自动构建多层因果卷积块以覆盖指定的时间步长。
140
+
141
+ 根据目标步长 step 自动计算所需的层数和膨胀系数,构建一个 nn.Sequential 模型。
142
+ 膨胀系数随层数指数增长 (1, 2, 4, 8, ...)。
143
+
144
+ Args:
145
+ in_channels (int): 输入通道数。
146
+ out_channels (int): 输出通道数。
147
+ step (int): 目标覆盖的时间步长 (感受野)。
148
+ kernel_size (int, optional): 卷积核大小。默认为 3。
149
+ norm (str, optional): 归一化类型。默认为 None。
150
+ activation (str, optional): 激活函数类型。默认为 'leaky_relu'。
151
+ leaky_relu (float, optional): LeakyReLU 的负斜率。默认为 0.1。
152
+ use_res (bool, optional): 是否使用残差连接。默认为 True。
153
+ dropout (float, optional): Dropout 概率。默认为 0.2。
154
+
155
+ Returns:
156
+ nn.Sequential: 包含多个 CausalConv1d 层的序列模型。
157
+ '''
158
+ layers, _ = calculate_causal_layer(step, kernel_size)
159
+ model = []
160
+ for i in range(layers):
161
+ dilation = 2 ** i
162
+ in_ch = in_channels if i == 0 else out_channels
163
+
164
+ model.append(CausalConv1d(
165
+ in_channels=in_ch,
166
+ out_channels=out_channels,
167
+ kernel_size=kernel_size,
168
+ dilation=dilation,
169
+ norm=norm,
170
+ activation=activation,
171
+ leaky_relu=leaky_relu,
172
+ use_res=use_res,
173
+ dropout=dropout
174
+ ))
175
+
176
+ return nn.Sequential(*model)
177
+
178
+
179
+ @register_model()
180
+ class ConvBlock(BaseBlock):
181
+ '''
182
+ 通用卷积块 (Conv-Norm-Act-Dropout)。
183
+
184
+ 支持 1D、2D 和 3D 卷积,以及多种归一化和激活函数配置。
185
+ '''
186
+
187
+ def __init__(
188
+ self,
189
+ in_channels: int,
190
+ out_channels: int,
191
+ kernel_size: Union[int, Tuple[int, ...]] = 3,
192
+ stride: Union[int, Tuple[int, ...]] = 1,
193
+ padding: Union[int, Tuple[int, ...], str] = 0,
194
+ dilation: Union[int, Tuple[int, ...]] = 1,
195
+ groups: int = 1,
196
+ bias: bool = True,
197
+ dim: int = 2,
198
+ norm: str = 'batch',
199
+ activation: str = 'relu',
200
+ dropout: float = 0.0,
201
+ pre_norm: bool = False,
202
+ ):
203
+ '''
204
+ 初始化 ConvBlock。
205
+
206
+ Args:
207
+ in_channels (int): 输入通道数。
208
+ out_channels (int): 输出通道数。
209
+ kernel_size (Union[int, Tuple[int, ...]], optional): 卷积核大小。默认为 3。
210
+ stride (Union[int, Tuple[int, ...]], optional): 步幅。默认为 1。
211
+ padding (Union[int, Tuple[int, ...], str], optional): 填充。默认为 0。
212
+ dilation (Union[int, Tuple[int, ...]], optional): 膨胀系数。默认为 1。
213
+ groups (int, optional): 分组卷积组数。默认为 1。
214
+ bias (bool, optional): 是否使用偏置。默认为 True。
215
+ dim (int, optional): 卷积维度 (1, 2, 3)。默认为 2。
216
+ norm (str, optional): 归一化类型 ('batch', 'group', 'layer', 'instance', None)。默认为 'batch'。
217
+ activation (str, optional): 激活函数类型 ('relu', 'leaky_relu', 'gelu', 'silu', 'tanh', 'sigmoid', None)。默认为 'relu'。
218
+ dropout (float, optional): Dropout 概率。默认为 0.0。
219
+ pre_norm (bool, optional): 是否使用 Pre-Norm (Norm-Conv-Act) 结构。默认为 False (Conv-Norm-Act)。
220
+ '''
221
+ super(ConvBlock, self).__init__()
222
+
223
+ self.pre_norm = pre_norm
224
+
225
+ if dim == 1:
226
+ conv_class = nn.Conv1d
227
+ elif dim == 2:
228
+ conv_class = nn.Conv2d
229
+ elif dim == 3:
230
+ conv_class = nn.Conv3d
231
+ else:
232
+ raise ValueError(f'Unsupported dimension: {dim}')
233
+
234
+ self.conv = conv_class(
235
+ in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias=bias
236
+ )
237
+
238
+ self.norm = None
239
+ if norm is not None:
240
+ norm_channels = in_channels if pre_norm else out_channels
241
+ if norm == 'batch':
242
+ if dim == 1: self.norm = nn.BatchNorm1d(norm_channels)
243
+ elif dim == 2: self.norm = nn.BatchNorm2d(norm_channels)
244
+ elif dim == 3: self.norm = nn.BatchNorm3d(norm_channels)
245
+ elif norm == 'group':
246
+ num_groups = 32 if norm_channels % 32 == 0 else min(norm_channels, 8)
247
+ self.norm = nn.GroupNorm(num_groups, norm_channels)
248
+ elif norm == 'layer':
249
+ self.norm = nn.GroupNorm(1, norm_channels)
250
+ elif norm == 'instance':
251
+ if dim == 1: self.norm = nn.InstanceNorm1d(norm_channels)
252
+ elif dim == 2: self.norm = nn.InstanceNorm2d(norm_channels)
253
+ elif dim == 3: self.norm = nn.InstanceNorm3d(norm_channels)
254
+ else:
255
+ raise ValueError(f'Unsupported normalization: {norm}')
256
+
257
+ self.act = None
258
+ if activation is not None:
259
+ if activation == 'relu':
260
+ self.act = nn.ReLU(inplace=True)
261
+ elif activation == 'leaky_relu':
262
+ self.act = nn.LeakyReLU(0.1, inplace=True)
263
+ elif activation == 'gelu':
264
+ self.act = nn.GELU()
265
+ elif activation == 'silu':
266
+ self.act = nn.SiLU(inplace=True)
267
+ elif activation == 'tanh':
268
+ self.act = nn.Tanh()
269
+ elif activation == 'sigmoid':
270
+ self.act = nn.Sigmoid()
271
+ else:
272
+ raise ValueError(f'Unsupported activation: {activation}')
273
+
274
+ self.dropout = nn.Dropout(dropout) if dropout > 0 else None
275
+
276
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
277
+ '''
278
+ 前向传播。
279
+
280
+ Args:
281
+ x (torch.Tensor): 输入张量。
282
+
283
+ Returns:
284
+ torch.Tensor: 输出张量。
285
+ '''
286
+ if self.pre_norm:
287
+ if self.norm: x = self.norm(x)
288
+ if self.act: x = self.act(x)
289
+ x = self.conv(x)
290
+ if self.dropout: x = self.dropout(x)
291
+ else:
292
+ x = self.conv(x)
293
+ if self.norm: x = self.norm(x)
294
+ if self.act: x = self.act(x)
295
+ if self.dropout: x = self.dropout(x)
296
+
297
+ return x
298
+
299
+
300
+ @register_model()
301
+ class DepthwiseSeparableConv(BaseBlock):
302
+ '''
303
+ 深度可分离卷积块 (Depthwise Separable Convolution)。
304
+
305
+ 由一个 Depthwise Conv(逐通道卷积)和一个 Pointwise Conv(逐点卷积)组成。
306
+ 在保持特征提取能力的同时,大幅降低参数量和计算开销。
307
+ '''
308
+
309
+ def __init__(
310
+ self,
311
+ in_channels: int,
312
+ out_channels: int,
313
+ kernel_size: Union[int, Tuple[int, ...]] = 3,
314
+ stride: Union[int, Tuple[int, ...]] = 1,
315
+ padding: Union[int, Tuple[int, ...], str] = 1,
316
+ dilation: Union[int, Tuple[int, ...]] = 1,
317
+ bias: bool = False,
318
+ dim: int = 2,
319
+ norm: str = 'batch',
320
+ activation: str = 'relu',
321
+ dropout: float = 0.0,
322
+ use_res: bool = True,
323
+ ):
324
+ '''
325
+ 初始化 DepthwiseSeparableConv。
326
+
327
+ Args:
328
+ in_channels (int): 输入通道数。
329
+ out_channels (int): 输出通道数。
330
+ kernel_size: 卷积核大小。
331
+ stride: 步幅。
332
+ padding: 填充。
333
+ dilation: 膨胀系数。
334
+ bias (bool): 是否使用偏置。
335
+ dim (int): 卷积维度 (1, 2, 3)。
336
+ norm (str): 归一化类型。
337
+ activation (str): 激活函数类型。
338
+ dropout (float): Dropout 概率。
339
+ use_res (bool): 是否在输入输出通道一致且步幅为1时使用残差连接。
340
+ '''
341
+ super(DepthwiseSeparableConv, self).__init__()
342
+
343
+ self.depthwise = ConvBlock(
344
+ in_channels=in_channels,
345
+ out_channels=in_channels,
346
+ kernel_size=kernel_size,
347
+ stride=stride,
348
+ padding=padding,
349
+ dilation=dilation,
350
+ groups=in_channels,
351
+ bias=bias,
352
+ dim=dim,
353
+ norm=norm,
354
+ activation=activation,
355
+ dropout=0.0
356
+ )
357
+
358
+ self.pointwise = ConvBlock(
359
+ in_channels=in_channels,
360
+ out_channels=out_channels,
361
+ kernel_size=1,
362
+ stride=1,
363
+ padding=0,
364
+ bias=bias,
365
+ dim=dim,
366
+ norm=norm,
367
+ activation=activation,
368
+ dropout=dropout
369
+ )
370
+
371
+ self.use_res = use_res and (in_channels == out_channels) and (stride == 1)
372
+
373
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
374
+ '''
375
+ 前向传播。
376
+ '''
377
+ identity = x
378
+
379
+ out = self.depthwise(x)
380
+ out = self.pointwise(out)
381
+
382
+ if self.use_res:
383
+ out += identity
384
+
385
+ return out
386
+
387
+
388
+ @register_model()
389
+ class ResBasicBlock(BaseBlock):
390
+ '''
391
+ 残差基本块 (Residual Basic Block)。
392
+
393
+ 由两个卷积层组成,支持标准 ResNet 和 Pre-activation ResNet 变体。
394
+ '''
395
+
396
+ def __init__(
397
+ self,
398
+ in_channels: int,
399
+ out_channels: int,
400
+ kernel_size: Union[int, Tuple[int, ...]] = 3,
401
+ stride: Union[int, Tuple[int, ...]] = 1,
402
+ padding: Union[int, Tuple[int, ...], str] = 1,
403
+ dilation: Union[int, Tuple[int, ...]] = 1,
404
+ groups: int = 1,
405
+ bias: bool = False,
406
+ dim: int = 2,
407
+ norm: str = 'batch',
408
+ activation: str = 'relu',
409
+ dropout: float = 0.0,
410
+ variant: str = 'original',
411
+ ):
412
+ '''
413
+ 初始化 ResBasicBlock。
414
+
415
+ Args:
416
+ in_channels (int): 输入通道数。
417
+ out_channels (int): 输出通道数。
418
+ kernel_size (Union[int, Tuple[int, ...]], optional): 卷积核大小。默认为 3。
419
+ stride (Union[int, Tuple[int, ...]], optional): 步幅。默认为 1。
420
+ padding (Union[int, Tuple[int, ...], str], optional): 填充。默认为 1。
421
+ dilation (Union[int, Tuple[int, ...]], optional): 膨胀系数。默认为 1。
422
+ groups (int, optional): 分组卷积组数。默认为 1。
423
+ bias (bool, optional): 是否使用偏置。默认为 False。
424
+ dim (int, optional): 卷积维度 (1, 2, 3)。默认为 2。
425
+ norm (str, optional): 归一化类型。默认为 'batch'。
426
+ activation (str, optional): 激活函数类型。默认为 'relu'。
427
+ dropout (float, optional): Dropout 概率。默认为 0.0。
428
+ variant (str, optional): 变体类型 ('original', 'pre_act')。默认为 'original'。
429
+ '''
430
+ super(ResBasicBlock, self).__init__()
431
+
432
+ self.variant = variant
433
+ self.activation = activation
434
+
435
+ self.act = None
436
+ if variant == 'original' and activation is not None:
437
+ if activation == 'relu':
438
+ self.act = nn.ReLU(inplace=True)
439
+ elif activation == 'leaky_relu':
440
+ self.act = nn.LeakyReLU(0.1, inplace=True)
441
+ elif activation == 'gelu':
442
+ self.act = nn.GELU()
443
+ elif activation == 'silu':
444
+ self.act = nn.SiLU(inplace=True)
445
+ elif activation == 'tanh':
446
+ self.act = nn.Tanh()
447
+ elif activation == 'sigmoid':
448
+ self.act = nn.Sigmoid()
449
+
450
+ if variant == 'original':
451
+ # Conv1: Conv-Norm-Act
452
+ self.conv1 = ConvBlock(
453
+ in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias, dim, norm, activation, dropout, pre_norm=False
454
+ )
455
+ # Conv2: Conv-Norm
456
+ self.conv2 = ConvBlock(
457
+ out_channels, out_channels, kernel_size, 1, padding, dilation, groups, bias, dim, norm, activation=None, dropout=dropout, pre_norm=False
458
+ )
459
+ elif variant == 'pre_act':
460
+ # Conv1: Norm-Act-Conv
461
+ self.conv1 = ConvBlock(
462
+ in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias, dim, norm, activation, dropout, pre_norm=True
463
+ )
464
+ # Conv2: Norm-Act-Conv
465
+ self.conv2 = ConvBlock(
466
+ out_channels, out_channels, kernel_size, 1, padding, dilation, groups, bias, dim, norm, activation, dropout, pre_norm=True
467
+ )
468
+ else:
469
+ raise ValueError(f'Unsupported variant: {variant}')
470
+
471
+ self.downsample = None
472
+ if stride != 1 or in_channels != out_channels:
473
+ if variant == 'original':
474
+ self.downsample = ConvBlock(
475
+ in_channels, out_channels, kernel_size=1, stride=stride, padding=0, dim=dim, norm=norm, activation=None, bias=bias, pre_norm=False
476
+ )
477
+ elif variant == 'pre_act':
478
+ self.downsample = ConvBlock(
479
+ in_channels, out_channels, kernel_size=1, stride=stride, padding=0, dim=dim, norm=None, activation=None, bias=bias, pre_norm=False
480
+ )
481
+
482
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
483
+ '''
484
+ 前向传播。
485
+
486
+ Args:
487
+ x (torch.Tensor): 输入张量。
488
+
489
+ Returns:
490
+ torch.Tensor: 输出张量。
491
+ '''
492
+ identity = x
493
+
494
+ out = self.conv1(x)
495
+ out = self.conv2(out)
496
+
497
+ if self.downsample is not None:
498
+ identity = self.downsample(x)
499
+
500
+ out += identity
501
+
502
+ if self.variant == 'original' and self.act is not None:
503
+ out = self.act(out)
504
+
505
+ return out