omnigenome 0.3.0a0__py3-none-any.whl → 0.3.1a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- omnigenome/__init__.py +29 -44
- omnigenome/auto/auto_bench/__init__.py +0 -1
- omnigenome/auto/auto_bench/auto_bench.py +24 -14
- omnigenome/auto/auto_train/__init__.py +0 -1
- omnigenome/auto/auto_train/auto_train.py +11 -12
- omnigenome/auto/bench_hub/__init__.py +0 -1
- omnigenome/auto/bench_hub/bench_hub.py +1 -1
- omnigenome/cli/__init__.py +0 -1
- omnigenome/cli/commands/__init__.py +0 -1
- omnigenome/cli/commands/base.py +10 -10
- omnigenome/cli/commands/bench/__init__.py +0 -1
- omnigenome/cli/commands/bench/bench_cli.py +10 -10
- omnigenome/cli/commands/rna/__init__.py +0 -1
- omnigenome/cli/commands/rna/rna_design.py +10 -11
- omnigenome/src/__init__.py +0 -1
- omnigenome/src/abc/__init__.py +0 -1
- omnigenome/src/abc/abstract_dataset.py +38 -19
- omnigenome/src/abc/abstract_metric.py +7 -7
- omnigenome/src/abc/abstract_model.py +15 -14
- omnigenome/src/abc/abstract_tokenizer.py +9 -7
- omnigenome/src/dataset/omni_dataset.py +16 -14
- omnigenome/src/lora/__init__.py +0 -1
- omnigenome/src/lora/lora_model.py +47 -41
- omnigenome/src/metric/classification_metric.py +11 -11
- omnigenome/src/metric/metric.py +19 -19
- omnigenome/src/metric/ranking_metric.py +15 -15
- omnigenome/src/metric/regression_metric.py +18 -18
- omnigenome/src/misc/utils.py +214 -150
- omnigenome/src/model/augmentation/__init__.py +0 -1
- omnigenome/src/model/augmentation/model.py +17 -17
- omnigenome/src/model/classification/__init__.py +0 -1
- omnigenome/src/model/classification/model.py +28 -32
- omnigenome/src/model/embedding/__init__.py +0 -1
- omnigenome/src/model/embedding/model.py +35 -35
- omnigenome/src/model/mlm/__init__.py +0 -1
- omnigenome/src/model/mlm/model.py +13 -13
- omnigenome/src/model/module_utils.py +17 -17
- omnigenome/src/model/regression/__init__.py +0 -1
- omnigenome/src/model/regression/model.py +72 -77
- omnigenome/src/model/regression/resnet.py +32 -32
- omnigenome/src/model/rna_design/__init__.py +0 -1
- omnigenome/src/model/rna_design/model.py +168 -118
- omnigenome/src/model/seq2seq/__init__.py +0 -1
- omnigenome/src/model/seq2seq/model.py +4 -4
- omnigenome/src/tokenizer/bpe_tokenizer.py +27 -27
- omnigenome/src/tokenizer/kmers_tokenizer.py +22 -22
- omnigenome/src/tokenizer/single_nucleotide_tokenizer.py +11 -11
- omnigenome/src/trainer/accelerate_trainer.py +40 -32
- omnigenome/src/trainer/hf_trainer.py +8 -8
- omnigenome/src/trainer/trainer.py +37 -25
- omnigenome/utility/dataset_hub/__init__.py +0 -1
- omnigenome/utility/dataset_hub/dataset_hub.py +13 -13
- omnigenome/utility/ensemble.py +26 -26
- omnigenome/utility/hub_utils.py +8 -8
- omnigenome/utility/model_hub/__init__.py +0 -1
- omnigenome/utility/model_hub/model_hub.py +26 -25
- omnigenome/utility/pipeline_hub/__init__.py +0 -1
- omnigenome/utility/pipeline_hub/pipeline.py +49 -49
- omnigenome/utility/pipeline_hub/pipeline_hub.py +17 -17
- {omnigenome-0.3.0a0.dist-info → omnigenome-0.3.1a0.dist-info}/METADATA +3 -3
- omnigenome-0.3.1a0.dist-info/RECORD +78 -0
- {omnigenome-0.3.0a0.dist-info → omnigenome-0.3.1a0.dist-info}/top_level.txt +0 -1
- omnigenome-0.3.0a0.dist-info/RECORD +0 -85
- tests/__init__.py +0 -9
- tests/conftest.py +0 -160
- tests/test_dataset_patterns.py +0 -291
- tests/test_examples_syntax.py +0 -83
- tests/test_model_loading.py +0 -183
- tests/test_rna_functions.py +0 -255
- tests/test_training_patterns.py +0 -302
- {omnigenome-0.3.0a0.dist-info → omnigenome-0.3.1a0.dist-info}/WHEEL +0 -0
- {omnigenome-0.3.0a0.dist-info → omnigenome-0.3.1a0.dist-info}/entry_points.txt +0 -0
- {omnigenome-0.3.0a0.dist-info → omnigenome-0.3.1a0.dist-info}/licenses/LICENSE +0 -0
|
@@ -23,14 +23,14 @@ from typing import Type, Callable, Union, List, Optional
|
|
|
23
23
|
def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
|
|
24
24
|
"""
|
|
25
25
|
3x3 convolution with padding.
|
|
26
|
-
|
|
26
|
+
|
|
27
27
|
Args:
|
|
28
28
|
in_planes (int): Number of input channels
|
|
29
29
|
out_planes (int): Number of output channels
|
|
30
30
|
stride (int): Stride for the convolution (default: 1)
|
|
31
31
|
groups (int): Number of groups for grouped convolution (default: 1)
|
|
32
32
|
dilation (int): Dilation factor for the convolution (default: 1)
|
|
33
|
-
|
|
33
|
+
|
|
34
34
|
Returns:
|
|
35
35
|
nn.Conv2d: 3x3 convolution layer
|
|
36
36
|
"""
|
|
@@ -49,12 +49,12 @@ def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
|
|
|
49
49
|
def conv1x1(in_planes, out_planes, stride=1):
|
|
50
50
|
"""
|
|
51
51
|
1x1 convolution.
|
|
52
|
-
|
|
52
|
+
|
|
53
53
|
Args:
|
|
54
54
|
in_planes (int): Number of input channels
|
|
55
55
|
out_planes (int): Number of output channels
|
|
56
56
|
stride (int): Stride for the convolution (default: 1)
|
|
57
|
-
|
|
57
|
+
|
|
58
58
|
Returns:
|
|
59
59
|
nn.Conv2d: 1x1 convolution layer
|
|
60
60
|
"""
|
|
@@ -64,14 +64,14 @@ def conv1x1(in_planes, out_planes, stride=1):
|
|
|
64
64
|
def conv5x5(in_planes, out_planes, stride=1, groups=1, dilation=1):
|
|
65
65
|
"""
|
|
66
66
|
5x5 convolution with padding.
|
|
67
|
-
|
|
67
|
+
|
|
68
68
|
Args:
|
|
69
69
|
in_planes (int): Number of input channels
|
|
70
70
|
out_planes (int): Number of output channels
|
|
71
71
|
stride (int): Stride for the convolution (default: 1)
|
|
72
72
|
groups (int): Number of groups for grouped convolution (default: 1)
|
|
73
73
|
dilation (int): Dilation factor for the convolution (default: 1)
|
|
74
|
-
|
|
74
|
+
|
|
75
75
|
Returns:
|
|
76
76
|
nn.Conv2d: 5x5 convolution layer
|
|
77
77
|
"""
|
|
@@ -90,10 +90,10 @@ def conv5x5(in_planes, out_planes, stride=1, groups=1, dilation=1):
|
|
|
90
90
|
class BasicBlock(nn.Module):
|
|
91
91
|
"""
|
|
92
92
|
Basic ResNet block for genomic sequence processing.
|
|
93
|
-
|
|
93
|
+
|
|
94
94
|
This block implements a basic residual connection with two convolutions
|
|
95
95
|
and is optimized for processing genomic sequence data with layer normalization.
|
|
96
|
-
|
|
96
|
+
|
|
97
97
|
Attributes:
|
|
98
98
|
expansion (int): Expansion factor for the block (default: 1)
|
|
99
99
|
conv1: First 3x3 convolution layer
|
|
@@ -105,7 +105,7 @@ class BasicBlock(nn.Module):
|
|
|
105
105
|
downsample: Downsampling layer for residual connection
|
|
106
106
|
stride: Stride for the convolutions
|
|
107
107
|
"""
|
|
108
|
-
|
|
108
|
+
|
|
109
109
|
expansion: int = 1
|
|
110
110
|
|
|
111
111
|
def __init__(
|
|
@@ -121,7 +121,7 @@ class BasicBlock(nn.Module):
|
|
|
121
121
|
) -> None:
|
|
122
122
|
"""
|
|
123
123
|
Initialize the BasicBlock.
|
|
124
|
-
|
|
124
|
+
|
|
125
125
|
Args:
|
|
126
126
|
inplanes (int): Number of input channels
|
|
127
127
|
planes (int): Number of output channels
|
|
@@ -130,7 +130,7 @@ class BasicBlock(nn.Module):
|
|
|
130
130
|
groups (int): Number of groups for grouped convolution (default: 1)
|
|
131
131
|
dilation (int): Dilation factor for convolutions (default: 1)
|
|
132
132
|
norm_layer: Normalization layer type (default: None, uses LayerNorm)
|
|
133
|
-
|
|
133
|
+
|
|
134
134
|
Raises:
|
|
135
135
|
NotImplementedError: If dilation > 1 is specified
|
|
136
136
|
"""
|
|
@@ -154,10 +154,10 @@ class BasicBlock(nn.Module):
|
|
|
154
154
|
def forward(self, x: Tensor) -> Tensor:
|
|
155
155
|
"""
|
|
156
156
|
Forward pass through the BasicBlock.
|
|
157
|
-
|
|
157
|
+
|
|
158
158
|
Args:
|
|
159
159
|
x (Tensor): Input tensor [batch_size, channels, height, width]
|
|
160
|
-
|
|
160
|
+
|
|
161
161
|
Returns:
|
|
162
162
|
Tensor: Output tensor with same shape as input
|
|
163
163
|
"""
|
|
@@ -188,11 +188,11 @@ class BasicBlock(nn.Module):
|
|
|
188
188
|
class Bottleneck(nn.Module):
|
|
189
189
|
"""
|
|
190
190
|
Bottleneck ResNet block for genomic sequence processing.
|
|
191
|
-
|
|
191
|
+
|
|
192
192
|
This block implements a bottleneck residual connection with three convolutions
|
|
193
193
|
(1x1, 3x3, 1x1) and is designed for deeper networks. It's adapted from
|
|
194
194
|
the original ResNet V1.5 implementation.
|
|
195
|
-
|
|
195
|
+
|
|
196
196
|
Attributes:
|
|
197
197
|
expansion (int): Expansion factor for the block (default: 4)
|
|
198
198
|
conv1: First 1x1 convolution layer
|
|
@@ -205,7 +205,7 @@ class Bottleneck(nn.Module):
|
|
|
205
205
|
downsample: Downsampling layer for residual connection
|
|
206
206
|
stride: Stride for the convolutions
|
|
207
207
|
"""
|
|
208
|
-
|
|
208
|
+
|
|
209
209
|
# Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
|
|
210
210
|
# while original implementation places the stride at the first 1x1 convolution(self.conv1)
|
|
211
211
|
# according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
|
|
@@ -227,7 +227,7 @@ class Bottleneck(nn.Module):
|
|
|
227
227
|
) -> None:
|
|
228
228
|
"""
|
|
229
229
|
Initialize the Bottleneck block.
|
|
230
|
-
|
|
230
|
+
|
|
231
231
|
Args:
|
|
232
232
|
inplanes (int): Number of input channels
|
|
233
233
|
planes (int): Number of output channels
|
|
@@ -256,10 +256,10 @@ class Bottleneck(nn.Module):
|
|
|
256
256
|
def forward(self, x: Tensor) -> Tensor:
|
|
257
257
|
"""
|
|
258
258
|
Forward pass through the Bottleneck block.
|
|
259
|
-
|
|
259
|
+
|
|
260
260
|
Args:
|
|
261
261
|
x (Tensor): Input tensor [batch_size, channels, height, width]
|
|
262
|
-
|
|
262
|
+
|
|
263
263
|
Returns:
|
|
264
264
|
Tensor: Output tensor with same shape as input
|
|
265
265
|
"""
|
|
@@ -288,11 +288,11 @@ class Bottleneck(nn.Module):
|
|
|
288
288
|
class ResNet(nn.Module):
|
|
289
289
|
"""
|
|
290
290
|
ResNet architecture adapted for genomic sequence analysis.
|
|
291
|
-
|
|
291
|
+
|
|
292
292
|
This ResNet implementation is specifically designed for processing genomic
|
|
293
293
|
sequences and their structural representations. It uses layer normalization
|
|
294
294
|
instead of batch normalization and is optimized for genomic data characteristics.
|
|
295
|
-
|
|
295
|
+
|
|
296
296
|
Attributes:
|
|
297
297
|
_norm_layer: Normalization layer type
|
|
298
298
|
inplanes: Number of input channels for the first layer
|
|
@@ -319,7 +319,7 @@ class ResNet(nn.Module):
|
|
|
319
319
|
) -> None:
|
|
320
320
|
"""
|
|
321
321
|
Initialize the ResNet architecture.
|
|
322
|
-
|
|
322
|
+
|
|
323
323
|
Args:
|
|
324
324
|
channels (int): Number of input channels
|
|
325
325
|
block: Type of ResNet block (BasicBlock or Bottleneck)
|
|
@@ -329,7 +329,7 @@ class ResNet(nn.Module):
|
|
|
329
329
|
width_per_group (int): Width per group for bottleneck blocks (default: 1)
|
|
330
330
|
replace_stride_with_dilation: Whether to replace stride with dilation (default: None)
|
|
331
331
|
norm_layer: Normalization layer type (default: None, uses LayerNorm)
|
|
332
|
-
|
|
332
|
+
|
|
333
333
|
Raises:
|
|
334
334
|
ValueError: If replace_stride_with_dilation is not None or a 3-element tuple
|
|
335
335
|
"""
|
|
@@ -379,14 +379,14 @@ class ResNet(nn.Module):
|
|
|
379
379
|
) -> nn.Sequential:
|
|
380
380
|
"""
|
|
381
381
|
Create a layer of ResNet blocks.
|
|
382
|
-
|
|
382
|
+
|
|
383
383
|
Args:
|
|
384
384
|
block: Type of ResNet block to use
|
|
385
385
|
planes (int): Number of output channels for the layer
|
|
386
386
|
blocks (int): Number of blocks in the layer
|
|
387
387
|
stride (int): Stride for the first block (default: 1)
|
|
388
388
|
dilate (bool): Whether to use dilation (default: False)
|
|
389
|
-
|
|
389
|
+
|
|
390
390
|
Returns:
|
|
391
391
|
nn.Sequential: Sequential container of ResNet blocks
|
|
392
392
|
"""
|
|
@@ -433,10 +433,10 @@ class ResNet(nn.Module):
|
|
|
433
433
|
def _forward_impl(self, x: Tensor) -> Tensor:
|
|
434
434
|
"""
|
|
435
435
|
Forward pass implementation.
|
|
436
|
-
|
|
436
|
+
|
|
437
437
|
Args:
|
|
438
438
|
x (Tensor): Input tensor [batch_size, channels, height, width]
|
|
439
|
-
|
|
439
|
+
|
|
440
440
|
Returns:
|
|
441
441
|
Tensor: Output tensor after processing through ResNet
|
|
442
442
|
"""
|
|
@@ -456,10 +456,10 @@ class ResNet(nn.Module):
|
|
|
456
456
|
def forward(self, x: Tensor) -> Tensor:
|
|
457
457
|
"""
|
|
458
458
|
Forward pass through the ResNet.
|
|
459
|
-
|
|
459
|
+
|
|
460
460
|
Args:
|
|
461
461
|
x (Tensor): Input tensor [batch_size, channels, height, width]
|
|
462
|
-
|
|
462
|
+
|
|
463
463
|
Returns:
|
|
464
464
|
Tensor: Output tensor after processing through ResNet
|
|
465
465
|
"""
|
|
@@ -469,14 +469,14 @@ class ResNet(nn.Module):
|
|
|
469
469
|
def resnet_b16(channels=128, bbn=16):
|
|
470
470
|
"""
|
|
471
471
|
Create a ResNet-B16 model for genomic sequence analysis.
|
|
472
|
-
|
|
472
|
+
|
|
473
473
|
This function creates a ResNet model with 16 basic blocks, optimized
|
|
474
474
|
for processing genomic sequences and their structural representations.
|
|
475
|
-
|
|
475
|
+
|
|
476
476
|
Args:
|
|
477
477
|
channels (int): Number of input channels (default: 128)
|
|
478
478
|
bbn (int): Number of basic blocks (default: 16)
|
|
479
|
-
|
|
479
|
+
|
|
480
480
|
Returns:
|
|
481
481
|
ResNet: Configured ResNet model
|
|
482
482
|
"""
|