omnigenome 0.3.0a0__py3-none-any.whl → 0.3.1a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. omnigenome/__init__.py +29 -44
  2. omnigenome/auto/auto_bench/__init__.py +0 -1
  3. omnigenome/auto/auto_bench/auto_bench.py +24 -14
  4. omnigenome/auto/auto_train/__init__.py +0 -1
  5. omnigenome/auto/auto_train/auto_train.py +11 -12
  6. omnigenome/auto/bench_hub/__init__.py +0 -1
  7. omnigenome/auto/bench_hub/bench_hub.py +1 -1
  8. omnigenome/cli/__init__.py +0 -1
  9. omnigenome/cli/commands/__init__.py +0 -1
  10. omnigenome/cli/commands/base.py +10 -10
  11. omnigenome/cli/commands/bench/__init__.py +0 -1
  12. omnigenome/cli/commands/bench/bench_cli.py +10 -10
  13. omnigenome/cli/commands/rna/__init__.py +0 -1
  14. omnigenome/cli/commands/rna/rna_design.py +10 -11
  15. omnigenome/src/__init__.py +0 -1
  16. omnigenome/src/abc/__init__.py +0 -1
  17. omnigenome/src/abc/abstract_dataset.py +38 -19
  18. omnigenome/src/abc/abstract_metric.py +7 -7
  19. omnigenome/src/abc/abstract_model.py +15 -14
  20. omnigenome/src/abc/abstract_tokenizer.py +9 -7
  21. omnigenome/src/dataset/omni_dataset.py +16 -14
  22. omnigenome/src/lora/__init__.py +0 -1
  23. omnigenome/src/lora/lora_model.py +47 -41
  24. omnigenome/src/metric/classification_metric.py +11 -11
  25. omnigenome/src/metric/metric.py +19 -19
  26. omnigenome/src/metric/ranking_metric.py +15 -15
  27. omnigenome/src/metric/regression_metric.py +18 -18
  28. omnigenome/src/misc/utils.py +214 -150
  29. omnigenome/src/model/augmentation/__init__.py +0 -1
  30. omnigenome/src/model/augmentation/model.py +17 -17
  31. omnigenome/src/model/classification/__init__.py +0 -1
  32. omnigenome/src/model/classification/model.py +28 -32
  33. omnigenome/src/model/embedding/__init__.py +0 -1
  34. omnigenome/src/model/embedding/model.py +35 -35
  35. omnigenome/src/model/mlm/__init__.py +0 -1
  36. omnigenome/src/model/mlm/model.py +13 -13
  37. omnigenome/src/model/module_utils.py +17 -17
  38. omnigenome/src/model/regression/__init__.py +0 -1
  39. omnigenome/src/model/regression/model.py +72 -77
  40. omnigenome/src/model/regression/resnet.py +32 -32
  41. omnigenome/src/model/rna_design/__init__.py +0 -1
  42. omnigenome/src/model/rna_design/model.py +168 -118
  43. omnigenome/src/model/seq2seq/__init__.py +0 -1
  44. omnigenome/src/model/seq2seq/model.py +4 -4
  45. omnigenome/src/tokenizer/bpe_tokenizer.py +27 -27
  46. omnigenome/src/tokenizer/kmers_tokenizer.py +22 -22
  47. omnigenome/src/tokenizer/single_nucleotide_tokenizer.py +11 -11
  48. omnigenome/src/trainer/accelerate_trainer.py +40 -32
  49. omnigenome/src/trainer/hf_trainer.py +8 -8
  50. omnigenome/src/trainer/trainer.py +37 -25
  51. omnigenome/utility/dataset_hub/__init__.py +0 -1
  52. omnigenome/utility/dataset_hub/dataset_hub.py +13 -13
  53. omnigenome/utility/ensemble.py +26 -26
  54. omnigenome/utility/hub_utils.py +8 -8
  55. omnigenome/utility/model_hub/__init__.py +0 -1
  56. omnigenome/utility/model_hub/model_hub.py +26 -25
  57. omnigenome/utility/pipeline_hub/__init__.py +0 -1
  58. omnigenome/utility/pipeline_hub/pipeline.py +49 -49
  59. omnigenome/utility/pipeline_hub/pipeline_hub.py +17 -17
  60. {omnigenome-0.3.0a0.dist-info → omnigenome-0.3.1a0.dist-info}/METADATA +3 -3
  61. omnigenome-0.3.1a0.dist-info/RECORD +78 -0
  62. {omnigenome-0.3.0a0.dist-info → omnigenome-0.3.1a0.dist-info}/top_level.txt +0 -1
  63. omnigenome-0.3.0a0.dist-info/RECORD +0 -85
  64. tests/__init__.py +0 -9
  65. tests/conftest.py +0 -160
  66. tests/test_dataset_patterns.py +0 -291
  67. tests/test_examples_syntax.py +0 -83
  68. tests/test_model_loading.py +0 -183
  69. tests/test_rna_functions.py +0 -255
  70. tests/test_training_patterns.py +0 -302
  71. {omnigenome-0.3.0a0.dist-info → omnigenome-0.3.1a0.dist-info}/WHEEL +0 -0
  72. {omnigenome-0.3.0a0.dist-info → omnigenome-0.3.1a0.dist-info}/entry_points.txt +0 -0
  73. {omnigenome-0.3.0a0.dist-info → omnigenome-0.3.1a0.dist-info}/licenses/LICENSE +0 -0
@@ -23,14 +23,14 @@ from typing import Type, Callable, Union, List, Optional
23
23
  def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
24
24
  """
25
25
  3x3 convolution with padding.
26
-
26
+
27
27
  Args:
28
28
  in_planes (int): Number of input channels
29
29
  out_planes (int): Number of output channels
30
30
  stride (int): Stride for the convolution (default: 1)
31
31
  groups (int): Number of groups for grouped convolution (default: 1)
32
32
  dilation (int): Dilation factor for the convolution (default: 1)
33
-
33
+
34
34
  Returns:
35
35
  nn.Conv2d: 3x3 convolution layer
36
36
  """
@@ -49,12 +49,12 @@ def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
49
49
  def conv1x1(in_planes, out_planes, stride=1):
50
50
  """
51
51
  1x1 convolution.
52
-
52
+
53
53
  Args:
54
54
  in_planes (int): Number of input channels
55
55
  out_planes (int): Number of output channels
56
56
  stride (int): Stride for the convolution (default: 1)
57
-
57
+
58
58
  Returns:
59
59
  nn.Conv2d: 1x1 convolution layer
60
60
  """
@@ -64,14 +64,14 @@ def conv1x1(in_planes, out_planes, stride=1):
64
64
  def conv5x5(in_planes, out_planes, stride=1, groups=1, dilation=1):
65
65
  """
66
66
  5x5 convolution with padding.
67
-
67
+
68
68
  Args:
69
69
  in_planes (int): Number of input channels
70
70
  out_planes (int): Number of output channels
71
71
  stride (int): Stride for the convolution (default: 1)
72
72
  groups (int): Number of groups for grouped convolution (default: 1)
73
73
  dilation (int): Dilation factor for the convolution (default: 1)
74
-
74
+
75
75
  Returns:
76
76
  nn.Conv2d: 5x5 convolution layer
77
77
  """
@@ -90,10 +90,10 @@ def conv5x5(in_planes, out_planes, stride=1, groups=1, dilation=1):
90
90
  class BasicBlock(nn.Module):
91
91
  """
92
92
  Basic ResNet block for genomic sequence processing.
93
-
93
+
94
94
  This block implements a basic residual connection with two convolutions
95
95
  and is optimized for processing genomic sequence data with layer normalization.
96
-
96
+
97
97
  Attributes:
98
98
  expansion (int): Expansion factor for the block (default: 1)
99
99
  conv1: First 3x3 convolution layer
@@ -105,7 +105,7 @@ class BasicBlock(nn.Module):
105
105
  downsample: Downsampling layer for residual connection
106
106
  stride: Stride for the convolutions
107
107
  """
108
-
108
+
109
109
  expansion: int = 1
110
110
 
111
111
  def __init__(
@@ -121,7 +121,7 @@ class BasicBlock(nn.Module):
121
121
  ) -> None:
122
122
  """
123
123
  Initialize the BasicBlock.
124
-
124
+
125
125
  Args:
126
126
  inplanes (int): Number of input channels
127
127
  planes (int): Number of output channels
@@ -130,7 +130,7 @@ class BasicBlock(nn.Module):
130
130
  groups (int): Number of groups for grouped convolution (default: 1)
131
131
  dilation (int): Dilation factor for convolutions (default: 1)
132
132
  norm_layer: Normalization layer type (default: None, uses LayerNorm)
133
-
133
+
134
134
  Raises:
135
135
  NotImplementedError: If dilation > 1 is specified
136
136
  """
@@ -154,10 +154,10 @@ class BasicBlock(nn.Module):
154
154
  def forward(self, x: Tensor) -> Tensor:
155
155
  """
156
156
  Forward pass through the BasicBlock.
157
-
157
+
158
158
  Args:
159
159
  x (Tensor): Input tensor [batch_size, channels, height, width]
160
-
160
+
161
161
  Returns:
162
162
  Tensor: Output tensor with same shape as input
163
163
  """
@@ -188,11 +188,11 @@ class BasicBlock(nn.Module):
188
188
  class Bottleneck(nn.Module):
189
189
  """
190
190
  Bottleneck ResNet block for genomic sequence processing.
191
-
191
+
192
192
  This block implements a bottleneck residual connection with three convolutions
193
193
  (1x1, 3x3, 1x1) and is designed for deeper networks. It's adapted from
194
194
  the original ResNet V1.5 implementation.
195
-
195
+
196
196
  Attributes:
197
197
  expansion (int): Expansion factor for the block (default: 4)
198
198
  conv1: First 1x1 convolution layer
@@ -205,7 +205,7 @@ class Bottleneck(nn.Module):
205
205
  downsample: Downsampling layer for residual connection
206
206
  stride: Stride for the convolutions
207
207
  """
208
-
208
+
209
209
  # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
210
210
  # while original implementation places the stride at the first 1x1 convolution(self.conv1)
211
211
  # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
@@ -227,7 +227,7 @@ class Bottleneck(nn.Module):
227
227
  ) -> None:
228
228
  """
229
229
  Initialize the Bottleneck block.
230
-
230
+
231
231
  Args:
232
232
  inplanes (int): Number of input channels
233
233
  planes (int): Number of output channels
@@ -256,10 +256,10 @@ class Bottleneck(nn.Module):
256
256
  def forward(self, x: Tensor) -> Tensor:
257
257
  """
258
258
  Forward pass through the Bottleneck block.
259
-
259
+
260
260
  Args:
261
261
  x (Tensor): Input tensor [batch_size, channels, height, width]
262
-
262
+
263
263
  Returns:
264
264
  Tensor: Output tensor with same shape as input
265
265
  """
@@ -288,11 +288,11 @@ class Bottleneck(nn.Module):
288
288
  class ResNet(nn.Module):
289
289
  """
290
290
  ResNet architecture adapted for genomic sequence analysis.
291
-
291
+
292
292
  This ResNet implementation is specifically designed for processing genomic
293
293
  sequences and their structural representations. It uses layer normalization
294
294
  instead of batch normalization and is optimized for genomic data characteristics.
295
-
295
+
296
296
  Attributes:
297
297
  _norm_layer: Normalization layer type
298
298
  inplanes: Number of input channels for the first layer
@@ -319,7 +319,7 @@ class ResNet(nn.Module):
319
319
  ) -> None:
320
320
  """
321
321
  Initialize the ResNet architecture.
322
-
322
+
323
323
  Args:
324
324
  channels (int): Number of input channels
325
325
  block: Type of ResNet block (BasicBlock or Bottleneck)
@@ -329,7 +329,7 @@ class ResNet(nn.Module):
329
329
  width_per_group (int): Width per group for bottleneck blocks (default: 1)
330
330
  replace_stride_with_dilation: Whether to replace stride with dilation (default: None)
331
331
  norm_layer: Normalization layer type (default: None, uses LayerNorm)
332
-
332
+
333
333
  Raises:
334
334
  ValueError: If replace_stride_with_dilation is not None or a 3-element tuple
335
335
  """
@@ -379,14 +379,14 @@ class ResNet(nn.Module):
379
379
  ) -> nn.Sequential:
380
380
  """
381
381
  Create a layer of ResNet blocks.
382
-
382
+
383
383
  Args:
384
384
  block: Type of ResNet block to use
385
385
  planes (int): Number of output channels for the layer
386
386
  blocks (int): Number of blocks in the layer
387
387
  stride (int): Stride for the first block (default: 1)
388
388
  dilate (bool): Whether to use dilation (default: False)
389
-
389
+
390
390
  Returns:
391
391
  nn.Sequential: Sequential container of ResNet blocks
392
392
  """
@@ -433,10 +433,10 @@ class ResNet(nn.Module):
433
433
  def _forward_impl(self, x: Tensor) -> Tensor:
434
434
  """
435
435
  Forward pass implementation.
436
-
436
+
437
437
  Args:
438
438
  x (Tensor): Input tensor [batch_size, channels, height, width]
439
-
439
+
440
440
  Returns:
441
441
  Tensor: Output tensor after processing through ResNet
442
442
  """
@@ -456,10 +456,10 @@ class ResNet(nn.Module):
456
456
  def forward(self, x: Tensor) -> Tensor:
457
457
  """
458
458
  Forward pass through the ResNet.
459
-
459
+
460
460
  Args:
461
461
  x (Tensor): Input tensor [batch_size, channels, height, width]
462
-
462
+
463
463
  Returns:
464
464
  Tensor: Output tensor after processing through ResNet
465
465
  """
@@ -469,14 +469,14 @@ class ResNet(nn.Module):
469
469
  def resnet_b16(channels=128, bbn=16):
470
470
  """
471
471
  Create a ResNet-B16 model for genomic sequence analysis.
472
-
472
+
473
473
  This function creates a ResNet model with 16 basic blocks, optimized
474
474
  for processing genomic sequences and their structural representations.
475
-
475
+
476
476
  Args:
477
477
  channels (int): Number of input channels (default: 128)
478
478
  bbn (int): Number of basic blocks (default: 16)
479
-
479
+
480
480
  Returns:
481
481
  ResNet: Configured ResNet model
482
482
  """
@@ -9,4 +9,3 @@
9
9
  """
10
10
  This package contains modules for RNA design models.
11
11
  """
12
-