omnigenome 0.3.0a1__py3-none-any.whl → 0.3.3a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of omnigenome might be problematic. Click here for more details.

Files changed (79) hide show
  1. omnigenome/__init__.py +252 -258
  2. {omnigenome-0.3.0a1.dist-info → omnigenome-0.3.3a0.dist-info}/METADATA +10 -10
  3. omnigenome-0.3.3a0.dist-info/RECORD +7 -0
  4. omnigenome/auto/__init__.py +0 -3
  5. omnigenome/auto/auto_bench/__init__.py +0 -12
  6. omnigenome/auto/auto_bench/auto_bench.py +0 -484
  7. omnigenome/auto/auto_bench/auto_bench_cli.py +0 -230
  8. omnigenome/auto/auto_bench/auto_bench_config.py +0 -216
  9. omnigenome/auto/auto_bench/config_check.py +0 -34
  10. omnigenome/auto/auto_train/__init__.py +0 -13
  11. omnigenome/auto/auto_train/auto_train.py +0 -430
  12. omnigenome/auto/auto_train/auto_train_cli.py +0 -222
  13. omnigenome/auto/bench_hub/__init__.py +0 -12
  14. omnigenome/auto/bench_hub/bench_hub.py +0 -25
  15. omnigenome/cli/__init__.py +0 -13
  16. omnigenome/cli/commands/__init__.py +0 -13
  17. omnigenome/cli/commands/base.py +0 -83
  18. omnigenome/cli/commands/bench/__init__.py +0 -13
  19. omnigenome/cli/commands/bench/bench_cli.py +0 -202
  20. omnigenome/cli/commands/rna/__init__.py +0 -13
  21. omnigenome/cli/commands/rna/rna_design.py +0 -178
  22. omnigenome/cli/omnigenome_cli.py +0 -128
  23. omnigenome/src/__init__.py +0 -12
  24. omnigenome/src/abc/__init__.py +0 -12
  25. omnigenome/src/abc/abstract_dataset.py +0 -622
  26. omnigenome/src/abc/abstract_metric.py +0 -114
  27. omnigenome/src/abc/abstract_model.py +0 -689
  28. omnigenome/src/abc/abstract_tokenizer.py +0 -267
  29. omnigenome/src/dataset/__init__.py +0 -16
  30. omnigenome/src/dataset/omni_dataset.py +0 -435
  31. omnigenome/src/lora/__init__.py +0 -13
  32. omnigenome/src/lora/lora_model.py +0 -294
  33. omnigenome/src/metric/__init__.py +0 -15
  34. omnigenome/src/metric/classification_metric.py +0 -184
  35. omnigenome/src/metric/metric.py +0 -199
  36. omnigenome/src/metric/ranking_metric.py +0 -142
  37. omnigenome/src/metric/regression_metric.py +0 -191
  38. omnigenome/src/misc/__init__.py +0 -3
  39. omnigenome/src/misc/utils.py +0 -499
  40. omnigenome/src/model/__init__.py +0 -19
  41. omnigenome/src/model/augmentation/__init__.py +0 -12
  42. omnigenome/src/model/augmentation/model.py +0 -219
  43. omnigenome/src/model/classification/__init__.py +0 -12
  44. omnigenome/src/model/classification/model.py +0 -642
  45. omnigenome/src/model/embedding/__init__.py +0 -12
  46. omnigenome/src/model/embedding/model.py +0 -263
  47. omnigenome/src/model/mlm/__init__.py +0 -12
  48. omnigenome/src/model/mlm/model.py +0 -177
  49. omnigenome/src/model/module_utils.py +0 -232
  50. omnigenome/src/model/regression/__init__.py +0 -12
  51. omnigenome/src/model/regression/model.py +0 -786
  52. omnigenome/src/model/regression/resnet.py +0 -483
  53. omnigenome/src/model/rna_design/__init__.py +0 -12
  54. omnigenome/src/model/rna_design/model.py +0 -469
  55. omnigenome/src/model/seq2seq/__init__.py +0 -12
  56. omnigenome/src/model/seq2seq/model.py +0 -44
  57. omnigenome/src/tokenizer/__init__.py +0 -16
  58. omnigenome/src/tokenizer/bpe_tokenizer.py +0 -226
  59. omnigenome/src/tokenizer/kmers_tokenizer.py +0 -247
  60. omnigenome/src/tokenizer/single_nucleotide_tokenizer.py +0 -249
  61. omnigenome/src/trainer/__init__.py +0 -14
  62. omnigenome/src/trainer/accelerate_trainer.py +0 -739
  63. omnigenome/src/trainer/hf_trainer.py +0 -75
  64. omnigenome/src/trainer/trainer.py +0 -579
  65. omnigenome/utility/__init__.py +0 -3
  66. omnigenome/utility/dataset_hub/__init__.py +0 -13
  67. omnigenome/utility/dataset_hub/dataset_hub.py +0 -178
  68. omnigenome/utility/ensemble.py +0 -324
  69. omnigenome/utility/hub_utils.py +0 -517
  70. omnigenome/utility/model_hub/__init__.py +0 -12
  71. omnigenome/utility/model_hub/model_hub.py +0 -231
  72. omnigenome/utility/pipeline_hub/__init__.py +0 -12
  73. omnigenome/utility/pipeline_hub/pipeline.py +0 -483
  74. omnigenome/utility/pipeline_hub/pipeline_hub.py +0 -129
  75. omnigenome-0.3.0a1.dist-info/RECORD +0 -78
  76. {omnigenome-0.3.0a1.dist-info → omnigenome-0.3.3a0.dist-info}/WHEEL +0 -0
  77. {omnigenome-0.3.0a1.dist-info → omnigenome-0.3.3a0.dist-info}/entry_points.txt +0 -0
  78. {omnigenome-0.3.0a1.dist-info → omnigenome-0.3.3a0.dist-info}/licenses/LICENSE +0 -0
  79. {omnigenome-0.3.0a1.dist-info → omnigenome-0.3.3a0.dist-info}/top_level.txt +0 -0
@@ -1,469 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- # file: model.py
3
- # author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
4
- # github: https://github.com/yangheng95
5
- # huggingface: https://huggingface.co/yangheng
6
- # google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
7
- # Copyright (C) 2019-2024. All Rights Reserved.
8
- """
9
- RNA design model using masked language modeling and evolutionary algorithms.
10
-
11
- This module provides an RNA design model that combines masked language modeling
12
- with evolutionary algorithms to design RNA sequences that fold into specific
13
- target structures. It uses a multi-objective optimization approach to balance
14
- structure similarity and thermodynamic stability.
15
- """
16
- import random
17
- import numpy as np
18
- import torch
19
- import autocuda
20
- from transformers import AutoModelForMaskedLM, AutoTokenizer
21
- from concurrent.futures import ProcessPoolExecutor, as_completed
22
- import ViennaRNA
23
- from scipy.spatial.distance import hamming
24
- import warnings
25
- import os
26
-
27
- from omnigenome.src.misc.utils import fprint
28
-
29
-
30
- class OmniModelForRNADesign(torch.nn.Module):
31
- """
32
- RNA design model using masked language modeling and evolutionary algorithms.
33
-
34
- This model combines a pre-trained masked language model with evolutionary
35
- algorithms to design RNA sequences that fold into specific target structures.
36
- It uses a multi-objective optimization approach to balance structure similarity
37
- and thermodynamic stability.
38
-
39
- Attributes:
40
- device: Device to run the model on (CPU or GPU)
41
- parallel: Whether to use parallel processing for structure prediction
42
- tokenizer: Tokenizer for processing RNA sequences
43
- model: Pre-trained masked language model
44
- """
45
-
46
- def __init__(
47
- self,
48
- model="yangheng/OmniGenome-186M",
49
- device=None,
50
- parallel=False,
51
- *args,
52
- **kwargs,
53
- ):
54
- """
55
- Initialize the RNA design model.
56
-
57
- Args:
58
- model (str): Model name or path for the pre-trained MLM model
59
- device: Device to run the model on (default: None, auto-detect)
60
- parallel (bool): Whether to use parallel processing (default: False)
61
- *args: Additional positional arguments
62
- **kwargs: Additional keyword arguments
63
- """
64
- super().__init__(*args, **kwargs)
65
- self.device = autocuda.auto_cuda() if device is None else device
66
- self.parallel = parallel
67
- self.tokenizer = AutoTokenizer.from_pretrained(model)
68
- self.model = AutoModelForMaskedLM.from_pretrained(model, trust_remote_code=True)
69
- self.model.to(self.device).to(torch.float16)
70
-
71
- @staticmethod
72
- def _random_bp_span(bp_span=None):
73
- """
74
- Generate a random base pair span.
75
-
76
- Args:
77
- bp_span (int, optional): Fixed base pair span. If None, generates random.
78
-
79
- Returns:
80
- int: Base pair span value
81
- """
82
- if bp_span is None:
83
- return random.randint(1, 10)
84
- return bp_span
85
-
86
- @staticmethod
87
- def _longest_bp_span(structure):
88
- """
89
- Find the longest base pair span in the structure.
90
-
91
- Args:
92
- structure (str): RNA structure in dot-bracket notation
93
-
94
- Returns:
95
- int: Length of the longest base pair span
96
- """
97
- max_span = 0
98
- current_span = 0
99
-
100
- for char in structure:
101
- if char == "(":
102
- current_span += 1
103
- max_span = max(max_span, current_span)
104
- elif char == ")":
105
- current_span = max(0, current_span - 1)
106
- else:
107
- current_span = 0
108
-
109
- return max_span
110
-
111
- @staticmethod
112
- def _predict_structure_single(sequence, bp_span=-1):
113
- """
114
- Predict structure for a single sequence (worker function for multiprocessing).
115
-
116
- Args:
117
- sequence (str): RNA sequence to fold
118
- bp_span (int): Base pair span parameter
119
-
120
- Returns:
121
- tuple: (structure, mfe) tuple
122
- """
123
- try:
124
- return ViennaRNA.fold(sequence)
125
- except Exception as e:
126
- warnings.warn(f"Failed to fold sequence {sequence}: {e}")
127
- return ("." * len(sequence), 0.0)
128
-
129
- def _predict_structure(self, sequences, bp_span=-1):
130
- """
131
- Predict structures for multiple sequences.
132
-
133
- Args:
134
- sequences (list): List of RNA sequences
135
- bp_span (int): Base pair span parameter
136
-
137
- Returns:
138
- list: List of (structure, mfe) tuples
139
- """
140
- if not self.parallel or len(sequences) <= 1:
141
- # Sequential processing
142
- return [self._predict_structure_single(seq, bp_span) for seq in sequences]
143
-
144
- # Parallel processing with improved error handling
145
- try:
146
- # Determine number of workers
147
- max_workers = min(os.cpu_count(), len(sequences), 8) # Limit to 8 workers
148
-
149
- with ProcessPoolExecutor(max_workers=max_workers) as executor:
150
- # Submit all tasks
151
- future_to_seq = {
152
- executor.submit(self._predict_structure_single, seq, bp_span): seq
153
- for seq in sequences
154
- }
155
-
156
- # Collect results
157
- results = []
158
- for future in as_completed(future_to_seq):
159
- try:
160
- result = future.result()
161
- results.append(result)
162
- except Exception as e:
163
- seq = future_to_seq[future]
164
- warnings.warn(f"Failed to process sequence {seq}: {e}")
165
- # Fallback to dot structure
166
- results.append(("." * len(seq), 0.0))
167
-
168
- return results
169
-
170
- except Exception as e:
171
- warnings.warn(f"Parallel processing failed, falling back to sequential: {e}")
172
- # Fallback to sequential processing
173
- return [self._predict_structure_single(seq, bp_span) for seq in sequences]
174
-
175
- def _init_population(self, structure, num_population):
176
- """
177
- Initialize the population with random sequences.
178
-
179
- Args:
180
- structure (str): Target RNA structure
181
- num_population (int): Population size
182
-
183
- Returns:
184
- list: List of (sequence, bp_span) tuples
185
- """
186
- population = []
187
- bp_span = self._longest_bp_span(structure)
188
-
189
- for _ in range(num_population):
190
- # Generate random sequence
191
- sequence = "".join(random.choice("ACGU") for _ in range(len(structure)))
192
- population.append((sequence, bp_span))
193
-
194
- return population
195
-
196
- def _mlm_mutate(self, population, structure, mutation_ratio):
197
- """
198
- Mutate population using masked language modeling.
199
-
200
- Args:
201
- population (list): Current population
202
- structure (str): Target RNA structure
203
- mutation_ratio (float): Ratio of tokens to mutate
204
-
205
- Returns:
206
- list: Mutated population
207
- """
208
- def mutate(sequence, mutation_rate):
209
- # Create masked sequence
210
- masked_sequence = list(sequence)
211
- num_mutations = int(len(sequence) * mutation_rate)
212
- mutation_positions = random.sample(range(len(sequence)), num_mutations)
213
-
214
- for pos in mutation_positions:
215
- masked_sequence[pos] = self.tokenizer.mask_token
216
-
217
- return "".join(masked_sequence)
218
-
219
- # Prepare inputs for MLM
220
- mlm_inputs = []
221
- for sequence, bp_span in population:
222
- masked_seq = mutate(sequence, mutation_ratio)
223
- mlm_inputs.append(masked_seq)
224
-
225
- # Get predictions from MLM
226
- predicted_tokens = self._mlm_predict(mlm_inputs, structure)
227
-
228
- # Convert predictions back to sequences
229
- mutated_population = []
230
- for i, (sequence, bp_span) in enumerate(population):
231
- # Convert token IDs back to nucleotides
232
- new_sequence = self.tokenizer.decode(predicted_tokens[i], skip_special_tokens=True)
233
- # Ensure the sequence has the correct length
234
- if len(new_sequence) != len(structure):
235
- new_sequence = new_sequence[:len(structure)].ljust(len(structure), "A")
236
- mutated_population.append((new_sequence, bp_span))
237
-
238
- return mutated_population
239
-
240
- def _crossover(self, population, num_points=3):
241
- """
242
- Perform crossover operation on the population.
243
-
244
- Args:
245
- population (list): Current population
246
- num_points (int): Number of crossover points
247
-
248
- Returns:
249
- list: Population after crossover
250
- """
251
- if len(population) < 2:
252
- return population
253
-
254
- # Create crossover masks
255
- num_sequences = len(population)
256
- masks = np.zeros((num_sequences, len(population[0][0])), dtype=bool)
257
-
258
- # Generate random crossover points
259
- crossover_points = np.random.randint(0, len(population[0][0]), (num_sequences, num_points))
260
-
261
- # Create parent indices
262
- parent_indices = np.random.randint(0, num_sequences, (num_sequences, 2))
263
-
264
- # Generate crossover masks
265
- for i in range(num_sequences):
266
- for j in range(num_points):
267
- if j == 0:
268
- masks[i, :crossover_points[i, j]] = True
269
- else:
270
- last_point = crossover_points[i, j-1]
271
- masks[i, last_point:crossover_points[i, j]] = j % 2 == 0
272
-
273
- # Handle the last segment
274
- last_point = crossover_points[i, -1]
275
- masks[i, last_point:] = num_points % 2 == 0
276
-
277
- # Perform crossover
278
- population_array = np.array([list(seq[0]) for seq in population])
279
- child1_array = np.where(
280
- masks,
281
- population_array[parent_indices[:, 0]],
282
- population_array[parent_indices[:, 1]],
283
- )
284
- child2_array = np.where(
285
- masks,
286
- population_array[parent_indices[:, 1]],
287
- population_array[parent_indices[:, 0]],
288
- )
289
-
290
- return [
291
- ("".join(child), bp_span)
292
- for child, (_, bp_span) in zip(child1_array, population)
293
- ] + [
294
- ("".join(child), bp_span)
295
- for child, (_, bp_span) in zip(child2_array, population)
296
- ]
297
-
298
- def _evaluate_structure_fitness(self, sequences, structure):
299
- """
300
- Evaluate the fitness of the RNA structure by comparing with the target structure.
301
-
302
- Args:
303
- sequences (list): List of (sequence, bp_span) tuples to evaluate
304
- structure (str): Target RNA structure
305
-
306
- Returns:
307
- list: Sorted population with fitness scores and MFE values
308
- """
309
- # Get sequences for structure prediction
310
- seq_list = [seq for seq, _ in sequences]
311
-
312
- # Predict structures (with improved multiprocessing)
313
- structures_mfe = self._predict_structure(seq_list)
314
-
315
- sorted_population = []
316
- for (seq, bp_span), (ss, mfe) in zip(sequences, structures_mfe):
317
- score = hamming(list(structure), list(ss))
318
- sorted_population.append((seq, bp_span, score, mfe))
319
-
320
- fronts = self._non_dominated_sorting(
321
- [x[2] for x in sorted_population], [x[3] for x in sorted_population]
322
- )
323
- return self._select_next_generation(sorted_population, fronts)
324
-
325
- @staticmethod
326
- def _non_dominated_sorting(scores, mfe_values):
327
- """
328
- Perform non-dominated sorting for multi-objective optimization.
329
-
330
- Args:
331
- scores (list): Structure similarity scores
332
- mfe_values (list): Minimum free energy values
333
-
334
- Returns:
335
- list: List of fronts (Pareto fronts)
336
- """
337
- num_solutions = len(scores)
338
- domination_count = [0] * num_solutions
339
- dominated_solutions = [[] for _ in range(num_solutions)]
340
- fronts = [[]]
341
-
342
- for p in range(num_solutions):
343
- for q in range(num_solutions):
344
- if scores[p] < scores[q] and mfe_values[p] < mfe_values[q]:
345
- dominated_solutions[p].append(q)
346
- elif scores[q] < scores[p] and mfe_values[q] < mfe_values[p]:
347
- domination_count[p] += 1
348
-
349
- if domination_count[p] == 0:
350
- fronts[0].append(p)
351
-
352
- i = 0
353
- while len(fronts[i]) > 0:
354
- next_front = []
355
- for p in fronts[i]:
356
- for q in dominated_solutions[p]:
357
- domination_count[q] -= 1
358
- if domination_count[q] == 0:
359
- next_front.append(q)
360
- i += 1
361
- fronts.append(next_front)
362
-
363
- if not fronts[-1]: # Ensure the last front is not empty before removing
364
- fronts.pop(-1)
365
-
366
- return fronts
367
-
368
- @staticmethod
369
- def _select_next_generation(next_generation, fronts):
370
- """
371
- Select the next generation based on Pareto fronts.
372
-
373
- Args:
374
- next_generation (list): Current population with fitness scores
375
- fronts (list): Pareto fronts
376
-
377
- Returns:
378
- list: Selected population for the next generation
379
- """
380
- sorted_population = []
381
- for front in fronts:
382
- front_population = [next_generation[i] for i in front]
383
- sorted_population.extend(front_population)
384
- if len(sorted_population) >= len(next_generation):
385
- break
386
-
387
- return sorted_population[: len(next_generation)]
388
-
389
- def _mlm_predict(self, mlm_inputs, structure):
390
- """
391
- Perform masked language model prediction.
392
-
393
- Args:
394
- mlm_inputs (list): List of masked input sequences
395
- structure (str): Target RNA structure
396
-
397
- Returns:
398
- list: Predicted token IDs for each input
399
- """
400
- batch_size = 8
401
- all_outputs = []
402
-
403
- with torch.no_grad():
404
- for i in range(0, len(mlm_inputs), batch_size):
405
- inputs = self.tokenizer(
406
- mlm_inputs[i: i + batch_size],
407
- padding=False,
408
- max_length=1024,
409
- truncation=True,
410
- return_tensors="pt",
411
- )
412
- inputs = {
413
- key: value.to(self.model.device) for key, value in inputs.items()
414
- }
415
- outputs = self.model(**inputs)[0].argmax(dim=-1)
416
- all_outputs.append(outputs)
417
-
418
- return torch.cat(all_outputs, dim=0)[:, 1 : 1 + len(structure)]
419
-
420
- def design(
421
- self, structure, mutation_ratio=0.5, num_population=100, num_generation=100
422
- ):
423
- """
424
- Design RNA sequences for a target structure using evolutionary algorithms.
425
-
426
- Args:
427
- structure (str): Target RNA structure in dot-bracket notation
428
- mutation_ratio (float): Ratio of tokens to mutate (default: 0.5)
429
- num_population (int): Population size (default: 100)
430
- num_generation (int): Number of generations (default: 100)
431
-
432
- Returns:
433
- list: List of designed RNA sequences with their fitness scores
434
- """
435
- population = self._init_population(structure, num_population)
436
- population = self._mlm_mutate(population, structure, mutation_ratio)
437
-
438
- for generation_id in range(num_generation):
439
- next_generation = self._crossover(population)
440
- next_generation = self._mlm_mutate(
441
- next_generation, structure, mutation_ratio
442
- )
443
- next_generation = self._evaluate_structure_fitness(
444
- next_generation, structure
445
- )[:num_population]
446
-
447
- candidate_sequences = [
448
- seq for seq, bp_span, score, mfe in next_generation if score == 0
449
- ]
450
- if candidate_sequences:
451
- return candidate_sequences
452
-
453
- population = [
454
- (seq, bp_span) for seq, bp_span, score, mfe in next_generation
455
- ]
456
-
457
- return population[0][0]
458
-
459
-
460
- # Example usage
461
- if __name__ == "__main__":
462
- model = OmniModelForRNADesign(model="anonymous8/OmniGenome-186M")
463
- best_sequence = model.design(
464
- structure="(((....)))",
465
- mutation_ratio=0.5,
466
- num_population=100,
467
- num_generation=100,
468
- )
469
- fprint(f"Best RNA sequence: {best_sequence}")
@@ -1,12 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- # file: __init__.py
3
- # time: 22:21 08/04/2024
4
- # author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
5
- # github: https://github.com/yangheng95
6
- # huggingface: https://huggingface.co/yangheng
7
- # google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
8
- # Copyright (C) 2019-2024. All Rights Reserved.
9
- """
10
- This package contains modules for sequence-to-sequence models.
11
- """
12
-
@@ -1,44 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- # file: model.py
3
- # time: 11:40 14/04/2024
4
- # author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
5
- # github: https://github.com/yangheng95
6
- # huggingface: https://huggingface.co/yangheng
7
- # google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
8
- # Copyright (C) 2019-2024. All Rights Reserved.
9
- """
10
- Sequence-to-sequence model for genomic sequences.
11
-
12
- This module provides a sequence-to-sequence model implementation for genomic
13
- sequences. It's designed for tasks where the input and output are both
14
- sequences, such as sequence translation, structure prediction, or sequence
15
- transformation tasks.
16
- """
17
-
18
- from ...abc.abstract_model import OmniModel
19
-
20
-
21
- class OmniModelForSeq2Seq(OmniModel):
22
- """
23
- Sequence-to-sequence model for genomic sequences.
24
-
25
- This model implements a sequence-to-sequence architecture for genomic
26
- sequences, where the input is one sequence and the output is another
27
- sequence. It's useful for tasks like sequence translation, structure
28
- prediction, or sequence transformation.
29
-
30
- The model can be extended to implement specific seq2seq tasks by
31
- overriding the forward, predict, and inference methods.
32
- """
33
-
34
- def __init__(self, config_or_model, tokenizer, *args, **kwargs):
35
- """
36
- Initialize the sequence-to-sequence model.
37
-
38
- Args:
39
- config_or_model: Model configuration or pre-trained model
40
- tokenizer: Tokenizer for processing input sequences
41
- *args: Additional positional arguments
42
- **kwargs: Additional keyword arguments
43
- """
44
- super().__init__(config_or_model, tokenizer, *args, **kwargs)
@@ -1,16 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- # file: __init__.py
3
- # time: 18:05 08/04/2024
4
- # author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
5
- # github: https://github.com/yangheng95
6
- # huggingface: https://huggingface.co/yangheng
7
- # google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
8
- # Copyright (C) 2019-2024. All Rights Reserved.
9
- """
10
- This package contains tokenizer implementations.
11
- """
12
-
13
-
14
- from .bpe_tokenizer import OmniBPETokenizer
15
- from .kmers_tokenizer import OmniKmersTokenizer
16
- from .single_nucleotide_tokenizer import OmniSingleNucleotideTokenizer