evoseq 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
evoseq-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Hideaki Mizoue
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
evoseq-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,230 @@
1
+ Metadata-Version: 2.4
2
+ Name: evoseq
3
+ Version: 0.1.0
4
+ Summary: Genome analysis toolkit powered by Evo
5
+ Author: Hideaki Mizoue
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/mizomizo1/EvoSeq
8
+ Project-URL: Repository, https://github.com/mizomizo1/EvoSeq
9
+ Project-URL: Issues, https://github.com/mizomizo1/EvoSeq/issues
10
+ Keywords: bioinformatics,genomics,evo2,variant-scoring
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
18
+ Requires-Python: >=3.10
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ Requires-Dist: pandas>=2.2
22
+ Requires-Dist: numpy>=2.0
23
+ Requires-Dist: biopython>=1.85
24
+ Requires-Dist: tqdm>=4.66
25
+ Provides-Extra: evo2
26
+ Requires-Dist: evo2; extra == "evo2"
27
+ Requires-Dist: torch; extra == "evo2"
28
+ Dynamic: license-file
29
+
30
+ # EvoSeq
31
+
32
+ EvoSeq is a small Colab-friendly toolkit for preparing paired reference/mutant
33
+ FASTA files and scoring variants with Evo2.
34
+
35
+ It is designed for the common research workflow where positive datasets have a
36
+ `manifest.tsv`, negative datasets may only have paired FASTA files, and the same
37
+ Evo2 model should stay loaded once per Colab runtime.
38
+
39
+ ## Install
40
+
41
+ For local testing from this repository:
42
+
43
+ ```bash
44
+ pip install -e .
45
+ ```
46
+
47
+ For Evo2 scoring dependencies:
48
+
49
+ ```bash
50
+ pip install -e ".[evo2]"
51
+ ```
52
+
53
+ In Google Colab, Evo2 often needs a runtime-specific install. Use this before
54
+ scoring:
55
+
56
+ ```bash
57
+ pip uninstall -y torchvision
58
+ pip install -q torch==2.7.1 --index-url https://download.pytorch.org/whl/cu128
59
+ pip install -q flash-attn==2.8.0.post2 --no-build-isolation
60
+ pip install -q evo2
61
+ pip install -e .
62
+ ```
63
+
64
+ After a GitHub Release is tagged, users can install a specific version directly:
65
+
66
+ ```bash
67
+ pip install "git+https://github.com/mizomizo1/EvoSeq.git@v0.1.0"
68
+ ```
69
+
70
+ For Evo2 scoring in Colab, install Evo2 and GPU dependencies in the runtime that
71
+ matches your model. The preprocessing step only needs the base dependencies.
72
+
73
+ ## Debug / Test
74
+
75
+ Run the local workflow tests without Evo2, torch, or flash-attn:
76
+
77
+ ```bash
78
+ python -m unittest discover -s tests -v
79
+ ```
80
+
81
+ These tests cover preprocessing, folder discovery, score-table export with a
82
+ fake scorer, and the missing Evo2 dependency message. Real Evo2 scoring still
83
+ requires a Colab GPU runtime with `torch`, `flash-attn`, and `evo2` installed.
84
+
85
+ ## Quick Start: Preprocessing Files
86
+
87
+ Put files anywhere, for example in `test/`, and pass the files directly:
88
+
89
+ ```python
90
+ from evoseq.preprocess import preprocess_files
91
+
92
+ evo_df, paths = preprocess_files(
93
+ reference_fasta_path="test/evo2_reference.fasta",
94
+ mutant_fasta_path="test/evo2_mutant.fasta",
95
+ manifest_path="auto",
96
+ )
97
+ ```
98
+
99
+ By default, outputs are written next to the input files:
100
+ `test/evoseq_preprocess_output/`.
101
+
102
+ You can also be explicit:
103
+
104
+ ```python
105
+ evo_df, paths = preprocess_files(
106
+ reference_fasta_path="test/evo2_reference.fasta",
107
+ mutant_fasta_path="test/evo2_mutant.fasta",
108
+ output_dir="outputs/preprocessing",
109
+ )
110
+ ```
111
+
112
+ Outputs include:
113
+
114
+ - `evo2_pairs.tsv`: one row per variant with `ref_seq` and `mut_seq`
115
+ - `evo2_reference.fa`
116
+ - `evo2_mutant.fa`
117
+ - `evo2_all.fa`
118
+ - `preprocessing_report.tsv`
119
+
120
+ `manifest.tsv` is optional. When present, metadata are merged by `record_id`.
121
+ When absent, metadata are inferred from FASTA IDs when possible.
122
+
123
+ ## Quick Start: Preprocessing a Folder
124
+
125
+ If your folder contains paired FASTA files, EvoSeq can discover them:
126
+
127
+ ```python
128
+ from evoseq.preprocess import preprocess_folder
129
+
130
+ evo_df, paths = preprocess_folder("test")
131
+ ```
132
+
133
+ ## Quick Start: Evo2 Scoring
134
+
135
+ ```python
136
+ from evoseq.scoring import score_pairs_file
137
+
138
+ result_df, result_paths = score_pairs_file(
139
+ pairs_path="test/evoseq_preprocess_output/evo2_pairs.tsv",
140
+ model_name="evo2_7b",
141
+ batch_size=8,
142
+ )
143
+ ```
144
+
145
+ By default, outputs are written next to the pair table:
146
+ `test/evoseq_preprocess_output/evoseq_scoring_output/`.
147
+
148
+ Use `output_dir="outputs/scoring"` if you want a project-level result folder.
149
+
150
+ - `evo2_variant_scores_unique.tsv`
151
+ - `evo2_variant_scores_manifest.tsv` when a manifest is available
152
+ - `environment_info.tsv`
153
+ - `scoring_report.tsv`
154
+
155
+ Reference sequences are scored once per unique sequence and reused. This is
156
+ useful when many variants share the same reference window.
157
+
158
+ ## Model Handling
159
+
160
+ EvoSeq caches the loaded Evo2 model inside the Python process:
161
+
162
+ ```python
163
+ from evoseq.scoring import Evo2Scorer
164
+
165
+ scorer = Evo2Scorer(model_name="evo2_7b", device="cuda:0")
166
+ scores = scorer.score_sequences(["ACGTACGT"])
167
+ ```
168
+
169
+ Calling another scoring function with the same model reuses it. Attempting to
170
+ load a different Evo2 model in the same runtime raises an explicit error by
171
+ default, because loading multiple large models often exhausts Colab GPU memory.
172
+ Restart the runtime when switching from 7B to 20B.
173
+
174
+ Common model names:
175
+
176
+ - `evo2_7b`
177
+ - `evo2_7b_base`
178
+ - `evo2_20b`
179
+
180
+ For local model weights:
181
+
182
+ ```python
183
+ score_evo2_pairs(
184
+ base_dir=".",
185
+ model_name="evo2_20b",
186
+ local_path="/content/drive/MyDrive/Models/evo2_20b.pt",
187
+ )
188
+ ```
189
+
190
+ ## TOML Config
191
+
192
+ Copy `evoseq.example.toml`, edit the input paths/model, and run:
193
+
194
+ ```python
195
+ from evoseq import run_from_config
196
+
197
+ outputs = run_from_config("evoseq.example.toml")
198
+ ```
199
+
200
+ or:
201
+
202
+ ```bash
203
+ evoseq-run evoseq.example.toml
204
+ ```
205
+
206
+ ## Per-Base Log-Probabilities
207
+
208
+ ```python
209
+ from evoseq.scoring import export_perbase_logprobs
210
+
211
+ path = export_perbase_logprobs(
212
+ fasta_path="test/representative_perbase.fasta",
213
+ model_name="evo2_7b",
214
+ center=4096,
215
+ half_window=320,
216
+ )
217
+ ```
218
+
219
+ By default, this writes `test/evoseq_perbase_output/perbase_logprobs.tsv`.
220
+
221
+ ## Reproducibility
222
+
223
+ EvoSeq writes small TSV reports for methods sections and reruns:
224
+
225
+ - input paths and output paths
226
+ - number of variants and unique reference sequences
227
+ - model name, batch size, device, elapsed time
228
+ - Python, PyTorch, CUDA, GPU, NumPy, pandas, Biopython, and Evo2 versions
229
+
230
+ These files are meant to be saved with each analysis directory.
evoseq-0.1.0/README.md ADDED
@@ -0,0 +1,201 @@
1
+ # EvoSeq
2
+
3
+ EvoSeq is a small Colab-friendly toolkit for preparing paired reference/mutant
4
+ FASTA files and scoring variants with Evo2.
5
+
6
+ It is designed for the common research workflow where positive datasets have a
7
+ `manifest.tsv`, negative datasets may only have paired FASTA files, and the same
8
+ Evo2 model should stay loaded once per Colab runtime.
9
+
10
+ ## Install
11
+
12
+ For local testing from this repository:
13
+
14
+ ```bash
15
+ pip install -e .
16
+ ```
17
+
18
+ For Evo2 scoring dependencies:
19
+
20
+ ```bash
21
+ pip install -e ".[evo2]"
22
+ ```
23
+
24
+ In Google Colab, Evo2 often needs a runtime-specific install. Use this before
25
+ scoring:
26
+
27
+ ```bash
28
+ pip uninstall -y torchvision
29
+ pip install -q torch==2.7.1 --index-url https://download.pytorch.org/whl/cu128
30
+ pip install -q flash-attn==2.8.0.post2 --no-build-isolation
31
+ pip install -q evo2
32
+ pip install -e .
33
+ ```
34
+
35
+ After a GitHub Release is tagged, users can install a specific version directly:
36
+
37
+ ```bash
38
+ pip install "git+https://github.com/mizomizo1/EvoSeq.git@v0.1.0"
39
+ ```
40
+
41
+ For Evo2 scoring in Colab, install Evo2 and GPU dependencies in the runtime that
42
+ matches your model. The preprocessing step only needs the base dependencies.
43
+
44
+ ## Debug / Test
45
+
46
+ Run the local workflow tests without Evo2, torch, or flash-attn:
47
+
48
+ ```bash
49
+ python -m unittest discover -s tests -v
50
+ ```
51
+
52
+ These tests cover preprocessing, folder discovery, score-table export with a
53
+ fake scorer, and the missing Evo2 dependency message. Real Evo2 scoring still
54
+ requires a Colab GPU runtime with `torch`, `flash-attn`, and `evo2` installed.
55
+
56
+ ## Quick Start: Preprocessing Files
57
+
58
+ Put files anywhere, for example in `test/`, and pass the files directly:
59
+
60
+ ```python
61
+ from evoseq.preprocess import preprocess_files
62
+
63
+ evo_df, paths = preprocess_files(
64
+ reference_fasta_path="test/evo2_reference.fasta",
65
+ mutant_fasta_path="test/evo2_mutant.fasta",
66
+ manifest_path="auto",
67
+ )
68
+ ```
69
+
70
+ By default, outputs are written next to the input files:
71
+ `test/evoseq_preprocess_output/`.
72
+
73
+ You can also be explicit:
74
+
75
+ ```python
76
+ evo_df, paths = preprocess_files(
77
+ reference_fasta_path="test/evo2_reference.fasta",
78
+ mutant_fasta_path="test/evo2_mutant.fasta",
79
+ output_dir="outputs/preprocessing",
80
+ )
81
+ ```
82
+
83
+ Outputs include:
84
+
85
+ - `evo2_pairs.tsv`: one row per variant with `ref_seq` and `mut_seq`
86
+ - `evo2_reference.fa`
87
+ - `evo2_mutant.fa`
88
+ - `evo2_all.fa`
89
+ - `preprocessing_report.tsv`
90
+
91
+ `manifest.tsv` is optional. When present, metadata are merged by `record_id`.
92
+ When absent, metadata are inferred from FASTA IDs when possible.
93
+
94
+ ## Quick Start: Preprocessing a Folder
95
+
96
+ If your folder contains paired FASTA files, EvoSeq can discover them:
97
+
98
+ ```python
99
+ from evoseq.preprocess import preprocess_folder
100
+
101
+ evo_df, paths = preprocess_folder("test")
102
+ ```
103
+
104
+ ## Quick Start: Evo2 Scoring
105
+
106
+ ```python
107
+ from evoseq.scoring import score_pairs_file
108
+
109
+ result_df, result_paths = score_pairs_file(
110
+ pairs_path="test/evoseq_preprocess_output/evo2_pairs.tsv",
111
+ model_name="evo2_7b",
112
+ batch_size=8,
113
+ )
114
+ ```
115
+
116
+ By default, outputs are written next to the pair table:
117
+ `test/evoseq_preprocess_output/evoseq_scoring_output/`.
118
+
119
+ Use `output_dir="outputs/scoring"` if you want a project-level result folder.
120
+
121
+ - `evo2_variant_scores_unique.tsv`
122
+ - `evo2_variant_scores_manifest.tsv` when a manifest is available
123
+ - `environment_info.tsv`
124
+ - `scoring_report.tsv`
125
+
126
+ Reference sequences are scored once per unique sequence and reused. This is
127
+ useful when many variants share the same reference window.
128
+
129
+ ## Model Handling
130
+
131
+ EvoSeq caches the loaded Evo2 model inside the Python process:
132
+
133
+ ```python
134
+ from evoseq.scoring import Evo2Scorer
135
+
136
+ scorer = Evo2Scorer(model_name="evo2_7b", device="cuda:0")
137
+ scores = scorer.score_sequences(["ACGTACGT"])
138
+ ```
139
+
140
+ Calling another scoring function with the same model reuses it. Attempting to
141
+ load a different Evo2 model in the same runtime raises an explicit error by
142
+ default, because loading multiple large models often exhausts Colab GPU memory.
143
+ Restart the runtime when switching from 7B to 20B.
144
+
145
+ Common model names:
146
+
147
+ - `evo2_7b`
148
+ - `evo2_7b_base`
149
+ - `evo2_20b`
150
+
151
+ For local model weights:
152
+
153
+ ```python
154
+ score_evo2_pairs(
155
+ base_dir=".",
156
+ model_name="evo2_20b",
157
+ local_path="/content/drive/MyDrive/Models/evo2_20b.pt",
158
+ )
159
+ ```
160
+
161
+ ## TOML Config
162
+
163
+ Copy `evoseq.example.toml`, edit the input paths/model, and run:
164
+
165
+ ```python
166
+ from evoseq import run_from_config
167
+
168
+ outputs = run_from_config("evoseq.example.toml")
169
+ ```
170
+
171
+ or:
172
+
173
+ ```bash
174
+ evoseq-run evoseq.example.toml
175
+ ```
176
+
177
+ ## Per-Base Log-Probabilities
178
+
179
+ ```python
180
+ from evoseq.scoring import export_perbase_logprobs
181
+
182
+ path = export_perbase_logprobs(
183
+ fasta_path="test/representative_perbase.fasta",
184
+ model_name="evo2_7b",
185
+ center=4096,
186
+ half_window=320,
187
+ )
188
+ ```
189
+
190
+ By default, this writes `test/evoseq_perbase_output/perbase_logprobs.tsv`.
191
+
192
+ ## Reproducibility
193
+
194
+ EvoSeq writes small TSV reports for methods sections and reruns:
195
+
196
+ - input paths and output paths
197
+ - number of variants and unique reference sequences
198
+ - model name, batch size, device, elapsed time
199
+ - Python, PyTorch, CUDA, GPU, NumPy, pandas, Biopython, and Evo2 versions
200
+
201
+ These files are meant to be saved with each analysis directory.
@@ -0,0 +1,3 @@
1
+ from .config import load_config, run_from_config
2
+
3
+ __all__ = ["load_config", "run_from_config"]
@@ -0,0 +1,21 @@
1
+ import argparse
2
+
3
+ from .config import run_from_config
4
+
5
+
6
+ def main(argv=None):
7
+ parser = argparse.ArgumentParser(description="Run EvoSeq from a TOML config.")
8
+ parser.add_argument("config", help="Path to an EvoSeq TOML config file.")
9
+ args = parser.parse_args(argv)
10
+
11
+ outputs = run_from_config(args.config)
12
+ print("EvoSeq run completed.")
13
+ for key, value in outputs.items():
14
+ if key.endswith("_paths"):
15
+ print(f"{key}:")
16
+ for name, path in value.items():
17
+ print(f" {name}: {path}")
18
+
19
+
20
+ if __name__ == "__main__":
21
+ main()
@@ -0,0 +1,107 @@
1
+ from pathlib import Path
2
+ import tomllib
3
+
4
+ from .preprocess import preprocess_files, preprocess_folder
5
+ from .scoring import export_perbase_logprobs, score_pairs_file
6
+
7
+
8
+ def _none_if_blank(value):
9
+ return None if value == "" else value
10
+
11
+
12
+ def load_config(path):
13
+ with open(path, "rb") as fh:
14
+ return tomllib.load(fh)
15
+
16
+
17
+ def run_from_config(path):
18
+ config = load_config(path)
19
+
20
+ project = config.get("project", {})
21
+ input_dir = project.get("input_dir", project.get("base_dir", "."))
22
+
23
+ preprocess_config = config.get("preprocess", {})
24
+ scoring_config = config.get("scoring", {})
25
+ perbase_config = config.get("perbase", {})
26
+
27
+ outputs = {}
28
+ if preprocess_config.get("enabled", True):
29
+ reference_fasta_path = _none_if_blank(preprocess_config.get("reference_fasta_path"))
30
+ mutant_fasta_path = _none_if_blank(preprocess_config.get("mutant_fasta_path"))
31
+ manifest_path = preprocess_config.get("manifest_path", "auto")
32
+
33
+ if reference_fasta_path and mutant_fasta_path:
34
+ evo_df, saved = preprocess_files(
35
+ reference_fasta_path=reference_fasta_path,
36
+ mutant_fasta_path=mutant_fasta_path,
37
+ manifest_path=manifest_path,
38
+ output_dir=_none_if_blank(
39
+ preprocess_config.get("output_dir", preprocess_config.get("out_dir"))
40
+ ),
41
+ strict_manifest=preprocess_config.get("strict_manifest", False),
42
+ progress=preprocess_config.get("progress", True),
43
+ )
44
+ else:
45
+ evo_df, saved = preprocess_folder(
46
+ input_dir=input_dir,
47
+ output_dir=_none_if_blank(
48
+ preprocess_config.get("output_dir", preprocess_config.get("out_dir"))
49
+ ),
50
+ manifest_path=manifest_path,
51
+ reference_fasta_path=reference_fasta_path,
52
+ mutant_fasta_path=mutant_fasta_path,
53
+ dataset_type=preprocess_config.get("dataset_type", "auto"),
54
+ window_size=preprocess_config.get("window_size"),
55
+ strict_manifest=preprocess_config.get("strict_manifest", False),
56
+ progress=preprocess_config.get("progress", True),
57
+ )
58
+ outputs["preprocess_df"] = evo_df
59
+ outputs["preprocess_paths"] = saved
60
+
61
+ if scoring_config.get("enabled", False):
62
+ pairs_path = (
63
+ _none_if_blank(scoring_config.get("pairs_path"))
64
+ or outputs.get("preprocess_paths", {}).get("pairs")
65
+ )
66
+ if not pairs_path:
67
+ raise ValueError(
68
+ "scoring.enabled is true, but no pairs_path was provided and "
69
+ "preprocessing did not produce a pair table."
70
+ )
71
+
72
+ result_df, paths = score_pairs_file(
73
+ pairs_path=pairs_path,
74
+ output_dir=_none_if_blank(
75
+ scoring_config.get("output_dir", scoring_config.get("result_dir"))
76
+ ),
77
+ manifest_path=scoring_config.get("manifest_path", "auto"),
78
+ model_name=scoring_config.get("model_name", "evo2_7b"),
79
+ device=scoring_config.get("device", "cuda:0"),
80
+ local_path=_none_if_blank(scoring_config.get("local_path")),
81
+ batch_size=scoring_config.get("batch_size", 8),
82
+ force_reload=scoring_config.get("force_reload", False),
83
+ require_recommended_gpu=scoring_config.get(
84
+ "require_recommended_gpu",
85
+ True,
86
+ ),
87
+ progress=scoring_config.get("progress", True),
88
+ )
89
+ outputs["scoring_df"] = result_df
90
+ outputs["scoring_paths"] = paths
91
+
92
+ if perbase_config.get("enabled", False):
93
+ output_path = export_perbase_logprobs(
94
+ fasta_path=perbase_config["fasta_path"],
95
+ output_path=_none_if_blank(perbase_config.get("output_path")),
96
+ output_dir=_none_if_blank(perbase_config.get("output_dir")),
97
+ model_name=perbase_config.get("model_name", "evo2_7b"),
98
+ device=perbase_config.get("device", "cuda:0"),
99
+ center=perbase_config.get("center", 4096),
100
+ half_window=perbase_config.get("half_window", 320),
101
+ local_path=_none_if_blank(perbase_config.get("local_path")),
102
+ progress=perbase_config.get("progress", True),
103
+ )
104
+ outputs["perbase_path"] = output_path
105
+
106
+ outputs["config_path"] = Path(path)
107
+ return outputs
@@ -0,0 +1,43 @@
1
+ from pathlib import Path
2
+
3
+
4
+ def common_parent(paths):
5
+ resolved = [Path(path).expanduser().resolve().parent for path in paths if path]
6
+ if not resolved:
7
+ return Path.cwd()
8
+ if len(resolved) == 1:
9
+ return resolved[0]
10
+
11
+ import os
12
+
13
+ return Path(os.path.commonpath([str(path) for path in resolved]))
14
+
15
+
16
+ def default_output_dir(kind, *input_paths, base_dir=None):
17
+ names = {
18
+ "preprocess": "evoseq_preprocess_output",
19
+ "scoring": "evoseq_scoring_output",
20
+ "perbase": "evoseq_perbase_output",
21
+ }
22
+ dirname = names.get(kind, f"evoseq_{kind}_output")
23
+
24
+ if base_dir:
25
+ return Path(base_dir) / dirname
26
+
27
+ return common_parent(input_paths) / dirname
28
+
29
+
30
+ def ensure_output_dir(path, fallback="/content/evoseq_output"):
31
+ path = Path(path)
32
+ try:
33
+ path.mkdir(parents=True, exist_ok=True)
34
+ test_path = path / ".write_test"
35
+ test_path.write_text("ok")
36
+ test_path.unlink(missing_ok=True)
37
+ return path
38
+ except OSError as exc:
39
+ fallback_path = Path(fallback)
40
+ print(f"Warning: cannot use output directory {path} ({exc}).")
41
+ print(f"Using fallback output directory: {fallback_path}")
42
+ fallback_path.mkdir(parents=True, exist_ok=True)
43
+ return fallback_path
@@ -0,0 +1,13 @@
1
+ from .pipeline import (
2
+ prepare_evo2_input,
3
+ preprocess_files,
4
+ preprocess_folder,
5
+ preprocess_from_base_dir,
6
+ )
7
+
8
+ __all__ = [
9
+ "prepare_evo2_input",
10
+ "preprocess_files",
11
+ "preprocess_folder",
12
+ "preprocess_from_base_dir",
13
+ ]