sai-pg 1.0.1__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sai/__init__.py +2 -0
- sai/__main__.py +6 -3
- sai/configs/__init__.py +24 -0
- sai/configs/global_config.py +83 -0
- sai/configs/ploidy_config.py +94 -0
- sai/configs/pop_config.py +82 -0
- sai/configs/stat_config.py +220 -0
- sai/{utils/generators → generators}/chunk_generator.py +1 -1
- sai/{utils/generators → generators}/window_generator.py +81 -37
- sai/{utils/multiprocessing → multiprocessing}/mp_manager.py +2 -2
- sai/{utils/multiprocessing → multiprocessing}/mp_pool.py +2 -2
- sai/parsers/outlier_parser.py +4 -3
- sai/parsers/score_parser.py +8 -119
- sai/{utils/preprocessors → preprocessors}/chunk_preprocessor.py +21 -15
- sai/preprocessors/feature_preprocessor.py +236 -0
- sai/registries/__init__.py +22 -0
- sai/registries/generic_registry.py +89 -0
- sai/registries/stat_registry.py +30 -0
- sai/sai.py +124 -220
- sai/stats/__init__.py +11 -0
- sai/stats/danc_statistic.py +83 -0
- sai/stats/dd_statistic.py +77 -0
- sai/stats/df_statistic.py +84 -0
- sai/stats/dplus_statistic.py +86 -0
- sai/stats/fd_statistic.py +92 -0
- sai/stats/generic_statistic.py +93 -0
- sai/stats/q_statistic.py +104 -0
- sai/stats/stat_utils.py +259 -0
- sai/stats/u_statistic.py +99 -0
- sai/utils/utils.py +213 -142
- {sai_pg-1.0.1.dist-info → sai_pg-1.1.0.dist-info}/METADATA +3 -14
- sai_pg-1.1.0.dist-info/RECORD +70 -0
- {sai_pg-1.0.1.dist-info → sai_pg-1.1.0.dist-info}/WHEEL +1 -1
- sai_pg-1.1.0.dist-info/top_level.txt +2 -0
- tests/configs/test_global_config.py +163 -0
- tests/configs/test_ploidy_config.py +93 -0
- tests/configs/test_pop_config.py +90 -0
- tests/configs/test_stat_config.py +171 -0
- tests/generators/test_chunk_generator.py +51 -0
- tests/generators/test_window_generator.py +164 -0
- tests/multiprocessing/test_mp_manager.py +92 -0
- tests/multiprocessing/test_mp_pool.py +79 -0
- tests/parsers/test_argument_validation.py +133 -0
- tests/parsers/test_outlier_parser.py +53 -0
- tests/parsers/test_score_parser.py +63 -0
- tests/preprocessors/test_chunk_preprocessor.py +79 -0
- tests/preprocessors/test_feature_preprocessor.py +223 -0
- tests/registries/test_registries.py +74 -0
- tests/stats/test_danc_statistic.py +51 -0
- tests/stats/test_dd_statistic.py +45 -0
- tests/stats/test_df_statistic.py +73 -0
- tests/stats/test_dplus_statistic.py +79 -0
- tests/stats/test_fd_statistic.py +68 -0
- tests/stats/test_q_statistic.py +268 -0
- tests/stats/test_stat_utils.py +354 -0
- tests/stats/test_u_statistic.py +233 -0
- tests/test___main__.py +51 -0
- tests/test_sai.py +102 -0
- tests/utils/test_utils.py +511 -0
- sai/parsers/plot_parser.py +0 -152
- sai/stats/features.py +0 -302
- sai/utils/preprocessors/feature_preprocessor.py +0 -211
- sai_pg-1.0.1.dist-info/RECORD +0 -30
- sai_pg-1.0.1.dist-info/top_level.txt +0 -1
- /sai/{utils/generators → generators}/__init__.py +0 -0
- /sai/{utils/generators → generators}/data_generator.py +0 -0
- /sai/{utils/multiprocessing → multiprocessing}/__init__.py +0 -0
- /sai/{utils/preprocessors → preprocessors}/__init__.py +0 -0
- /sai/{utils/preprocessors → preprocessors}/data_preprocessor.py +0 -0
- {sai_pg-1.0.1.dist-info → sai_pg-1.1.0.dist-info}/entry_points.txt +0 -0
- {sai_pg-1.0.1.dist-info → sai_pg-1.1.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,164 @@
|
|
1
|
+
# Copyright 2025 Xin Huang
|
2
|
+
#
|
3
|
+
# GNU General Public License v3.0
|
4
|
+
#
|
5
|
+
# This program is free software: you can redistribute it and/or modify
|
6
|
+
# it under the terms of the GNU General Public License as published by
|
7
|
+
# the Free Software Foundation, either version 3 of the License, or
|
8
|
+
# (at your option) any later version.
|
9
|
+
#
|
10
|
+
# This program is distributed in the hope that it will be useful,
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
# GNU General Public License for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU General Public License
|
16
|
+
# along with this program. If not, please see
|
17
|
+
#
|
18
|
+
# https://www.gnu.org/licenses/gpl-3.0.en.html
|
19
|
+
|
20
|
+
|
21
|
+
import pytest
|
22
|
+
from sai.generators import WindowGenerator
|
23
|
+
from sai.configs import PloidyConfig
|
24
|
+
|
25
|
+
|
26
|
+
@pytest.fixture
|
27
|
+
def test_generator():
|
28
|
+
# Initialize the WindowGenerator with actual data files
|
29
|
+
ploidy_config = PloidyConfig(
|
30
|
+
{
|
31
|
+
"ref": {"ref1": 2},
|
32
|
+
"tgt": {"tgt1": 2, "tgt2": 2},
|
33
|
+
"src": {"src1": 2, "src2": 2},
|
34
|
+
}
|
35
|
+
)
|
36
|
+
|
37
|
+
generator = WindowGenerator(
|
38
|
+
vcf_file="tests/data/test.data.vcf",
|
39
|
+
chr_name="21",
|
40
|
+
ref_ind_file="tests/data/test.ref.ind.list",
|
41
|
+
tgt_ind_file="tests/data/test.tgt.ind.list",
|
42
|
+
src_ind_file="tests/data/test.src.ind.list",
|
43
|
+
out_ind_file=None,
|
44
|
+
win_len=1000, # Set window length as appropriate for testing
|
45
|
+
win_step=500, # Set window step as appropriate for testing
|
46
|
+
ploidy_config=ploidy_config,
|
47
|
+
)
|
48
|
+
return generator
|
49
|
+
|
50
|
+
|
51
|
+
def test_initialization(test_generator):
|
52
|
+
# Verify initialization parameters
|
53
|
+
assert test_generator.win_len == 1000
|
54
|
+
assert test_generator.win_step == 500
|
55
|
+
|
56
|
+
|
57
|
+
def test_window_generator(test_generator):
|
58
|
+
# Collect data from generator
|
59
|
+
data_list = list(test_generator.get())
|
60
|
+
|
61
|
+
# Ensure windows were generated
|
62
|
+
assert len(data_list) == 380
|
63
|
+
|
64
|
+
# Inspect first window's contents for expected format and data
|
65
|
+
first_window = data_list[0]
|
66
|
+
assert "chr_name" in first_window
|
67
|
+
assert "start" in first_window
|
68
|
+
assert "end" in first_window
|
69
|
+
assert "ref_pop" in first_window
|
70
|
+
assert first_window["ref_pop"] == "ref1"
|
71
|
+
assert "tgt_pop" in first_window
|
72
|
+
assert "src_pop_list" in first_window
|
73
|
+
assert "ref_gts" in first_window
|
74
|
+
assert "tgt_gts" in first_window
|
75
|
+
assert "src_gts_list" in first_window
|
76
|
+
assert "ploidy_config" in first_window
|
77
|
+
|
78
|
+
|
79
|
+
def test_none_window_generator(test_generator):
|
80
|
+
test_generator.ref_data = None
|
81
|
+
|
82
|
+
data_list = list(test_generator.get())
|
83
|
+
assert len(data_list) == 380
|
84
|
+
|
85
|
+
first_window = data_list[0]
|
86
|
+
assert first_window["ref_gts"] is None
|
87
|
+
assert first_window["tgt_gts"] is None
|
88
|
+
assert first_window["src_gts_list"] is None
|
89
|
+
assert first_window["ploidy_config"].get_ploidy("ref")[0] == 2
|
90
|
+
assert first_window["ploidy_config"].get_ploidy("tgt")[0] == 2
|
91
|
+
assert first_window["ploidy_config"].get_ploidy("src")[0] == 2
|
92
|
+
assert len(first_window["pos"]) == 0
|
93
|
+
|
94
|
+
|
95
|
+
def test_len(test_generator):
|
96
|
+
# Check if __len__ provides a reasonable window count
|
97
|
+
assert len(test_generator) == 380
|
98
|
+
|
99
|
+
|
100
|
+
@pytest.fixture
|
101
|
+
def test_generator_two_sources():
|
102
|
+
# Initialize the WindowGenerator with num_src=2 for testing two-source combinations
|
103
|
+
ploidy_config = PloidyConfig(
|
104
|
+
{
|
105
|
+
"ref": {"ref1": 2},
|
106
|
+
"tgt": {"tgt1": 2, "tgt2": 2},
|
107
|
+
"src": {"src1": 2, "src2": 2},
|
108
|
+
}
|
109
|
+
)
|
110
|
+
|
111
|
+
generator = WindowGenerator(
|
112
|
+
vcf_file="tests/data/test.data.vcf",
|
113
|
+
chr_name="21",
|
114
|
+
ref_ind_file="tests/data/test.ref.ind.list",
|
115
|
+
tgt_ind_file="tests/data/test.tgt.ind.list",
|
116
|
+
src_ind_file="tests/data/test.src.ind.list",
|
117
|
+
out_ind_file=None,
|
118
|
+
win_len=1000, # Set window length as appropriate for testing
|
119
|
+
win_step=500, # Set window step as appropriate for testing
|
120
|
+
num_src=2, # Set to 2 to test two-source combinations
|
121
|
+
ploidy_config=ploidy_config,
|
122
|
+
)
|
123
|
+
return generator
|
124
|
+
|
125
|
+
|
126
|
+
def test_initialization_two_sources(test_generator_two_sources):
|
127
|
+
# Verify initialization parameters for two-source generator
|
128
|
+
assert test_generator_two_sources.win_len == 1000
|
129
|
+
assert test_generator_two_sources.win_step == 500
|
130
|
+
assert test_generator_two_sources.ploidy_config.get_ploidy("ref")[0] == 2
|
131
|
+
assert test_generator_two_sources.ploidy_config.get_ploidy("tgt")[0] == 2
|
132
|
+
assert test_generator_two_sources.ploidy_config.get_ploidy("src")[0] == 2
|
133
|
+
assert test_generator_two_sources.num_src == 2
|
134
|
+
|
135
|
+
|
136
|
+
def test_window_generator_with_two_sources(test_generator_two_sources):
|
137
|
+
# Collect data from generator with two sources
|
138
|
+
data_list = list(test_generator_two_sources.get())
|
139
|
+
|
140
|
+
# Ensure windows were generated and have two sources in src_pop_list
|
141
|
+
assert len(data_list) > 0 # Ensure data was generated
|
142
|
+
first_window = data_list[0]
|
143
|
+
|
144
|
+
# Check keys in the first window
|
145
|
+
assert "start" in first_window
|
146
|
+
assert "end" in first_window
|
147
|
+
assert "ref_pop" in first_window
|
148
|
+
assert "tgt_pop" in first_window
|
149
|
+
assert "src_pop_list" in first_window
|
150
|
+
assert "ref_gts" in first_window
|
151
|
+
assert "tgt_gts" in first_window
|
152
|
+
assert "src_gts_list" in first_window
|
153
|
+
assert "ploidy_config" in first_window
|
154
|
+
|
155
|
+
# Verify that src_pop_list contains exactly two source populations
|
156
|
+
assert len(first_window["src_pop_list"]) == 2
|
157
|
+
assert (
|
158
|
+
len(first_window["src_gts_list"]) == 2
|
159
|
+
) # Ensure two sets of genotypes in src_gts_list
|
160
|
+
|
161
|
+
|
162
|
+
def test_len_two_sources(test_generator_two_sources):
|
163
|
+
# Check if __len__ provides a reasonable window count with two-source combinations
|
164
|
+
assert len(test_generator_two_sources) > 0 # Ensure it counts windows correctly
|
@@ -0,0 +1,92 @@
|
|
1
|
+
# Copyright 2025 Xin Huang
|
2
|
+
#
|
3
|
+
# GNU General Public License v3.0
|
4
|
+
#
|
5
|
+
# This program is free software: you can redistribute it and/or modify
|
6
|
+
# it under the terms of the GNU General Public License as published by
|
7
|
+
# the Free Software Foundation, either version 3 of the License, or
|
8
|
+
# (at your option) any later version.
|
9
|
+
#
|
10
|
+
# This program is distributed in the hope that it will be useful,
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
# GNU General Public License for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU General Public License
|
16
|
+
# along with this program. If not, please see
|
17
|
+
#
|
18
|
+
# https://www.gnu.org/licenses/gpl-3.0.en.html
|
19
|
+
|
20
|
+
|
21
|
+
from sai.multiprocessing import mp_manager
|
22
|
+
from sai.preprocessors import DataPreprocessor
|
23
|
+
from sai.generators import DataGenerator
|
24
|
+
|
25
|
+
|
26
|
+
class TmpDataPreprocessor(DataPreprocessor):
|
27
|
+
def run(self, rep):
|
28
|
+
return [rep]
|
29
|
+
|
30
|
+
def process_items(self, items):
|
31
|
+
print(items)
|
32
|
+
|
33
|
+
|
34
|
+
class TmpDataGenerator(DataGenerator):
|
35
|
+
def __init__(self, start_rep=0, nrep=5):
|
36
|
+
self.start_rep = start_rep
|
37
|
+
self.nrep = nrep
|
38
|
+
|
39
|
+
def get(self):
|
40
|
+
for i in range(self.start_rep, self.start_rep + self.nrep):
|
41
|
+
yield {"rep": i}
|
42
|
+
|
43
|
+
def __len__(self):
|
44
|
+
return self.nrep
|
45
|
+
|
46
|
+
|
47
|
+
class FailureDataPreprocessor(DataPreprocessor):
|
48
|
+
def run(self, rep):
|
49
|
+
raise Exception("Simulating failure by stopping.")
|
50
|
+
|
51
|
+
def process_items(self, items):
|
52
|
+
print(items)
|
53
|
+
|
54
|
+
|
55
|
+
def test_mp_manager(capfd):
|
56
|
+
nprocess = 2
|
57
|
+
nrep = 5
|
58
|
+
|
59
|
+
data_processor = TmpDataPreprocessor()
|
60
|
+
generator = TmpDataGenerator(nrep=nrep)
|
61
|
+
|
62
|
+
mp_manager(
|
63
|
+
data_processor=data_processor, data_generator=generator, nprocess=nprocess
|
64
|
+
)
|
65
|
+
|
66
|
+
# Define the expected set of outputs
|
67
|
+
expected_set = {"[0, 1, 2, 3, 4]"}
|
68
|
+
|
69
|
+
# Capture the actual output and convert it to a set of strings
|
70
|
+
captured = capfd.readouterr()
|
71
|
+
actual_set = {captured.out.strip()}
|
72
|
+
|
73
|
+
# Compare the actual and expected sets
|
74
|
+
assert actual_set == expected_set, "The output does not match the expected results."
|
75
|
+
|
76
|
+
|
77
|
+
def test_mp_manager_failure(capfd):
|
78
|
+
nprocess = 2
|
79
|
+
data_processor = FailureDataPreprocessor()
|
80
|
+
generator = TmpDataGenerator(nrep=5)
|
81
|
+
|
82
|
+
mp_manager(
|
83
|
+
data_processor=data_processor, data_generator=generator, nprocess=nprocess
|
84
|
+
)
|
85
|
+
|
86
|
+
# Use capfd to capture stdout and stderr
|
87
|
+
captured = capfd.readouterr()
|
88
|
+
|
89
|
+
# Assertions to verify expected output and behavior
|
90
|
+
assert "Simulating failure by stopping." in captured.err
|
91
|
+
assert "did not complete successfully. Initiating shutdown." in captured.out
|
92
|
+
assert "All workers are terminated." in captured.out
|
@@ -0,0 +1,79 @@
|
|
1
|
+
# Copyright 2025 Xin Huang
|
2
|
+
#
|
3
|
+
# GNU General Public License v3.0
|
4
|
+
#
|
5
|
+
# This program is free software: you can redistribute it and/or modify
|
6
|
+
# it under the terms of the GNU General Public License as published by
|
7
|
+
# the Free Software Foundation, either version 3 of the License, or
|
8
|
+
# (at your option) any later version.
|
9
|
+
#
|
10
|
+
# This program is distributed in the hope that it will be useful,
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
# GNU General Public License for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU General Public License
|
16
|
+
# along with this program. If not, please see
|
17
|
+
#
|
18
|
+
# https://www.gnu.org/licenses/gpl-3.0.en.html
|
19
|
+
|
20
|
+
|
21
|
+
import pytest
|
22
|
+
from sai.multiprocessing.mp_pool import mp_pool, mp_worker
|
23
|
+
|
24
|
+
|
25
|
+
class DataPreprocessor:
|
26
|
+
"""Mock class simulating a data processor."""
|
27
|
+
|
28
|
+
def run(self, x: int) -> int:
|
29
|
+
"""Mock processing: returns the square of x."""
|
30
|
+
return x**2
|
31
|
+
|
32
|
+
def process_items(self, results: list) -> None:
|
33
|
+
"""Stores the processed results for validation."""
|
34
|
+
self.final_results = results
|
35
|
+
|
36
|
+
|
37
|
+
class DataGenerator:
|
38
|
+
"""Mock class simulating a data generator."""
|
39
|
+
|
40
|
+
def __init__(self, data: list):
|
41
|
+
self.data = data
|
42
|
+
|
43
|
+
def get(self):
|
44
|
+
"""Yields data in dictionary format."""
|
45
|
+
for x in self.data:
|
46
|
+
yield {"x": x}
|
47
|
+
|
48
|
+
|
49
|
+
@pytest.mark.parametrize(
|
50
|
+
"params, expected",
|
51
|
+
[
|
52
|
+
((DataPreprocessor(), {"x": 2}), 4),
|
53
|
+
((DataPreprocessor(), {"x": 3}), 9),
|
54
|
+
((DataPreprocessor(), {"x": 4}), 16),
|
55
|
+
],
|
56
|
+
)
|
57
|
+
def test_mp_worker(params, expected):
|
58
|
+
"""Tests mp_worker to ensure correct processing."""
|
59
|
+
assert mp_worker(params) == expected
|
60
|
+
|
61
|
+
|
62
|
+
def test_mp_pool():
|
63
|
+
"""Tests mp_pool to ensure parallel processing works correctly."""
|
64
|
+
data_processor = DataPreprocessor()
|
65
|
+
data_generator = DataGenerator([1, 2, 3, 4, 5])
|
66
|
+
nprocess = 2
|
67
|
+
|
68
|
+
# Run multiprocessing pool
|
69
|
+
mp_pool(data_processor, data_generator, nprocess)
|
70
|
+
|
71
|
+
# Validate results
|
72
|
+
assert hasattr(data_processor, "final_results") # Ensure results are stored
|
73
|
+
assert sorted(data_processor.final_results) == [
|
74
|
+
1,
|
75
|
+
4,
|
76
|
+
9,
|
77
|
+
16,
|
78
|
+
25,
|
79
|
+
] # Check correctness
|
@@ -0,0 +1,133 @@
|
|
1
|
+
# Copyright 2025 Xin Huang
|
2
|
+
#
|
3
|
+
# GNU General Public License v3.0
|
4
|
+
#
|
5
|
+
# This program is free software: you can redistribute it and/or modify
|
6
|
+
# it under the terms of the GNU General Public License as published by
|
7
|
+
# the Free Software Foundation, either version 3 of the License, or
|
8
|
+
# (at your option) any later version.
|
9
|
+
#
|
10
|
+
# This program is distributed in the hope that it will be useful,
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
# GNU General Public License for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU General Public License
|
16
|
+
# along with this program. If not, please see
|
17
|
+
#
|
18
|
+
# https://www.gnu.org/licenses/gpl-3.0.en.html
|
19
|
+
|
20
|
+
|
21
|
+
import argparse
|
22
|
+
import pytest
|
23
|
+
from sai.parsers.argument_validation import positive_int
|
24
|
+
from sai.parsers.argument_validation import positive_number
|
25
|
+
from sai.parsers.argument_validation import between_zero_and_one
|
26
|
+
from sai.parsers.argument_validation import existed_file
|
27
|
+
from sai.parsers.argument_validation import validate_stat_type
|
28
|
+
|
29
|
+
|
30
|
+
def test_positive_int():
|
31
|
+
# Valid positive integer
|
32
|
+
assert positive_int("5") == 5
|
33
|
+
|
34
|
+
# Not a positive integer (zero)
|
35
|
+
with pytest.raises(argparse.ArgumentTypeError, match="0 is not a positive integer"):
|
36
|
+
positive_int("0")
|
37
|
+
|
38
|
+
# Negative integer
|
39
|
+
with pytest.raises(
|
40
|
+
argparse.ArgumentTypeError, match="-1 is not a positive integer"
|
41
|
+
):
|
42
|
+
positive_int("-1")
|
43
|
+
|
44
|
+
# Non-integer string
|
45
|
+
with pytest.raises(argparse.ArgumentTypeError, match="abc is not a valid integer"):
|
46
|
+
positive_int("abc")
|
47
|
+
|
48
|
+
|
49
|
+
def test_positive_number():
|
50
|
+
# Valid positive number
|
51
|
+
assert positive_number("3.14") == 3.14
|
52
|
+
|
53
|
+
# Not a positive number (zero)
|
54
|
+
with pytest.raises(argparse.ArgumentTypeError, match="0 is not a positive number"):
|
55
|
+
positive_number("0")
|
56
|
+
|
57
|
+
# Negative number
|
58
|
+
with pytest.raises(
|
59
|
+
argparse.ArgumentTypeError, match="-2.5 is not a positive number"
|
60
|
+
):
|
61
|
+
positive_number("-2.5")
|
62
|
+
|
63
|
+
# Non-numeric string
|
64
|
+
with pytest.raises(argparse.ArgumentTypeError, match="xyz is not a valid number"):
|
65
|
+
positive_number("xyz")
|
66
|
+
|
67
|
+
|
68
|
+
def test_between_zero_and_one():
|
69
|
+
# Values within range
|
70
|
+
assert between_zero_and_one("0.5") == 0.5
|
71
|
+
assert between_zero_and_one("0") == 0
|
72
|
+
assert between_zero_and_one("1") == 1
|
73
|
+
|
74
|
+
# Values out of range
|
75
|
+
with pytest.raises(argparse.ArgumentTypeError, match="1.5 is not between 0 and 1"):
|
76
|
+
between_zero_and_one("1.5")
|
77
|
+
|
78
|
+
with pytest.raises(argparse.ArgumentTypeError, match="-0.1 is not between 0 and 1"):
|
79
|
+
between_zero_and_one("-0.1")
|
80
|
+
|
81
|
+
# Non-numeric string
|
82
|
+
with pytest.raises(
|
83
|
+
argparse.ArgumentTypeError, match="not_a_number is not a valid number"
|
84
|
+
):
|
85
|
+
between_zero_and_one("not_a_number")
|
86
|
+
|
87
|
+
|
88
|
+
def test_existed_file(tmp_path):
|
89
|
+
# Create a temporary file for testing
|
90
|
+
temp_file = tmp_path / "temp.txt"
|
91
|
+
temp_file.write_text("This is a test file.")
|
92
|
+
|
93
|
+
# Validate an existing file path
|
94
|
+
assert existed_file(str(temp_file)) == str(temp_file)
|
95
|
+
|
96
|
+
# Validate a non-existent file path
|
97
|
+
with pytest.raises(
|
98
|
+
argparse.ArgumentTypeError, match="non_existent_file is not found"
|
99
|
+
):
|
100
|
+
existed_file("non_existent_file")
|
101
|
+
|
102
|
+
|
103
|
+
def test_valid_inputs():
|
104
|
+
assert validate_stat_type("U50") == "U50"
|
105
|
+
assert validate_stat_type("Q05") == "Q05"
|
106
|
+
assert validate_stat_type("Q95") == "Q95"
|
107
|
+
assert validate_stat_type("Q99") == "Q99"
|
108
|
+
|
109
|
+
|
110
|
+
def test_invalid_inputs():
|
111
|
+
with pytest.raises(argparse.ArgumentTypeError):
|
112
|
+
validate_stat_type("U")
|
113
|
+
|
114
|
+
with pytest.raises(argparse.ArgumentTypeError):
|
115
|
+
validate_stat_type("Q")
|
116
|
+
|
117
|
+
with pytest.raises(argparse.ArgumentTypeError):
|
118
|
+
validate_stat_type("Q5")
|
119
|
+
|
120
|
+
with pytest.raises(argparse.ArgumentTypeError):
|
121
|
+
validate_stat_type("U100")
|
122
|
+
|
123
|
+
with pytest.raises(argparse.ArgumentTypeError):
|
124
|
+
validate_stat_type("Q100")
|
125
|
+
|
126
|
+
with pytest.raises(argparse.ArgumentTypeError):
|
127
|
+
validate_stat_type("X50")
|
128
|
+
|
129
|
+
with pytest.raises(argparse.ArgumentTypeError):
|
130
|
+
validate_stat_type("Qabc")
|
131
|
+
|
132
|
+
with pytest.raises(argparse.ArgumentTypeError):
|
133
|
+
validate_stat_type("")
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# Copyright 2025 Xin Huang
|
2
|
+
#
|
3
|
+
# GNU General Public License v3.0
|
4
|
+
#
|
5
|
+
# This program is free software: you can redistribute it and/or modify
|
6
|
+
# it under the terms of the GNU General Public License as published by
|
7
|
+
# the Free Software Foundation, either version 3 of the License, or
|
8
|
+
# (at your option) any later version.
|
9
|
+
#
|
10
|
+
# This program is distributed in the hope that it will be useful,
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
# GNU General Public License for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU General Public License
|
16
|
+
# along with this program. If not, please see
|
17
|
+
#
|
18
|
+
# https://www.gnu.org/licenses/gpl-3.0.en.html
|
19
|
+
|
20
|
+
|
21
|
+
import pytest
|
22
|
+
import argparse
|
23
|
+
from sai.parsers.outlier_parser import add_outlier_parser
|
24
|
+
|
25
|
+
|
26
|
+
@pytest.fixture
|
27
|
+
def parser():
|
28
|
+
# Initialize the argument parser with a subparser for the 'outlier' command
|
29
|
+
main_parser = argparse.ArgumentParser()
|
30
|
+
subparsers = main_parser.add_subparsers(dest="command")
|
31
|
+
add_outlier_parser(subparsers)
|
32
|
+
return main_parser
|
33
|
+
|
34
|
+
|
35
|
+
def test_add_outlier_parser(parser):
|
36
|
+
# Simulate command-line arguments to parse
|
37
|
+
args = parser.parse_args(
|
38
|
+
[
|
39
|
+
"outlier",
|
40
|
+
"--score",
|
41
|
+
"tests/data/example.scores",
|
42
|
+
"--output-prefix",
|
43
|
+
"output/test_outliers",
|
44
|
+
"--quantile",
|
45
|
+
"0.95",
|
46
|
+
]
|
47
|
+
)
|
48
|
+
|
49
|
+
# Validate parsed arguments
|
50
|
+
assert args.command == "outlier"
|
51
|
+
assert args.score == "tests/data/example.scores"
|
52
|
+
assert args.output_prefix == "output/test_outliers"
|
53
|
+
assert args.quantile == 0.95
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# Copyright 2025 Xin Huang
|
2
|
+
#
|
3
|
+
# GNU General Public License v3.0
|
4
|
+
#
|
5
|
+
# This program is free software: you can redistribute it and/or modify
|
6
|
+
# it under the terms of the GNU General Public License as published by
|
7
|
+
# the Free Software Foundation, either version 3 of the License, or
|
8
|
+
# (at your option) any later version.
|
9
|
+
#
|
10
|
+
# This program is distributed in the hope that it will be useful,
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
# GNU General Public License for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU General Public License
|
16
|
+
# along with this program. If not, please see
|
17
|
+
#
|
18
|
+
# https://www.gnu.org/licenses/gpl-3.0.en.html
|
19
|
+
|
20
|
+
|
21
|
+
import pytest
|
22
|
+
import argparse
|
23
|
+
from sai.parsers.score_parser import add_score_parser
|
24
|
+
|
25
|
+
|
26
|
+
@pytest.fixture
|
27
|
+
def parser():
|
28
|
+
# Initialize the argument parser with a subparser for the 'score' command
|
29
|
+
main_parser = argparse.ArgumentParser()
|
30
|
+
subparsers = main_parser.add_subparsers(dest="command")
|
31
|
+
add_score_parser(subparsers)
|
32
|
+
return main_parser
|
33
|
+
|
34
|
+
|
35
|
+
def test_add_score_parser(parser):
|
36
|
+
# Simulate command-line arguments to parse
|
37
|
+
args = parser.parse_args(
|
38
|
+
[
|
39
|
+
"score",
|
40
|
+
"--vcf",
|
41
|
+
"tests/data/example.vcf",
|
42
|
+
"--chr-name",
|
43
|
+
"chr1",
|
44
|
+
"--win-len",
|
45
|
+
"50000",
|
46
|
+
"--win-step",
|
47
|
+
"10000",
|
48
|
+
"--output",
|
49
|
+
"output/results.tsv",
|
50
|
+
"--config",
|
51
|
+
"tests/data/test_config.yaml",
|
52
|
+
]
|
53
|
+
)
|
54
|
+
|
55
|
+
# Validate parsed arguments
|
56
|
+
assert args.command == "score"
|
57
|
+
assert args.vcf == "tests/data/example.vcf"
|
58
|
+
assert args.chr_name == "chr1"
|
59
|
+
assert args.win_len == 50000
|
60
|
+
assert args.win_step == 10000
|
61
|
+
assert args.anc_alleles is None
|
62
|
+
assert args.output == "output/results.tsv"
|
63
|
+
assert args.config == "tests/data/test_config.yaml"
|
@@ -0,0 +1,79 @@
|
|
1
|
+
# Copyright 2025 Xin Huang
|
2
|
+
#
|
3
|
+
# GNU General Public License v3.0
|
4
|
+
#
|
5
|
+
# This program is free software: you can redistribute it and/or modify
|
6
|
+
# it under the terms of the GNU General Public License as published by
|
7
|
+
# the Free Software Foundation, either version 3 of the License, or
|
8
|
+
# (at your option) any later version.
|
9
|
+
#
|
10
|
+
# This program is distributed in the hope that it will be useful,
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
# GNU General Public License for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU General Public License
|
16
|
+
# along with this program. If not, please see
|
17
|
+
#
|
18
|
+
# https://www.gnu.org/licenses/gpl-3.0.en.html
|
19
|
+
|
20
|
+
|
21
|
+
import pytest
|
22
|
+
import yaml
|
23
|
+
import sai.stats
|
24
|
+
from sai.preprocessors import ChunkPreprocessor
|
25
|
+
from sai.configs import PloidyConfig, StatConfig
|
26
|
+
|
27
|
+
|
28
|
+
@pytest.fixture
|
29
|
+
def example_data(tmp_path):
|
30
|
+
# Define example file paths
|
31
|
+
pytest.example_vcf = "tests/data/example.vcf"
|
32
|
+
pytest.example_ref_ind_list = "tests/data/example.ref.ind.list"
|
33
|
+
pytest.example_tgt_ind_list = "tests/data/example.tgt.ind.list"
|
34
|
+
pytest.example_src_ind_list = "tests/data/example.src.ind.list"
|
35
|
+
pytest.example_config = "tests/data/example.config.yaml"
|
36
|
+
|
37
|
+
# Create a temporary output file path for the score function
|
38
|
+
temp_output_file = tmp_path / "output.tsv"
|
39
|
+
|
40
|
+
return {
|
41
|
+
"vcf_file": pytest.example_vcf,
|
42
|
+
"ref_ind_file": pytest.example_ref_ind_list,
|
43
|
+
"tgt_ind_file": pytest.example_tgt_ind_list,
|
44
|
+
"src_ind_file": pytest.example_src_ind_list,
|
45
|
+
"config": pytest.example_config,
|
46
|
+
"output_file": str(temp_output_file),
|
47
|
+
"output_dir": tmp_path,
|
48
|
+
}
|
49
|
+
|
50
|
+
|
51
|
+
def test_chunk_preprocessor(example_data):
|
52
|
+
with open(example_data["config"], "r") as f:
|
53
|
+
config = yaml.safe_load(f)
|
54
|
+
|
55
|
+
stat_config = StatConfig(config["statistics"])
|
56
|
+
ploidy_config = PloidyConfig(config["ploidies"])
|
57
|
+
|
58
|
+
preprocessor = ChunkPreprocessor(
|
59
|
+
vcf_file=example_data["vcf_file"],
|
60
|
+
ref_ind_file=example_data["ref_ind_file"],
|
61
|
+
tgt_ind_file=example_data["tgt_ind_file"],
|
62
|
+
src_ind_file=example_data["src_ind_file"],
|
63
|
+
out_ind_file=None,
|
64
|
+
win_len=6666,
|
65
|
+
win_step=6666,
|
66
|
+
num_src=1,
|
67
|
+
anc_allele_file=None,
|
68
|
+
output_file=example_data["output_file"],
|
69
|
+
stat_config=stat_config,
|
70
|
+
ploidy_config=ploidy_config,
|
71
|
+
)
|
72
|
+
|
73
|
+
results = preprocessor.run(
|
74
|
+
chr_name="21",
|
75
|
+
start=0,
|
76
|
+
end=6666,
|
77
|
+
)
|
78
|
+
|
79
|
+
assert results[0]["Q"] == 0.9
|