sai-pg 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. sai/__init__.py +2 -0
  2. sai/__main__.py +6 -3
  3. sai/configs/__init__.py +24 -0
  4. sai/configs/global_config.py +83 -0
  5. sai/configs/ploidy_config.py +94 -0
  6. sai/configs/pop_config.py +82 -0
  7. sai/configs/stat_config.py +220 -0
  8. sai/{utils/generators → generators}/chunk_generator.py +2 -8
  9. sai/{utils/generators → generators}/window_generator.py +82 -37
  10. sai/{utils/multiprocessing → multiprocessing}/mp_manager.py +2 -2
  11. sai/{utils/multiprocessing → multiprocessing}/mp_pool.py +2 -2
  12. sai/parsers/outlier_parser.py +4 -3
  13. sai/parsers/score_parser.py +8 -119
  14. sai/{utils/preprocessors → preprocessors}/chunk_preprocessor.py +21 -15
  15. sai/preprocessors/feature_preprocessor.py +236 -0
  16. sai/registries/__init__.py +22 -0
  17. sai/registries/generic_registry.py +89 -0
  18. sai/registries/stat_registry.py +30 -0
  19. sai/sai.py +124 -220
  20. sai/stats/__init__.py +11 -0
  21. sai/stats/danc_statistic.py +83 -0
  22. sai/stats/dd_statistic.py +77 -0
  23. sai/stats/df_statistic.py +84 -0
  24. sai/stats/dplus_statistic.py +86 -0
  25. sai/stats/fd_statistic.py +92 -0
  26. sai/stats/generic_statistic.py +93 -0
  27. sai/stats/q_statistic.py +104 -0
  28. sai/stats/stat_utils.py +259 -0
  29. sai/stats/u_statistic.py +99 -0
  30. sai/utils/utils.py +220 -143
  31. {sai_pg-1.0.0.dist-info → sai_pg-1.1.0.dist-info}/METADATA +3 -14
  32. sai_pg-1.1.0.dist-info/RECORD +70 -0
  33. {sai_pg-1.0.0.dist-info → sai_pg-1.1.0.dist-info}/WHEEL +1 -1
  34. sai_pg-1.1.0.dist-info/top_level.txt +2 -0
  35. tests/configs/test_global_config.py +163 -0
  36. tests/configs/test_ploidy_config.py +93 -0
  37. tests/configs/test_pop_config.py +90 -0
  38. tests/configs/test_stat_config.py +171 -0
  39. tests/generators/test_chunk_generator.py +51 -0
  40. tests/generators/test_window_generator.py +164 -0
  41. tests/multiprocessing/test_mp_manager.py +92 -0
  42. tests/multiprocessing/test_mp_pool.py +79 -0
  43. tests/parsers/test_argument_validation.py +133 -0
  44. tests/parsers/test_outlier_parser.py +53 -0
  45. tests/parsers/test_score_parser.py +63 -0
  46. tests/preprocessors/test_chunk_preprocessor.py +79 -0
  47. tests/preprocessors/test_feature_preprocessor.py +223 -0
  48. tests/registries/test_registries.py +74 -0
  49. tests/stats/test_danc_statistic.py +51 -0
  50. tests/stats/test_dd_statistic.py +45 -0
  51. tests/stats/test_df_statistic.py +73 -0
  52. tests/stats/test_dplus_statistic.py +79 -0
  53. tests/stats/test_fd_statistic.py +68 -0
  54. tests/stats/test_q_statistic.py +268 -0
  55. tests/stats/test_stat_utils.py +354 -0
  56. tests/stats/test_u_statistic.py +233 -0
  57. tests/test___main__.py +51 -0
  58. tests/test_sai.py +102 -0
  59. tests/utils/test_utils.py +511 -0
  60. sai/parsers/plot_parser.py +0 -152
  61. sai/stats/features.py +0 -302
  62. sai/utils/preprocessors/feature_preprocessor.py +0 -211
  63. sai_pg-1.0.0.dist-info/RECORD +0 -30
  64. sai_pg-1.0.0.dist-info/top_level.txt +0 -1
  65. /sai/{utils/generators → generators}/__init__.py +0 -0
  66. /sai/{utils/generators → generators}/data_generator.py +0 -0
  67. /sai/{utils/multiprocessing → multiprocessing}/__init__.py +0 -0
  68. /sai/{utils/preprocessors → preprocessors}/__init__.py +0 -0
  69. /sai/{utils/preprocessors → preprocessors}/data_preprocessor.py +0 -0
  70. {sai_pg-1.0.0.dist-info → sai_pg-1.1.0.dist-info}/entry_points.txt +0 -0
  71. {sai_pg-1.0.0.dist-info → sai_pg-1.1.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,164 @@
1
+ # Copyright 2025 Xin Huang
2
+ #
3
+ # GNU General Public License v3.0
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, please see
17
+ #
18
+ # https://www.gnu.org/licenses/gpl-3.0.en.html
19
+
20
+
21
+ import pytest
22
+ from sai.generators import WindowGenerator
23
+ from sai.configs import PloidyConfig
24
+
25
+
26
+ @pytest.fixture
27
+ def test_generator():
28
+ # Initialize the WindowGenerator with actual data files
29
+ ploidy_config = PloidyConfig(
30
+ {
31
+ "ref": {"ref1": 2},
32
+ "tgt": {"tgt1": 2, "tgt2": 2},
33
+ "src": {"src1": 2, "src2": 2},
34
+ }
35
+ )
36
+
37
+ generator = WindowGenerator(
38
+ vcf_file="tests/data/test.data.vcf",
39
+ chr_name="21",
40
+ ref_ind_file="tests/data/test.ref.ind.list",
41
+ tgt_ind_file="tests/data/test.tgt.ind.list",
42
+ src_ind_file="tests/data/test.src.ind.list",
43
+ out_ind_file=None,
44
+ win_len=1000, # Set window length as appropriate for testing
45
+ win_step=500, # Set window step as appropriate for testing
46
+ ploidy_config=ploidy_config,
47
+ )
48
+ return generator
49
+
50
+
51
+ def test_initialization(test_generator):
52
+ # Verify initialization parameters
53
+ assert test_generator.win_len == 1000
54
+ assert test_generator.win_step == 500
55
+
56
+
57
+ def test_window_generator(test_generator):
58
+ # Collect data from generator
59
+ data_list = list(test_generator.get())
60
+
61
+ # Ensure windows were generated
62
+ assert len(data_list) == 380
63
+
64
+ # Inspect first window's contents for expected format and data
65
+ first_window = data_list[0]
66
+ assert "chr_name" in first_window
67
+ assert "start" in first_window
68
+ assert "end" in first_window
69
+ assert "ref_pop" in first_window
70
+ assert first_window["ref_pop"] == "ref1"
71
+ assert "tgt_pop" in first_window
72
+ assert "src_pop_list" in first_window
73
+ assert "ref_gts" in first_window
74
+ assert "tgt_gts" in first_window
75
+ assert "src_gts_list" in first_window
76
+ assert "ploidy_config" in first_window
77
+
78
+
79
+ def test_none_window_generator(test_generator):
80
+ test_generator.ref_data = None
81
+
82
+ data_list = list(test_generator.get())
83
+ assert len(data_list) == 380
84
+
85
+ first_window = data_list[0]
86
+ assert first_window["ref_gts"] is None
87
+ assert first_window["tgt_gts"] is None
88
+ assert first_window["src_gts_list"] is None
89
+ assert first_window["ploidy_config"].get_ploidy("ref")[0] == 2
90
+ assert first_window["ploidy_config"].get_ploidy("tgt")[0] == 2
91
+ assert first_window["ploidy_config"].get_ploidy("src")[0] == 2
92
+ assert len(first_window["pos"]) == 0
93
+
94
+
95
+ def test_len(test_generator):
96
+ # Check if __len__ provides a reasonable window count
97
+ assert len(test_generator) == 380
98
+
99
+
100
+ @pytest.fixture
101
+ def test_generator_two_sources():
102
+ # Initialize the WindowGenerator with num_src=2 for testing two-source combinations
103
+ ploidy_config = PloidyConfig(
104
+ {
105
+ "ref": {"ref1": 2},
106
+ "tgt": {"tgt1": 2, "tgt2": 2},
107
+ "src": {"src1": 2, "src2": 2},
108
+ }
109
+ )
110
+
111
+ generator = WindowGenerator(
112
+ vcf_file="tests/data/test.data.vcf",
113
+ chr_name="21",
114
+ ref_ind_file="tests/data/test.ref.ind.list",
115
+ tgt_ind_file="tests/data/test.tgt.ind.list",
116
+ src_ind_file="tests/data/test.src.ind.list",
117
+ out_ind_file=None,
118
+ win_len=1000, # Set window length as appropriate for testing
119
+ win_step=500, # Set window step as appropriate for testing
120
+ num_src=2, # Set to 2 to test two-source combinations
121
+ ploidy_config=ploidy_config,
122
+ )
123
+ return generator
124
+
125
+
126
+ def test_initialization_two_sources(test_generator_two_sources):
127
+ # Verify initialization parameters for two-source generator
128
+ assert test_generator_two_sources.win_len == 1000
129
+ assert test_generator_two_sources.win_step == 500
130
+ assert test_generator_two_sources.ploidy_config.get_ploidy("ref")[0] == 2
131
+ assert test_generator_two_sources.ploidy_config.get_ploidy("tgt")[0] == 2
132
+ assert test_generator_two_sources.ploidy_config.get_ploidy("src")[0] == 2
133
+ assert test_generator_two_sources.num_src == 2
134
+
135
+
136
+ def test_window_generator_with_two_sources(test_generator_two_sources):
137
+ # Collect data from generator with two sources
138
+ data_list = list(test_generator_two_sources.get())
139
+
140
+ # Ensure windows were generated and have two sources in src_pop_list
141
+ assert len(data_list) > 0 # Ensure data was generated
142
+ first_window = data_list[0]
143
+
144
+ # Check keys in the first window
145
+ assert "start" in first_window
146
+ assert "end" in first_window
147
+ assert "ref_pop" in first_window
148
+ assert "tgt_pop" in first_window
149
+ assert "src_pop_list" in first_window
150
+ assert "ref_gts" in first_window
151
+ assert "tgt_gts" in first_window
152
+ assert "src_gts_list" in first_window
153
+ assert "ploidy_config" in first_window
154
+
155
+ # Verify that src_pop_list contains exactly two source populations
156
+ assert len(first_window["src_pop_list"]) == 2
157
+ assert (
158
+ len(first_window["src_gts_list"]) == 2
159
+ ) # Ensure two sets of genotypes in src_gts_list
160
+
161
+
162
+ def test_len_two_sources(test_generator_two_sources):
163
+ # Check if __len__ provides a reasonable window count with two-source combinations
164
+ assert len(test_generator_two_sources) > 0 # Ensure it counts windows correctly
@@ -0,0 +1,92 @@
1
+ # Copyright 2025 Xin Huang
2
+ #
3
+ # GNU General Public License v3.0
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, please see
17
+ #
18
+ # https://www.gnu.org/licenses/gpl-3.0.en.html
19
+
20
+
21
+ from sai.multiprocessing import mp_manager
22
+ from sai.preprocessors import DataPreprocessor
23
+ from sai.generators import DataGenerator
24
+
25
+
26
+ class TmpDataPreprocessor(DataPreprocessor):
27
+ def run(self, rep):
28
+ return [rep]
29
+
30
+ def process_items(self, items):
31
+ print(items)
32
+
33
+
34
+ class TmpDataGenerator(DataGenerator):
35
+ def __init__(self, start_rep=0, nrep=5):
36
+ self.start_rep = start_rep
37
+ self.nrep = nrep
38
+
39
+ def get(self):
40
+ for i in range(self.start_rep, self.start_rep + self.nrep):
41
+ yield {"rep": i}
42
+
43
+ def __len__(self):
44
+ return self.nrep
45
+
46
+
47
+ class FailureDataPreprocessor(DataPreprocessor):
48
+ def run(self, rep):
49
+ raise Exception("Simulating failure by stopping.")
50
+
51
+ def process_items(self, items):
52
+ print(items)
53
+
54
+
55
+ def test_mp_manager(capfd):
56
+ nprocess = 2
57
+ nrep = 5
58
+
59
+ data_processor = TmpDataPreprocessor()
60
+ generator = TmpDataGenerator(nrep=nrep)
61
+
62
+ mp_manager(
63
+ data_processor=data_processor, data_generator=generator, nprocess=nprocess
64
+ )
65
+
66
+ # Define the expected set of outputs
67
+ expected_set = {"[0, 1, 2, 3, 4]"}
68
+
69
+ # Capture the actual output and convert it to a set of strings
70
+ captured = capfd.readouterr()
71
+ actual_set = {captured.out.strip()}
72
+
73
+ # Compare the actual and expected sets
74
+ assert actual_set == expected_set, "The output does not match the expected results."
75
+
76
+
77
+ def test_mp_manager_failure(capfd):
78
+ nprocess = 2
79
+ data_processor = FailureDataPreprocessor()
80
+ generator = TmpDataGenerator(nrep=5)
81
+
82
+ mp_manager(
83
+ data_processor=data_processor, data_generator=generator, nprocess=nprocess
84
+ )
85
+
86
+ # Use capfd to capture stdout and stderr
87
+ captured = capfd.readouterr()
88
+
89
+ # Assertions to verify expected output and behavior
90
+ assert "Simulating failure by stopping." in captured.err
91
+ assert "did not complete successfully. Initiating shutdown." in captured.out
92
+ assert "All workers are terminated." in captured.out
@@ -0,0 +1,79 @@
1
+ # Copyright 2025 Xin Huang
2
+ #
3
+ # GNU General Public License v3.0
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, please see
17
+ #
18
+ # https://www.gnu.org/licenses/gpl-3.0.en.html
19
+
20
+
21
+ import pytest
22
+ from sai.multiprocessing.mp_pool import mp_pool, mp_worker
23
+
24
+
25
+ class DataPreprocessor:
26
+ """Mock class simulating a data processor."""
27
+
28
+ def run(self, x: int) -> int:
29
+ """Mock processing: returns the square of x."""
30
+ return x**2
31
+
32
+ def process_items(self, results: list) -> None:
33
+ """Stores the processed results for validation."""
34
+ self.final_results = results
35
+
36
+
37
+ class DataGenerator:
38
+ """Mock class simulating a data generator."""
39
+
40
+ def __init__(self, data: list):
41
+ self.data = data
42
+
43
+ def get(self):
44
+ """Yields data in dictionary format."""
45
+ for x in self.data:
46
+ yield {"x": x}
47
+
48
+
49
+ @pytest.mark.parametrize(
50
+ "params, expected",
51
+ [
52
+ ((DataPreprocessor(), {"x": 2}), 4),
53
+ ((DataPreprocessor(), {"x": 3}), 9),
54
+ ((DataPreprocessor(), {"x": 4}), 16),
55
+ ],
56
+ )
57
+ def test_mp_worker(params, expected):
58
+ """Tests mp_worker to ensure correct processing."""
59
+ assert mp_worker(params) == expected
60
+
61
+
62
+ def test_mp_pool():
63
+ """Tests mp_pool to ensure parallel processing works correctly."""
64
+ data_processor = DataPreprocessor()
65
+ data_generator = DataGenerator([1, 2, 3, 4, 5])
66
+ nprocess = 2
67
+
68
+ # Run multiprocessing pool
69
+ mp_pool(data_processor, data_generator, nprocess)
70
+
71
+ # Validate results
72
+ assert hasattr(data_processor, "final_results") # Ensure results are stored
73
+ assert sorted(data_processor.final_results) == [
74
+ 1,
75
+ 4,
76
+ 9,
77
+ 16,
78
+ 25,
79
+ ] # Check correctness
@@ -0,0 +1,133 @@
1
+ # Copyright 2025 Xin Huang
2
+ #
3
+ # GNU General Public License v3.0
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, please see
17
+ #
18
+ # https://www.gnu.org/licenses/gpl-3.0.en.html
19
+
20
+
21
+ import argparse
22
+ import pytest
23
+ from sai.parsers.argument_validation import positive_int
24
+ from sai.parsers.argument_validation import positive_number
25
+ from sai.parsers.argument_validation import between_zero_and_one
26
+ from sai.parsers.argument_validation import existed_file
27
+ from sai.parsers.argument_validation import validate_stat_type
28
+
29
+
30
+ def test_positive_int():
31
+ # Valid positive integer
32
+ assert positive_int("5") == 5
33
+
34
+ # Not a positive integer (zero)
35
+ with pytest.raises(argparse.ArgumentTypeError, match="0 is not a positive integer"):
36
+ positive_int("0")
37
+
38
+ # Negative integer
39
+ with pytest.raises(
40
+ argparse.ArgumentTypeError, match="-1 is not a positive integer"
41
+ ):
42
+ positive_int("-1")
43
+
44
+ # Non-integer string
45
+ with pytest.raises(argparse.ArgumentTypeError, match="abc is not a valid integer"):
46
+ positive_int("abc")
47
+
48
+
49
+ def test_positive_number():
50
+ # Valid positive number
51
+ assert positive_number("3.14") == 3.14
52
+
53
+ # Not a positive number (zero)
54
+ with pytest.raises(argparse.ArgumentTypeError, match="0 is not a positive number"):
55
+ positive_number("0")
56
+
57
+ # Negative number
58
+ with pytest.raises(
59
+ argparse.ArgumentTypeError, match="-2.5 is not a positive number"
60
+ ):
61
+ positive_number("-2.5")
62
+
63
+ # Non-numeric string
64
+ with pytest.raises(argparse.ArgumentTypeError, match="xyz is not a valid number"):
65
+ positive_number("xyz")
66
+
67
+
68
+ def test_between_zero_and_one():
69
+ # Values within range
70
+ assert between_zero_and_one("0.5") == 0.5
71
+ assert between_zero_and_one("0") == 0
72
+ assert between_zero_and_one("1") == 1
73
+
74
+ # Values out of range
75
+ with pytest.raises(argparse.ArgumentTypeError, match="1.5 is not between 0 and 1"):
76
+ between_zero_and_one("1.5")
77
+
78
+ with pytest.raises(argparse.ArgumentTypeError, match="-0.1 is not between 0 and 1"):
79
+ between_zero_and_one("-0.1")
80
+
81
+ # Non-numeric string
82
+ with pytest.raises(
83
+ argparse.ArgumentTypeError, match="not_a_number is not a valid number"
84
+ ):
85
+ between_zero_and_one("not_a_number")
86
+
87
+
88
+ def test_existed_file(tmp_path):
89
+ # Create a temporary file for testing
90
+ temp_file = tmp_path / "temp.txt"
91
+ temp_file.write_text("This is a test file.")
92
+
93
+ # Validate an existing file path
94
+ assert existed_file(str(temp_file)) == str(temp_file)
95
+
96
+ # Validate a non-existent file path
97
+ with pytest.raises(
98
+ argparse.ArgumentTypeError, match="non_existent_file is not found"
99
+ ):
100
+ existed_file("non_existent_file")
101
+
102
+
103
+ def test_valid_inputs():
104
+ assert validate_stat_type("U50") == "U50"
105
+ assert validate_stat_type("Q05") == "Q05"
106
+ assert validate_stat_type("Q95") == "Q95"
107
+ assert validate_stat_type("Q99") == "Q99"
108
+
109
+
110
+ def test_invalid_inputs():
111
+ with pytest.raises(argparse.ArgumentTypeError):
112
+ validate_stat_type("U")
113
+
114
+ with pytest.raises(argparse.ArgumentTypeError):
115
+ validate_stat_type("Q")
116
+
117
+ with pytest.raises(argparse.ArgumentTypeError):
118
+ validate_stat_type("Q5")
119
+
120
+ with pytest.raises(argparse.ArgumentTypeError):
121
+ validate_stat_type("U100")
122
+
123
+ with pytest.raises(argparse.ArgumentTypeError):
124
+ validate_stat_type("Q100")
125
+
126
+ with pytest.raises(argparse.ArgumentTypeError):
127
+ validate_stat_type("X50")
128
+
129
+ with pytest.raises(argparse.ArgumentTypeError):
130
+ validate_stat_type("Qabc")
131
+
132
+ with pytest.raises(argparse.ArgumentTypeError):
133
+ validate_stat_type("")
@@ -0,0 +1,53 @@
1
+ # Copyright 2025 Xin Huang
2
+ #
3
+ # GNU General Public License v3.0
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, please see
17
+ #
18
+ # https://www.gnu.org/licenses/gpl-3.0.en.html
19
+
20
+
21
+ import pytest
22
+ import argparse
23
+ from sai.parsers.outlier_parser import add_outlier_parser
24
+
25
+
26
+ @pytest.fixture
27
+ def parser():
28
+ # Initialize the argument parser with a subparser for the 'outlier' command
29
+ main_parser = argparse.ArgumentParser()
30
+ subparsers = main_parser.add_subparsers(dest="command")
31
+ add_outlier_parser(subparsers)
32
+ return main_parser
33
+
34
+
35
+ def test_add_outlier_parser(parser):
36
+ # Simulate command-line arguments to parse
37
+ args = parser.parse_args(
38
+ [
39
+ "outlier",
40
+ "--score",
41
+ "tests/data/example.scores",
42
+ "--output-prefix",
43
+ "output/test_outliers",
44
+ "--quantile",
45
+ "0.95",
46
+ ]
47
+ )
48
+
49
+ # Validate parsed arguments
50
+ assert args.command == "outlier"
51
+ assert args.score == "tests/data/example.scores"
52
+ assert args.output_prefix == "output/test_outliers"
53
+ assert args.quantile == 0.95
@@ -0,0 +1,63 @@
1
+ # Copyright 2025 Xin Huang
2
+ #
3
+ # GNU General Public License v3.0
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, please see
17
+ #
18
+ # https://www.gnu.org/licenses/gpl-3.0.en.html
19
+
20
+
21
+ import pytest
22
+ import argparse
23
+ from sai.parsers.score_parser import add_score_parser
24
+
25
+
26
+ @pytest.fixture
27
+ def parser():
28
+ # Initialize the argument parser with a subparser for the 'score' command
29
+ main_parser = argparse.ArgumentParser()
30
+ subparsers = main_parser.add_subparsers(dest="command")
31
+ add_score_parser(subparsers)
32
+ return main_parser
33
+
34
+
35
+ def test_add_score_parser(parser):
36
+ # Simulate command-line arguments to parse
37
+ args = parser.parse_args(
38
+ [
39
+ "score",
40
+ "--vcf",
41
+ "tests/data/example.vcf",
42
+ "--chr-name",
43
+ "chr1",
44
+ "--win-len",
45
+ "50000",
46
+ "--win-step",
47
+ "10000",
48
+ "--output",
49
+ "output/results.tsv",
50
+ "--config",
51
+ "tests/data/test_config.yaml",
52
+ ]
53
+ )
54
+
55
+ # Validate parsed arguments
56
+ assert args.command == "score"
57
+ assert args.vcf == "tests/data/example.vcf"
58
+ assert args.chr_name == "chr1"
59
+ assert args.win_len == 50000
60
+ assert args.win_step == 10000
61
+ assert args.anc_alleles is None
62
+ assert args.output == "output/results.tsv"
63
+ assert args.config == "tests/data/test_config.yaml"
@@ -0,0 +1,79 @@
1
+ # Copyright 2025 Xin Huang
2
+ #
3
+ # GNU General Public License v3.0
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, please see
17
+ #
18
+ # https://www.gnu.org/licenses/gpl-3.0.en.html
19
+
20
+
21
+ import pytest
22
+ import yaml
23
+ import sai.stats
24
+ from sai.preprocessors import ChunkPreprocessor
25
+ from sai.configs import PloidyConfig, StatConfig
26
+
27
+
28
+ @pytest.fixture
29
+ def example_data(tmp_path):
30
+ # Define example file paths
31
+ pytest.example_vcf = "tests/data/example.vcf"
32
+ pytest.example_ref_ind_list = "tests/data/example.ref.ind.list"
33
+ pytest.example_tgt_ind_list = "tests/data/example.tgt.ind.list"
34
+ pytest.example_src_ind_list = "tests/data/example.src.ind.list"
35
+ pytest.example_config = "tests/data/example.config.yaml"
36
+
37
+ # Create a temporary output file path for the score function
38
+ temp_output_file = tmp_path / "output.tsv"
39
+
40
+ return {
41
+ "vcf_file": pytest.example_vcf,
42
+ "ref_ind_file": pytest.example_ref_ind_list,
43
+ "tgt_ind_file": pytest.example_tgt_ind_list,
44
+ "src_ind_file": pytest.example_src_ind_list,
45
+ "config": pytest.example_config,
46
+ "output_file": str(temp_output_file),
47
+ "output_dir": tmp_path,
48
+ }
49
+
50
+
51
+ def test_chunk_preprocessor(example_data):
52
+ with open(example_data["config"], "r") as f:
53
+ config = yaml.safe_load(f)
54
+
55
+ stat_config = StatConfig(config["statistics"])
56
+ ploidy_config = PloidyConfig(config["ploidies"])
57
+
58
+ preprocessor = ChunkPreprocessor(
59
+ vcf_file=example_data["vcf_file"],
60
+ ref_ind_file=example_data["ref_ind_file"],
61
+ tgt_ind_file=example_data["tgt_ind_file"],
62
+ src_ind_file=example_data["src_ind_file"],
63
+ out_ind_file=None,
64
+ win_len=6666,
65
+ win_step=6666,
66
+ num_src=1,
67
+ anc_allele_file=None,
68
+ output_file=example_data["output_file"],
69
+ stat_config=stat_config,
70
+ ploidy_config=ploidy_config,
71
+ )
72
+
73
+ results = preprocessor.run(
74
+ chr_name="21",
75
+ start=0,
76
+ end=6666,
77
+ )
78
+
79
+ assert results[0]["Q"] == 0.9