sai-pg 1.0.1__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sai/__init__.py +2 -0
- sai/__main__.py +6 -3
- sai/configs/__init__.py +24 -0
- sai/configs/global_config.py +83 -0
- sai/configs/ploidy_config.py +94 -0
- sai/configs/pop_config.py +82 -0
- sai/configs/stat_config.py +220 -0
- sai/{utils/generators → generators}/chunk_generator.py +1 -1
- sai/{utils/generators → generators}/window_generator.py +81 -37
- sai/{utils/multiprocessing → multiprocessing}/mp_manager.py +2 -2
- sai/{utils/multiprocessing → multiprocessing}/mp_pool.py +2 -2
- sai/parsers/outlier_parser.py +4 -3
- sai/parsers/score_parser.py +8 -119
- sai/{utils/preprocessors → preprocessors}/chunk_preprocessor.py +21 -15
- sai/preprocessors/feature_preprocessor.py +236 -0
- sai/registries/__init__.py +22 -0
- sai/registries/generic_registry.py +89 -0
- sai/registries/stat_registry.py +30 -0
- sai/sai.py +124 -220
- sai/stats/__init__.py +11 -0
- sai/stats/danc_statistic.py +83 -0
- sai/stats/dd_statistic.py +77 -0
- sai/stats/df_statistic.py +84 -0
- sai/stats/dplus_statistic.py +86 -0
- sai/stats/fd_statistic.py +92 -0
- sai/stats/generic_statistic.py +93 -0
- sai/stats/q_statistic.py +104 -0
- sai/stats/stat_utils.py +259 -0
- sai/stats/u_statistic.py +99 -0
- sai/utils/utils.py +213 -142
- {sai_pg-1.0.1.dist-info → sai_pg-1.1.0.dist-info}/METADATA +3 -14
- sai_pg-1.1.0.dist-info/RECORD +70 -0
- {sai_pg-1.0.1.dist-info → sai_pg-1.1.0.dist-info}/WHEEL +1 -1
- sai_pg-1.1.0.dist-info/top_level.txt +2 -0
- tests/configs/test_global_config.py +163 -0
- tests/configs/test_ploidy_config.py +93 -0
- tests/configs/test_pop_config.py +90 -0
- tests/configs/test_stat_config.py +171 -0
- tests/generators/test_chunk_generator.py +51 -0
- tests/generators/test_window_generator.py +164 -0
- tests/multiprocessing/test_mp_manager.py +92 -0
- tests/multiprocessing/test_mp_pool.py +79 -0
- tests/parsers/test_argument_validation.py +133 -0
- tests/parsers/test_outlier_parser.py +53 -0
- tests/parsers/test_score_parser.py +63 -0
- tests/preprocessors/test_chunk_preprocessor.py +79 -0
- tests/preprocessors/test_feature_preprocessor.py +223 -0
- tests/registries/test_registries.py +74 -0
- tests/stats/test_danc_statistic.py +51 -0
- tests/stats/test_dd_statistic.py +45 -0
- tests/stats/test_df_statistic.py +73 -0
- tests/stats/test_dplus_statistic.py +79 -0
- tests/stats/test_fd_statistic.py +68 -0
- tests/stats/test_q_statistic.py +268 -0
- tests/stats/test_stat_utils.py +354 -0
- tests/stats/test_u_statistic.py +233 -0
- tests/test___main__.py +51 -0
- tests/test_sai.py +102 -0
- tests/utils/test_utils.py +511 -0
- sai/parsers/plot_parser.py +0 -152
- sai/stats/features.py +0 -302
- sai/utils/preprocessors/feature_preprocessor.py +0 -211
- sai_pg-1.0.1.dist-info/RECORD +0 -30
- sai_pg-1.0.1.dist-info/top_level.txt +0 -1
- /sai/{utils/generators → generators}/__init__.py +0 -0
- /sai/{utils/generators → generators}/data_generator.py +0 -0
- /sai/{utils/multiprocessing → multiprocessing}/__init__.py +0 -0
- /sai/{utils/preprocessors → preprocessors}/__init__.py +0 -0
- /sai/{utils/preprocessors → preprocessors}/data_preprocessor.py +0 -0
- {sai_pg-1.0.1.dist-info → sai_pg-1.1.0.dist-info}/entry_points.txt +0 -0
- {sai_pg-1.0.1.dist-info → sai_pg-1.1.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,70 @@
|
|
1
|
+
sai/__init__.py,sha256=vOz5CvYolyCQkKMzSK98WzFFsb6XW3Q7QTMfavmiFVA,747
|
2
|
+
sai/__main__.py,sha256=WBtgdvCLW5aRF-z7O30n3PFZxdXNEGwjPvmEHmkTb3Y,2174
|
3
|
+
sai/sai.py,sha256=zL0b4d730wAWE214nIcjZ_A8H07XxHeSlOKk-1LOfuU,7510
|
4
|
+
sai/configs/__init__.py,sha256=V1uPUfwcjb9FlLBcmVSB6q7_29IlntD60HLeW7GDzTc,876
|
5
|
+
sai/configs/global_config.py,sha256=afGJ0Nq9pq6yMIuhT6HmeaM4NevoVmEP5iuZJdfF6j0,3200
|
6
|
+
sai/configs/ploidy_config.py,sha256=V51RulhspASeDxRye6gaBRTWrYPWTfRYngLpmdiG1TQ,3339
|
7
|
+
sai/configs/pop_config.py,sha256=p_JUMN-UYSdIkaPLjoEQD620Y36JkmLVRX-_TeJaG7c,2659
|
8
|
+
sai/configs/stat_config.py,sha256=WbHiGvDm6o1m6uEWXQ9sJfeqQUZiRFOgfNrcNeKO4H0,8037
|
9
|
+
sai/generators/__init__.py,sha256=hpE4PUQIOZQXzdpSx7dEllecDoOfxIWXNu1-WHa_VcM,858
|
10
|
+
sai/generators/chunk_generator.py,sha256=wL4ZX1yqFpCnQjQF4Wds5Gewo29VE4fG-UkSaGh8TUs,4313
|
11
|
+
sai/generators/data_generator.py,sha256=bVz9KPjJSL4becTsVZ-zH4i40y5UDA0JJOtzbwf5n84,1635
|
12
|
+
sai/generators/window_generator.py,sha256=XuVEIvI5ns9hfJEWXz9lFcAvld3sWnVWDyDk9dZWZ4M,11088
|
13
|
+
sai/multiprocessing/__init__.py,sha256=mI_iVjOJschKjIcneo99utCMVjR0K3UpEjA_PMcmLm4,790
|
14
|
+
sai/multiprocessing/mp_manager.py,sha256=6JACefm5em_FCXcAn-JRpo2WmVVVzJG6zEWijAyixnk,10251
|
15
|
+
sai/multiprocessing/mp_pool.py,sha256=tdUMOVAPHUmlFavm2-xRNHntCq09Qu1653SeXtsvFEs,2261
|
16
|
+
sai/parsers/__init__.py,sha256=ZLPiBk86c9R8ZFx9y5VF7Up2v4JS8WsEMfsVNlj7nXY,724
|
17
|
+
sai/parsers/argument_validation.py,sha256=OzkE9Ayr3KCB8rSP8mXnfx4aNOeJuEEFfit9q7RbURU,4374
|
18
|
+
sai/parsers/outlier_parser.py,sha256=tQomTsQ16Fqimuw97vbTX3_N2yl5lGRZRrBbF1hALV8,2403
|
19
|
+
sai/parsers/score_parser.py,sha256=eAKqSluXJlLm5-k-7H6dU3bD-RUsubSjqD3I9NKl5AU,4429
|
20
|
+
sai/preprocessors/__init__.py,sha256=q25uXnq4jTrIVBbl1HFv3ZgEV0d79fCMp4tTZrq4vsM,878
|
21
|
+
sai/preprocessors/chunk_preprocessor.py,sha256=YAzhlqkBtwR5RGDOMBDLOT0YSxQcnfnl0YQ3w5InfOA,5601
|
22
|
+
sai/preprocessors/data_preprocessor.py,sha256=XSQN_kXUfyCtIpqIf7TcqWIPcSk8kynHPH-KY9gh5VY,3595
|
23
|
+
sai/preprocessors/feature_preprocessor.py,sha256=o7qhdJY_5gKcdoRJwY5MUrP2p1lU4gG5oCIXJvgWh0U,8990
|
24
|
+
sai/registries/__init__.py,sha256=6_IVIghxB2LN2ukve-8stfs_9_JzvDJMg7DQ4MW8Bnc,812
|
25
|
+
sai/registries/generic_registry.py,sha256=qat9xlCaCab_ZyvfUq22TT_Pq2wuuULoUWFFwjwwoDA,2459
|
26
|
+
sai/registries/stat_registry.py,sha256=3BHHO_mOd802Yy_RG_uXKiJTsCcYuayFNUYL9NJ6UIQ,919
|
27
|
+
sai/stats/__init__.py,sha256=CNP5QmjO-GVYWJI6RRumseAhT0P16lKOJNXSY9SxBGk,1072
|
28
|
+
sai/stats/danc_statistic.py,sha256=peMn4nUdek6sksyYJkzxusnsXdIYsYyx8IC2vJWya-s,2878
|
29
|
+
sai/stats/dd_statistic.py,sha256=vZmQ_8P25dhg2M9TBcJIRuP6onoxdQ6W8-fRdd6UAyY,2730
|
30
|
+
sai/stats/df_statistic.py,sha256=-MK9GxrCk5qBODw4-1jWiFGoVqzNFtMRBFkrMFgjlAo,2996
|
31
|
+
sai/stats/dplus_statistic.py,sha256=7WUm-h0MY2P-VhIxKaVfRRFEJmGNufAbjMEWkOpEdfo,3202
|
32
|
+
sai/stats/fd_statistic.py,sha256=_uapDkTrpp4xXfOE_qNVTsludtYsk7wEYdHqZcFsC60,3360
|
33
|
+
sai/stats/generic_statistic.py,sha256=YMG7P7fTgq6_fC8anziJHYZWgLvLvXSzGcP0SarWhz4,3409
|
34
|
+
sai/stats/q_statistic.py,sha256=N1oLZxuu6TxdP_2RlnzfwXCNAhKV1woCVJYWBDOpxHA,4382
|
35
|
+
sai/stats/stat_utils.py,sha256=NrukP7mZC3GUr6psSVdoK2Adc6U_7Eynv5BnRbNwWc0,9385
|
36
|
+
sai/stats/u_statistic.py,sha256=ezMrUtD2KLej2nTCH4kI_IzH3Z3acNX9WoyUtAYRWgU,3956
|
37
|
+
sai/utils/__init__.py,sha256=B3ZcC1ALSWieGHPiqXKBFQRTrnlTX4TaHc3tCx9fj0w,782
|
38
|
+
sai/utils/genomic_dataclasses.py,sha256=HBYp2dehdW_y3Pd6Un8XFMnN1Odg1EiZb9ci1syIibU,1443
|
39
|
+
sai/utils/utils.py,sha256=DggdghqERdK_bqb-dxX0kGvlqdS89QdOUwBWZxScxDk,25445
|
40
|
+
sai_pg-1.1.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
41
|
+
tests/test___main__.py,sha256=b9-O8rzeYJSsnpO0pjxk8ftfshji0VMq7l34T2DdBw8,1913
|
42
|
+
tests/test_sai.py,sha256=Q544XurG5wRZqVPlQ8EOKX05UceKl5EYPuV7i8RIHzA,3042
|
43
|
+
tests/configs/test_global_config.py,sha256=NYqMFQK3qOnmmrZ-S7lvRmi9UFwPwApyS0AnvWpJLrM,4485
|
44
|
+
tests/configs/test_ploidy_config.py,sha256=KbWNYQUNYZdRsDgkpk-d2Fm3Ip8Zy57KBmE6_xWMgVQ,2609
|
45
|
+
tests/configs/test_pop_config.py,sha256=pu7FXFE17x1uH9HLDScQ0XcztIS-tS7mcvm9tWTa3zc,2670
|
46
|
+
tests/configs/test_stat_config.py,sha256=BWBYGMjegC1bh83acmg5X3UpCiYI4dT_G9Z_OWzZL9Q,4868
|
47
|
+
tests/generators/test_chunk_generator.py,sha256=V6cFufLcKmsAp9krTNPR3yaQWA6rM-G3HYY0O3wsTx4,1583
|
48
|
+
tests/generators/test_window_generator.py,sha256=tp-OUc3ROi-wx6c6V39cXH3quneOZOekllzN-1xTMR0,5756
|
49
|
+
tests/multiprocessing/test_mp_manager.py,sha256=KYd3MVDPw3egw_mQ2jy9s3_VuJ-uxHfHKoatkxKoi00,2706
|
50
|
+
tests/multiprocessing/test_mp_pool.py,sha256=oCIC_BrKsgVpXevFyZ92sSU6SwVLa-jFEZZVg8nIMMk,2261
|
51
|
+
tests/parsers/test_argument_validation.py,sha256=Q2WDGxDWjHM3-cJrtjzGGwm2qdI-kFHxU5f1_OFHGW0,4184
|
52
|
+
tests/parsers/test_outlier_parser.py,sha256=0_wWIRMQ4qJRKuGfCNkukZrWg9sP6dqzAuMXMYyM6DM,1642
|
53
|
+
tests/parsers/test_score_parser.py,sha256=-potixrJhMBm7pIf7R4WFQmdKv6s3wV_7rICO1sj6M4,1921
|
54
|
+
tests/preprocessors/test_chunk_preprocessor.py,sha256=AAQBSN5KH3huXoSi-KvKouV839GbTjDfy29rIUz8qi0,2557
|
55
|
+
tests/preprocessors/test_feature_preprocessor.py,sha256=nIncvpPJEr88iAPRhv0teL2mqirrZLgjeIA1LPSARmQ,6743
|
56
|
+
tests/registries/test_registries.py,sha256=UPMj2tH435AprUBdFW767g77yj2kztchFHiVJvW8SUM,1710
|
57
|
+
tests/stats/test_danc_statistic.py,sha256=WOA8GK3FvwWT9wD50v9BMLGc_gi7tGI1u5oRflW69i8,1643
|
58
|
+
tests/stats/test_dd_statistic.py,sha256=yhBdTQyCdYcQse_DrAEaK-OPAitCG99KGDcyDSr4Xzs,1356
|
59
|
+
tests/stats/test_df_statistic.py,sha256=kJi58bUs5QQGsS7Z2AmTv4ukrHjaGXHwXcjehOxFOqM,2246
|
60
|
+
tests/stats/test_dplus_statistic.py,sha256=vIm7DjUZLZMsUWZj1qaV_AyRTnA7r5bjEtiHMoxLzXs,2476
|
61
|
+
tests/stats/test_fd_statistic.py,sha256=jq7DGTlHbaIkCEA6KLrUV0vH4Th0gumoKUFWAM15vV8,2042
|
62
|
+
tests/stats/test_q_statistic.py,sha256=b-YC66aA_NMNXOXlnot3vU_Lc9z0WGP19YJLp6MCcUc,7943
|
63
|
+
tests/stats/test_stat_utils.py,sha256=n4nNCuiG79LOsk7mnpQPbivjgF9vnlWxEdzpJRjCZu0,10646
|
64
|
+
tests/stats/test_u_statistic.py,sha256=V_vejLUCKIlFsVtOUEROwNoqNXO7x8rHaxJX0fyGssA,6659
|
65
|
+
tests/utils/test_utils.py,sha256=yaoCuDYIqC4mFaJISZ8Juhze2NhUfRSQkGJBBlIJtxg,15673
|
66
|
+
sai_pg-1.1.0.dist-info/METADATA,sha256=DG6BjlA--aZsetXCZVEZbny_vXCIoW7NtQrJLwgiMHI,1581
|
67
|
+
sai_pg-1.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
68
|
+
sai_pg-1.1.0.dist-info/entry_points.txt,sha256=uK34frE2UhEDNa5ISbGOtjF2HGAXF5uG_EgMocesEPs,42
|
69
|
+
sai_pg-1.1.0.dist-info/top_level.txt,sha256=8-AsrbtORPvVD9w1Is6vyxKgJBuuYNrwii1UNi-8Lr4,10
|
70
|
+
sai_pg-1.1.0.dist-info/RECORD,,
|
@@ -0,0 +1,163 @@
|
|
1
|
+
# Copyright 2025 Xin Huang
|
2
|
+
#
|
3
|
+
# GNU General Public License v3.0
|
4
|
+
#
|
5
|
+
# This program is free software: you can redistribute it and/or modify
|
6
|
+
# it under the terms of the GNU General Public License as published by
|
7
|
+
# the Free Software Foundation, either version 3 of the License, or
|
8
|
+
# (at your option) any later version.
|
9
|
+
#
|
10
|
+
# This program is distributed in the hope that it will be useful,
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
# GNU General Public License for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU General Public License
|
16
|
+
# along with this program. If not, please see
|
17
|
+
#
|
18
|
+
# https://www.gnu.org/licenses/gpl-3.0.en.html
|
19
|
+
|
20
|
+
|
21
|
+
import pytest
|
22
|
+
from sai.configs.global_config import GlobalConfig
|
23
|
+
from sai.configs.stat_config import StatConfig
|
24
|
+
from sai.configs.ploidy_config import PloidyConfig
|
25
|
+
from sai.configs.pop_config import PopConfig
|
26
|
+
|
27
|
+
|
28
|
+
def test_global_config_valid(tmp_path):
|
29
|
+
ref_file = tmp_path / "ref.txt"
|
30
|
+
tgt_file = tmp_path / "tgt.txt"
|
31
|
+
src_file = tmp_path / "src.txt"
|
32
|
+
|
33
|
+
ref_file.write_text("popA sample1\npopA sample2")
|
34
|
+
tgt_file.write_text("popB sample3\npopB sample4")
|
35
|
+
src_file.write_text("popC sample5\npopC sample6")
|
36
|
+
|
37
|
+
stat_cfg = StatConfig.model_validate(
|
38
|
+
{
|
39
|
+
"Q": {
|
40
|
+
"ref": {"popA": 0.3},
|
41
|
+
"tgt": {"popB": 0.95},
|
42
|
+
"src": {"popC": "=1"},
|
43
|
+
}
|
44
|
+
}
|
45
|
+
)
|
46
|
+
|
47
|
+
ploidy_cfg = PloidyConfig.model_validate(
|
48
|
+
{
|
49
|
+
"ref": {"popA": 2},
|
50
|
+
"tgt": {"popB": 2},
|
51
|
+
"src": {"popC": 2},
|
52
|
+
}
|
53
|
+
)
|
54
|
+
|
55
|
+
pop_cfg = PopConfig.model_validate(
|
56
|
+
{
|
57
|
+
"ref": str(ref_file),
|
58
|
+
"tgt": str(tgt_file),
|
59
|
+
"src": str(src_file),
|
60
|
+
}
|
61
|
+
)
|
62
|
+
|
63
|
+
global_cfg = GlobalConfig(
|
64
|
+
statistics=stat_cfg,
|
65
|
+
ploidies=ploidy_cfg,
|
66
|
+
populations=pop_cfg,
|
67
|
+
)
|
68
|
+
|
69
|
+
assert global_cfg.statistics.get_parameters("Q")["ref"]["popA"] == 0.3
|
70
|
+
assert global_cfg.ploidies.get_ploidy("ref", "popA") == 2
|
71
|
+
assert global_cfg.populations.get_population("ref") == str(ref_file)
|
72
|
+
|
73
|
+
|
74
|
+
def test_global_config_invalid_ploidy(tmp_path):
|
75
|
+
ref_file = tmp_path / "ref.txt"
|
76
|
+
tgt_file = tmp_path / "tgt.txt"
|
77
|
+
src_file = tmp_path / "src.txt"
|
78
|
+
|
79
|
+
ref_file.write_text("popA sample1\npopA sample2")
|
80
|
+
tgt_file.write_text("popB sample3\npopB sample4")
|
81
|
+
src_file.write_text("popC sample5\npopC sample6")
|
82
|
+
|
83
|
+
stat_cfg = StatConfig.model_validate(
|
84
|
+
{
|
85
|
+
"Q": {
|
86
|
+
"ref": {"popA": 0.3},
|
87
|
+
"tgt": {"popB": 0.95},
|
88
|
+
"src": {"popC": "=1"},
|
89
|
+
}
|
90
|
+
}
|
91
|
+
)
|
92
|
+
|
93
|
+
ploidy_cfg = PloidyConfig.model_validate(
|
94
|
+
{
|
95
|
+
"ref": {"popA": 2},
|
96
|
+
"tgt": {"popD": 2},
|
97
|
+
"src": {"popC": 2},
|
98
|
+
}
|
99
|
+
)
|
100
|
+
|
101
|
+
pop_cfg = PopConfig.model_validate(
|
102
|
+
{
|
103
|
+
"ref": str(ref_file),
|
104
|
+
"tgt": str(tgt_file),
|
105
|
+
"src": str(src_file),
|
106
|
+
}
|
107
|
+
)
|
108
|
+
|
109
|
+
with pytest.raises(
|
110
|
+
ValueError,
|
111
|
+
match=r"Population 'popB' used in statistics\[Q\]\[tgt\] is not defined in ploidies\[tgt\]",
|
112
|
+
):
|
113
|
+
GlobalConfig(
|
114
|
+
statistics=stat_cfg,
|
115
|
+
ploidies=ploidy_cfg,
|
116
|
+
populations=pop_cfg,
|
117
|
+
)
|
118
|
+
|
119
|
+
|
120
|
+
def test_global_config_invalid_population(tmp_path):
|
121
|
+
ref_file = tmp_path / "ref.txt"
|
122
|
+
tgt_file = tmp_path / "tgt.txt"
|
123
|
+
src_file = tmp_path / "src.txt"
|
124
|
+
|
125
|
+
ref_file.write_text("popA sample1\npopA sample2")
|
126
|
+
tgt_file.write_text("popD sample3\npopD sample4")
|
127
|
+
src_file.write_text("popC sample5\npopC sample6")
|
128
|
+
|
129
|
+
stat_cfg = StatConfig.model_validate(
|
130
|
+
{
|
131
|
+
"Q": {
|
132
|
+
"ref": {"popA": 0.3},
|
133
|
+
"tgt": {"popB": 0.95},
|
134
|
+
"src": {"popC": "=1"},
|
135
|
+
}
|
136
|
+
}
|
137
|
+
)
|
138
|
+
|
139
|
+
ploidy_cfg = PloidyConfig.model_validate(
|
140
|
+
{
|
141
|
+
"ref": {"popA": 2},
|
142
|
+
"tgt": {"popB": 2},
|
143
|
+
"src": {"popC": 2},
|
144
|
+
}
|
145
|
+
)
|
146
|
+
|
147
|
+
pop_cfg = PopConfig.model_validate(
|
148
|
+
{
|
149
|
+
"ref": str(ref_file),
|
150
|
+
"tgt": str(tgt_file),
|
151
|
+
"src": str(src_file),
|
152
|
+
}
|
153
|
+
)
|
154
|
+
|
155
|
+
with pytest.raises(
|
156
|
+
ValueError,
|
157
|
+
match=r"Population 'popB' used in statistics\[Q\]\[tgt\] is not found in the population file for group 'tgt'",
|
158
|
+
):
|
159
|
+
GlobalConfig(
|
160
|
+
statistics=stat_cfg,
|
161
|
+
ploidies=ploidy_cfg,
|
162
|
+
populations=pop_cfg,
|
163
|
+
)
|
@@ -0,0 +1,93 @@
|
|
1
|
+
# Copyright 2025 Xin Huang
|
2
|
+
#
|
3
|
+
# GNU General Public License v3.0
|
4
|
+
#
|
5
|
+
# This program is free software: you can redistribute it and/or modify
|
6
|
+
# it under the terms of the GNU General Public License as published by
|
7
|
+
# the Free Software Foundation, either version 3 of the License, or
|
8
|
+
# (at your option) any later version.
|
9
|
+
#
|
10
|
+
# This program is distributed in the hope that it will be useful,
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
# GNU General Public License for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU General Public License
|
16
|
+
# along with this program. If not, please see
|
17
|
+
#
|
18
|
+
# https://www.gnu.org/licenses/gpl-3.0.en.html
|
19
|
+
|
20
|
+
|
21
|
+
import pytest
|
22
|
+
from pydantic import ValidationError
|
23
|
+
from sai.configs import PloidyConfig
|
24
|
+
|
25
|
+
|
26
|
+
def test_valid_config():
|
27
|
+
config = PloidyConfig(
|
28
|
+
{
|
29
|
+
"ref": {"popA": 2},
|
30
|
+
"tgt": {"popB": 2},
|
31
|
+
"src": {"popC": 2, "popD": 4},
|
32
|
+
"outgroup": {"popE": 1},
|
33
|
+
}
|
34
|
+
)
|
35
|
+
assert config.get_ploidy("ref", "popA") == 2
|
36
|
+
assert config.get_ploidy("src", "popD") == 4
|
37
|
+
|
38
|
+
|
39
|
+
def test_missing_required_keys():
|
40
|
+
with pytest.raises(ValidationError, match="Missing required ploidy keys"):
|
41
|
+
PloidyConfig(
|
42
|
+
{
|
43
|
+
"ref": {"popA": 2},
|
44
|
+
"src": {"popC": 2},
|
45
|
+
}
|
46
|
+
)
|
47
|
+
|
48
|
+
|
49
|
+
def test_extra_keys():
|
50
|
+
with pytest.raises(ValidationError, match="Unsupported ploidy keys"):
|
51
|
+
PloidyConfig(
|
52
|
+
{
|
53
|
+
"ref": {"popA": 2},
|
54
|
+
"tgt": {"popB": 2},
|
55
|
+
"src": {"popC": 2},
|
56
|
+
"badkey": {"popD": 3},
|
57
|
+
}
|
58
|
+
)
|
59
|
+
|
60
|
+
|
61
|
+
def test_invalid_ploidy_values():
|
62
|
+
with pytest.raises(ValidationError, match="must be a positive integer"):
|
63
|
+
PloidyConfig(
|
64
|
+
{
|
65
|
+
"ref": {"popA": 2},
|
66
|
+
"tgt": {"popB": 0},
|
67
|
+
"src": {"popC": 1},
|
68
|
+
"outgroup": {"popD": 1},
|
69
|
+
}
|
70
|
+
)
|
71
|
+
|
72
|
+
with pytest.raises(ValidationError, match="must be a positive integer"):
|
73
|
+
PloidyConfig(
|
74
|
+
{
|
75
|
+
"ref": {"popA": 2},
|
76
|
+
"tgt": {"popB": 2},
|
77
|
+
"src": {"popC": -2, "popD": -3},
|
78
|
+
"outgroup": {"popE": 1},
|
79
|
+
}
|
80
|
+
)
|
81
|
+
|
82
|
+
|
83
|
+
def test_get_ploidy_key_error():
|
84
|
+
config = PloidyConfig(
|
85
|
+
{
|
86
|
+
"ref": {"popA": 2},
|
87
|
+
"tgt": {"popB": 2},
|
88
|
+
"src": {"popC": 2},
|
89
|
+
"outgroup": {"popD": 1},
|
90
|
+
}
|
91
|
+
)
|
92
|
+
with pytest.raises(KeyError):
|
93
|
+
config.get_ploidy("ghost", "popE")
|
@@ -0,0 +1,90 @@
|
|
1
|
+
# Copyright 2025 Xin Huang
|
2
|
+
#
|
3
|
+
# GNU General Public License v3.0
|
4
|
+
#
|
5
|
+
# This program is free software: you can redistribute it and/or modify
|
6
|
+
# it under the terms of the GNU General Public License as published by
|
7
|
+
# the Free Software Foundation, either version 3 of the License, or
|
8
|
+
# (at your option) any later version.
|
9
|
+
#
|
10
|
+
# This program is distributed in the hope that it will be useful,
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
# GNU General Public License for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU General Public License
|
16
|
+
# along with this program. If not, please see
|
17
|
+
#
|
18
|
+
# https://www.gnu.org/licenses/gpl-3.0.en.html
|
19
|
+
|
20
|
+
|
21
|
+
import tempfile
|
22
|
+
import os
|
23
|
+
import pytest
|
24
|
+
from sai.configs import PopConfig
|
25
|
+
|
26
|
+
|
27
|
+
@pytest.fixture
|
28
|
+
def temp_files():
|
29
|
+
files = {}
|
30
|
+
for key in ["ref", "tgt", "src", "outgroup"]:
|
31
|
+
f = tempfile.NamedTemporaryFile(delete=False)
|
32
|
+
f.write(b"sample1\nsample2\n")
|
33
|
+
f.close()
|
34
|
+
files[key] = f.name
|
35
|
+
yield files
|
36
|
+
for path in files.values():
|
37
|
+
os.unlink(path)
|
38
|
+
|
39
|
+
|
40
|
+
def test_valid_pop_config_with_outgroup(temp_files):
|
41
|
+
config = PopConfig(
|
42
|
+
{
|
43
|
+
"ref": temp_files["ref"],
|
44
|
+
"tgt": temp_files["tgt"],
|
45
|
+
"src": temp_files["src"],
|
46
|
+
"outgroup": temp_files["outgroup"],
|
47
|
+
}
|
48
|
+
)
|
49
|
+
assert config.get_population("ref") == temp_files["ref"]
|
50
|
+
assert config.get_population("outgroup") == temp_files["outgroup"]
|
51
|
+
|
52
|
+
|
53
|
+
def test_valid_pop_config_without_outgroup(temp_files):
|
54
|
+
config = PopConfig(
|
55
|
+
{
|
56
|
+
"ref": temp_files["ref"],
|
57
|
+
"tgt": temp_files["tgt"],
|
58
|
+
"src": temp_files["src"],
|
59
|
+
}
|
60
|
+
)
|
61
|
+
assert config.get_population("tgt") == temp_files["tgt"]
|
62
|
+
assert config.get_population("outgroup") is None
|
63
|
+
|
64
|
+
|
65
|
+
def test_missing_required_key(temp_files):
|
66
|
+
with pytest.raises(ValueError, match="Missing required population keys"):
|
67
|
+
PopConfig({"ref": temp_files["ref"], "src": temp_files["src"]})
|
68
|
+
|
69
|
+
|
70
|
+
def test_invalid_extra_key(temp_files):
|
71
|
+
with pytest.raises(ValueError, match="Unsupported population keys"):
|
72
|
+
PopConfig(
|
73
|
+
{
|
74
|
+
"ref": temp_files["ref"],
|
75
|
+
"tgt": temp_files["tgt"],
|
76
|
+
"src": temp_files["src"],
|
77
|
+
"ghost": temp_files["outgroup"],
|
78
|
+
}
|
79
|
+
)
|
80
|
+
|
81
|
+
|
82
|
+
def test_file_not_exist():
|
83
|
+
with pytest.raises(ValueError, match="does not exist"):
|
84
|
+
PopConfig(
|
85
|
+
{
|
86
|
+
"ref": "/non/existent/path",
|
87
|
+
"tgt": "/non/existent/path",
|
88
|
+
"src": "/non/existent/path",
|
89
|
+
}
|
90
|
+
)
|
@@ -0,0 +1,171 @@
|
|
1
|
+
# Copyright 2025 Xin Huang
|
2
|
+
#
|
3
|
+
# GNU General Public License v3.0
|
4
|
+
#
|
5
|
+
# This program is free software: you can redistribute it and/or modify
|
6
|
+
# it under the terms of the GNU General Public License as published by
|
7
|
+
# the Free Software Foundation, either version 3 of the License, or
|
8
|
+
# (at your option) any later version.
|
9
|
+
#
|
10
|
+
# This program is distributed in the hope that it will be useful,
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
# GNU General Public License for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU General Public License
|
16
|
+
# along with this program. If not, please see
|
17
|
+
#
|
18
|
+
# https://www.gnu.org/licenses/gpl-3.0.en.html
|
19
|
+
|
20
|
+
|
21
|
+
import pytest
|
22
|
+
import yaml
|
23
|
+
from sai.configs import StatConfig
|
24
|
+
|
25
|
+
|
26
|
+
def test_stat_config_valid():
|
27
|
+
config = StatConfig(
|
28
|
+
{
|
29
|
+
"U": {
|
30
|
+
"ref": {"popA": 0.01},
|
31
|
+
"tgt": {"popB": 0.5},
|
32
|
+
"src": {"Nea": "=1", "Den": ">0.8"},
|
33
|
+
},
|
34
|
+
"Q": {
|
35
|
+
"ref": {"popA": 0.01},
|
36
|
+
"tgt": {"popB": 0.95},
|
37
|
+
"src": {"Nea": ">0.2", "Den": "<0.8"},
|
38
|
+
},
|
39
|
+
"fd": {}, # No parameters
|
40
|
+
"df": {}, # No parameters
|
41
|
+
}
|
42
|
+
)
|
43
|
+
|
44
|
+
# Test valid configuration
|
45
|
+
assert config.get_parameters("U") == {
|
46
|
+
"ref": {"popA": 0.01},
|
47
|
+
"tgt": {"popB": 0.5},
|
48
|
+
"src": {"Nea": ("=", 1.0), "Den": (">", 0.8)},
|
49
|
+
}
|
50
|
+
|
51
|
+
assert config.get_parameters("Q") == {
|
52
|
+
"ref": {"popA": 0.01},
|
53
|
+
"tgt": {"popB": 0.95},
|
54
|
+
"src": {"Nea": (">", 0.2), "Den": ("<", 0.8)},
|
55
|
+
}
|
56
|
+
assert config.get_parameters("fd") == {}
|
57
|
+
assert config.get_parameters("df") == {}
|
58
|
+
|
59
|
+
|
60
|
+
def test_stat_config_invalid():
|
61
|
+
# Test unsupported statistics
|
62
|
+
with pytest.raises(ValueError):
|
63
|
+
StatConfig({"qq": {}})
|
64
|
+
|
65
|
+
# Test invalid src format (not a list)
|
66
|
+
with pytest.raises(ValueError):
|
67
|
+
StatConfig(
|
68
|
+
{"U": {"ref": "0.01", "tgt": "0.5", "src": "=1"}} # src should be a list
|
69
|
+
)
|
70
|
+
|
71
|
+
# Test out-of-range numeric values for 'ref'
|
72
|
+
with pytest.raises(ValueError):
|
73
|
+
StatConfig(
|
74
|
+
{
|
75
|
+
"U": {"ref": -0.1, "tgt": 0.5, "src": ["=1", ">0.8"]},
|
76
|
+
}
|
77
|
+
)
|
78
|
+
|
79
|
+
# Test out-of-range numeric values for 'tgt'
|
80
|
+
with pytest.raises(ValueError):
|
81
|
+
StatConfig(
|
82
|
+
{
|
83
|
+
"U": {"ref": 0.01, "tgt": -100, "src": ["=1", ">0.8"]},
|
84
|
+
}
|
85
|
+
)
|
86
|
+
|
87
|
+
# Test out-of-range numeric values for 'src'
|
88
|
+
with pytest.raises(ValueError):
|
89
|
+
StatConfig(
|
90
|
+
{
|
91
|
+
"U": {"ref": 0.01, "tgt": 0.5, "src": ["=-1", ">0.8"]},
|
92
|
+
}
|
93
|
+
)
|
94
|
+
|
95
|
+
# Test non-numeric string values for 'ref'
|
96
|
+
with pytest.raises(ValueError):
|
97
|
+
StatConfig(
|
98
|
+
{
|
99
|
+
"U": {"ref": "foo", "tgt": 0.5, "src": ["=1", ">0.8"]},
|
100
|
+
}
|
101
|
+
)
|
102
|
+
|
103
|
+
# Test non-numeric string values for 'tgt'
|
104
|
+
with pytest.raises(ValueError):
|
105
|
+
StatConfig(
|
106
|
+
{
|
107
|
+
"U": {"ref": 0.01, "tgt": "foo", "src": ["=1", ">0.8"]},
|
108
|
+
}
|
109
|
+
)
|
110
|
+
|
111
|
+
# Test non-numeric string values after comparator
|
112
|
+
with pytest.raises(ValueError):
|
113
|
+
StatConfig(
|
114
|
+
{
|
115
|
+
"U": {"ref": 0.01, "tgt": 0.5, "src": ["=invalid", ">0.8"]},
|
116
|
+
}
|
117
|
+
)
|
118
|
+
|
119
|
+
# Test missing ref, tgt, src in U and Q statistics
|
120
|
+
with pytest.raises(ValueError):
|
121
|
+
StatConfig({"U": {"ref": "0.01", "tgt": "0.5"}}) # src is missing
|
122
|
+
|
123
|
+
with pytest.raises(ValueError):
|
124
|
+
StatConfig({"Q": {}})
|
125
|
+
|
126
|
+
# Test invalid src comparator (invalid value)
|
127
|
+
with pytest.raises(ValueError):
|
128
|
+
StatConfig(
|
129
|
+
{
|
130
|
+
"Q": {
|
131
|
+
"ref": "0.01",
|
132
|
+
"tgt": "0.95",
|
133
|
+
"src": ["invalid_value"],
|
134
|
+
}
|
135
|
+
}
|
136
|
+
)
|
137
|
+
|
138
|
+
|
139
|
+
def test_stat_config_from_file():
|
140
|
+
with open("tests/data/test_config.yaml", "r") as f:
|
141
|
+
data = yaml.safe_load(f)
|
142
|
+
|
143
|
+
stat_config = StatConfig(data["statistics"])
|
144
|
+
|
145
|
+
stat_names = list(stat_config.root.keys())
|
146
|
+
assert "U" in stat_names
|
147
|
+
assert "Q" in stat_names
|
148
|
+
|
149
|
+
u_params = stat_config.get_parameters("U")
|
150
|
+
assert u_params == {
|
151
|
+
"ref": {"popA": 0.01},
|
152
|
+
"tgt": {"popB": 0.5},
|
153
|
+
"src": {"Nea": ("=", 1), "Den": (">=", 0.8)},
|
154
|
+
}
|
155
|
+
|
156
|
+
q_params = stat_config.get_parameters("Q")
|
157
|
+
assert q_params == {
|
158
|
+
"ref": {"popA": 0.01},
|
159
|
+
"tgt": {"popB": 0.95},
|
160
|
+
"src": {"Nea": (">", 0.2), "Den": ("<=", 0.8)},
|
161
|
+
}
|
162
|
+
|
163
|
+
assert "ref" in u_params
|
164
|
+
assert "tgt" in u_params
|
165
|
+
assert "src" in u_params
|
166
|
+
|
167
|
+
with open("tests/data/test_invalid_stat_config.yaml", "r") as f:
|
168
|
+
invalid_data = yaml.safe_load(f)
|
169
|
+
|
170
|
+
with pytest.raises(ValueError):
|
171
|
+
StatConfig(statistics=invalid_data["statistics"])
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# Copyright 2025 Xin Huang
|
2
|
+
#
|
3
|
+
# GNU General Public License v3.0
|
4
|
+
#
|
5
|
+
# This program is free software: you can redistribute it and/or modify
|
6
|
+
# it under the terms of the GNU General Public License as published by
|
7
|
+
# the Free Software Foundation, either version 3 of the License, or
|
8
|
+
# (at your option) any later version.
|
9
|
+
#
|
10
|
+
# This program is distributed in the hope that it will be useful,
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
# GNU General Public License for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU General Public License
|
16
|
+
# along with this program. If not, please see
|
17
|
+
#
|
18
|
+
# https://www.gnu.org/licenses/gpl-3.0.en.html
|
19
|
+
|
20
|
+
|
21
|
+
import pytest
|
22
|
+
from sai.generators import ChunkGenerator
|
23
|
+
|
24
|
+
|
25
|
+
def test_chunk_generator():
|
26
|
+
# Initialize
|
27
|
+
generator = ChunkGenerator(
|
28
|
+
vcf_file="tests/data/test.data.vcf",
|
29
|
+
chr_name="21",
|
30
|
+
step_size=5000,
|
31
|
+
window_size=10000,
|
32
|
+
num_chunks=2,
|
33
|
+
)
|
34
|
+
|
35
|
+
# Check that length is calculated properly (mocked to 3 records)
|
36
|
+
assert len(generator) == 2 # num_workers
|
37
|
+
|
38
|
+
# Check that chunks were split correctly
|
39
|
+
expected_chunks = [(1, 30000), (25001, 55000)]
|
40
|
+
assert generator.chunks == expected_chunks
|
41
|
+
|
42
|
+
|
43
|
+
def test_chunk_generator_chr_not_found():
|
44
|
+
with pytest.raises(ValueError, match="Chromosome 1 not found in VCF."):
|
45
|
+
ChunkGenerator(
|
46
|
+
vcf_file="tests/data/test.data.vcf",
|
47
|
+
chr_name="1",
|
48
|
+
step_size=10000,
|
49
|
+
window_size=10000,
|
50
|
+
num_chunks=2,
|
51
|
+
)
|