sai-pg 1.0.1__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. sai/__init__.py +2 -0
  2. sai/__main__.py +6 -3
  3. sai/configs/__init__.py +24 -0
  4. sai/configs/global_config.py +83 -0
  5. sai/configs/ploidy_config.py +94 -0
  6. sai/configs/pop_config.py +82 -0
  7. sai/configs/stat_config.py +220 -0
  8. sai/{utils/generators → generators}/chunk_generator.py +1 -1
  9. sai/{utils/generators → generators}/window_generator.py +81 -37
  10. sai/{utils/multiprocessing → multiprocessing}/mp_manager.py +2 -2
  11. sai/{utils/multiprocessing → multiprocessing}/mp_pool.py +2 -2
  12. sai/parsers/outlier_parser.py +4 -3
  13. sai/parsers/score_parser.py +8 -119
  14. sai/{utils/preprocessors → preprocessors}/chunk_preprocessor.py +21 -15
  15. sai/preprocessors/feature_preprocessor.py +236 -0
  16. sai/registries/__init__.py +22 -0
  17. sai/registries/generic_registry.py +89 -0
  18. sai/registries/stat_registry.py +30 -0
  19. sai/sai.py +124 -220
  20. sai/stats/__init__.py +11 -0
  21. sai/stats/danc_statistic.py +83 -0
  22. sai/stats/dd_statistic.py +77 -0
  23. sai/stats/df_statistic.py +84 -0
  24. sai/stats/dplus_statistic.py +86 -0
  25. sai/stats/fd_statistic.py +92 -0
  26. sai/stats/generic_statistic.py +93 -0
  27. sai/stats/q_statistic.py +104 -0
  28. sai/stats/stat_utils.py +259 -0
  29. sai/stats/u_statistic.py +99 -0
  30. sai/utils/utils.py +213 -142
  31. {sai_pg-1.0.1.dist-info → sai_pg-1.1.0.dist-info}/METADATA +3 -14
  32. sai_pg-1.1.0.dist-info/RECORD +70 -0
  33. {sai_pg-1.0.1.dist-info → sai_pg-1.1.0.dist-info}/WHEEL +1 -1
  34. sai_pg-1.1.0.dist-info/top_level.txt +2 -0
  35. tests/configs/test_global_config.py +163 -0
  36. tests/configs/test_ploidy_config.py +93 -0
  37. tests/configs/test_pop_config.py +90 -0
  38. tests/configs/test_stat_config.py +171 -0
  39. tests/generators/test_chunk_generator.py +51 -0
  40. tests/generators/test_window_generator.py +164 -0
  41. tests/multiprocessing/test_mp_manager.py +92 -0
  42. tests/multiprocessing/test_mp_pool.py +79 -0
  43. tests/parsers/test_argument_validation.py +133 -0
  44. tests/parsers/test_outlier_parser.py +53 -0
  45. tests/parsers/test_score_parser.py +63 -0
  46. tests/preprocessors/test_chunk_preprocessor.py +79 -0
  47. tests/preprocessors/test_feature_preprocessor.py +223 -0
  48. tests/registries/test_registries.py +74 -0
  49. tests/stats/test_danc_statistic.py +51 -0
  50. tests/stats/test_dd_statistic.py +45 -0
  51. tests/stats/test_df_statistic.py +73 -0
  52. tests/stats/test_dplus_statistic.py +79 -0
  53. tests/stats/test_fd_statistic.py +68 -0
  54. tests/stats/test_q_statistic.py +268 -0
  55. tests/stats/test_stat_utils.py +354 -0
  56. tests/stats/test_u_statistic.py +233 -0
  57. tests/test___main__.py +51 -0
  58. tests/test_sai.py +102 -0
  59. tests/utils/test_utils.py +511 -0
  60. sai/parsers/plot_parser.py +0 -152
  61. sai/stats/features.py +0 -302
  62. sai/utils/preprocessors/feature_preprocessor.py +0 -211
  63. sai_pg-1.0.1.dist-info/RECORD +0 -30
  64. sai_pg-1.0.1.dist-info/top_level.txt +0 -1
  65. /sai/{utils/generators → generators}/__init__.py +0 -0
  66. /sai/{utils/generators → generators}/data_generator.py +0 -0
  67. /sai/{utils/multiprocessing → multiprocessing}/__init__.py +0 -0
  68. /sai/{utils/preprocessors → preprocessors}/__init__.py +0 -0
  69. /sai/{utils/preprocessors → preprocessors}/data_preprocessor.py +0 -0
  70. {sai_pg-1.0.1.dist-info → sai_pg-1.1.0.dist-info}/entry_points.txt +0 -0
  71. {sai_pg-1.0.1.dist-info → sai_pg-1.1.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,70 @@
1
+ sai/__init__.py,sha256=vOz5CvYolyCQkKMzSK98WzFFsb6XW3Q7QTMfavmiFVA,747
2
+ sai/__main__.py,sha256=WBtgdvCLW5aRF-z7O30n3PFZxdXNEGwjPvmEHmkTb3Y,2174
3
+ sai/sai.py,sha256=zL0b4d730wAWE214nIcjZ_A8H07XxHeSlOKk-1LOfuU,7510
4
+ sai/configs/__init__.py,sha256=V1uPUfwcjb9FlLBcmVSB6q7_29IlntD60HLeW7GDzTc,876
5
+ sai/configs/global_config.py,sha256=afGJ0Nq9pq6yMIuhT6HmeaM4NevoVmEP5iuZJdfF6j0,3200
6
+ sai/configs/ploidy_config.py,sha256=V51RulhspASeDxRye6gaBRTWrYPWTfRYngLpmdiG1TQ,3339
7
+ sai/configs/pop_config.py,sha256=p_JUMN-UYSdIkaPLjoEQD620Y36JkmLVRX-_TeJaG7c,2659
8
+ sai/configs/stat_config.py,sha256=WbHiGvDm6o1m6uEWXQ9sJfeqQUZiRFOgfNrcNeKO4H0,8037
9
+ sai/generators/__init__.py,sha256=hpE4PUQIOZQXzdpSx7dEllecDoOfxIWXNu1-WHa_VcM,858
10
+ sai/generators/chunk_generator.py,sha256=wL4ZX1yqFpCnQjQF4Wds5Gewo29VE4fG-UkSaGh8TUs,4313
11
+ sai/generators/data_generator.py,sha256=bVz9KPjJSL4becTsVZ-zH4i40y5UDA0JJOtzbwf5n84,1635
12
+ sai/generators/window_generator.py,sha256=XuVEIvI5ns9hfJEWXz9lFcAvld3sWnVWDyDk9dZWZ4M,11088
13
+ sai/multiprocessing/__init__.py,sha256=mI_iVjOJschKjIcneo99utCMVjR0K3UpEjA_PMcmLm4,790
14
+ sai/multiprocessing/mp_manager.py,sha256=6JACefm5em_FCXcAn-JRpo2WmVVVzJG6zEWijAyixnk,10251
15
+ sai/multiprocessing/mp_pool.py,sha256=tdUMOVAPHUmlFavm2-xRNHntCq09Qu1653SeXtsvFEs,2261
16
+ sai/parsers/__init__.py,sha256=ZLPiBk86c9R8ZFx9y5VF7Up2v4JS8WsEMfsVNlj7nXY,724
17
+ sai/parsers/argument_validation.py,sha256=OzkE9Ayr3KCB8rSP8mXnfx4aNOeJuEEFfit9q7RbURU,4374
18
+ sai/parsers/outlier_parser.py,sha256=tQomTsQ16Fqimuw97vbTX3_N2yl5lGRZRrBbF1hALV8,2403
19
+ sai/parsers/score_parser.py,sha256=eAKqSluXJlLm5-k-7H6dU3bD-RUsubSjqD3I9NKl5AU,4429
20
+ sai/preprocessors/__init__.py,sha256=q25uXnq4jTrIVBbl1HFv3ZgEV0d79fCMp4tTZrq4vsM,878
21
+ sai/preprocessors/chunk_preprocessor.py,sha256=YAzhlqkBtwR5RGDOMBDLOT0YSxQcnfnl0YQ3w5InfOA,5601
22
+ sai/preprocessors/data_preprocessor.py,sha256=XSQN_kXUfyCtIpqIf7TcqWIPcSk8kynHPH-KY9gh5VY,3595
23
+ sai/preprocessors/feature_preprocessor.py,sha256=o7qhdJY_5gKcdoRJwY5MUrP2p1lU4gG5oCIXJvgWh0U,8990
24
+ sai/registries/__init__.py,sha256=6_IVIghxB2LN2ukve-8stfs_9_JzvDJMg7DQ4MW8Bnc,812
25
+ sai/registries/generic_registry.py,sha256=qat9xlCaCab_ZyvfUq22TT_Pq2wuuULoUWFFwjwwoDA,2459
26
+ sai/registries/stat_registry.py,sha256=3BHHO_mOd802Yy_RG_uXKiJTsCcYuayFNUYL9NJ6UIQ,919
27
+ sai/stats/__init__.py,sha256=CNP5QmjO-GVYWJI6RRumseAhT0P16lKOJNXSY9SxBGk,1072
28
+ sai/stats/danc_statistic.py,sha256=peMn4nUdek6sksyYJkzxusnsXdIYsYyx8IC2vJWya-s,2878
29
+ sai/stats/dd_statistic.py,sha256=vZmQ_8P25dhg2M9TBcJIRuP6onoxdQ6W8-fRdd6UAyY,2730
30
+ sai/stats/df_statistic.py,sha256=-MK9GxrCk5qBODw4-1jWiFGoVqzNFtMRBFkrMFgjlAo,2996
31
+ sai/stats/dplus_statistic.py,sha256=7WUm-h0MY2P-VhIxKaVfRRFEJmGNufAbjMEWkOpEdfo,3202
32
+ sai/stats/fd_statistic.py,sha256=_uapDkTrpp4xXfOE_qNVTsludtYsk7wEYdHqZcFsC60,3360
33
+ sai/stats/generic_statistic.py,sha256=YMG7P7fTgq6_fC8anziJHYZWgLvLvXSzGcP0SarWhz4,3409
34
+ sai/stats/q_statistic.py,sha256=N1oLZxuu6TxdP_2RlnzfwXCNAhKV1woCVJYWBDOpxHA,4382
35
+ sai/stats/stat_utils.py,sha256=NrukP7mZC3GUr6psSVdoK2Adc6U_7Eynv5BnRbNwWc0,9385
36
+ sai/stats/u_statistic.py,sha256=ezMrUtD2KLej2nTCH4kI_IzH3Z3acNX9WoyUtAYRWgU,3956
37
+ sai/utils/__init__.py,sha256=B3ZcC1ALSWieGHPiqXKBFQRTrnlTX4TaHc3tCx9fj0w,782
38
+ sai/utils/genomic_dataclasses.py,sha256=HBYp2dehdW_y3Pd6Un8XFMnN1Odg1EiZb9ci1syIibU,1443
39
+ sai/utils/utils.py,sha256=DggdghqERdK_bqb-dxX0kGvlqdS89QdOUwBWZxScxDk,25445
40
+ sai_pg-1.1.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
41
+ tests/test___main__.py,sha256=b9-O8rzeYJSsnpO0pjxk8ftfshji0VMq7l34T2DdBw8,1913
42
+ tests/test_sai.py,sha256=Q544XurG5wRZqVPlQ8EOKX05UceKl5EYPuV7i8RIHzA,3042
43
+ tests/configs/test_global_config.py,sha256=NYqMFQK3qOnmmrZ-S7lvRmi9UFwPwApyS0AnvWpJLrM,4485
44
+ tests/configs/test_ploidy_config.py,sha256=KbWNYQUNYZdRsDgkpk-d2Fm3Ip8Zy57KBmE6_xWMgVQ,2609
45
+ tests/configs/test_pop_config.py,sha256=pu7FXFE17x1uH9HLDScQ0XcztIS-tS7mcvm9tWTa3zc,2670
46
+ tests/configs/test_stat_config.py,sha256=BWBYGMjegC1bh83acmg5X3UpCiYI4dT_G9Z_OWzZL9Q,4868
47
+ tests/generators/test_chunk_generator.py,sha256=V6cFufLcKmsAp9krTNPR3yaQWA6rM-G3HYY0O3wsTx4,1583
48
+ tests/generators/test_window_generator.py,sha256=tp-OUc3ROi-wx6c6V39cXH3quneOZOekllzN-1xTMR0,5756
49
+ tests/multiprocessing/test_mp_manager.py,sha256=KYd3MVDPw3egw_mQ2jy9s3_VuJ-uxHfHKoatkxKoi00,2706
50
+ tests/multiprocessing/test_mp_pool.py,sha256=oCIC_BrKsgVpXevFyZ92sSU6SwVLa-jFEZZVg8nIMMk,2261
51
+ tests/parsers/test_argument_validation.py,sha256=Q2WDGxDWjHM3-cJrtjzGGwm2qdI-kFHxU5f1_OFHGW0,4184
52
+ tests/parsers/test_outlier_parser.py,sha256=0_wWIRMQ4qJRKuGfCNkukZrWg9sP6dqzAuMXMYyM6DM,1642
53
+ tests/parsers/test_score_parser.py,sha256=-potixrJhMBm7pIf7R4WFQmdKv6s3wV_7rICO1sj6M4,1921
54
+ tests/preprocessors/test_chunk_preprocessor.py,sha256=AAQBSN5KH3huXoSi-KvKouV839GbTjDfy29rIUz8qi0,2557
55
+ tests/preprocessors/test_feature_preprocessor.py,sha256=nIncvpPJEr88iAPRhv0teL2mqirrZLgjeIA1LPSARmQ,6743
56
+ tests/registries/test_registries.py,sha256=UPMj2tH435AprUBdFW767g77yj2kztchFHiVJvW8SUM,1710
57
+ tests/stats/test_danc_statistic.py,sha256=WOA8GK3FvwWT9wD50v9BMLGc_gi7tGI1u5oRflW69i8,1643
58
+ tests/stats/test_dd_statistic.py,sha256=yhBdTQyCdYcQse_DrAEaK-OPAitCG99KGDcyDSr4Xzs,1356
59
+ tests/stats/test_df_statistic.py,sha256=kJi58bUs5QQGsS7Z2AmTv4ukrHjaGXHwXcjehOxFOqM,2246
60
+ tests/stats/test_dplus_statistic.py,sha256=vIm7DjUZLZMsUWZj1qaV_AyRTnA7r5bjEtiHMoxLzXs,2476
61
+ tests/stats/test_fd_statistic.py,sha256=jq7DGTlHbaIkCEA6KLrUV0vH4Th0gumoKUFWAM15vV8,2042
62
+ tests/stats/test_q_statistic.py,sha256=b-YC66aA_NMNXOXlnot3vU_Lc9z0WGP19YJLp6MCcUc,7943
63
+ tests/stats/test_stat_utils.py,sha256=n4nNCuiG79LOsk7mnpQPbivjgF9vnlWxEdzpJRjCZu0,10646
64
+ tests/stats/test_u_statistic.py,sha256=V_vejLUCKIlFsVtOUEROwNoqNXO7x8rHaxJX0fyGssA,6659
65
+ tests/utils/test_utils.py,sha256=yaoCuDYIqC4mFaJISZ8Juhze2NhUfRSQkGJBBlIJtxg,15673
66
+ sai_pg-1.1.0.dist-info/METADATA,sha256=DG6BjlA--aZsetXCZVEZbny_vXCIoW7NtQrJLwgiMHI,1581
67
+ sai_pg-1.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
68
+ sai_pg-1.1.0.dist-info/entry_points.txt,sha256=uK34frE2UhEDNa5ISbGOtjF2HGAXF5uG_EgMocesEPs,42
69
+ sai_pg-1.1.0.dist-info/top_level.txt,sha256=8-AsrbtORPvVD9w1Is6vyxKgJBuuYNrwii1UNi-8Lr4,10
70
+ sai_pg-1.1.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (78.1.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -0,0 +1,2 @@
1
+ sai
2
+ tests
@@ -0,0 +1,163 @@
1
+ # Copyright 2025 Xin Huang
2
+ #
3
+ # GNU General Public License v3.0
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, please see
17
+ #
18
+ # https://www.gnu.org/licenses/gpl-3.0.en.html
19
+
20
+
21
+ import pytest
22
+ from sai.configs.global_config import GlobalConfig
23
+ from sai.configs.stat_config import StatConfig
24
+ from sai.configs.ploidy_config import PloidyConfig
25
+ from sai.configs.pop_config import PopConfig
26
+
27
+
28
+ def test_global_config_valid(tmp_path):
29
+ ref_file = tmp_path / "ref.txt"
30
+ tgt_file = tmp_path / "tgt.txt"
31
+ src_file = tmp_path / "src.txt"
32
+
33
+ ref_file.write_text("popA sample1\npopA sample2")
34
+ tgt_file.write_text("popB sample3\npopB sample4")
35
+ src_file.write_text("popC sample5\npopC sample6")
36
+
37
+ stat_cfg = StatConfig.model_validate(
38
+ {
39
+ "Q": {
40
+ "ref": {"popA": 0.3},
41
+ "tgt": {"popB": 0.95},
42
+ "src": {"popC": "=1"},
43
+ }
44
+ }
45
+ )
46
+
47
+ ploidy_cfg = PloidyConfig.model_validate(
48
+ {
49
+ "ref": {"popA": 2},
50
+ "tgt": {"popB": 2},
51
+ "src": {"popC": 2},
52
+ }
53
+ )
54
+
55
+ pop_cfg = PopConfig.model_validate(
56
+ {
57
+ "ref": str(ref_file),
58
+ "tgt": str(tgt_file),
59
+ "src": str(src_file),
60
+ }
61
+ )
62
+
63
+ global_cfg = GlobalConfig(
64
+ statistics=stat_cfg,
65
+ ploidies=ploidy_cfg,
66
+ populations=pop_cfg,
67
+ )
68
+
69
+ assert global_cfg.statistics.get_parameters("Q")["ref"]["popA"] == 0.3
70
+ assert global_cfg.ploidies.get_ploidy("ref", "popA") == 2
71
+ assert global_cfg.populations.get_population("ref") == str(ref_file)
72
+
73
+
74
+ def test_global_config_invalid_ploidy(tmp_path):
75
+ ref_file = tmp_path / "ref.txt"
76
+ tgt_file = tmp_path / "tgt.txt"
77
+ src_file = tmp_path / "src.txt"
78
+
79
+ ref_file.write_text("popA sample1\npopA sample2")
80
+ tgt_file.write_text("popB sample3\npopB sample4")
81
+ src_file.write_text("popC sample5\npopC sample6")
82
+
83
+ stat_cfg = StatConfig.model_validate(
84
+ {
85
+ "Q": {
86
+ "ref": {"popA": 0.3},
87
+ "tgt": {"popB": 0.95},
88
+ "src": {"popC": "=1"},
89
+ }
90
+ }
91
+ )
92
+
93
+ ploidy_cfg = PloidyConfig.model_validate(
94
+ {
95
+ "ref": {"popA": 2},
96
+ "tgt": {"popD": 2},
97
+ "src": {"popC": 2},
98
+ }
99
+ )
100
+
101
+ pop_cfg = PopConfig.model_validate(
102
+ {
103
+ "ref": str(ref_file),
104
+ "tgt": str(tgt_file),
105
+ "src": str(src_file),
106
+ }
107
+ )
108
+
109
+ with pytest.raises(
110
+ ValueError,
111
+ match=r"Population 'popB' used in statistics\[Q\]\[tgt\] is not defined in ploidies\[tgt\]",
112
+ ):
113
+ GlobalConfig(
114
+ statistics=stat_cfg,
115
+ ploidies=ploidy_cfg,
116
+ populations=pop_cfg,
117
+ )
118
+
119
+
120
+ def test_global_config_invalid_population(tmp_path):
121
+ ref_file = tmp_path / "ref.txt"
122
+ tgt_file = tmp_path / "tgt.txt"
123
+ src_file = tmp_path / "src.txt"
124
+
125
+ ref_file.write_text("popA sample1\npopA sample2")
126
+ tgt_file.write_text("popD sample3\npopD sample4")
127
+ src_file.write_text("popC sample5\npopC sample6")
128
+
129
+ stat_cfg = StatConfig.model_validate(
130
+ {
131
+ "Q": {
132
+ "ref": {"popA": 0.3},
133
+ "tgt": {"popB": 0.95},
134
+ "src": {"popC": "=1"},
135
+ }
136
+ }
137
+ )
138
+
139
+ ploidy_cfg = PloidyConfig.model_validate(
140
+ {
141
+ "ref": {"popA": 2},
142
+ "tgt": {"popB": 2},
143
+ "src": {"popC": 2},
144
+ }
145
+ )
146
+
147
+ pop_cfg = PopConfig.model_validate(
148
+ {
149
+ "ref": str(ref_file),
150
+ "tgt": str(tgt_file),
151
+ "src": str(src_file),
152
+ }
153
+ )
154
+
155
+ with pytest.raises(
156
+ ValueError,
157
+ match=r"Population 'popB' used in statistics\[Q\]\[tgt\] is not found in the population file for group 'tgt'",
158
+ ):
159
+ GlobalConfig(
160
+ statistics=stat_cfg,
161
+ ploidies=ploidy_cfg,
162
+ populations=pop_cfg,
163
+ )
@@ -0,0 +1,93 @@
1
+ # Copyright 2025 Xin Huang
2
+ #
3
+ # GNU General Public License v3.0
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, please see
17
+ #
18
+ # https://www.gnu.org/licenses/gpl-3.0.en.html
19
+
20
+
21
+ import pytest
22
+ from pydantic import ValidationError
23
+ from sai.configs import PloidyConfig
24
+
25
+
26
+ def test_valid_config():
27
+ config = PloidyConfig(
28
+ {
29
+ "ref": {"popA": 2},
30
+ "tgt": {"popB": 2},
31
+ "src": {"popC": 2, "popD": 4},
32
+ "outgroup": {"popE": 1},
33
+ }
34
+ )
35
+ assert config.get_ploidy("ref", "popA") == 2
36
+ assert config.get_ploidy("src", "popD") == 4
37
+
38
+
39
+ def test_missing_required_keys():
40
+ with pytest.raises(ValidationError, match="Missing required ploidy keys"):
41
+ PloidyConfig(
42
+ {
43
+ "ref": {"popA": 2},
44
+ "src": {"popC": 2},
45
+ }
46
+ )
47
+
48
+
49
+ def test_extra_keys():
50
+ with pytest.raises(ValidationError, match="Unsupported ploidy keys"):
51
+ PloidyConfig(
52
+ {
53
+ "ref": {"popA": 2},
54
+ "tgt": {"popB": 2},
55
+ "src": {"popC": 2},
56
+ "badkey": {"popD": 3},
57
+ }
58
+ )
59
+
60
+
61
+ def test_invalid_ploidy_values():
62
+ with pytest.raises(ValidationError, match="must be a positive integer"):
63
+ PloidyConfig(
64
+ {
65
+ "ref": {"popA": 2},
66
+ "tgt": {"popB": 0},
67
+ "src": {"popC": 1},
68
+ "outgroup": {"popD": 1},
69
+ }
70
+ )
71
+
72
+ with pytest.raises(ValidationError, match="must be a positive integer"):
73
+ PloidyConfig(
74
+ {
75
+ "ref": {"popA": 2},
76
+ "tgt": {"popB": 2},
77
+ "src": {"popC": -2, "popD": -3},
78
+ "outgroup": {"popE": 1},
79
+ }
80
+ )
81
+
82
+
83
+ def test_get_ploidy_key_error():
84
+ config = PloidyConfig(
85
+ {
86
+ "ref": {"popA": 2},
87
+ "tgt": {"popB": 2},
88
+ "src": {"popC": 2},
89
+ "outgroup": {"popD": 1},
90
+ }
91
+ )
92
+ with pytest.raises(KeyError):
93
+ config.get_ploidy("ghost", "popE")
@@ -0,0 +1,90 @@
1
+ # Copyright 2025 Xin Huang
2
+ #
3
+ # GNU General Public License v3.0
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, please see
17
+ #
18
+ # https://www.gnu.org/licenses/gpl-3.0.en.html
19
+
20
+
21
+ import tempfile
22
+ import os
23
+ import pytest
24
+ from sai.configs import PopConfig
25
+
26
+
27
+ @pytest.fixture
28
+ def temp_files():
29
+ files = {}
30
+ for key in ["ref", "tgt", "src", "outgroup"]:
31
+ f = tempfile.NamedTemporaryFile(delete=False)
32
+ f.write(b"sample1\nsample2\n")
33
+ f.close()
34
+ files[key] = f.name
35
+ yield files
36
+ for path in files.values():
37
+ os.unlink(path)
38
+
39
+
40
+ def test_valid_pop_config_with_outgroup(temp_files):
41
+ config = PopConfig(
42
+ {
43
+ "ref": temp_files["ref"],
44
+ "tgt": temp_files["tgt"],
45
+ "src": temp_files["src"],
46
+ "outgroup": temp_files["outgroup"],
47
+ }
48
+ )
49
+ assert config.get_population("ref") == temp_files["ref"]
50
+ assert config.get_population("outgroup") == temp_files["outgroup"]
51
+
52
+
53
+ def test_valid_pop_config_without_outgroup(temp_files):
54
+ config = PopConfig(
55
+ {
56
+ "ref": temp_files["ref"],
57
+ "tgt": temp_files["tgt"],
58
+ "src": temp_files["src"],
59
+ }
60
+ )
61
+ assert config.get_population("tgt") == temp_files["tgt"]
62
+ assert config.get_population("outgroup") is None
63
+
64
+
65
+ def test_missing_required_key(temp_files):
66
+ with pytest.raises(ValueError, match="Missing required population keys"):
67
+ PopConfig({"ref": temp_files["ref"], "src": temp_files["src"]})
68
+
69
+
70
+ def test_invalid_extra_key(temp_files):
71
+ with pytest.raises(ValueError, match="Unsupported population keys"):
72
+ PopConfig(
73
+ {
74
+ "ref": temp_files["ref"],
75
+ "tgt": temp_files["tgt"],
76
+ "src": temp_files["src"],
77
+ "ghost": temp_files["outgroup"],
78
+ }
79
+ )
80
+
81
+
82
+ def test_file_not_exist():
83
+ with pytest.raises(ValueError, match="does not exist"):
84
+ PopConfig(
85
+ {
86
+ "ref": "/non/existent/path",
87
+ "tgt": "/non/existent/path",
88
+ "src": "/non/existent/path",
89
+ }
90
+ )
@@ -0,0 +1,171 @@
1
+ # Copyright 2025 Xin Huang
2
+ #
3
+ # GNU General Public License v3.0
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, please see
17
+ #
18
+ # https://www.gnu.org/licenses/gpl-3.0.en.html
19
+
20
+
21
+ import pytest
22
+ import yaml
23
+ from sai.configs import StatConfig
24
+
25
+
26
+ def test_stat_config_valid():
27
+ config = StatConfig(
28
+ {
29
+ "U": {
30
+ "ref": {"popA": 0.01},
31
+ "tgt": {"popB": 0.5},
32
+ "src": {"Nea": "=1", "Den": ">0.8"},
33
+ },
34
+ "Q": {
35
+ "ref": {"popA": 0.01},
36
+ "tgt": {"popB": 0.95},
37
+ "src": {"Nea": ">0.2", "Den": "<0.8"},
38
+ },
39
+ "fd": {}, # No parameters
40
+ "df": {}, # No parameters
41
+ }
42
+ )
43
+
44
+ # Test valid configuration
45
+ assert config.get_parameters("U") == {
46
+ "ref": {"popA": 0.01},
47
+ "tgt": {"popB": 0.5},
48
+ "src": {"Nea": ("=", 1.0), "Den": (">", 0.8)},
49
+ }
50
+
51
+ assert config.get_parameters("Q") == {
52
+ "ref": {"popA": 0.01},
53
+ "tgt": {"popB": 0.95},
54
+ "src": {"Nea": (">", 0.2), "Den": ("<", 0.8)},
55
+ }
56
+ assert config.get_parameters("fd") == {}
57
+ assert config.get_parameters("df") == {}
58
+
59
+
60
+ def test_stat_config_invalid():
61
+ # Test unsupported statistics
62
+ with pytest.raises(ValueError):
63
+ StatConfig({"qq": {}})
64
+
65
+ # Test invalid src format (not a list)
66
+ with pytest.raises(ValueError):
67
+ StatConfig(
68
+ {"U": {"ref": "0.01", "tgt": "0.5", "src": "=1"}} # src should be a list
69
+ )
70
+
71
+ # Test out-of-range numeric values for 'ref'
72
+ with pytest.raises(ValueError):
73
+ StatConfig(
74
+ {
75
+ "U": {"ref": -0.1, "tgt": 0.5, "src": ["=1", ">0.8"]},
76
+ }
77
+ )
78
+
79
+ # Test out-of-range numeric values for 'tgt'
80
+ with pytest.raises(ValueError):
81
+ StatConfig(
82
+ {
83
+ "U": {"ref": 0.01, "tgt": -100, "src": ["=1", ">0.8"]},
84
+ }
85
+ )
86
+
87
+ # Test out-of-range numeric values for 'src'
88
+ with pytest.raises(ValueError):
89
+ StatConfig(
90
+ {
91
+ "U": {"ref": 0.01, "tgt": 0.5, "src": ["=-1", ">0.8"]},
92
+ }
93
+ )
94
+
95
+ # Test non-numeric string values for 'ref'
96
+ with pytest.raises(ValueError):
97
+ StatConfig(
98
+ {
99
+ "U": {"ref": "foo", "tgt": 0.5, "src": ["=1", ">0.8"]},
100
+ }
101
+ )
102
+
103
+ # Test non-numeric string values for 'tgt'
104
+ with pytest.raises(ValueError):
105
+ StatConfig(
106
+ {
107
+ "U": {"ref": 0.01, "tgt": "foo", "src": ["=1", ">0.8"]},
108
+ }
109
+ )
110
+
111
+ # Test non-numeric string values after comparator
112
+ with pytest.raises(ValueError):
113
+ StatConfig(
114
+ {
115
+ "U": {"ref": 0.01, "tgt": 0.5, "src": ["=invalid", ">0.8"]},
116
+ }
117
+ )
118
+
119
+ # Test missing ref, tgt, src in U and Q statistics
120
+ with pytest.raises(ValueError):
121
+ StatConfig({"U": {"ref": "0.01", "tgt": "0.5"}}) # src is missing
122
+
123
+ with pytest.raises(ValueError):
124
+ StatConfig({"Q": {}})
125
+
126
+ # Test invalid src comparator (invalid value)
127
+ with pytest.raises(ValueError):
128
+ StatConfig(
129
+ {
130
+ "Q": {
131
+ "ref": "0.01",
132
+ "tgt": "0.95",
133
+ "src": ["invalid_value"],
134
+ }
135
+ }
136
+ )
137
+
138
+
139
+ def test_stat_config_from_file():
140
+ with open("tests/data/test_config.yaml", "r") as f:
141
+ data = yaml.safe_load(f)
142
+
143
+ stat_config = StatConfig(data["statistics"])
144
+
145
+ stat_names = list(stat_config.root.keys())
146
+ assert "U" in stat_names
147
+ assert "Q" in stat_names
148
+
149
+ u_params = stat_config.get_parameters("U")
150
+ assert u_params == {
151
+ "ref": {"popA": 0.01},
152
+ "tgt": {"popB": 0.5},
153
+ "src": {"Nea": ("=", 1), "Den": (">=", 0.8)},
154
+ }
155
+
156
+ q_params = stat_config.get_parameters("Q")
157
+ assert q_params == {
158
+ "ref": {"popA": 0.01},
159
+ "tgt": {"popB": 0.95},
160
+ "src": {"Nea": (">", 0.2), "Den": ("<=", 0.8)},
161
+ }
162
+
163
+ assert "ref" in u_params
164
+ assert "tgt" in u_params
165
+ assert "src" in u_params
166
+
167
+ with open("tests/data/test_invalid_stat_config.yaml", "r") as f:
168
+ invalid_data = yaml.safe_load(f)
169
+
170
+ with pytest.raises(ValueError):
171
+ StatConfig(statistics=invalid_data["statistics"])
@@ -0,0 +1,51 @@
1
+ # Copyright 2025 Xin Huang
2
+ #
3
+ # GNU General Public License v3.0
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, please see
17
+ #
18
+ # https://www.gnu.org/licenses/gpl-3.0.en.html
19
+
20
+
21
+ import pytest
22
+ from sai.generators import ChunkGenerator
23
+
24
+
25
+ def test_chunk_generator():
26
+ # Initialize
27
+ generator = ChunkGenerator(
28
+ vcf_file="tests/data/test.data.vcf",
29
+ chr_name="21",
30
+ step_size=5000,
31
+ window_size=10000,
32
+ num_chunks=2,
33
+ )
34
+
35
+ # Check that length is calculated properly (mocked to 3 records)
36
+ assert len(generator) == 2 # num_workers
37
+
38
+ # Check that chunks were split correctly
39
+ expected_chunks = [(1, 30000), (25001, 55000)]
40
+ assert generator.chunks == expected_chunks
41
+
42
+
43
+ def test_chunk_generator_chr_not_found():
44
+ with pytest.raises(ValueError, match="Chromosome 1 not found in VCF."):
45
+ ChunkGenerator(
46
+ vcf_file="tests/data/test.data.vcf",
47
+ chr_name="1",
48
+ step_size=10000,
49
+ window_size=10000,
50
+ num_chunks=2,
51
+ )