sai-pg 1.0.1__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. sai/__init__.py +2 -0
  2. sai/__main__.py +6 -3
  3. sai/configs/__init__.py +24 -0
  4. sai/configs/global_config.py +83 -0
  5. sai/configs/ploidy_config.py +94 -0
  6. sai/configs/pop_config.py +82 -0
  7. sai/configs/stat_config.py +220 -0
  8. sai/{utils/generators → generators}/chunk_generator.py +1 -1
  9. sai/{utils/generators → generators}/window_generator.py +81 -37
  10. sai/{utils/multiprocessing → multiprocessing}/mp_manager.py +2 -2
  11. sai/{utils/multiprocessing → multiprocessing}/mp_pool.py +2 -2
  12. sai/parsers/outlier_parser.py +4 -3
  13. sai/parsers/score_parser.py +8 -119
  14. sai/{utils/preprocessors → preprocessors}/chunk_preprocessor.py +21 -15
  15. sai/preprocessors/feature_preprocessor.py +236 -0
  16. sai/registries/__init__.py +22 -0
  17. sai/registries/generic_registry.py +89 -0
  18. sai/registries/stat_registry.py +30 -0
  19. sai/sai.py +124 -220
  20. sai/stats/__init__.py +11 -0
  21. sai/stats/danc_statistic.py +83 -0
  22. sai/stats/dd_statistic.py +77 -0
  23. sai/stats/df_statistic.py +84 -0
  24. sai/stats/dplus_statistic.py +86 -0
  25. sai/stats/fd_statistic.py +92 -0
  26. sai/stats/generic_statistic.py +93 -0
  27. sai/stats/q_statistic.py +104 -0
  28. sai/stats/stat_utils.py +259 -0
  29. sai/stats/u_statistic.py +99 -0
  30. sai/utils/utils.py +213 -142
  31. {sai_pg-1.0.1.dist-info → sai_pg-1.1.0.dist-info}/METADATA +3 -14
  32. sai_pg-1.1.0.dist-info/RECORD +70 -0
  33. {sai_pg-1.0.1.dist-info → sai_pg-1.1.0.dist-info}/WHEEL +1 -1
  34. sai_pg-1.1.0.dist-info/top_level.txt +2 -0
  35. tests/configs/test_global_config.py +163 -0
  36. tests/configs/test_ploidy_config.py +93 -0
  37. tests/configs/test_pop_config.py +90 -0
  38. tests/configs/test_stat_config.py +171 -0
  39. tests/generators/test_chunk_generator.py +51 -0
  40. tests/generators/test_window_generator.py +164 -0
  41. tests/multiprocessing/test_mp_manager.py +92 -0
  42. tests/multiprocessing/test_mp_pool.py +79 -0
  43. tests/parsers/test_argument_validation.py +133 -0
  44. tests/parsers/test_outlier_parser.py +53 -0
  45. tests/parsers/test_score_parser.py +63 -0
  46. tests/preprocessors/test_chunk_preprocessor.py +79 -0
  47. tests/preprocessors/test_feature_preprocessor.py +223 -0
  48. tests/registries/test_registries.py +74 -0
  49. tests/stats/test_danc_statistic.py +51 -0
  50. tests/stats/test_dd_statistic.py +45 -0
  51. tests/stats/test_df_statistic.py +73 -0
  52. tests/stats/test_dplus_statistic.py +79 -0
  53. tests/stats/test_fd_statistic.py +68 -0
  54. tests/stats/test_q_statistic.py +268 -0
  55. tests/stats/test_stat_utils.py +354 -0
  56. tests/stats/test_u_statistic.py +233 -0
  57. tests/test___main__.py +51 -0
  58. tests/test_sai.py +102 -0
  59. tests/utils/test_utils.py +511 -0
  60. sai/parsers/plot_parser.py +0 -152
  61. sai/stats/features.py +0 -302
  62. sai/utils/preprocessors/feature_preprocessor.py +0 -211
  63. sai_pg-1.0.1.dist-info/RECORD +0 -30
  64. sai_pg-1.0.1.dist-info/top_level.txt +0 -1
  65. /sai/{utils/generators → generators}/__init__.py +0 -0
  66. /sai/{utils/generators → generators}/data_generator.py +0 -0
  67. /sai/{utils/multiprocessing → multiprocessing}/__init__.py +0 -0
  68. /sai/{utils/preprocessors → preprocessors}/__init__.py +0 -0
  69. /sai/{utils/preprocessors → preprocessors}/data_preprocessor.py +0 -0
  70. {sai_pg-1.0.1.dist-info → sai_pg-1.1.0.dist-info}/entry_points.txt +0 -0
  71. {sai_pg-1.0.1.dist-info → sai_pg-1.1.0.dist-info}/licenses/LICENSE +0 -0
sai/__init__.py CHANGED
@@ -16,3 +16,5 @@
16
16
  # along with this program. If not, please see
17
17
  #
18
18
  # https://www.gnu.org/licenses/gpl-3.0.en.html
19
+
20
+ __version__ = "1.1.0"
sai/__main__.py CHANGED
@@ -19,9 +19,10 @@
19
19
 
20
20
 
21
21
  import argparse
22
+ import sai.stats
23
+ from sai import __version__
22
24
  from sai.parsers.score_parser import add_score_parser
23
25
  from sai.parsers.outlier_parser import add_outlier_parser
24
- from sai.parsers.plot_parser import add_plot_parser
25
26
 
26
27
 
27
28
  def _set_sigpipe_handler() -> None:
@@ -47,13 +48,15 @@ def _sai_cli_parser() -> argparse.ArgumentParser:
47
48
  top_parser : argparse.ArgumentParser
48
49
  A configured command-line interface parser.
49
50
  """
50
- top_parser = argparse.ArgumentParser()
51
+ top_parser = argparse.ArgumentParser(
52
+ description="SAI: Statistics for Adaptive Introgression"
53
+ )
54
+ top_parser.add_argument("--version", action="version", version=f"{__version__}")
51
55
  subparsers = top_parser.add_subparsers(dest="subcommand")
52
56
  subparsers.required = True
53
57
 
54
58
  add_score_parser(subparsers)
55
59
  add_outlier_parser(subparsers)
56
- add_plot_parser(subparsers)
57
60
 
58
61
  return top_parser
59
62
 
@@ -0,0 +1,24 @@
1
+ # Copyright 2025 Xin Huang
2
+ #
3
+ # GNU General Public License v3.0
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, please see
17
+ #
18
+ # https://www.gnu.org/licenses/gpl-3.0.en.html
19
+
20
+
21
+ from .global_config import GlobalConfig
22
+ from .ploidy_config import PloidyConfig
23
+ from .pop_config import PopConfig
24
+ from .stat_config import StatConfig
@@ -0,0 +1,83 @@
1
+ # Copyright 2025 Xin Huang
2
+ #
3
+ # GNU General Public License v3.0
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, please see
17
+ #
18
+ # https://www.gnu.org/licenses/gpl-3.0.en.html
19
+
20
+
21
+ from pydantic import BaseModel
22
+ from pydantic import model_validator
23
+ from typing import Optional
24
+ from sai.configs.stat_config import StatConfig
25
+ from sai.configs.ploidy_config import PloidyConfig
26
+ from sai.configs.pop_config import PopConfig
27
+
28
+
29
+ class GlobalConfig(BaseModel):
30
+ statistics: StatConfig
31
+ ploidies: PloidyConfig
32
+ populations: PopConfig
33
+
34
+ @model_validator(mode="after")
35
+ def validate_population_in_ploidies(self) -> "GlobalConfig":
36
+ """
37
+ Cross-validates that every population in statistics also appears
38
+ in the corresponding group in ploidies.
39
+ """
40
+ stat_data = (
41
+ self.statistics.root
42
+ ) # Dict[str, Dict[str, Dict[str, Union[float, tuple]]]]
43
+ ploidy_data = self.ploidies.root # Dict[str, Dict[str, int]]
44
+
45
+ for stat_name, params in stat_data.items():
46
+ for group in ("ref", "tgt", "src"):
47
+ pop_dict = params.get(group, {})
48
+ for pop in pop_dict:
49
+ if pop not in ploidy_data.get(group, {}):
50
+ raise ValueError(
51
+ f"Population '{pop}' used in statistics[{stat_name}][{group}] "
52
+ f"is not defined in ploidies[{group}]"
53
+ )
54
+ return self
55
+
56
+ @model_validator(mode="after")
57
+ def validate_population_in_populations(self) -> "GlobalConfig":
58
+ """
59
+ Cross-validates that every population in statistics also appears
60
+ in the corresponding group in sample files.
61
+ """
62
+ from sai.utils import parse_ind_file
63
+
64
+ stat_data = self.statistics.root # Dict[stat_name][group][pop] = ...
65
+ population_paths = self.populations.root # Dict[group] = path
66
+
67
+ categories_per_group = {
68
+ group: set(parse_ind_file(path).keys())
69
+ for group, path in population_paths.items()
70
+ }
71
+
72
+ for stat_name, params in stat_data.items():
73
+ for group in ("ref", "tgt", "src"):
74
+ pop_dict = params.get(group, {})
75
+ expected_categories = categories_per_group.get(group, set())
76
+
77
+ for pop in pop_dict:
78
+ if pop not in expected_categories:
79
+ raise ValueError(
80
+ f"Population '{pop}' used in statistics[{stat_name}][{group}] "
81
+ f"is not found in the population file for group '{group}'."
82
+ )
83
+ return self
@@ -0,0 +1,94 @@
1
+ # Copyright 2025 Xin Huang
2
+ #
3
+ # GNU General Public License v3.0
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, please see
17
+ #
18
+ # https://www.gnu.org/licenses/gpl-3.0.en.html
19
+
20
+
21
+ from pydantic import RootModel, field_validator
22
+ from typing import Dict, Union
23
+
24
+
25
+ class PloidyConfig(RootModel[Dict[str, Dict[str, int]]]):
26
+ """
27
+ Configuration for specifying per-population ploidy levels
28
+ under categories like 'ref', 'tgt', 'src', and 'outgroup'.
29
+
30
+ Ensures:
31
+ - Only allowed keys are present
32
+ - Each sub-dictionary maps to positive integers
33
+ - Required keys ("ref", "tgt", "src") are present
34
+ """
35
+
36
+ @field_validator("root")
37
+ def validate_ploidy_dict(
38
+ cls, v: Dict[str, Dict[str, int]]
39
+ ) -> Dict[str, Dict[str, int]]:
40
+ allowed_keys = {"ref", "tgt", "src", "outgroup"}
41
+ required_keys = {"ref", "tgt", "src"}
42
+
43
+ extra_keys = set(v.keys()) - allowed_keys
44
+ if extra_keys:
45
+ raise ValueError(
46
+ f"Unsupported ploidy keys: {extra_keys}. Allowed keys are {allowed_keys}."
47
+ )
48
+
49
+ missing_keys = required_keys - set(v.keys())
50
+ if missing_keys:
51
+ raise ValueError(f"Missing required ploidy keys: {missing_keys}.")
52
+
53
+ for group, subdict in v.items():
54
+ if not isinstance(subdict, dict):
55
+ raise ValueError(
56
+ f"Value for '{group}' must be a dictionary of population -> ploidy."
57
+ )
58
+ for pop, ploidy in subdict.items():
59
+ if not isinstance(ploidy, int) or ploidy <= 0:
60
+ raise ValueError(
61
+ f"Ploidy for '{group}:{pop}' must be a positive integer."
62
+ )
63
+
64
+ return v
65
+
66
+ def get_ploidy(self, group: str, population: str = None) -> Union[int, list[int]]:
67
+ """
68
+ Returns the ploidy for a given population under a given group.
69
+
70
+ Parameters
71
+ ----------
72
+ group : str
73
+ One of "ref", "tgt", "src", or "outgroup".
74
+ population : str, optional
75
+ The name of the population within the group. If None, return all ploidies as a list.
76
+
77
+ Returns
78
+ -------
79
+ int or list[int]
80
+ - If population is given: returns the ploidy for that population.
81
+ - If population is None: returns a list of ploidies for all populations in the group.
82
+ """
83
+ if group not in self.root:
84
+ raise KeyError(f"Group '{group}' not found in configuration.")
85
+
86
+ if population is None:
87
+ return list(self.root[group].values())
88
+
89
+ if population not in self.root[group]:
90
+ raise KeyError(
91
+ f"Population '{population}' not found under group '{group}'."
92
+ )
93
+
94
+ return self.root[group][population]
@@ -0,0 +1,82 @@
1
+ # Copyright 2025 Xin Huang
2
+ #
3
+ # GNU General Public License v3.0
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, please see
17
+ #
18
+ # https://www.gnu.org/licenses/gpl-3.0.en.html
19
+
20
+
21
+ import os
22
+ from typing import Dict
23
+ from pydantic import RootModel, field_validator
24
+
25
+
26
+ REQUIRED_KEYS = {"ref", "tgt", "src"}
27
+ OPTIONAL_KEYS = {"outgroup"}
28
+ ALLOWED_KEYS = REQUIRED_KEYS | OPTIONAL_KEYS
29
+
30
+
31
+ class PopConfig(RootModel[Dict[str, str]]):
32
+ """
33
+ Configuration for population sample name files.
34
+
35
+ Required:
36
+ - ref: Path to file containing reference population sample names.
37
+ - tgt: Path to file containing target population sample names.
38
+ - src: Path to file containing source population sample names.
39
+
40
+ Optional:
41
+ - outgroup: Path to file containing outgroup sample names.
42
+ """
43
+
44
+ @field_validator("root")
45
+ def validate_population_keys_and_paths(cls, v: Dict[str, str]) -> Dict[str, str]:
46
+ keys = set(v.keys())
47
+ missing = REQUIRED_KEYS - keys
48
+ invalid = keys - ALLOWED_KEYS
49
+ if missing:
50
+ raise ValueError(f"Missing required population keys: {missing}")
51
+ if invalid:
52
+ raise ValueError(f"Unsupported population keys: {invalid}")
53
+ for name, path in v.items():
54
+ if not os.path.isfile(path):
55
+ raise ValueError(f"{name} file does not exist: {path}")
56
+ return v
57
+
58
+ def get_population(self, group: str) -> str:
59
+ """
60
+ Retrieves the file path for a given population group.
61
+
62
+ Parameters
63
+ ----------
64
+ group : str
65
+ The population group name (e.g., 'ref', 'tgt', 'src', or 'outgroup').
66
+
67
+ Returns
68
+ -------
69
+ str
70
+ The file path corresponding to the group.
71
+
72
+ Raises
73
+ ------
74
+ ValueError
75
+ If the requested group is not present in the configuration.
76
+ """
77
+ if group not in self.root:
78
+ if group == "outgroup":
79
+ return None
80
+ else:
81
+ raise ValueError(f"Population group '{group}' not found in config.")
82
+ return self.root[group]
@@ -0,0 +1,220 @@
1
+ # Copyright 2025 Xin Huang
2
+ #
3
+ # GNU General Public License v3.0
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, please see
17
+ #
18
+ # https://www.gnu.org/licenses/gpl-3.0.en.html
19
+
20
+
21
+ SUPPORTED_STATISTICS = [
22
+ "Danc",
23
+ "DD",
24
+ "df",
25
+ "Dplus",
26
+ "fd",
27
+ "U",
28
+ "Q",
29
+ ]
30
+
31
+
32
+ from pydantic import RootModel, field_validator, ValidationError
33
+ from typing import Dict, Literal, List, Optional, Union
34
+
35
+
36
+ class StatConfig(
37
+ RootModel[
38
+ Dict[
39
+ str,
40
+ Dict[str, Dict[str, Union[float, str]]],
41
+ ]
42
+ ]
43
+ ):
44
+ """
45
+ A class to represent the configuration for various statistics used in the analysis.
46
+
47
+ This class manages the configuration of statistical parameters for different
48
+ statistical tests (e.g., "U", "Q"). It validates the range of parameters
49
+ such as `ref`, `tgt`, and `src`, where `ref` and `tgt` are numerical values
50
+ representing frequencies between 0 and 1, and `src` can be a list of strings with
51
+ comparison operators (e.g., "=0.5", ">0.2").
52
+ """
53
+
54
+ @field_validator("root")
55
+ def check_valid_stat_types(
56
+ cls, v: Dict[str, Dict[str, Dict[str, Union[float, str]]]]
57
+ ) -> Dict[
58
+ str,
59
+ Dict[str, Dict[str, Union[float, tuple[str, float]]]],
60
+ ]:
61
+ """
62
+ Validates statistics parameters, specifically for U and Q types.
63
+
64
+ Parameters
65
+ ----------
66
+ v : Dict[str, Dict[str, Dict[str, Union[float, str]]]]
67
+ A dictionary mapping statistic names (e.g., "U", "Q") to parameter groups ("ref", "tgt", "src"),
68
+ where each group is a mapping of population names to values.
69
+
70
+ - Outer dict key: statistic name (e.g., "U", "Q", "fd")
71
+ - Middle dict key: parameter group ("ref", "tgt", or "src")
72
+ - Inner dict key: population name (e.g., "AFR", "CHB")
73
+ - Inner dict value:
74
+ - For "ref" and "tgt": float (frequency between 0 and 1)
75
+ - For "src": string comparator expression (e.g., ">=0.2", "=1")
76
+
77
+ Returns
78
+ -------
79
+ Dict[str, Dict[str, Dict[str, Union[float, tuple[str, float]]]]]
80
+ A validated and normalized statistics dictionary.
81
+
82
+ - Outer dict key: statistic name (e.g., "U", "Q")
83
+ - Middle dict key: parameter group ("ref", "tgt", "src")
84
+ - Inner dict key: population name (e.g., "AFR", "CHB")
85
+ - Inner dict value:
86
+ - For "ref" and "tgt": float (validated to be between 0 and 1)
87
+ - For "src": tuple (comparator operator, float), e.g., (">=", 0.2)
88
+
89
+ Raises
90
+ ------
91
+ ValueError
92
+ If any name of statistics is not supported.
93
+ """
94
+ for stat_name, params in v.items():
95
+ if stat_name not in SUPPORTED_STATISTICS:
96
+ raise ValueError(f"The {stat_name} statistic is not supported.")
97
+ if stat_name in ["U", "Q"]:
98
+ # Validate U and Q statistics parameters
99
+ cls.check_range_for_u_q(stat_name, params)
100
+ return v
101
+
102
+ @staticmethod
103
+ def check_range_for_u_q(
104
+ stat_name: str, params: Dict[str, Dict[str, Union[float, str]]]
105
+ ) -> None:
106
+ """
107
+ Validates the parameters for U and Q statistics.
108
+ ref and tgt must be between 0 and 1, and src must contain a valid comparator
109
+ with a frequency value.
110
+
111
+ Parameters
112
+ ----------
113
+ stat_name : str
114
+ The name of the statistic (e.g., "U" or "Q").
115
+ params : Dict[str, Dict[str, Union[float, str]]]
116
+ A dictionary containing the parameters for the statistic, such as ref,
117
+ tgt, and src.
118
+
119
+ Raises
120
+ ------
121
+ ValueError
122
+ If any of the parameters are outside the valid range or in an incorrect
123
+ format.
124
+ """
125
+ if stat_name in ["U", "Q"]:
126
+ required_keys = {"ref", "tgt", "src"}
127
+ param_keys = set(params.keys())
128
+ if param_keys != required_keys:
129
+ raise ValueError(
130
+ f"{stat_name} must have exactly the keys: {required_keys}, but got {param_keys}."
131
+ )
132
+
133
+ for param, pop_values in params.items():
134
+ if param in ["ref", "tgt"]:
135
+ for pop, value in pop_values.items():
136
+ num = float(value)
137
+ if not (0 <= num <= 1):
138
+ raise ValueError(
139
+ f"{param}[{pop}] value must be between 0 and 1 for {stat_name}, got {val}."
140
+ )
141
+ elif param == "src":
142
+ new_src: Dict[str, tuple[str, float]] = {}
143
+ for pop, expr in pop_values.items():
144
+ if not isinstance(expr, str):
145
+ raise ValueError(
146
+ f"{param}[{pop}] value must be a comparator string for {stat_name}."
147
+ )
148
+ new_src[pop] = StatConfig.check_comparator(
149
+ expr, stat_name, f"src[{pop}]"
150
+ )
151
+ params["src"] = new_src
152
+
153
+ @staticmethod
154
+ def check_comparator(value: str, stat_name: str, param: str) -> tuple[str, float]:
155
+ """
156
+ Validates that the src parameter contains a valid comparator (e.g., "=0.5", ">=0.2"),
157
+ and ensure the number is between 0 and 1.
158
+
159
+ Parameters
160
+ ----------
161
+ value : str
162
+ The value of the src parameter, which should contain a comparator (e.g., "=0.5").
163
+ stat_name : str
164
+ The name of the statistic (e.g., "U" or "Q").
165
+ param : str
166
+ The parameter name ("src").
167
+
168
+ Returns
169
+ -------
170
+ tuple[str, float]
171
+ A tuple containing:
172
+ - A string representing the comparison operator (`=`, `<`, `>`, `<=`, `>=`).
173
+ - A float representing the threshold value.
174
+
175
+ Raises
176
+ ------
177
+ ValueError
178
+ If the value does not contain a valid comparator or the number is not in
179
+ the range 0-1.
180
+ """
181
+ valid_comparators = ["<=", ">=", "=", "<", ">"]
182
+ if not any(comp in value for comp in valid_comparators):
183
+ raise ValueError(
184
+ f"{param} for {stat_name} must contain a valid comparator (e.g., '=0.5', '>=0.2')."
185
+ )
186
+
187
+ # Extract the numeric value after the comparator
188
+ comparator = next(comp for comp in valid_comparators if comp in value)
189
+ try:
190
+ num = float(value[len(comparator) :])
191
+ except ValueError:
192
+ raise ValueError(
193
+ f"{param} value for {stat_name} must be a valid number after the comparator."
194
+ )
195
+
196
+ if not (0 <= num <= 1):
197
+ raise ValueError(
198
+ f"{param} value must be between 0 and 1 for {stat_name}, but got {num}."
199
+ )
200
+
201
+ return comparator, num
202
+
203
+ def get_parameters(
204
+ self, stat_name: str
205
+ ) -> Optional[Dict[str, Dict[str, Union[float, tuple[str, float]]]]]:
206
+ """
207
+ Retrieves the parameters for a specific statistic.
208
+
209
+ Parameters
210
+ ----------
211
+ stat_name : str
212
+ The name of the statistic whose parameters are to be retrieved.
213
+
214
+ Returns
215
+ -------
216
+ Optional[Dict[str, Dict[str, Union[float, tuple[str, float]]]]]
217
+ A dictionary containing the parameters for the specified statistic,
218
+ or None if not found.
219
+ """
220
+ return self.root.get(stat_name, None)
@@ -21,7 +21,7 @@
21
21
  import pysam
22
22
  from typing import Iterator
23
23
  from sai.utils import split_genome
24
- from sai.utils.generators import DataGenerator
24
+ from sai.generators import DataGenerator
25
25
 
26
26
 
27
27
  class ChunkGenerator(DataGenerator):