fastdfe 0.1.1b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
fastdfe/__init__.py ADDED
@@ -0,0 +1,85 @@
1
+ """
2
+ fastDFE package.
3
+ """
4
+
5
+ __author__ = "Janek Sendrowski"
6
+ __contact__ = "sendrowski.janek@gmail.com"
7
+ __date__ = "2023-03-10"
8
+
9
+ __version__ = 'beta'
10
+
11
+ import logging
12
+ import sys
13
+ import warnings
14
+
15
+ import jsonpickle
16
+ import numpy as np
17
+ import pandas as pd
18
+
19
+ from .json_handlers import DataframeHandler, SpectrumHandler, SpectraHandler, NumpyArrayHandler
20
+ from .spectrum import Spectrum, Spectra
21
+
22
+ # register custom handles
23
+ jsonpickle.handlers.registry.register(pd.DataFrame, DataframeHandler)
24
+ jsonpickle.handlers.registry.register(Spectrum, SpectrumHandler)
25
+ jsonpickle.handlers.registry.register(Spectra, SpectraHandler)
26
+ jsonpickle.handlers.registry.register(np.ndarray, NumpyArrayHandler)
27
+
28
+ # configure logger
29
+ logger = logging.getLogger('fastdfe')
30
+ handler = logging.StreamHandler(sys.stdout)
31
+ handler.setFormatter(logging.Formatter('%(levelname)s:%(name)s:%(message)s'))
32
+ logger.addHandler(handler)
33
+ logger.setLevel(logging.INFO)
34
+
35
+
36
+ def raise_on_warning(message, category, filename, lineno, file=None, line=None):
37
+ """
38
+ Raise exception on warning.
39
+ """
40
+ raise Exception(warnings.formatwarning(message, category, filename, lineno, line))
41
+
42
+
43
+ # warnings.showwarning = raise_on_warning
44
+
45
+ # configure default colormap
46
+ # plt.rcParams['image.cmap'] = 'Dark2'
47
+ # plt.rcParams['axes.prop_cycle'] = cycler('color', plt.get_cmap('Dark2').colors)
48
+
49
+ # load class from modules
50
+ from .parametrization import Parametrization, GammaExpParametrization, DiscreteParametrization, \
51
+ GammaDiscreteParametrization, DisplacedGammaParametrization
52
+ from .config import Config
53
+ from .abstract_inference import Inference
54
+ from .base_inference import BaseInference, InferenceResults
55
+ from .shared_inference import SharedInference, SharedParams
56
+ from .optimization import Covariate
57
+ from .polydfe import PolyDFE, PolyDFEResult
58
+ from .visualization import Visualization
59
+ from .spectrum import Spectrum, Spectra
60
+ from .parser import Parser, BaseTransitionStratification, BaseContextStratification, DegeneracyStratification, \
61
+ TransitionTransversionStratification, ReferenceBaseStratification
62
+
63
+ __all__ = [
64
+ 'Parametrization',
65
+ 'GammaExpParametrization',
66
+ 'DiscreteParametrization',
67
+ 'GammaDiscreteParametrization',
68
+ 'DisplacedGammaParametrization',
69
+ 'Config',
70
+ 'Inference',
71
+ 'BaseInference',
72
+ 'SharedInference',
73
+ 'SharedParams',
74
+ 'Covariate',
75
+ 'PolyDFE',
76
+ 'Visualization',
77
+ 'Spectrum',
78
+ 'Spectra',
79
+ 'Parser',
80
+ 'BaseTransitionStratification',
81
+ 'BaseContextStratification',
82
+ 'DegeneracyStratification',
83
+ 'TransitionTransversionStratification',
84
+ 'ReferenceBaseStratification'
85
+ ]
@@ -0,0 +1,426 @@
1
+ __author__ = "Janek Sendrowski"
2
+ __contact__ = "sendrowski.janek@gmail.com"
3
+ __date__ = "2023-03-12"
4
+
5
+ from abc import ABC, abstractmethod
6
+ from typing import List, Optional, Literal
7
+ from typing_extensions import Self
8
+
9
+ import jsonpickle
10
+ import numpy as np
11
+ import pandas as pd
12
+ from matplotlib import pyplot as plt
13
+
14
+ from .bootstrap import Bootstrap
15
+ from .parametrization import Parametrization, from_string
16
+ from .visualization import Visualization
17
+
18
+
19
+ class Inference:
20
+
21
+ @staticmethod
22
+ def plot_discretized(
23
+ inferences: List['AbstractInference'],
24
+ intervals: np.ndarray = np.array([-np.inf, -100, -10, -1, 0, 1, np.inf]),
25
+ confidence_intervals: bool = True,
26
+ ci_level: float = 0.05,
27
+ bootstrap_type: Literal['percentile', 'bca'] = 'percentile',
28
+ file: str = None,
29
+ show: bool = True,
30
+ title: str = 'discretized DFEs',
31
+ labels: list | np.ndarray = None,
32
+ **kwargs
33
+
34
+ ) -> plt.axis:
35
+ """
36
+ Visualize several discretized DFEs given by the list of inference objects.
37
+
38
+ :param inferences: List of inference objects.
39
+ :param intervals: Intervals to use for discretization.
40
+ :param confidence_intervals: Whether to plot confidence intervals.
41
+ :param ci_level: Confidence level for confidence intervals.
42
+ :param bootstrap_type: Type of bootstrap to use for confidence intervals.
43
+ :param file: Path to file to save the plot to.
44
+ :param show: Whether to show the plot.
45
+ :param title: Title of the plot.
46
+ :param labels: Labels for the DFEs.
47
+ :param kwargs: Additional arguments for the plot.
48
+ :return: Axis of the plot.
49
+ """
50
+ # get data from inference objects
51
+ values = []
52
+ errors = []
53
+ for i, inference in enumerate(inferences):
54
+ val, errs = inference.get_discretized(
55
+ intervals=intervals,
56
+ confidence_intervals=confidence_intervals,
57
+ ci_level=ci_level,
58
+ bootstrap_type=bootstrap_type
59
+ )
60
+
61
+ values.append(val)
62
+ errors.append(errs)
63
+
64
+ # plot DFEs
65
+ return Visualization.plot_discretized(
66
+ values=values,
67
+ errors=errors,
68
+ labels=labels,
69
+ file=file,
70
+ show=show,
71
+ intervals=intervals,
72
+ title=title
73
+ )
74
+
75
+ @staticmethod
76
+ def plot_continuous(
77
+ inferences: List['AbstractInference'],
78
+ intervals: np.ndarray = np.array([-np.inf, -100, -10, -1, 0, 1, np.inf]),
79
+ confidence_intervals: bool = True,
80
+ ci_level: float = 0.05,
81
+ bootstrap_type: Literal['percentile', 'bca'] = 'percentile',
82
+ file: str = None,
83
+ show: bool = True,
84
+ title: str = 'continuous DFEs',
85
+ labels: list | np.ndarray = None,
86
+ scale: Literal['lin', 'log'] = 'lin',
87
+ scale_density: bool = False,
88
+ **kwargs
89
+
90
+ ) -> plt.axis:
91
+ """
92
+ Visualize several DFEs given by the list of inference objects.
93
+ By default, the PDF is plotted as is. Due to the logarithmic scale on
94
+ the x-axis, we may get a wrong intuition on how the mass is distributed,
95
+ however. To get a better intuition, we can optionally scale the density
96
+ by the x-axis interval size using ``scale_density = True``. This has the
97
+ disadvantage that the density now changes for x, so that even a constant
98
+ density will look warped.
99
+
100
+ :param inferences: List of inference objects.
101
+ :param intervals: Intervals to use for discretization.
102
+ :param confidence_intervals: Whether to plot confidence intervals.
103
+ :param ci_level: Confidence level for confidence intervals.
104
+ :param bootstrap_type: Type of bootstrap to use for confidence intervals.
105
+ :param file: Path to file to save the plot to.
106
+ :param show: Whether to show the plot.
107
+ :param title: Title of the plot.
108
+ :param labels: Labels for the DFEs.
109
+ :param scale: y-scale of the plot.
110
+ :param scale_density: Whether to scale the density by the x-axis interval size.
111
+ :param kwargs: Additional arguments for the plot.
112
+ :return: Axis of the plot.
113
+ """
114
+ # get data from inference objects
115
+ values = []
116
+ errors = []
117
+ for i, inference in enumerate(inferences):
118
+ val, errs = inference.get_discretized(
119
+ intervals=intervals,
120
+ confidence_intervals=confidence_intervals,
121
+ ci_level=ci_level,
122
+ bootstrap_type=bootstrap_type
123
+ )
124
+
125
+ values.append(val)
126
+ errors.append(errs)
127
+
128
+ # plot DFEs
129
+ return Visualization.plot_continuous(
130
+ bins=intervals,
131
+ **locals()
132
+ )
133
+
134
+ @staticmethod
135
+ def plot_inferred_parameters(
136
+ inferences: List['AbstractInference'],
137
+ confidence_intervals: bool = True,
138
+ ci_level: float = 0.05,
139
+ bootstrap_type: Literal['percentile', 'bca'] = 'percentile',
140
+ file: str = None,
141
+ show: bool = True,
142
+ title: str = 'parameter estimates',
143
+ labels: list | np.ndarray = None,
144
+ scale: Literal['lin', 'log'] = 'log',
145
+ legend: bool = True,
146
+ **kwargs
147
+
148
+ ) -> plt.axis:
149
+ """
150
+ Visualize several discretized DFEs given by the list of inference objects.
151
+ Note that the DFE parametrization needs to be the same for all inference objects.
152
+
153
+ :param scale: y-scale of the plot.
154
+ :param legend: Whether to show a legend.
155
+ :param inferences: List of inference objects.
156
+ :param confidence_intervals: Whether to plot confidence intervals.
157
+ :param ci_level: Confidence level for confidence intervals.
158
+ :param bootstrap_type: Type of bootstrap to use for confidence intervals.
159
+ :param file: Path to file to save the plot to.
160
+ :param show: Whether to show the plot.
161
+ :param title: Title of the plot.
162
+ :param labels: Labels for the DFEs.
163
+ :param kwargs: Additional arguments for the plot.
164
+ :return: Axis of the plot.
165
+ """
166
+ return Visualization.plot_inferred_parameters(
167
+ params=[inf.get_bootstrap_params() for inf in inferences],
168
+ bootstraps=[inf.bootstraps for inf in inferences],
169
+ **locals()
170
+ )
171
+
172
+ @staticmethod
173
+ def get_discretized(
174
+ bootstraps: pd.DataFrame,
175
+ params: dict,
176
+ model: Parametrization,
177
+ intervals: np.ndarray = np.array([-np.inf, -100, -10, -1, 0, 1, np.inf]),
178
+ confidence_intervals: bool = True,
179
+ ci_level: float = 0.05,
180
+ bootstrap_type: Literal['percentile', 'bca'] = 'percentile',
181
+
182
+ ) -> (np.ndarray, np.ndarray):
183
+ """
184
+ Get discretized DFE.
185
+
186
+ :param bootstraps: Bootstrap samples
187
+ :param params: Parameters of the model
188
+ :param model: DFE parametrization
189
+ :param bootstrap_type: Type of bootstrap to use
190
+ :param ci_level: Confidence interval level
191
+ :param confidence_intervals: Whether to compute confidence intervals
192
+ :param intervals: Array of interval boundaries yielding ``intervals.shape[0] - 1`` bars.
193
+ :return: Values, errors
194
+ """
195
+ if confidence_intervals and bootstraps is not None:
196
+ # get bootstraps and errors if specified
197
+ errors, _, bs, means, values = Inference.get_errors_discretized_dfe(
198
+ params=params,
199
+ bootstraps=bootstraps,
200
+ model=model,
201
+ ci_level=ci_level,
202
+ intervals=intervals,
203
+ bootstrap_type=bootstrap_type
204
+ )
205
+ else:
206
+ # otherwise just get discretized values
207
+ values = Inference.compute_histogram(
208
+ params=params,
209
+ model=model,
210
+ intervals=intervals
211
+ )
212
+ errors, means, bs = None, None, None
213
+
214
+ # whether to use the mean of all bootstraps instead of the original values
215
+ use_means = confidence_intervals and bootstraps is not None and bootstrap_type == 'percentile'
216
+
217
+ if use_means:
218
+ values = np.mean(bs, axis=0)
219
+
220
+ return values, errors
221
+
222
+ @staticmethod
223
+ def get_errors_discretized_dfe(
224
+ params: dict,
225
+ bootstraps: pd.DataFrame,
226
+ model: Parametrization | str,
227
+ ci_level: float = 0.05,
228
+ intervals: np.ndarray = np.array([-np.inf, -100, -10, -1, 0, 1, np.inf]),
229
+ bootstrap_type: Literal['percentile', 'bca'] = 'percentile'
230
+
231
+ ) -> (np.ndarray, np.ndarray, np.ndarray, np.ndarray):
232
+ """
233
+ Compute errors and confidence interval for a discretized DFE.
234
+
235
+ :param params: Parameters of the model
236
+ :param bootstraps: Bootstrapped samples
237
+ :param model: DFE parametrization
238
+ :param ci_level: Confidence interval level
239
+ :param intervals: Array of interval boundaries yielding ``intervals.shape[0] - 1`` bins.
240
+ :param bootstrap_type: Type of bootstrap
241
+ :return: Arrays of errors, confidence intervals, bootstraps, means and values
242
+ """
243
+ # discretize MLE DFE
244
+ values = Inference.compute_histogram(model, params, intervals)
245
+
246
+ # calculate bootstrapped histograms
247
+ # get discretized DFE per bootstrap sample
248
+ bs = np.array([Inference.compute_histogram(model, dict(r), intervals) for _, r in bootstraps.iterrows()])
249
+
250
+ errors, cis = Bootstrap.get_errors(
251
+ values=values,
252
+ bs=bs,
253
+ bootstrap_type=bootstrap_type,
254
+ ci_level=ci_level
255
+ )
256
+
257
+ # calculate mean values
258
+ means = np.mean(bs, axis=0)
259
+
260
+ return errors, cis, bs, means, values
261
+
262
+ @staticmethod
263
+ def compute_histogram(
264
+ model: Parametrization | str,
265
+ params: dict,
266
+ intervals: np.ndarray
267
+ ) -> np.ndarray:
268
+ """
269
+ Discretize the DFE given the DFE parametrization and the parameters.
270
+
271
+ :param model: DFE parametrization
272
+ :param params: Parameters of the model
273
+ :param intervals: Array of interval boundaries yielding ``intervals.shape[0] - 1`` bins.
274
+ :return: Discretized DFE
275
+ """
276
+ # discrete DFE
277
+ y = from_string(model).discretize(params, intervals)
278
+
279
+ # return normalized histogram
280
+ return y / y.sum()
281
+
282
+
283
+ class AbstractInference(ABC):
284
+ """
285
+ Base class for main Inference and polyDFE wrapper.
286
+ """
287
+
288
+ def __init__(self, **kwargs):
289
+ """
290
+ Initialize the inference.
291
+
292
+ :param kwargs: Keyword arguments
293
+ """
294
+ self.bootstraps: Optional[pd.DataFrame] = None
295
+ self.params_mle: Optional[dict] = None
296
+ self.model: Optional[Parametrization] = None
297
+
298
+ @abstractmethod
299
+ def get_bootstrap_params(self) -> dict:
300
+ """
301
+ Get the parameters to be included in the bootstraps.
302
+
303
+ :return: Parameters to be included in the bootstraps
304
+ """
305
+ pass
306
+
307
+ def get_errors_discretized_dfe(
308
+ self,
309
+ ci_level: float = 0.05,
310
+ intervals: np.ndarray = np.array([-np.inf, -100, -10, -1, 0, 1, np.inf]),
311
+ bootstrap_type: Literal['percentile', 'bca'] = 'percentile'
312
+ ) -> (np.ndarray, np.ndarray, np.ndarray, np.ndarray):
313
+ """
314
+ Compute errors and confidence interval for a discretized DFE.
315
+
316
+ :param ci_level: Confidence interval level
317
+ :param intervals: Array of interval boundaries yielding ``intervals.shape[0] - 1`` bins.
318
+ :param bootstrap_type: Type of bootstrap
319
+ :return: Arrays of errors, confidence intervals, bootstraps, means and values
320
+ """
321
+ return Inference.get_errors_discretized_dfe(
322
+ params=self.get_bootstrap_params(),
323
+ bootstraps=self.bootstraps,
324
+ model=self.model,
325
+ ci_level=ci_level,
326
+ intervals=intervals,
327
+ bootstrap_type=bootstrap_type
328
+ )
329
+
330
+ def get_discretized(
331
+ self,
332
+ intervals: np.ndarray = np.array([-np.inf, -100, -10, -1, 0, 1, np.inf]),
333
+ confidence_intervals: bool = True,
334
+ ci_level: float = 0.05,
335
+ bootstrap_type: Literal['percentile', 'bca'] = 'percentile'
336
+
337
+ ) -> (np.ndarray, np.ndarray):
338
+ """
339
+ Get discretized DFE.
340
+
341
+ :param bootstrap_type: Type of bootstrap
342
+ :param ci_level: Confidence interval level
343
+ :param confidence_intervals: Whether to return confidence intervals
344
+ :param intervals: Array of interval boundaries yielding ``intervals.shape[0] - 1`` bins.
345
+ :return: Discretized DFE
346
+ """
347
+ return Inference.get_discretized(
348
+ bootstraps=self.bootstraps,
349
+ params=self.get_bootstrap_params(),
350
+ model=self.model,
351
+ intervals=intervals,
352
+ confidence_intervals=confidence_intervals,
353
+ ci_level=ci_level,
354
+ bootstrap_type=bootstrap_type
355
+ )
356
+
357
+ def plot_discretized(
358
+ self,
359
+ file: str = None,
360
+ show=True,
361
+ intervals: np.ndarray = np.array([-np.inf, -100, -10, -1, 0, 1, np.inf]),
362
+ confidence_intervals: bool = True,
363
+ ci_level: float = 0.05,
364
+ bootstrap_type: Literal['percentile', 'bca'] = 'percentile',
365
+ title: str = 'discretized DFE'
366
+ ) -> plt.axis:
367
+ """
368
+ Plot discretized DFE.
369
+
370
+ :param title: Title of the plot
371
+ :param bootstrap_type: Type of bootstrap
372
+ :param ci_level: Confidence interval level
373
+ :param confidence_intervals: Whether to plot confidence intervals
374
+ :param file: File to save the plot to
375
+ :param show: Whether to show the plot
376
+ :param intervals: Array of interval boundaries yielding ``intervals.shape[0] - 1`` bars.
377
+ :return: Axis
378
+ """
379
+ return Inference.plot_discretized(
380
+ inferences=[self],
381
+ file=file,
382
+ show=show,
383
+ intervals=intervals,
384
+ confidence_intervals=confidence_intervals,
385
+ ci_level=ci_level,
386
+ bootstrap_type=bootstrap_type,
387
+ title=title
388
+ )
389
+
390
+ def to_json(self) -> str:
391
+ """
392
+ Serialize object.
393
+
394
+ :return: JSON string
395
+ """
396
+ return jsonpickle.encode(self, indent=4, warn=True)
397
+
398
+ def to_file(self, file: str):
399
+ """
400
+ Save object to file.
401
+
402
+ :param file: File to save to
403
+ """
404
+ with open(file, 'w') as fh:
405
+ fh.write(self.to_json())
406
+
407
+ @classmethod
408
+ def from_json(cls, json: str, classes=None) -> Self:
409
+ """
410
+ Unserialize object.
411
+
412
+ :param classes: Classes to be used for unserialization
413
+ :param json: JSON string
414
+ """
415
+ return jsonpickle.decode(json, classes=classes)
416
+
417
+ @classmethod
418
+ def from_file(cls, file: str, classes=None) -> Self:
419
+ """
420
+ Load object from file.
421
+
422
+ :param classes: Classes to be used for unserialization
423
+ :param file: File to load from
424
+ """
425
+ with open(file, 'r') as fh:
426
+ return cls.from_json(fh.read(), classes)