pg-sui 1.0.2.1__py3-none-any.whl → 1.6.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pg-sui might be problematic. Click here for more details.

Files changed (112) hide show
  1. {pg_sui-1.0.2.1.dist-info → pg_sui-1.6.8.dist-info}/METADATA +51 -70
  2. pg_sui-1.6.8.dist-info/RECORD +78 -0
  3. {pg_sui-1.0.2.1.dist-info → pg_sui-1.6.8.dist-info}/WHEEL +1 -1
  4. pg_sui-1.6.8.dist-info/entry_points.txt +4 -0
  5. pg_sui-1.6.8.dist-info/top_level.txt +1 -0
  6. pgsui/__init__.py +35 -54
  7. pgsui/_version.py +34 -0
  8. pgsui/cli.py +635 -0
  9. pgsui/data_processing/config.py +576 -0
  10. pgsui/data_processing/containers.py +1782 -0
  11. pgsui/data_processing/transformers.py +121 -1103
  12. pgsui/electron/app/__main__.py +5 -0
  13. pgsui/electron/app/icons/icons/1024x1024.png +0 -0
  14. pgsui/electron/app/icons/icons/128x128.png +0 -0
  15. pgsui/electron/app/icons/icons/16x16.png +0 -0
  16. pgsui/electron/app/icons/icons/24x24.png +0 -0
  17. pgsui/electron/app/icons/icons/256x256.png +0 -0
  18. pgsui/electron/app/icons/icons/32x32.png +0 -0
  19. pgsui/electron/app/icons/icons/48x48.png +0 -0
  20. pgsui/electron/app/icons/icons/512x512.png +0 -0
  21. pgsui/electron/app/icons/icons/64x64.png +0 -0
  22. pgsui/electron/app/icons/icons/icon.icns +0 -0
  23. pgsui/electron/app/icons/icons/icon.ico +0 -0
  24. pgsui/electron/app/main.js +189 -0
  25. pgsui/electron/app/package-lock.json +6893 -0
  26. pgsui/electron/app/package.json +50 -0
  27. pgsui/electron/app/preload.js +15 -0
  28. pgsui/electron/app/server.py +146 -0
  29. pgsui/electron/app/ui/logo.png +0 -0
  30. pgsui/electron/app/ui/renderer.js +130 -0
  31. pgsui/electron/app/ui/styles.css +59 -0
  32. pgsui/electron/app/ui/ui_shim.js +72 -0
  33. pgsui/electron/bootstrap.py +43 -0
  34. pgsui/electron/launch.py +59 -0
  35. pgsui/electron/package.json +14 -0
  36. pgsui/example_data/popmaps/{test.popmap → phylogen_nomx.popmap} +185 -99
  37. pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz +0 -0
  38. pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz.tbi +0 -0
  39. pgsui/impute/deterministic/imputers/allele_freq.py +691 -0
  40. pgsui/impute/deterministic/imputers/mode.py +679 -0
  41. pgsui/impute/deterministic/imputers/nmf.py +221 -0
  42. pgsui/impute/deterministic/imputers/phylo.py +971 -0
  43. pgsui/impute/deterministic/imputers/ref_allele.py +530 -0
  44. pgsui/impute/supervised/base.py +339 -0
  45. pgsui/impute/supervised/imputers/hist_gradient_boosting.py +293 -0
  46. pgsui/impute/supervised/imputers/random_forest.py +287 -0
  47. pgsui/impute/unsupervised/base.py +924 -0
  48. pgsui/impute/unsupervised/callbacks.py +89 -263
  49. pgsui/impute/unsupervised/imputers/autoencoder.py +972 -0
  50. pgsui/impute/unsupervised/imputers/nlpca.py +1264 -0
  51. pgsui/impute/unsupervised/imputers/ubp.py +1288 -0
  52. pgsui/impute/unsupervised/imputers/vae.py +957 -0
  53. pgsui/impute/unsupervised/loss_functions.py +158 -0
  54. pgsui/impute/unsupervised/models/autoencoder_model.py +208 -558
  55. pgsui/impute/unsupervised/models/nlpca_model.py +149 -468
  56. pgsui/impute/unsupervised/models/ubp_model.py +198 -1317
  57. pgsui/impute/unsupervised/models/vae_model.py +259 -618
  58. pgsui/impute/unsupervised/nn_scorers.py +215 -0
  59. pgsui/utils/classification_viz.py +591 -0
  60. pgsui/utils/misc.py +35 -480
  61. pgsui/utils/plotting.py +514 -824
  62. pgsui/utils/scorers.py +212 -438
  63. pg_sui-1.0.2.1.dist-info/RECORD +0 -75
  64. pg_sui-1.0.2.1.dist-info/top_level.txt +0 -3
  65. pgsui/example_data/phylip_files/test_n10.phy +0 -118
  66. pgsui/example_data/phylip_files/test_n100.phy +0 -118
  67. pgsui/example_data/phylip_files/test_n2.phy +0 -118
  68. pgsui/example_data/phylip_files/test_n500.phy +0 -118
  69. pgsui/example_data/structure_files/test.nopops.1row.10sites.str +0 -117
  70. pgsui/example_data/structure_files/test.nopops.2row.100sites.str +0 -234
  71. pgsui/example_data/structure_files/test.nopops.2row.10sites.str +0 -234
  72. pgsui/example_data/structure_files/test.nopops.2row.30sites.str +0 -234
  73. pgsui/example_data/structure_files/test.nopops.2row.allsites.str +0 -234
  74. pgsui/example_data/structure_files/test.pops.1row.10sites.str +0 -117
  75. pgsui/example_data/structure_files/test.pops.2row.10sites.str +0 -234
  76. pgsui/example_data/trees/test.iqtree +0 -376
  77. pgsui/example_data/trees/test.qmat +0 -5
  78. pgsui/example_data/trees/test.rate +0 -2033
  79. pgsui/example_data/trees/test.tre +0 -1
  80. pgsui/example_data/trees/test_n10.rate +0 -19
  81. pgsui/example_data/trees/test_n100.rate +0 -109
  82. pgsui/example_data/trees/test_n500.rate +0 -509
  83. pgsui/example_data/trees/test_siterates.txt +0 -2024
  84. pgsui/example_data/trees/test_siterates_n10.txt +0 -10
  85. pgsui/example_data/trees/test_siterates_n100.txt +0 -100
  86. pgsui/example_data/trees/test_siterates_n500.txt +0 -500
  87. pgsui/example_data/vcf_files/test.vcf +0 -244
  88. pgsui/example_data/vcf_files/test.vcf.gz +0 -0
  89. pgsui/example_data/vcf_files/test.vcf.gz.tbi +0 -0
  90. pgsui/impute/estimators.py +0 -735
  91. pgsui/impute/impute.py +0 -1486
  92. pgsui/impute/simple_imputers.py +0 -1439
  93. pgsui/impute/supervised/iterative_imputer_fixedparams.py +0 -785
  94. pgsui/impute/supervised/iterative_imputer_gridsearch.py +0 -1027
  95. pgsui/impute/unsupervised/keras_classifiers.py +0 -702
  96. pgsui/impute/unsupervised/models/in_development/cnn_model.py +0 -486
  97. pgsui/impute/unsupervised/neural_network_imputers.py +0 -1424
  98. pgsui/impute/unsupervised/neural_network_methods.py +0 -1549
  99. pgsui/pg_sui.py +0 -261
  100. pgsui/utils/sequence_tools.py +0 -407
  101. simulation/sim_benchmarks.py +0 -333
  102. simulation/sim_treeparams.py +0 -475
  103. test/__init__.py +0 -0
  104. test/pg_sui_simtest.py +0 -215
  105. test/pg_sui_testing.py +0 -523
  106. test/test.py +0 -297
  107. test/test_pgsui.py +0 -374
  108. test/test_tkc.py +0 -214
  109. {pg_sui-1.0.2.1.dist-info → pg_sui-1.6.8.dist-info/licenses}/LICENSE +0 -0
  110. /pgsui/{example_data/trees → electron/app}/__init__.py +0 -0
  111. /pgsui/impute/{unsupervised/models/in_development → supervised/imputers}/__init__.py +0 -0
  112. {simulation → pgsui/impute/unsupervised/imputers}/__init__.py +0 -0
@@ -1,286 +1,112 @@
1
- import math
2
- import sys
1
+ from snpio.utils.logging import LoggerManager
3
2
 
4
- import numpy as np
5
- import tensorflow as tf
6
3
 
4
+ class EarlyStopping:
5
+ """Class to stop the training when a monitored metric has stopped improving.
7
6
 
8
- class CyclicalAnnealingCallback(tf.keras.callbacks.Callback):
9
- """Perform cyclical annealing with KL Divergence weights.
10
-
11
- The dynamically changing weight (beta) is multiplied with the KL Divergence loss.
12
-
13
- This process is supposed to improve the latent distribution sampling for the variational autoencoder model and eliminate the KL vanishing issue.
14
-
15
- Three types of cycle curves can be used that determine how the weight increases: 'linear', 'sigmoid', and 'cosine'..
16
-
17
- Code is adapted from: https://github.com/haofuml/cyclical_annealing
18
-
19
- The cyclical annealing process was first described in the following paper: https://aclanthology.org/N19-1021.pdf
20
-
21
- Args:
22
- n_iter (int): Number of iterations (epochs) being used in training.
23
- start (float, optional): Where to start cycles. Defaults to 0.0.
24
- stop (float, optional): Where to stop cycles. Defaults to 1.0.
25
- n_cycle (int, optional): How many cycles to use across all the epochs. Defaults to 4.
26
- ratio (float, optional): Ratio to determine proportion used to increase beta. Defaults to 0.5.
27
- schedule_type (str, optional): Type of curve to use for scheduler. Possible options include: 'linear', 'sigmoid', or 'cosine'. Defaults to 'linear'.
7
+ This class is used to stop the training of a model when a monitored metric has stopped improving (such as validation loss or accuracy). If the metric does not improve for `patience` epochs, and we have already passed the `min_epochs` epoch threshold, training is halted. The best model checkpoint is reloaded when early stopping is triggered.
28
8
 
9
+ Example:
10
+ >>> early_stopping = EarlyStopping(patience=25, verbose=1, min_epochs=100)
11
+ >>> for epoch in range(1, 1001):
12
+ >>> val_loss = train_epoch(...)
13
+ >>> early_stopping(val_loss, model)
14
+ >>> if early_stopping.early_stop:
15
+ >>> break
29
16
  """
30
17
 
31
18
  def __init__(
32
19
  self,
33
- n_iter,
34
- start=0.0,
35
- stop=1.0,
36
- n_cycle=4,
37
- ratio=0.5,
38
- schedule_type="linear",
20
+ patience: int = 25,
21
+ delta: float = 0.0,
22
+ verbose: int = 0,
23
+ mode: str = "min",
24
+ min_epochs: int = 100,
25
+ prefix: str = "pgsui_output",
26
+ debug: bool = False,
39
27
  ):
40
- self.n_iter = n_iter
41
- self.start = start
42
- self.stop = stop
43
- self.n_cycle = n_cycle
44
- self.ratio = ratio
45
- self.schedule_type = schedule_type
46
-
47
- self.arr = None
28
+ """Early stopping callback for PyTorch training.
48
29
 
49
- def on_train_begin(self, logs=None):
50
- """Executes on training begin.
30
+ This class is used to stop the training of a model when a monitored metric has stopped improving (such as validation loss or accuracy). If the metric does not improve for `patience` epochs, and we have already passed the `min_epochs` epoch threshold, training is halted. The best model checkpoint is reloaded when early stopping is triggered. The `mode` parameter can be set to "min" or "max" to indicate whether the metric should be minimized or maximized, respectively.
51
31
 
52
- Here, the cycle curve is generated and stored as a class variable.
32
+ Args:
33
+ patience (int): Number of epochs to wait after the last time the monitored metric improved.
34
+ delta (float): Minimum change in the monitored metric to qualify as an improvement.
35
+ verbose (int): Verbosity level (0 = silent, 1 = improvement messages, 2+ = more).
36
+ mode (str): "min" or "max" to indicate how improvement is defined.
37
+ prefix (str): Prefix for directory naming.
38
+ output_dir (Path): Directory in which to create subfolders/checkpoints.
39
+ min_epochs (int): Minimum epoch count before early stopping can take effect.
40
+ debug (bool): Debug mode for logging messages
41
+
42
+ Raises:
43
+ ValueError: If an invalid mode is provided. Must be "min" or "max".
53
44
  """
54
- if self.schedule_type == "linear":
55
- cycle_func = self._linear_cycle_range
56
- elif self.schedule_type == "sigmoid":
57
- cycle_func = self._sigmoid_cycle_range
58
- elif self.schedule_type == "cosine":
59
- cycle_func = self._cosine_cycle_range
45
+ self.patience = patience
46
+ self.delta = delta
47
+ self.verbose = verbose >= 2 or debug
48
+ self.debug = debug
49
+ self.mode = mode
50
+ self.counter = 0
51
+ self.epoch_count = 0
52
+ self.best_score = None
53
+ self.early_stop = False
54
+ self.best_model = None
55
+ self.min_epochs = min_epochs
56
+
57
+ is_verbose = verbose >= 2 or debug
58
+ logman = LoggerManager(name=__name__, prefix=prefix, verbose=is_verbose)
59
+ self.logger = logman.get_logger()
60
+
61
+ # Define the comparison function for the monitored metric
62
+ if mode == "min":
63
+ self.monitor = lambda current, best: current < best - self.delta
64
+ elif mode == "max":
65
+ self.monitor = lambda current, best: current > best + self.delta
60
66
  else:
61
- raise ValueError(
62
- f"Invalid schedule_type value provided: {self.schedule_type}"
63
- )
67
+ msg = f"Invalid mode provided: '{mode}'. Use 'min' or 'max'."
68
+ self.logger.error(msg)
69
+ raise ValueError(msg)
64
70
 
65
- self.arr = cycle_func()
66
-
67
- def on_epoch_begin(self, epoch, logs=None):
68
- """Executes each time an epoch begins.
69
-
70
- Here, the new kl_beta weight is set.
71
+ def __call__(self, score, model):
72
+ """Checks if early stopping condition is met and checkpoints model accordingly.
71
73
 
72
74
  Args:
73
- epoch (int): Current epoch iteration.
74
- logs (None, optional): For compatibility. Not used. Defaults to None.
75
- """
76
- idx = epoch - 1
77
- new_weight = self.arr[idx]
78
-
79
- tf.keras.backend.set_value(self.model.kl_beta, new_weight)
80
-
81
- def _linear_cycle_range(self):
82
- """Get an array with a linear cycle curve ranging from 0 to 1 for n_iter epochs.
83
-
84
- The amount of time cycling and spent at 1.0 is determined by the ratio variable.
85
-
86
- Returns:
87
- numpy.ndarray: Linear cycle range.
88
- """
89
- L = np.ones(self.n_iter) * self.stop
90
- period = self.n_iter / self.n_cycle
91
-
92
- # Linear schedule
93
- step = (self.stop - self.start) / (
94
- period * self.ratio
95
- ) # linear schedule
96
-
97
- for c in range(self.n_cycle):
98
- v, i = self.start, 0
99
- while v <= self.stop and (int(i + c * period) < self.n_iter):
100
- L[int(i + c * period)] = v
101
- v += step
102
- i += 1
103
-
104
- return L
105
-
106
- def _sigmoid_cycle_range(self):
107
- """Get sigmoidal curve cycle ranging from 0 to 1 for n_iter epochs.
108
-
109
- The amount of time cycling and spent at 1.0 is determined by the ratio variable.
110
-
111
- Returns:
112
- numpy.ndarray: Sigmoidal cycle range.
75
+ score (float): The current metric value (e.g., validation loss/accuracy).
76
+ model (torch.nn.Module): The model being trained.
113
77
  """
114
- L = np.ones(self.n_iter)
115
- period = self.n_iter / self.n_cycle
116
- step = (self.stop - self.start) / (
117
- period * self.ratio
118
- ) # step is in [0,1]
119
-
120
- for c in range(self.n_cycle):
121
- v, i = self.start, 0
78
+ # Increment the epoch count each time we call this function
79
+ self.epoch_count += 1
80
+
81
+ # If this is the first epoch, initialize best_score and save model
82
+ if self.best_score is None:
83
+ self.best_score = score
84
+ return
85
+
86
+ # Check if there is improvement
87
+ if self.monitor(score, self.best_score):
88
+ # If improved, reset counter and update the best score/model
89
+ self.best_score = score
90
+ self.best_model = model
91
+ self.counter = 0
92
+ else:
93
+ # No improvement: increase counter
94
+ self.counter += 1
122
95
 
123
- while v <= self.stop:
124
- L[int(i + c * period)] = 1.0 / (
125
- 1.0 + np.exp(-(v * 12.0 - 6.0))
96
+ if self.verbose:
97
+ self.logger.info(
98
+ f"EarlyStopping counter: {self.counter}/{self.patience}"
126
99
  )
127
- v += step
128
- i += 1
129
- return L
130
-
131
- def _cosine_cycle_range(self):
132
- """Get cosine curve cycle ranging from 0 to 1 for n_iter epochs.
133
100
 
134
- The amount of time cycling and spent at 1.0 is determined by the ratio variable.
135
-
136
- Returns:
137
- numpy.ndarray: Cosine cycle range.
138
- """
139
- L = np.ones(self.n_iter)
140
- period = self.n_iter / self.n_cycle
141
- step = (self.stop - self.start) / (
142
- period * self.ratio
143
- ) # step is in [0,1]
144
-
145
- for c in range(self.n_cycle):
146
- v, i = self.start, 0
147
-
148
- while v <= self.stop:
149
- L[int(i + c * period)] = 0.5 - 0.5 * math.cos(v * math.pi)
150
- v += step
151
- i += 1
152
- return L
153
-
154
-
155
- class VAECallbacks(tf.keras.callbacks.Callback):
156
- """Custom callbacks to use with subclassed VAE Keras model.
157
-
158
- Requires y, missing_mask, and sample_weight to be input variables to be properties with setters in the subclassed model.
159
- """
101
+ # Now check if we surpass patience AND have reached min_epochs
102
+ if self.counter >= self.patience and self.epoch_count >= self.min_epochs:
160
103
 
161
- def __init__(self):
162
- self.indices = None
104
+ if self.best_model is None:
105
+ self.best_model = model
163
106
 
164
- def on_epoch_begin(self, epoch, logs=None):
165
- """Shuffle input and target at start of epoch."""
166
- y = self.model.y.copy()
167
- missing_mask = self.model.missing_mask
168
- sample_weight = self.model.sample_weight
169
-
170
- n_samples = len(y)
171
- self.indices = np.arange(n_samples)
172
- np.random.shuffle(self.indices)
173
-
174
- self.model.y = y[self.indices]
175
- self.model.missing_mask = missing_mask[self.indices]
176
-
177
- if sample_weight is not None:
178
- self.model.sample_weight = sample_weight[self.indices]
179
-
180
- def on_train_batch_begin(self, batch, logs=None):
181
- """Get batch index."""
182
- self.model.batch_idx = batch
183
-
184
- def on_epoch_end(self, epoch, logs=None):
185
- """Unsort the row indices."""
186
- unshuffled = np.argsort(self.indices)
187
-
188
- self.model.y = self.model.y[unshuffled]
189
- self.model.missing_mask = self.model.missing_mask[unshuffled]
190
-
191
- if self.model.sample_weight is not None:
192
- self.model.sample_weight = self.model.sample_weight[unshuffled]
193
-
194
-
195
- class UBPCallbacks(tf.keras.callbacks.Callback):
196
- """Custom callbacks to use with subclassed NLPCA/ UBP Keras models.
197
-
198
- Requires y, missing_mask, V_latent, and sample_weight to be input variables to be properties with setters in the subclassed model.
199
- """
200
-
201
- def __init__(self):
202
- self.indices = None
203
-
204
- def on_epoch_begin(self, epoch, logs=None):
205
- """Shuffle input and target at start of epoch."""
206
- y = self.model.y.copy()
207
- missing_mask = self.model.missing_mask
208
- sample_weight = self.model.sample_weight
209
-
210
- n_samples = len(y)
211
- self.indices = np.arange(n_samples)
212
- np.random.shuffle(self.indices)
213
-
214
- self.model.y = y[self.indices]
215
- self.model.V_latent = self.model.V_latent[self.indices]
216
- self.model.missing_mask = missing_mask[self.indices]
217
-
218
- if sample_weight is not None:
219
- self.model.sample_weight = sample_weight[self.indices]
220
-
221
- def on_train_batch_begin(self, batch, logs=None):
222
- """Get batch index."""
223
- self.model.batch_idx = batch
224
-
225
- def on_epoch_end(self, epoch, logs=None):
226
- """Unsort the row indices."""
227
- unshuffled = np.argsort(self.indices)
228
-
229
- self.model.y = self.model.y[unshuffled]
230
- self.model.V_latent = self.model.V_latent[unshuffled]
231
- self.model.missing_mask = self.model.missing_mask[unshuffled]
232
-
233
- if self.model.sample_weight is not None:
234
- self.model.sample_weight = self.model.sample_weight[unshuffled]
235
-
236
-
237
- class UBPEarlyStopping(tf.keras.callbacks.Callback):
238
- """Stop training when the loss is at its min, i.e. the loss stops decreasing.
239
-
240
- Args:
241
- patience (int, optional): Number of epochs to wait after min has been hit. After this
242
- number of no improvement, training stops. Defaults to 0.
243
-
244
- phase (int, optional): Current UBP Phase. Defaults to 3.
245
- """
246
-
247
- def __init__(self, patience=0, phase=3):
248
- super(UBPEarlyStopping, self).__init__()
249
- self.patience = patience
250
- self.phase = phase
251
-
252
- # best_weights to store the weights at which the minimum loss occurs.
253
- self.best_weights = None
254
-
255
- # In UBP, the input gets refined during training.
256
- # So we have to revert it too.
257
- self.best_input = None
258
-
259
- def on_train_begin(self, logs=None):
260
- # The number of epoch it has waited when loss is no longer minimum.
261
- self.wait = 0
262
- # The epoch the training stops at.
263
- self.stopped_epoch = 0
264
- # Initialize the best as infinity.
265
- self.best = np.Inf
266
-
267
- def on_epoch_end(self, epoch, logs=None):
268
- current = logs.get("loss")
269
- if np.less(current, self.best):
270
- self.best = current
271
- self.wait = 0
272
- # Record the best weights if current results is better (less).
273
- self.best_weights = self.model.get_weights()
274
-
275
- if self.phase != 2:
276
- # Only refine input in phase 2.
277
- self.best_input = self.model.V_latent
278
- else:
279
- self.wait += 1
280
- if self.wait >= self.patience:
281
- self.stopped_epoch = epoch
282
- self.model.stop_training = True
283
- self.model.set_weights(self.best_weights)
107
+ self.early_stop = True
284
108
 
285
- if self.phase != 2:
286
- self.model.V_latent = self.best_input
109
+ if self.verbose:
110
+ self.logger.info(
111
+ f"Early stopping triggered at epoch {self.epoch_count}"
112
+ )