pg-sui 0.2.0__py3-none-any.whl → 1.6.14.dev9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pg_sui-0.2.0.dist-info → pg_sui-1.6.14.dev9.dist-info}/METADATA +101 -79
- pg_sui-1.6.14.dev9.dist-info/RECORD +81 -0
- {pg_sui-0.2.0.dist-info → pg_sui-1.6.14.dev9.dist-info}/WHEEL +1 -1
- pg_sui-1.6.14.dev9.dist-info/entry_points.txt +4 -0
- {pg_sui-0.2.0.dist-info → pg_sui-1.6.14.dev9.dist-info/licenses}/LICENSE +0 -0
- pg_sui-1.6.14.dev9.dist-info/top_level.txt +1 -0
- pgsui/__init__.py +35 -54
- pgsui/_version.py +34 -0
- pgsui/cli.py +909 -0
- pgsui/data_processing/__init__.py +0 -0
- pgsui/data_processing/config.py +565 -0
- pgsui/data_processing/containers.py +1424 -0
- pgsui/data_processing/transformers.py +557 -907
- pgsui/{example_data/trees → electron/app}/__init__.py +0 -0
- pgsui/electron/app/__main__.py +5 -0
- pgsui/electron/app/extra-resources/.gitkeep +1 -0
- pgsui/electron/app/icons/icons/1024x1024.png +0 -0
- pgsui/electron/app/icons/icons/128x128.png +0 -0
- pgsui/electron/app/icons/icons/16x16.png +0 -0
- pgsui/electron/app/icons/icons/24x24.png +0 -0
- pgsui/electron/app/icons/icons/256x256.png +0 -0
- pgsui/electron/app/icons/icons/32x32.png +0 -0
- pgsui/electron/app/icons/icons/48x48.png +0 -0
- pgsui/electron/app/icons/icons/512x512.png +0 -0
- pgsui/electron/app/icons/icons/64x64.png +0 -0
- pgsui/electron/app/icons/icons/icon.icns +0 -0
- pgsui/electron/app/icons/icons/icon.ico +0 -0
- pgsui/electron/app/main.js +227 -0
- pgsui/electron/app/package-lock.json +6894 -0
- pgsui/electron/app/package.json +51 -0
- pgsui/electron/app/preload.js +15 -0
- pgsui/electron/app/server.py +157 -0
- pgsui/electron/app/ui/logo.png +0 -0
- pgsui/electron/app/ui/renderer.js +131 -0
- pgsui/electron/app/ui/styles.css +59 -0
- pgsui/electron/app/ui/ui_shim.js +72 -0
- pgsui/electron/bootstrap.py +43 -0
- pgsui/electron/launch.py +57 -0
- pgsui/electron/package.json +14 -0
- pgsui/example_data/__init__.py +0 -0
- pgsui/example_data/phylip_files/__init__.py +0 -0
- pgsui/example_data/phylip_files/test.phy +0 -0
- pgsui/example_data/popmaps/__init__.py +0 -0
- pgsui/example_data/popmaps/{test.popmap → phylogen_nomx.popmap} +185 -99
- pgsui/example_data/structure_files/__init__.py +0 -0
- pgsui/example_data/structure_files/test.pops.2row.allsites.str +0 -0
- pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz +0 -0
- pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz.tbi +0 -0
- pgsui/impute/__init__.py +0 -0
- pgsui/impute/deterministic/imputers/allele_freq.py +725 -0
- pgsui/impute/deterministic/imputers/mode.py +844 -0
- pgsui/impute/deterministic/imputers/nmf.py +221 -0
- pgsui/impute/deterministic/imputers/phylo.py +973 -0
- pgsui/impute/deterministic/imputers/ref_allele.py +669 -0
- pgsui/impute/supervised/__init__.py +0 -0
- pgsui/impute/supervised/base.py +343 -0
- pgsui/impute/{unsupervised/models/in_development → supervised/imputers}/__init__.py +0 -0
- pgsui/impute/supervised/imputers/hist_gradient_boosting.py +317 -0
- pgsui/impute/supervised/imputers/random_forest.py +291 -0
- pgsui/impute/unsupervised/__init__.py +0 -0
- pgsui/impute/unsupervised/base.py +1118 -0
- pgsui/impute/unsupervised/callbacks.py +92 -262
- {simulation → pgsui/impute/unsupervised/imputers}/__init__.py +0 -0
- pgsui/impute/unsupervised/imputers/autoencoder.py +1285 -0
- pgsui/impute/unsupervised/imputers/nlpca.py +1554 -0
- pgsui/impute/unsupervised/imputers/ubp.py +1575 -0
- pgsui/impute/unsupervised/imputers/vae.py +1228 -0
- pgsui/impute/unsupervised/loss_functions.py +261 -0
- pgsui/impute/unsupervised/models/__init__.py +0 -0
- pgsui/impute/unsupervised/models/autoencoder_model.py +215 -567
- pgsui/impute/unsupervised/models/nlpca_model.py +155 -394
- pgsui/impute/unsupervised/models/ubp_model.py +180 -1106
- pgsui/impute/unsupervised/models/vae_model.py +269 -630
- pgsui/impute/unsupervised/nn_scorers.py +255 -0
- pgsui/utils/__init__.py +0 -0
- pgsui/utils/classification_viz.py +608 -0
- pgsui/utils/logging_utils.py +22 -0
- pgsui/utils/misc.py +35 -480
- pgsui/utils/plotting.py +996 -829
- pgsui/utils/pretty_metrics.py +290 -0
- pgsui/utils/scorers.py +213 -666
- pg_sui-0.2.0.dist-info/RECORD +0 -75
- pg_sui-0.2.0.dist-info/top_level.txt +0 -3
- pgsui/example_data/phylip_files/test_n10.phy +0 -118
- pgsui/example_data/phylip_files/test_n100.phy +0 -118
- pgsui/example_data/phylip_files/test_n2.phy +0 -118
- pgsui/example_data/phylip_files/test_n500.phy +0 -118
- pgsui/example_data/structure_files/test.nopops.1row.10sites.str +0 -117
- pgsui/example_data/structure_files/test.nopops.2row.100sites.str +0 -234
- pgsui/example_data/structure_files/test.nopops.2row.10sites.str +0 -234
- pgsui/example_data/structure_files/test.nopops.2row.30sites.str +0 -234
- pgsui/example_data/structure_files/test.nopops.2row.allsites.str +0 -234
- pgsui/example_data/structure_files/test.pops.1row.10sites.str +0 -117
- pgsui/example_data/structure_files/test.pops.2row.10sites.str +0 -234
- pgsui/example_data/trees/test.iqtree +0 -376
- pgsui/example_data/trees/test.qmat +0 -5
- pgsui/example_data/trees/test.rate +0 -2033
- pgsui/example_data/trees/test.tre +0 -1
- pgsui/example_data/trees/test_n10.rate +0 -19
- pgsui/example_data/trees/test_n100.rate +0 -109
- pgsui/example_data/trees/test_n500.rate +0 -509
- pgsui/example_data/trees/test_siterates.txt +0 -2024
- pgsui/example_data/trees/test_siterates_n10.txt +0 -10
- pgsui/example_data/trees/test_siterates_n100.txt +0 -100
- pgsui/example_data/trees/test_siterates_n500.txt +0 -500
- pgsui/example_data/vcf_files/test.vcf +0 -244
- pgsui/example_data/vcf_files/test.vcf.gz +0 -0
- pgsui/example_data/vcf_files/test.vcf.gz.tbi +0 -0
- pgsui/impute/estimators.py +0 -1268
- pgsui/impute/impute.py +0 -1463
- pgsui/impute/simple_imputers.py +0 -1431
- pgsui/impute/supervised/iterative_imputer_fixedparams.py +0 -782
- pgsui/impute/supervised/iterative_imputer_gridsearch.py +0 -1024
- pgsui/impute/unsupervised/keras_classifiers.py +0 -697
- pgsui/impute/unsupervised/models/in_development/cnn_model.py +0 -486
- pgsui/impute/unsupervised/neural_network_imputers.py +0 -1440
- pgsui/impute/unsupervised/neural_network_methods.py +0 -1395
- pgsui/pg_sui.py +0 -261
- pgsui/utils/sequence_tools.py +0 -407
- simulation/sim_benchmarks.py +0 -333
- simulation/sim_treeparams.py +0 -475
- test/__init__.py +0 -0
- test/pg_sui_simtest.py +0 -215
- test/pg_sui_testing.py +0 -523
- test/test.py +0 -151
- test/test_pgsui.py +0 -374
- test/test_tkc.py +0 -185
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
# Standard library
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Literal
|
|
5
|
+
|
|
6
|
+
# Third-party
|
|
7
|
+
import numpy as np
|
|
8
|
+
from sklearn.ensemble import RandomForestClassifier
|
|
9
|
+
from sklearn.exceptions import NotFittedError
|
|
10
|
+
from sklearn.experimental import enable_iterative_imputer # noqa
|
|
11
|
+
from sklearn.impute import IterativeImputer
|
|
12
|
+
from sklearn.model_selection import train_test_split
|
|
13
|
+
|
|
14
|
+
# Project
|
|
15
|
+
from snpio.analysis.genotype_encoder import GenotypeEncoder
|
|
16
|
+
from snpio.utils.logging import LoggerManager
|
|
17
|
+
|
|
18
|
+
from pgsui.data_processing.config import apply_dot_overrides, load_yaml_to_dataclass
|
|
19
|
+
from pgsui.data_processing.containers import (
|
|
20
|
+
RFConfig,
|
|
21
|
+
_ImputerParams,
|
|
22
|
+
_RFParams,
|
|
23
|
+
_SimParams,
|
|
24
|
+
)
|
|
25
|
+
from pgsui.data_processing.transformers import SimGenotypeDataTransformer
|
|
26
|
+
from pgsui.impute.supervised.base import BaseImputer
|
|
27
|
+
from pgsui.utils.logging_utils import configure_logger
|
|
28
|
+
from pgsui.utils.plotting import Plotting
|
|
29
|
+
from pgsui.utils.scorers import Scorer
|
|
30
|
+
|
|
31
|
+
if TYPE_CHECKING:
|
|
32
|
+
from snpio.read_input.genotype_data import GenotypeData
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def ensure_rf_config(config: RFConfig | Dict | str | None) -> RFConfig:
|
|
36
|
+
"""Resolve RF configuration from dataclass, mapping, or YAML path."""
|
|
37
|
+
|
|
38
|
+
if config is None:
|
|
39
|
+
return RFConfig()
|
|
40
|
+
if isinstance(config, RFConfig):
|
|
41
|
+
return config
|
|
42
|
+
if isinstance(config, str):
|
|
43
|
+
return load_yaml_to_dataclass(config, RFConfig)
|
|
44
|
+
if isinstance(config, dict):
|
|
45
|
+
payload = dict(config)
|
|
46
|
+
preset = payload.pop("preset", None)
|
|
47
|
+
base = RFConfig.from_preset(preset) if preset else RFConfig()
|
|
48
|
+
|
|
49
|
+
def _flatten(prefix: str, data: Dict[str, Any], out: Dict[str, Any]) -> None:
|
|
50
|
+
for key, value in data.items():
|
|
51
|
+
dotted = f"{prefix}.{key}" if prefix else key
|
|
52
|
+
if isinstance(value, dict):
|
|
53
|
+
_flatten(dotted, value, out)
|
|
54
|
+
else:
|
|
55
|
+
out[dotted] = value
|
|
56
|
+
|
|
57
|
+
flat: Dict[str, Any] = {}
|
|
58
|
+
_flatten("", payload, flat)
|
|
59
|
+
return apply_dot_overrides(base, flat)
|
|
60
|
+
|
|
61
|
+
raise TypeError("config must be an RFConfig, dict, YAML path, or None.")
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class ImputeRandomForest(BaseImputer):
|
|
65
|
+
"""Supervised RF imputer driven by :class:`RFConfig`."""
|
|
66
|
+
|
|
67
|
+
def __init__(
|
|
68
|
+
self,
|
|
69
|
+
genotype_data: "GenotypeData",
|
|
70
|
+
*,
|
|
71
|
+
config: RFConfig | Dict | str | None = None,
|
|
72
|
+
overrides: Dict | None = None,
|
|
73
|
+
) -> None:
|
|
74
|
+
self.model_name = "ImputeRandomForest"
|
|
75
|
+
self.Model = RandomForestClassifier
|
|
76
|
+
|
|
77
|
+
cfg = ensure_rf_config(config)
|
|
78
|
+
if overrides:
|
|
79
|
+
cfg = cfg.apply_overrides(overrides)
|
|
80
|
+
self.cfg = cfg
|
|
81
|
+
|
|
82
|
+
self.genotype_data = genotype_data
|
|
83
|
+
self.pgenc = GenotypeEncoder(genotype_data)
|
|
84
|
+
|
|
85
|
+
self.prefix = cfg.io.prefix
|
|
86
|
+
self.seed = cfg.io.seed
|
|
87
|
+
self.n_jobs = cfg.io.n_jobs
|
|
88
|
+
self.verbose = cfg.io.verbose
|
|
89
|
+
self.debug = cfg.io.debug
|
|
90
|
+
|
|
91
|
+
super().__init__(verbose=self.verbose, debug=self.debug)
|
|
92
|
+
|
|
93
|
+
logman = LoggerManager(
|
|
94
|
+
__name__, prefix=self.prefix, verbose=self.verbose, debug=self.debug
|
|
95
|
+
)
|
|
96
|
+
self.logger = configure_logger(
|
|
97
|
+
logman.get_logger(), verbose=self.verbose, debug=self.debug
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
self._create_model_directories(
|
|
101
|
+
self.prefix, ["models", "plots", "metrics", "optimize", "parameters"]
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
self.plot_format: Literal["png", "pdf", "svg", "jpg", "jpeg"] = cfg.plot.fmt
|
|
105
|
+
|
|
106
|
+
self.plot_fontsize = cfg.plot.fontsize
|
|
107
|
+
self.title_fontsize = cfg.plot.fontsize
|
|
108
|
+
self.plot_dpi = cfg.plot.dpi
|
|
109
|
+
self.despine = cfg.plot.despine
|
|
110
|
+
self.show_plots = cfg.plot.show
|
|
111
|
+
|
|
112
|
+
self.validation_split = cfg.train.validation_split
|
|
113
|
+
|
|
114
|
+
self.params = _RFParams(
|
|
115
|
+
n_estimators=cfg.model.n_estimators,
|
|
116
|
+
max_depth=cfg.model.max_depth,
|
|
117
|
+
min_samples_split=cfg.model.min_samples_split,
|
|
118
|
+
min_samples_leaf=cfg.model.min_samples_leaf,
|
|
119
|
+
max_features=cfg.model.max_features,
|
|
120
|
+
criterion=cfg.model.criterion,
|
|
121
|
+
class_weight=cfg.model.class_weight,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
self.imputer_params = _ImputerParams(
|
|
125
|
+
n_nearest_features=cfg.imputer.n_nearest_features,
|
|
126
|
+
max_iter=cfg.imputer.max_iter,
|
|
127
|
+
random_state=self.seed,
|
|
128
|
+
verbose=self.verbose,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
self.sim_params = _SimParams(
|
|
132
|
+
prop_missing=cfg.sim.prop_missing,
|
|
133
|
+
strategy=cfg.sim.strategy,
|
|
134
|
+
missing_val=cfg.sim.missing_val,
|
|
135
|
+
het_boost=cfg.sim.het_boost,
|
|
136
|
+
seed=self.seed,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
self.max_iter = cfg.imputer.max_iter
|
|
140
|
+
self.n_nearest_features = cfg.imputer.n_nearest_features
|
|
141
|
+
|
|
142
|
+
# Will be set in fit()
|
|
143
|
+
self.is_haploid_: bool | None = None
|
|
144
|
+
self.num_classes_: int | None = None
|
|
145
|
+
self.num_features_: int | None = None
|
|
146
|
+
self.rf_models_: List[RandomForestClassifier | None] | None = None
|
|
147
|
+
self.is_fit_: bool = False
|
|
148
|
+
|
|
149
|
+
def fit(self) -> "BaseImputer":
|
|
150
|
+
"""Fit the imputer using self.genotype_data with no arguments.
|
|
151
|
+
|
|
152
|
+
This method trains the imputer on the provided genotype data.
|
|
153
|
+
|
|
154
|
+
Steps:
|
|
155
|
+
1) Encode to 0/1/2 with -9/-1 as missing.
|
|
156
|
+
2) Split samples into train/test.
|
|
157
|
+
3) Train IterativeImputer on train (convert missing -> NaN).
|
|
158
|
+
4) Evaluate on test **non-missing positions** (reconstruction metrics) and call your original plotting stack via _make_class_reports().
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
BaseImputer: self.
|
|
162
|
+
"""
|
|
163
|
+
# Prepare utilities & metadata
|
|
164
|
+
self.scorers_ = Scorer(
|
|
165
|
+
prefix=self.prefix, average="macro", verbose=self.verbose, debug=self.debug
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
pf: Literal["png", "pdf", "svg", "jpg", "jpeg"] = self.plot_format
|
|
169
|
+
|
|
170
|
+
self.plotter_ = Plotting(
|
|
171
|
+
self.model_name,
|
|
172
|
+
prefix=self.prefix,
|
|
173
|
+
plot_format=pf,
|
|
174
|
+
plot_dpi=self.plot_dpi,
|
|
175
|
+
plot_fontsize=self.plot_fontsize,
|
|
176
|
+
title_fontsize=self.title_fontsize,
|
|
177
|
+
despine=self.despine,
|
|
178
|
+
show_plots=self.show_plots,
|
|
179
|
+
verbose=self.verbose,
|
|
180
|
+
debug=self.debug,
|
|
181
|
+
multiqc=True,
|
|
182
|
+
multiqc_section=f"PG-SUI: {self.model_name} Model Imputation",
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
X_int = self.pgenc.genotypes_012
|
|
186
|
+
self.X012_ = X_int.astype(float)
|
|
187
|
+
self.X012_[self.X012_ < 0] = np.nan # Ensure missing are NaN
|
|
188
|
+
self.is_haploid_ = np.count_nonzero(self.X012_ == 1) == 0
|
|
189
|
+
self.num_classes_ = 2 if self.is_haploid_ else 3
|
|
190
|
+
self.n_samples_, self.n_features_ = X_int.shape
|
|
191
|
+
|
|
192
|
+
# Split
|
|
193
|
+
X_train, X_test = train_test_split(
|
|
194
|
+
self.X012_,
|
|
195
|
+
test_size=self.validation_split,
|
|
196
|
+
random_state=self.seed,
|
|
197
|
+
shuffle=True,
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
# Simulate missing values on test set.
|
|
201
|
+
sim_transformer = SimGenotypeDataTransformer(**self.sim_params.to_dict())
|
|
202
|
+
|
|
203
|
+
X_test = np.nan_to_num(X_test, nan=-1) # ensure missing are -1
|
|
204
|
+
sim_transformer.fit(X_test)
|
|
205
|
+
X_test_sim, missing_masks = sim_transformer.transform(X_test)
|
|
206
|
+
sim_mask = missing_masks["simulated"]
|
|
207
|
+
X_test_sim[X_test_sim < 0] = np.nan # ensure missing are NaN
|
|
208
|
+
|
|
209
|
+
self.model_params_ = self.params.to_dict()
|
|
210
|
+
self.model_params_["n_jobs"] = self.n_jobs
|
|
211
|
+
self.model_params_["random_state"] = self.seed
|
|
212
|
+
|
|
213
|
+
# Train IterativeImputer
|
|
214
|
+
est = self.Model(**self.model_params_)
|
|
215
|
+
|
|
216
|
+
self.imputer_ = IterativeImputer(estimator=est, **self.imputer_params.to_dict())
|
|
217
|
+
|
|
218
|
+
self.imputer_.fit(X_train)
|
|
219
|
+
self.is_fit_ = True
|
|
220
|
+
|
|
221
|
+
X_test_imputed = self.imputer_.transform(X_test_sim)
|
|
222
|
+
|
|
223
|
+
# Predict on simulated test set
|
|
224
|
+
y_true_flat = X_test[sim_mask].copy()
|
|
225
|
+
y_pred_flat = X_test_imputed[sim_mask].copy()
|
|
226
|
+
|
|
227
|
+
# Round and clip predictions to valid {0,1,2} or {0,1} if haploid.
|
|
228
|
+
if self.is_haploid_:
|
|
229
|
+
y_pred_flat = np.clip(np.rint(y_pred_flat), 0, 1).astype(int, copy=False)
|
|
230
|
+
y_true_flat = np.clip(np.rint(y_true_flat), 0, 1).astype(int, copy=False)
|
|
231
|
+
else:
|
|
232
|
+
y_pred_flat = np.clip(np.rint(y_pred_flat), 0, 2).astype(int, copy=False)
|
|
233
|
+
y_true_flat = np.clip(np.rint(y_true_flat), 0, 2).astype(int, copy=False)
|
|
234
|
+
|
|
235
|
+
# Evaluate (012 / zygosity)
|
|
236
|
+
self._evaluate_012_and_plot(y_true_flat.copy(), y_pred_flat.copy())
|
|
237
|
+
|
|
238
|
+
# Evaluate (IUPAC)
|
|
239
|
+
encodings_dict = {
|
|
240
|
+
"A": 0,
|
|
241
|
+
"C": 1,
|
|
242
|
+
"G": 2,
|
|
243
|
+
"T": 3,
|
|
244
|
+
"W": 4,
|
|
245
|
+
"R": 5,
|
|
246
|
+
"M": 6,
|
|
247
|
+
"K": 7,
|
|
248
|
+
"Y": 8,
|
|
249
|
+
"S": 9,
|
|
250
|
+
"N": -1,
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
y_true_iupac_tmp = self.pgenc.decode_012(y_true_flat)
|
|
254
|
+
y_pred_iupac_tmp = self.pgenc.decode_012(y_pred_flat)
|
|
255
|
+
y_true_iupac = self.pgenc.convert_int_iupac(
|
|
256
|
+
y_true_iupac_tmp, encodings_dict=encodings_dict
|
|
257
|
+
)
|
|
258
|
+
y_pred_iupac = self.pgenc.convert_int_iupac(
|
|
259
|
+
y_pred_iupac_tmp, encodings_dict=encodings_dict
|
|
260
|
+
)
|
|
261
|
+
self._evaluate_iupac10_and_plot(y_true_iupac, y_pred_iupac)
|
|
262
|
+
|
|
263
|
+
self.best_params_ = self.model_params_
|
|
264
|
+
self.best_params_.update(self.imputer_params.to_dict())
|
|
265
|
+
self.best_params_.update(self.sim_params.to_dict())
|
|
266
|
+
self._save_best_params(self.best_params_)
|
|
267
|
+
|
|
268
|
+
return self
|
|
269
|
+
|
|
270
|
+
def transform(self) -> np.ndarray:
|
|
271
|
+
"""Impute all samples and return imputed genotypes.
|
|
272
|
+
|
|
273
|
+
This method applies the trained imputer to the entire dataset, filling in missing genotype values. It ensures that any remaining missing values after imputation are set to -9, and decodes the imputed 0/1/2 genotypes back to their original format.
|
|
274
|
+
|
|
275
|
+
Returns:
|
|
276
|
+
np.ndarray: (n_samples, n_loci) integers with no -9/-1/NaN.
|
|
277
|
+
"""
|
|
278
|
+
if not self.is_fit_:
|
|
279
|
+
msg = "Imputer has not been fit; call fit() before transform()."
|
|
280
|
+
self.logger.error(msg)
|
|
281
|
+
raise NotFittedError(msg)
|
|
282
|
+
|
|
283
|
+
X = self.X012_.copy()
|
|
284
|
+
X_imp = self.imputer_.transform(X)
|
|
285
|
+
|
|
286
|
+
if np.any(X_imp < 0) or np.isnan(X_imp).any():
|
|
287
|
+
self.logger.warning("Some imputed values are still missing; setting to -9.")
|
|
288
|
+
X_imp[X_imp < 0] = -9
|
|
289
|
+
X_imp[np.isnan(X_imp)] = -9
|
|
290
|
+
|
|
291
|
+
return self.pgenc.decode_012(X_imp)
|
|
File without changes
|