machinegnostics 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- __init__.py +0 -0
- machinegnostics/__init__.py +24 -0
- machinegnostics/magcal/__init__.py +37 -0
- machinegnostics/magcal/characteristics.py +460 -0
- machinegnostics/magcal/criteria_eval.py +268 -0
- machinegnostics/magcal/criterion.py +140 -0
- machinegnostics/magcal/data_conversion.py +381 -0
- machinegnostics/magcal/gcor.py +64 -0
- machinegnostics/magcal/gdf/__init__.py +2 -0
- machinegnostics/magcal/gdf/base_df.py +39 -0
- machinegnostics/magcal/gdf/base_distfunc.py +1202 -0
- machinegnostics/magcal/gdf/base_egdf.py +823 -0
- machinegnostics/magcal/gdf/base_eldf.py +830 -0
- machinegnostics/magcal/gdf/base_qgdf.py +1234 -0
- machinegnostics/magcal/gdf/base_qldf.py +1019 -0
- machinegnostics/magcal/gdf/cluster_analysis.py +456 -0
- machinegnostics/magcal/gdf/data_cluster.py +975 -0
- machinegnostics/magcal/gdf/data_intervals.py +853 -0
- machinegnostics/magcal/gdf/data_membership.py +536 -0
- machinegnostics/magcal/gdf/der_egdf.py +243 -0
- machinegnostics/magcal/gdf/distfunc_engine.py +841 -0
- machinegnostics/magcal/gdf/egdf.py +324 -0
- machinegnostics/magcal/gdf/eldf.py +297 -0
- machinegnostics/magcal/gdf/eldf_intv.py +609 -0
- machinegnostics/magcal/gdf/eldf_ma.py +627 -0
- machinegnostics/magcal/gdf/homogeneity.py +1218 -0
- machinegnostics/magcal/gdf/intv_engine.py +1523 -0
- machinegnostics/magcal/gdf/marginal_intv_analysis.py +558 -0
- machinegnostics/magcal/gdf/qgdf.py +289 -0
- machinegnostics/magcal/gdf/qldf.py +296 -0
- machinegnostics/magcal/gdf/scedasticity.py +197 -0
- machinegnostics/magcal/gdf/wedf.py +181 -0
- machinegnostics/magcal/gdf/z0_estimator.py +1047 -0
- machinegnostics/magcal/layer_base.py +42 -0
- machinegnostics/magcal/layer_history_base.py +74 -0
- machinegnostics/magcal/layer_io_process_base.py +238 -0
- machinegnostics/magcal/layer_param_base.py +448 -0
- machinegnostics/magcal/mg_weights.py +36 -0
- machinegnostics/magcal/sample_characteristics.py +532 -0
- machinegnostics/magcal/scale_optimization.py +185 -0
- machinegnostics/magcal/scale_param.py +313 -0
- machinegnostics/magcal/util/__init__.py +0 -0
- machinegnostics/magcal/util/dis_docstring.py +18 -0
- machinegnostics/magcal/util/logging.py +24 -0
- machinegnostics/magcal/util/min_max_float.py +34 -0
- machinegnostics/magnet/__init__.py +0 -0
- machinegnostics/metrics/__init__.py +28 -0
- machinegnostics/metrics/accu.py +61 -0
- machinegnostics/metrics/accuracy.py +67 -0
- machinegnostics/metrics/auto_correlation.py +183 -0
- machinegnostics/metrics/auto_covariance.py +204 -0
- machinegnostics/metrics/cls_report.py +130 -0
- machinegnostics/metrics/conf_matrix.py +93 -0
- machinegnostics/metrics/correlation.py +178 -0
- machinegnostics/metrics/cross_variance.py +167 -0
- machinegnostics/metrics/divi.py +82 -0
- machinegnostics/metrics/evalmet.py +109 -0
- machinegnostics/metrics/f1_score.py +128 -0
- machinegnostics/metrics/gmmfe.py +108 -0
- machinegnostics/metrics/hc.py +141 -0
- machinegnostics/metrics/mae.py +72 -0
- machinegnostics/metrics/mean.py +117 -0
- machinegnostics/metrics/median.py +122 -0
- machinegnostics/metrics/mg_r2.py +167 -0
- machinegnostics/metrics/mse.py +78 -0
- machinegnostics/metrics/precision.py +119 -0
- machinegnostics/metrics/r2.py +122 -0
- machinegnostics/metrics/recall.py +108 -0
- machinegnostics/metrics/rmse.py +77 -0
- machinegnostics/metrics/robr2.py +119 -0
- machinegnostics/metrics/std.py +144 -0
- machinegnostics/metrics/variance.py +101 -0
- machinegnostics/models/__init__.py +2 -0
- machinegnostics/models/classification/__init__.py +1 -0
- machinegnostics/models/classification/layer_history_log_reg.py +121 -0
- machinegnostics/models/classification/layer_io_process_log_reg.py +98 -0
- machinegnostics/models/classification/layer_mlflow_log_reg.py +107 -0
- machinegnostics/models/classification/layer_param_log_reg.py +275 -0
- machinegnostics/models/classification/mg_log_reg.py +273 -0
- machinegnostics/models/cross_validation.py +118 -0
- machinegnostics/models/data_split.py +106 -0
- machinegnostics/models/regression/__init__.py +2 -0
- machinegnostics/models/regression/layer_histroy_rob_reg.py +139 -0
- machinegnostics/models/regression/layer_io_process_rob_rig.py +88 -0
- machinegnostics/models/regression/layer_mlflow_rob_reg.py +134 -0
- machinegnostics/models/regression/layer_param_rob_reg.py +212 -0
- machinegnostics/models/regression/mg_lin_reg.py +253 -0
- machinegnostics/models/regression/mg_poly_reg.py +258 -0
- machinegnostics-0.0.1.dist-info/METADATA +246 -0
- machinegnostics-0.0.1.dist-info/RECORD +93 -0
- machinegnostics-0.0.1.dist-info/WHEEL +5 -0
- machinegnostics-0.0.1.dist-info/licenses/LICENSE +674 -0
- machinegnostics-0.0.1.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
'''
|
|
2
|
+
Gnostic - Homoscedasticity and Heteroscedasticity
|
|
3
|
+
|
|
4
|
+
This module to check for homoscedasticity and heteroscedasticity in data.
|
|
5
|
+
|
|
6
|
+
Author: Nirmal Parmar
|
|
7
|
+
Machine Gnostics
|
|
8
|
+
'''
|
|
9
|
+
import numpy as np
|
|
10
|
+
import logging
|
|
11
|
+
from machinegnostics.magcal.util.logging import get_logger
|
|
12
|
+
|
|
13
|
+
class DataScedasticity:
|
|
14
|
+
"""
|
|
15
|
+
Gnostic Scedasticity Test for Homoscedasticity and Heteroscedasticity
|
|
16
|
+
|
|
17
|
+
This class provides a method to check for homoscedasticity and heteroscedasticity in data,
|
|
18
|
+
inspired by fundamental principles rather than standard statistical tests. Unlike classical
|
|
19
|
+
approaches, this implementation uses gnostic variance and gnostic linear regression, which are
|
|
20
|
+
based on the Machine Gnostics framework.
|
|
21
|
+
|
|
22
|
+
Key Differences from Standard Methods:
|
|
23
|
+
- **Variance Calculation:** The variance used here is the gnostic variance, which may differ in
|
|
24
|
+
definition and properties from classical statistical variance. It is designed to capture
|
|
25
|
+
uncertainty and spread in a way that aligns with gnostic principles.
|
|
26
|
+
- **Regression Model:** The linear regression model employed is a gnostic linear regression,
|
|
27
|
+
not the standard least squares regression. This model is tailored to the gnostic approach and
|
|
28
|
+
may use different loss functions, optimization criteria, or regularization.
|
|
29
|
+
- **Test Philosophy:** This is not a formal statistical test (such as Breusch-Pagan or White's test),
|
|
30
|
+
but rather a diagnostic inspired by the fundamentals of the gnostic framework. The method splits
|
|
31
|
+
residuals based on the median of the independent variable and compares the gnostic variances of
|
|
32
|
+
the squared residuals in each half.
|
|
33
|
+
|
|
34
|
+
Usage:
|
|
35
|
+
1. Initialize the class with desired gnostic regression parameters.
|
|
36
|
+
2. Call `fit(x, y)` with your data.
|
|
37
|
+
3. Check the `is_homoscedastic` attribute or returned value to determine if the data is
|
|
38
|
+
homoscedastic (equal gnostic variance across splits) or heteroscedastic.
|
|
39
|
+
|
|
40
|
+
Attributes:
|
|
41
|
+
x (np.ndarray): Independent variable data.
|
|
42
|
+
y (np.ndarray): Dependent variable data.
|
|
43
|
+
model (LinearRegressor): Gnostic linear regression model.
|
|
44
|
+
residuals (np.ndarray): Residuals from the fitted model.
|
|
45
|
+
params (dict): Stores calculated variances and variance ratio.
|
|
46
|
+
variance_ratio (float): Ratio of gnostic variances between data splits.
|
|
47
|
+
is_homoscedastic (bool): True if data is homoscedastic under gnostic test, else False.
|
|
48
|
+
|
|
49
|
+
Example:
|
|
50
|
+
>>> import numpy as np
|
|
51
|
+
>>> from machinegnostics.magcal import DataScedasticity
|
|
52
|
+
>>> x = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
|
|
53
|
+
>>> y = np.array([2.1, 4.2, 6.1, 8.3, 10.2, 12.1, 14.2, 16.1, 18.2, 20.1])
|
|
54
|
+
>>> sced = DataScedasticity()
|
|
55
|
+
>>> is_homo = sced.fit(x, y)
|
|
56
|
+
>>> print(f"Is data homoscedastic? {is_homo}")
|
|
57
|
+
>>> print(f"Variance ratio: {sced.variance_ratio}")
|
|
58
|
+
|
|
59
|
+
Note:
|
|
60
|
+
This class is intended for users interested in gnostic data analysis. Results and interpretations
|
|
61
|
+
may not align with classical statistical methods. For more details on gnostic variance and regression,
|
|
62
|
+
refer to the Machine Gnostics documentation.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
def __init__(self,
|
|
66
|
+
scale: str | int | float = 'auto',
|
|
67
|
+
max_iter: int = 100,
|
|
68
|
+
tol: float = 0.001,
|
|
69
|
+
mg_loss: str = 'hi',
|
|
70
|
+
early_stopping: bool = True,
|
|
71
|
+
verbose: bool = False,
|
|
72
|
+
data_form: str = 'a',
|
|
73
|
+
gnostic_characteristics: bool = True,
|
|
74
|
+
history: bool = True):
|
|
75
|
+
|
|
76
|
+
from machinegnostics.models.regression import LinearRegressor
|
|
77
|
+
self.x = None
|
|
78
|
+
self.y = None
|
|
79
|
+
self.model = LinearRegressor(scale=scale,
|
|
80
|
+
max_iter=max_iter,
|
|
81
|
+
tol=tol,
|
|
82
|
+
mg_loss=mg_loss,
|
|
83
|
+
early_stopping=early_stopping,
|
|
84
|
+
verbose=verbose,
|
|
85
|
+
data_form=data_form,
|
|
86
|
+
gnostic_characteristics=gnostic_characteristics,
|
|
87
|
+
history=history)
|
|
88
|
+
self.residuals = None
|
|
89
|
+
self.params = {}
|
|
90
|
+
self.logger = get_logger(self.__class__.__name__, logging.DEBUG if verbose else logging.WARNING)
|
|
91
|
+
self.logger.debug(f"{self.__class__.__name__} initialized:")
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _split_residuals(self):
|
|
95
|
+
"""
|
|
96
|
+
Split residuals into two halves based on the median of x. zip x and residuals.
|
|
97
|
+
sorted(zip(x, residuals))
|
|
98
|
+
"""
|
|
99
|
+
self.logger.info("Splitting residuals based on median of x.")
|
|
100
|
+
median_x = np.median(self.x)
|
|
101
|
+
left_half = [(xi, ri) for xi, ri in zip(self.x, self.residuals) if xi <= median_x]
|
|
102
|
+
right_half = [(xi, ri) for xi, ri in zip(self.x, self.residuals) if xi > median_x]
|
|
103
|
+
return left_half, right_half
|
|
104
|
+
|
|
105
|
+
def _variance_ratio(self):
|
|
106
|
+
"""
|
|
107
|
+
Calculate the variance ratio of the squared residuals in the two halves.
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
float: Variance ratio of the squared residuals.
|
|
111
|
+
"""
|
|
112
|
+
from machinegnostics import variance
|
|
113
|
+
self.logger.info("Calculating variance ratio.")
|
|
114
|
+
left_half, right_half = self._split_residuals()
|
|
115
|
+
left_residuals = np.array([ri for xi, ri in left_half])
|
|
116
|
+
right_residuals = np.array([ri for xi, ri in right_half])
|
|
117
|
+
var_left = variance(left_residuals ** 2)
|
|
118
|
+
var_right = variance(right_residuals ** 2)
|
|
119
|
+
|
|
120
|
+
self.logger.debug(f"Left variance: {var_left}, Right variance: {var_right}")
|
|
121
|
+
# cap values between [1, 1e-9]
|
|
122
|
+
var_left = float(var_left)
|
|
123
|
+
var_right = float(np.maximum(var_right, 1e-9)) # to avoid division by zero
|
|
124
|
+
if var_right == 0 and var_left == 0:
|
|
125
|
+
variance_ratio = 1.0
|
|
126
|
+
elif var_right == 0:
|
|
127
|
+
variance_ratio = np.inf
|
|
128
|
+
else:
|
|
129
|
+
variance_ratio = var_left / var_right
|
|
130
|
+
|
|
131
|
+
# params
|
|
132
|
+
self.logger.info(f"Variance ratio calculated: {variance_ratio}")
|
|
133
|
+
self.params['var_left'] = var_left
|
|
134
|
+
self.params['var_right'] = var_right
|
|
135
|
+
self.params['variance_ratio'] = variance_ratio
|
|
136
|
+
return variance_ratio
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _is_homoscedastic(self, threshold: float = 0.001):
|
|
140
|
+
"""
|
|
141
|
+
Check if the data is homoscedastic based on the variance ratio.
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
threshold (float): Threshold to determine homoscedasticity.
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
bool: True if homoscedastic, False if heteroscedastic.
|
|
148
|
+
"""
|
|
149
|
+
if self.variance_ratio is None:
|
|
150
|
+
self.logger.error("Variance ratio not calculated. Please run fit() first.")
|
|
151
|
+
raise ValueError("Variance ratio not calculated. Please run fit() first.")
|
|
152
|
+
return abs(self.variance_ratio - 1) < threshold
|
|
153
|
+
|
|
154
|
+
def fit(self, x: np.ndarray, y: np.ndarray) -> bool:
|
|
155
|
+
"""
|
|
156
|
+
Fit the gnostic linear regression model to the data and assess scedasticity.
|
|
157
|
+
|
|
158
|
+
This method fits the gnostic linear regression model to the provided data, computes the residuals,
|
|
159
|
+
and evaluates homoscedasticity or heteroscedasticity using the gnostic variance approach. Unlike
|
|
160
|
+
standard statistical tests, this method uses gnostic variance and gnostic regression, which are
|
|
161
|
+
based on the Machine Gnostics framework and may yield different results from classical methods.
|
|
162
|
+
|
|
163
|
+
The method splits the data based on the median of the independent variable, calculates the gnostic
|
|
164
|
+
variance of squared residuals in each half, and determines if the data is homoscedastic (equal
|
|
165
|
+
gnostic variance) or heteroscedastic.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
x (np.ndarray): Independent variable data.
|
|
169
|
+
y (np.ndarray): Dependent variable data.
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
bool: True if data is homoscedastic under the gnostic test, False if heteroscedastic.
|
|
173
|
+
|
|
174
|
+
Note:
|
|
175
|
+
This is not a standard statistical test. For details on the gnostic approach, see the
|
|
176
|
+
Machine Gnostics documentation.
|
|
177
|
+
"""
|
|
178
|
+
self.logger.info("Fitting DataScedasticity model...")
|
|
179
|
+
self.x = x
|
|
180
|
+
self.y = y
|
|
181
|
+
|
|
182
|
+
self.logger.info("Fitting gnostic regression model.")
|
|
183
|
+
self.model.fit(x, y)
|
|
184
|
+
self.logger.debug(f"Model calculations complete.")
|
|
185
|
+
|
|
186
|
+
self.logger.info("Calculating residuals.")
|
|
187
|
+
self.residuals = y - self.model.predict(x)
|
|
188
|
+
|
|
189
|
+
# calculate variance ratio
|
|
190
|
+
self.logger.info("Calculating variance ratio.")
|
|
191
|
+
self.variance_ratio = self._variance_ratio()
|
|
192
|
+
|
|
193
|
+
# check
|
|
194
|
+
self.logger.info("Checking homoscedasticity.")
|
|
195
|
+
self.is_homoscedastic = self._is_homoscedastic()
|
|
196
|
+
self.logger.info(f"Homoscedasticity check result - is_homoscedastic: {self.is_homoscedastic}")
|
|
197
|
+
return self.is_homoscedastic
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
from machinegnostics.magcal.util.logging import get_logger
|
|
2
|
+
import numpy as np
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
class WEDF:
|
|
6
|
+
"""
|
|
7
|
+
Weighted Empirical Distribution Function (WEDF)
|
|
8
|
+
|
|
9
|
+
This class implements the WEDF that accounts for data weights, which is useful
|
|
10
|
+
when dealing with repeated values or data points of varying importance.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
def __init__(self, data, weights=None, data_lb=None, data_ub=None, verbose=False):
|
|
14
|
+
"""
|
|
15
|
+
Initialize the WEDF with data points and optional weights.
|
|
16
|
+
|
|
17
|
+
Parameters
|
|
18
|
+
----------
|
|
19
|
+
data : array-like
|
|
20
|
+
Input data values
|
|
21
|
+
weights : array-like, optional
|
|
22
|
+
A priori weights for each data point. If None, equal weights are assigned.
|
|
23
|
+
data_lb : float, optional
|
|
24
|
+
Lower bound for the data range
|
|
25
|
+
data_ub : float, optional
|
|
26
|
+
Upper bound for the data range
|
|
27
|
+
verbose : bool, optional
|
|
28
|
+
If True, set logging level to DEBUG. Default is False.
|
|
29
|
+
"""
|
|
30
|
+
self.logger = get_logger(self.__class__.__name__, logging.DEBUG if verbose else logging.WARNING)
|
|
31
|
+
self.logger.debug(f"{self.__class__.__name__} initialized with parameters: %s", self.__dict__)
|
|
32
|
+
|
|
33
|
+
# Convert inputs to numpy arrays and sort data
|
|
34
|
+
self.data = np.asarray(data)
|
|
35
|
+
if data_lb is None:
|
|
36
|
+
self.data_lb = np.min(self.data)
|
|
37
|
+
else:
|
|
38
|
+
self.data_lb = data_lb
|
|
39
|
+
if data_ub is None:
|
|
40
|
+
self.data_ub = np.max(self.data)
|
|
41
|
+
else:
|
|
42
|
+
self.data_ub = data_ub
|
|
43
|
+
if self.data_lb >= self.data_ub:
|
|
44
|
+
self.logger.info("data_lb must be less than data_ub")
|
|
45
|
+
if self.data.size == 0:
|
|
46
|
+
self.logger.error("data must contain at least one element")
|
|
47
|
+
raise ValueError("data must contain at least one element")
|
|
48
|
+
if not np.issubdtype(self.data.dtype, np.number):
|
|
49
|
+
self.logger.error("data must be numeric")
|
|
50
|
+
raise ValueError("data must be numeric")
|
|
51
|
+
|
|
52
|
+
# Sort data and corresponding weights
|
|
53
|
+
sort_idx = np.argsort(self.data)
|
|
54
|
+
self.data = self.data[sort_idx]
|
|
55
|
+
|
|
56
|
+
if weights is None:
|
|
57
|
+
# Equal weights if none provided
|
|
58
|
+
self.weights = np.ones_like(self.data)
|
|
59
|
+
else:
|
|
60
|
+
weights = np.asarray(weights)
|
|
61
|
+
self.weights = weights[sort_idx]
|
|
62
|
+
|
|
63
|
+
# Normalize weights
|
|
64
|
+
self.normalized_weights = self.weights / np.sum(self.weights)
|
|
65
|
+
|
|
66
|
+
# Calculate WEDF values
|
|
67
|
+
self._calculate_wedf()
|
|
68
|
+
|
|
69
|
+
def _calculate_wedf(self):
|
|
70
|
+
"""Calculate the WEDF values at each data point."""
|
|
71
|
+
n = len(self.data)
|
|
72
|
+
self.wedf_values = np.zeros(n)
|
|
73
|
+
|
|
74
|
+
# First value
|
|
75
|
+
self.wedf_values[0] = self.normalized_weights[0] / 2
|
|
76
|
+
|
|
77
|
+
# Remaining values using recursive relation
|
|
78
|
+
for k in range(1, n):
|
|
79
|
+
self.wedf_values[k] = (self.wedf_values[k-1] +
|
|
80
|
+
(self.normalized_weights[k-1] + self.normalized_weights[k]) / 2)
|
|
81
|
+
|
|
82
|
+
def fit(self, z):
|
|
83
|
+
"""
|
|
84
|
+
Fit the WEDF at given points.
|
|
85
|
+
|
|
86
|
+
Parameters
|
|
87
|
+
----------
|
|
88
|
+
z : float or array-like
|
|
89
|
+
Points at which to fit the WEDF
|
|
90
|
+
|
|
91
|
+
Returns
|
|
92
|
+
-------
|
|
93
|
+
float or ndarray
|
|
94
|
+
WEDF values at the given points
|
|
95
|
+
"""
|
|
96
|
+
self.logger.info("Fitting WEDF at given points.")
|
|
97
|
+
z = np.asarray(z)
|
|
98
|
+
single_value = z.ndim == 0
|
|
99
|
+
|
|
100
|
+
if single_value:
|
|
101
|
+
z = np.array([z])
|
|
102
|
+
|
|
103
|
+
result = np.zeros_like(z, dtype=float)
|
|
104
|
+
|
|
105
|
+
for i, point in enumerate(z):
|
|
106
|
+
if point <= self.data[0]:
|
|
107
|
+
result[i] = 0.0
|
|
108
|
+
elif point >= self.data[-1]:
|
|
109
|
+
result[i] = 1.0
|
|
110
|
+
else:
|
|
111
|
+
# Find the index of the largest data point less than z
|
|
112
|
+
idx = np.searchsorted(self.data, point) - 1
|
|
113
|
+
result[i] = self.wedf_values[idx]
|
|
114
|
+
|
|
115
|
+
self.logger.info("WEDF fitting completed.")
|
|
116
|
+
return result[0] if single_value else result
|
|
117
|
+
|
|
118
|
+
def plot(self, ax=None):
|
|
119
|
+
"""
|
|
120
|
+
Plot the WEDF.
|
|
121
|
+
|
|
122
|
+
Parameters
|
|
123
|
+
----------
|
|
124
|
+
ax : matplotlib.axes.Axes, optional
|
|
125
|
+
Axes to plot on. If None, a new figure and axes are created.
|
|
126
|
+
|
|
127
|
+
Returns
|
|
128
|
+
-------
|
|
129
|
+
matplotlib.axes.Axes
|
|
130
|
+
The axes containing the plot
|
|
131
|
+
"""
|
|
132
|
+
try:
|
|
133
|
+
import matplotlib.pyplot as plt
|
|
134
|
+
if ax is None:
|
|
135
|
+
fig, ax = plt.subplots()
|
|
136
|
+
|
|
137
|
+
# Create a step function representation
|
|
138
|
+
x = np.repeat(self.data, 2)[1:]
|
|
139
|
+
y = np.repeat(self.wedf_values, 2)[:-1]
|
|
140
|
+
|
|
141
|
+
# Add endpoints for proper step function
|
|
142
|
+
x = np.concatenate([[self.data[0]], x, [self.data[-1]]])
|
|
143
|
+
y = np.concatenate([[0], y, [1]])
|
|
144
|
+
|
|
145
|
+
ax.plot(x, y, 'b-', label='WEDF')
|
|
146
|
+
ax.set_xlabel('Data Value')
|
|
147
|
+
ax.set_ylabel('Cumulative Probability')
|
|
148
|
+
ax.set_title('Weighted Empirical Distribution Function')
|
|
149
|
+
ax.grid(True)
|
|
150
|
+
return ax
|
|
151
|
+
|
|
152
|
+
except ImportError:
|
|
153
|
+
self.logger.warning("Matplotlib is required for plotting.")
|
|
154
|
+
return None
|
|
155
|
+
|
|
156
|
+
def generate_ks_points(self, num_points=None):
|
|
157
|
+
"""
|
|
158
|
+
Generate Kolmogorov-Smirnov points for distribution fitting.
|
|
159
|
+
|
|
160
|
+
Parameters
|
|
161
|
+
----------
|
|
162
|
+
num_points : int, optional
|
|
163
|
+
Number of K-S points to generate. If None, uses the length of the data.
|
|
164
|
+
Returns
|
|
165
|
+
-------
|
|
166
|
+
Z0 : ndarray
|
|
167
|
+
Generated K-S points
|
|
168
|
+
ks_probs : ndarray
|
|
169
|
+
Corresponding probabilities for the K-S points
|
|
170
|
+
"""
|
|
171
|
+
# Use data length if not specified
|
|
172
|
+
L = num_points if num_points is not None else len(self.data)
|
|
173
|
+
|
|
174
|
+
# Generate K-S probabilities
|
|
175
|
+
ks_probs = np.arange(1, 2*L, 2) / (2*L)
|
|
176
|
+
|
|
177
|
+
# Generate corresponding points
|
|
178
|
+
data_range = self.data_ub - self.data_lb
|
|
179
|
+
Z0 = self.data_lb + data_range * ks_probs
|
|
180
|
+
|
|
181
|
+
return Z0, ks_probs
|