machinegnostics 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- __init__.py +0 -0
- machinegnostics/__init__.py +24 -0
- machinegnostics/magcal/__init__.py +37 -0
- machinegnostics/magcal/characteristics.py +460 -0
- machinegnostics/magcal/criteria_eval.py +268 -0
- machinegnostics/magcal/criterion.py +140 -0
- machinegnostics/magcal/data_conversion.py +381 -0
- machinegnostics/magcal/gcor.py +64 -0
- machinegnostics/magcal/gdf/__init__.py +2 -0
- machinegnostics/magcal/gdf/base_df.py +39 -0
- machinegnostics/magcal/gdf/base_distfunc.py +1202 -0
- machinegnostics/magcal/gdf/base_egdf.py +823 -0
- machinegnostics/magcal/gdf/base_eldf.py +830 -0
- machinegnostics/magcal/gdf/base_qgdf.py +1234 -0
- machinegnostics/magcal/gdf/base_qldf.py +1019 -0
- machinegnostics/magcal/gdf/cluster_analysis.py +456 -0
- machinegnostics/magcal/gdf/data_cluster.py +975 -0
- machinegnostics/magcal/gdf/data_intervals.py +853 -0
- machinegnostics/magcal/gdf/data_membership.py +536 -0
- machinegnostics/magcal/gdf/der_egdf.py +243 -0
- machinegnostics/magcal/gdf/distfunc_engine.py +841 -0
- machinegnostics/magcal/gdf/egdf.py +324 -0
- machinegnostics/magcal/gdf/eldf.py +297 -0
- machinegnostics/magcal/gdf/eldf_intv.py +609 -0
- machinegnostics/magcal/gdf/eldf_ma.py +627 -0
- machinegnostics/magcal/gdf/homogeneity.py +1218 -0
- machinegnostics/magcal/gdf/intv_engine.py +1523 -0
- machinegnostics/magcal/gdf/marginal_intv_analysis.py +558 -0
- machinegnostics/magcal/gdf/qgdf.py +289 -0
- machinegnostics/magcal/gdf/qldf.py +296 -0
- machinegnostics/magcal/gdf/scedasticity.py +197 -0
- machinegnostics/magcal/gdf/wedf.py +181 -0
- machinegnostics/magcal/gdf/z0_estimator.py +1047 -0
- machinegnostics/magcal/layer_base.py +42 -0
- machinegnostics/magcal/layer_history_base.py +74 -0
- machinegnostics/magcal/layer_io_process_base.py +238 -0
- machinegnostics/magcal/layer_param_base.py +448 -0
- machinegnostics/magcal/mg_weights.py +36 -0
- machinegnostics/magcal/sample_characteristics.py +532 -0
- machinegnostics/magcal/scale_optimization.py +185 -0
- machinegnostics/magcal/scale_param.py +313 -0
- machinegnostics/magcal/util/__init__.py +0 -0
- machinegnostics/magcal/util/dis_docstring.py +18 -0
- machinegnostics/magcal/util/logging.py +24 -0
- machinegnostics/magcal/util/min_max_float.py +34 -0
- machinegnostics/magnet/__init__.py +0 -0
- machinegnostics/metrics/__init__.py +28 -0
- machinegnostics/metrics/accu.py +61 -0
- machinegnostics/metrics/accuracy.py +67 -0
- machinegnostics/metrics/auto_correlation.py +183 -0
- machinegnostics/metrics/auto_covariance.py +204 -0
- machinegnostics/metrics/cls_report.py +130 -0
- machinegnostics/metrics/conf_matrix.py +93 -0
- machinegnostics/metrics/correlation.py +178 -0
- machinegnostics/metrics/cross_variance.py +167 -0
- machinegnostics/metrics/divi.py +82 -0
- machinegnostics/metrics/evalmet.py +109 -0
- machinegnostics/metrics/f1_score.py +128 -0
- machinegnostics/metrics/gmmfe.py +108 -0
- machinegnostics/metrics/hc.py +141 -0
- machinegnostics/metrics/mae.py +72 -0
- machinegnostics/metrics/mean.py +117 -0
- machinegnostics/metrics/median.py +122 -0
- machinegnostics/metrics/mg_r2.py +167 -0
- machinegnostics/metrics/mse.py +78 -0
- machinegnostics/metrics/precision.py +119 -0
- machinegnostics/metrics/r2.py +122 -0
- machinegnostics/metrics/recall.py +108 -0
- machinegnostics/metrics/rmse.py +77 -0
- machinegnostics/metrics/robr2.py +119 -0
- machinegnostics/metrics/std.py +144 -0
- machinegnostics/metrics/variance.py +101 -0
- machinegnostics/models/__init__.py +2 -0
- machinegnostics/models/classification/__init__.py +1 -0
- machinegnostics/models/classification/layer_history_log_reg.py +121 -0
- machinegnostics/models/classification/layer_io_process_log_reg.py +98 -0
- machinegnostics/models/classification/layer_mlflow_log_reg.py +107 -0
- machinegnostics/models/classification/layer_param_log_reg.py +275 -0
- machinegnostics/models/classification/mg_log_reg.py +273 -0
- machinegnostics/models/cross_validation.py +118 -0
- machinegnostics/models/data_split.py +106 -0
- machinegnostics/models/regression/__init__.py +2 -0
- machinegnostics/models/regression/layer_histroy_rob_reg.py +139 -0
- machinegnostics/models/regression/layer_io_process_rob_rig.py +88 -0
- machinegnostics/models/regression/layer_mlflow_rob_reg.py +134 -0
- machinegnostics/models/regression/layer_param_rob_reg.py +212 -0
- machinegnostics/models/regression/mg_lin_reg.py +253 -0
- machinegnostics/models/regression/mg_poly_reg.py +258 -0
- machinegnostics-0.0.1.dist-info/METADATA +246 -0
- machinegnostics-0.0.1.dist-info/RECORD +93 -0
- machinegnostics-0.0.1.dist-info/WHEEL +5 -0
- machinegnostics-0.0.1.dist-info/licenses/LICENSE +674 -0
- machinegnostics-0.0.1.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,532 @@
|
|
|
1
|
+
'''
|
|
2
|
+
ManGo - Machine Gnostics Library
|
|
3
|
+
Copyright (C) 2025 ManGo Team
|
|
4
|
+
|
|
5
|
+
Author: Nirmal Parmar
|
|
6
|
+
|
|
7
|
+
This class is deprecated. Use GnosticsCharacteristics class instead.
|
|
8
|
+
'''
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
from scipy.optimize import root_scalar
|
|
12
|
+
from machinegnostics.magcal.characteristics import GnosticsCharacteristics
|
|
13
|
+
from machinegnostics.magcal.scale_param import ScaleParam
|
|
14
|
+
import warnings
|
|
15
|
+
|
|
16
|
+
class GnosticCharacteristicsSample:
|
|
17
|
+
'''
|
|
18
|
+
For internal use only
|
|
19
|
+
|
|
20
|
+
Estimates location parameter Z0 (gnostic median), tolerance interval, and interval of typical data
|
|
21
|
+
'''
|
|
22
|
+
|
|
23
|
+
def __init__(self,
|
|
24
|
+
data: np.ndarray,
|
|
25
|
+
tol=1e-8):
|
|
26
|
+
self.data = data
|
|
27
|
+
self.tol = tol
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _gnostic_median(self, case='i', z_range=None):
|
|
31
|
+
"""
|
|
32
|
+
Calculate the Gnostic Median of a data sample.
|
|
33
|
+
|
|
34
|
+
The G-median is defined as the value Z_med for which the sum of irrelevances equals zero.
|
|
35
|
+
Implements both quantifying and estimating cases based on equations 14.23 and 14.24.
|
|
36
|
+
|
|
37
|
+
Parameters
|
|
38
|
+
----------
|
|
39
|
+
data : array-like
|
|
40
|
+
Input data sample
|
|
41
|
+
case : str, default='quantifying'
|
|
42
|
+
The type of G-median to calculate:
|
|
43
|
+
- 'quantifying': Uses equation 14.23
|
|
44
|
+
- 'estimating': Uses equation 14.24
|
|
45
|
+
z_range : tuple, optional
|
|
46
|
+
Initial search range for Z_med (min, max). If None, will be determined from data
|
|
47
|
+
tol : float, default=1e-8
|
|
48
|
+
Tolerance for convergence
|
|
49
|
+
|
|
50
|
+
Returns
|
|
51
|
+
-------
|
|
52
|
+
float
|
|
53
|
+
The calculated G-median value
|
|
54
|
+
|
|
55
|
+
References
|
|
56
|
+
----------
|
|
57
|
+
.. [1] Kovanic P., Humber M.B (2015) The Economics of Information - Mathematical
|
|
58
|
+
Gnostics for Data Analysis. http://www.math-gnostics.eu/books/
|
|
59
|
+
"""
|
|
60
|
+
# If all data is identical, return the common value immediately
|
|
61
|
+
if np.all(self.data == self.data[0]):
|
|
62
|
+
class Result:
|
|
63
|
+
def __init__(self, root):
|
|
64
|
+
self.root = root
|
|
65
|
+
self.converged = True
|
|
66
|
+
return Result(self.data[0])
|
|
67
|
+
|
|
68
|
+
if z_range is None:
|
|
69
|
+
z_range = (np.min(self.data), np.max(self.data))
|
|
70
|
+
|
|
71
|
+
z_min, z_max = np.min(self.data), np.max(self.data)
|
|
72
|
+
|
|
73
|
+
def _hc_sum(z_med):
|
|
74
|
+
# define GC
|
|
75
|
+
gc = GnosticsCharacteristics(self.data/z_med)
|
|
76
|
+
q, q1 = gc._get_q_q1()
|
|
77
|
+
|
|
78
|
+
if case == 'i':
|
|
79
|
+
fi = gc._fi()
|
|
80
|
+
scale = ScaleParam()
|
|
81
|
+
s = scale._gscale_loc(np.mean(fi))
|
|
82
|
+
s = np.where(s > self.tol, s, 1) #NOTE can be improved after
|
|
83
|
+
q, q1 = gc._get_q_q1(S=s)
|
|
84
|
+
hi = gc._hi(q, q1)
|
|
85
|
+
return np.sum(hi)
|
|
86
|
+
elif case == 'j':
|
|
87
|
+
fj = gc._fj()
|
|
88
|
+
scale = ScaleParam()
|
|
89
|
+
s = scale._gscale_loc(np.mean(fj))
|
|
90
|
+
s = np.where(s > self.tol, s, 1) #NOTE can be improved after
|
|
91
|
+
q, q1 = gc._get_q_q1(S=s)
|
|
92
|
+
hj = gc._hi(q, q1)
|
|
93
|
+
return np.sum(hj)
|
|
94
|
+
|
|
95
|
+
# Find root of irrelevance sum to get G-median
|
|
96
|
+
# result = root_scalar(_hc_sum,
|
|
97
|
+
# bracket=z_range,
|
|
98
|
+
# method='brentq',
|
|
99
|
+
# rtol=self.tol)
|
|
100
|
+
|
|
101
|
+
# Try up to 50% expansion (1% increments) if not converged
|
|
102
|
+
expansion_steps = 5
|
|
103
|
+
expansion_factor = 0.01
|
|
104
|
+
for step in range(expansion_steps + 1):
|
|
105
|
+
try:
|
|
106
|
+
result = root_scalar(_hc_sum, bracket=(z_min, z_max), method='brentq', rtol=self.tol)
|
|
107
|
+
if result.converged:
|
|
108
|
+
return result
|
|
109
|
+
except Exception:
|
|
110
|
+
pass # Try expanding the bracket
|
|
111
|
+
|
|
112
|
+
# Expand z_min and z_max by 1% each side
|
|
113
|
+
range_width = z_max - z_min
|
|
114
|
+
z_min_exp = z_min - expansion_factor * (step + 1) * range_width
|
|
115
|
+
z_max_exp = z_max + expansion_factor * (step + 1) * range_width
|
|
116
|
+
# Avoid negative or zero z_med if data is strictly positive
|
|
117
|
+
if np.all(self.data > 0):
|
|
118
|
+
z_min = max(z_min_exp, self.tol)
|
|
119
|
+
z_max = max(z_max_exp, z_min + self.tol)
|
|
120
|
+
else:
|
|
121
|
+
z_min = z_min_exp
|
|
122
|
+
z_max = z_max_exp
|
|
123
|
+
|
|
124
|
+
raise RuntimeError("G-median calculation did not converge after expanding the bracket by up to 50%.")
|
|
125
|
+
|
|
126
|
+
def _calculate_modulus(self, case='i'):
|
|
127
|
+
"""
|
|
128
|
+
Calculate the modulus of the data sample using equation 14.8: M_Z,c = sqrt(F_c^2 - c^2*H_c^2)
|
|
129
|
+
|
|
130
|
+
Parameters
|
|
131
|
+
----------
|
|
132
|
+
case : str, default='i'
|
|
133
|
+
The type of modulus to calculate:
|
|
134
|
+
- 'i': Uses irrelevance Hi (estimation case)
|
|
135
|
+
- 'j': Uses irrelevance Hj (quantification case)
|
|
136
|
+
|
|
137
|
+
Returns
|
|
138
|
+
-------
|
|
139
|
+
float
|
|
140
|
+
The calculated modulus value M_Z,c
|
|
141
|
+
|
|
142
|
+
Notes
|
|
143
|
+
-----
|
|
144
|
+
This implementation follows Theorem 15 from the reference, which states that
|
|
145
|
+
the modulus of a data sample can be calculated using the relation:
|
|
146
|
+
M_Z,c = sqrt(F_c^2 - c^2*H_c^2)
|
|
147
|
+
|
|
148
|
+
where:
|
|
149
|
+
- F_c is the relevance function
|
|
150
|
+
- H_c is the irrelevance function
|
|
151
|
+
- c is the case parameter ('i' or 'j')
|
|
152
|
+
|
|
153
|
+
References
|
|
154
|
+
----------
|
|
155
|
+
Equation 14.8 in Mathematical Gnostics
|
|
156
|
+
"""
|
|
157
|
+
# Validate case parameter
|
|
158
|
+
if case not in ['i', 'j']:
|
|
159
|
+
raise ValueError("case must be either 'i' or 'j'")
|
|
160
|
+
|
|
161
|
+
z_min, z_max = np.min(self.data), np.max(self.data)
|
|
162
|
+
if z_min == z_max:
|
|
163
|
+
return 1
|
|
164
|
+
|
|
165
|
+
# gmedian
|
|
166
|
+
z0_result = self._gnostic_median(case=case)
|
|
167
|
+
z0 = z0_result.root
|
|
168
|
+
# Get the gnostic characteristics
|
|
169
|
+
gc = GnosticsCharacteristics(self.data/z0)
|
|
170
|
+
q, q1 = gc._get_q_q1()
|
|
171
|
+
|
|
172
|
+
# Calculate relevance (F) and irrelevance (H) based on case
|
|
173
|
+
if case == 'i':
|
|
174
|
+
# Estimation case
|
|
175
|
+
fi = gc._fi()
|
|
176
|
+
scale = ScaleParam()
|
|
177
|
+
s = scale._gscale_loc(np.mean(fi))
|
|
178
|
+
s = np.where(s > self.tol, s, 1)
|
|
179
|
+
q, q1 = gc._get_q_q1(S=s)
|
|
180
|
+
F = np.mean(gc._fi(q, q1))
|
|
181
|
+
H = np.mean(gc._hi(q, q1))
|
|
182
|
+
c = -1 # For case 'i'
|
|
183
|
+
elif case == 'j':
|
|
184
|
+
# Quantification case
|
|
185
|
+
fj = gc._fj()
|
|
186
|
+
scale = ScaleParam()
|
|
187
|
+
s = scale._gscale_loc(np.mean(fj))
|
|
188
|
+
s = np.where(s > self.tol, s, 1)
|
|
189
|
+
q, q1 = gc._get_q_q1(S=s)
|
|
190
|
+
F = np.mean(gc._fj(q, q1))
|
|
191
|
+
H = np.mean(gc._hj(q, q1))
|
|
192
|
+
c = 1 # For case 'j'
|
|
193
|
+
else:
|
|
194
|
+
ValueError("case must be either 'i' or 'j'")
|
|
195
|
+
|
|
196
|
+
# Calculate modulus using equation 14.8
|
|
197
|
+
M_Z = np.sqrt(np.abs(F**2 - (c**2 * H**2)))
|
|
198
|
+
return M_Z
|
|
199
|
+
|
|
200
|
+
def _calculate_detailed_modulus(self, Z0, S=None, case='i'): # NOTE not in current use
|
|
201
|
+
"""
|
|
202
|
+
Calculate the detailed modulus of the data sample using equation 14.12:
|
|
203
|
+
M_Z,c = sqrt(1 + (c^2/N^2) * sum((f_k*f_l)^(1-c)/2 * ((Z_k/Z_l)^(1/S) - (Z_l/Z_k)^(1/S)))
|
|
204
|
+
|
|
205
|
+
Parameters
|
|
206
|
+
----------
|
|
207
|
+
Z0 : float
|
|
208
|
+
Location parameter (usually the G-median)
|
|
209
|
+
S : float, optional
|
|
210
|
+
Scale parameter. If None, will be calculated from data
|
|
211
|
+
case : str, default='i'
|
|
212
|
+
The type of modulus to calculate:
|
|
213
|
+
- 'i': Uses irrelevance Hi (estimation case)
|
|
214
|
+
- 'j': Uses irrelevance Hj (quantification case)
|
|
215
|
+
|
|
216
|
+
Returns
|
|
217
|
+
-------
|
|
218
|
+
float
|
|
219
|
+
The calculated detailed modulus value M_Z,c
|
|
220
|
+
|
|
221
|
+
Notes
|
|
222
|
+
-----
|
|
223
|
+
This implementation follows equation 14.12 which provides a more detailed
|
|
224
|
+
calculation of the modulus when all data in the sample Z have Z_0,k = Z_0
|
|
225
|
+
and S_k = S conditions.
|
|
226
|
+
"""
|
|
227
|
+
# Input validation
|
|
228
|
+
if case not in ['i', 'j']:
|
|
229
|
+
raise ValueError("case must be either 'i' or 'j'")
|
|
230
|
+
|
|
231
|
+
# Get gnostic characteristics
|
|
232
|
+
gc = GnosticsCharacteristics(self.data)
|
|
233
|
+
|
|
234
|
+
# Get scale parameter if not provided
|
|
235
|
+
if S is None:
|
|
236
|
+
if case == 'i':
|
|
237
|
+
fi = gc._fi()
|
|
238
|
+
scale = ScaleParam()
|
|
239
|
+
S = scale._gscale_loc(np.mean(fi))
|
|
240
|
+
else:
|
|
241
|
+
fj = gc._fj()
|
|
242
|
+
scale = ScaleParam()
|
|
243
|
+
S = scale._gscale_loc(np.mean(fj))
|
|
244
|
+
|
|
245
|
+
# Ensure S is positive and above tolerance
|
|
246
|
+
S = max(S, self.tol)
|
|
247
|
+
|
|
248
|
+
# Get number of samples
|
|
249
|
+
N = len(self.data)
|
|
250
|
+
|
|
251
|
+
# Set c based on case
|
|
252
|
+
c = -1 if case == 'i' else 1
|
|
253
|
+
|
|
254
|
+
# Calculate f_k values based on case
|
|
255
|
+
if case == 'i':
|
|
256
|
+
f_values = gc._fi()
|
|
257
|
+
else:
|
|
258
|
+
f_values = gc._fj()
|
|
259
|
+
|
|
260
|
+
# Initialize sum
|
|
261
|
+
sum_term = 0.0
|
|
262
|
+
|
|
263
|
+
# Calculate double sum term
|
|
264
|
+
for k in range(N):
|
|
265
|
+
for l in range(N):
|
|
266
|
+
# Calculate f_k * f_l term
|
|
267
|
+
f_product = f_values[k] * f_values[l]
|
|
268
|
+
|
|
269
|
+
# Calculate power term (f_k*f_l)^((1-c)/2)
|
|
270
|
+
f_power = np.power(f_product, (1-c)/2)
|
|
271
|
+
|
|
272
|
+
# Calculate Z_k/Z_l and Z_l/Z_k terms
|
|
273
|
+
Z_ratio_k_l = self.data[k] / self.data[l]
|
|
274
|
+
Z_ratio_l_k = 1 / Z_ratio_k_l
|
|
275
|
+
|
|
276
|
+
# Calculate the difference term
|
|
277
|
+
diff_term = (np.power(Z_ratio_k_l, 1/S) -
|
|
278
|
+
np.power(Z_ratio_l_k, 1/S))
|
|
279
|
+
|
|
280
|
+
# Add to sum
|
|
281
|
+
sum_term += f_power * diff_term
|
|
282
|
+
|
|
283
|
+
# Calculate final modulus using equation 14.12
|
|
284
|
+
try:
|
|
285
|
+
M_Z = np.sqrt(1 + (c**2 / N**2) * sum_term)
|
|
286
|
+
|
|
287
|
+
# Handle potential numerical issues
|
|
288
|
+
if np.isnan(M_Z) or np.isinf(M_Z):
|
|
289
|
+
warnings.warn("Invalid modulus value encountered. Returning 0.0")
|
|
290
|
+
return 0.0
|
|
291
|
+
|
|
292
|
+
return float(M_Z)
|
|
293
|
+
except ValueError as e:
|
|
294
|
+
warnings.warn(f"Error in modulus calculation: {str(e)}. Returning 0.0")
|
|
295
|
+
return 0.0
|
|
296
|
+
|
|
297
|
+
def _gnostic_variance(self, data:np.ndarray, case:str = 'i'):
|
|
298
|
+
"""
|
|
299
|
+
To calculate gnostic variance of the given sample data.
|
|
300
|
+
|
|
301
|
+
For internal use only
|
|
302
|
+
|
|
303
|
+
"""
|
|
304
|
+
data = self.data
|
|
305
|
+
# Validate case parameter
|
|
306
|
+
if case not in ['i', 'j']:
|
|
307
|
+
raise ValueError("case must be either 'i' or 'j'")
|
|
308
|
+
|
|
309
|
+
z_min, z_max = np.min(data), np.max(data)
|
|
310
|
+
if z_min == z_max:
|
|
311
|
+
return 0
|
|
312
|
+
|
|
313
|
+
# gmedian
|
|
314
|
+
z0_result = self._gnostic_median(case=case)
|
|
315
|
+
z0 = z0_result.root
|
|
316
|
+
# Get the gnostic characteristics
|
|
317
|
+
gc = GnosticsCharacteristics(data/z0)
|
|
318
|
+
q, q1 = gc._get_q_q1()
|
|
319
|
+
|
|
320
|
+
# Calculate relevance (F) and irrelevance (H) based on case
|
|
321
|
+
if case == 'i':
|
|
322
|
+
# Estimation case
|
|
323
|
+
fi = gc._fi()
|
|
324
|
+
scale = ScaleParam()
|
|
325
|
+
s = scale._gscale_loc(np.mean(fi))
|
|
326
|
+
s = np.where(s > self.tol, s, 1)
|
|
327
|
+
q, q1 = gc._get_q_q1(S=s)
|
|
328
|
+
H = np.mean(gc._hi(q, q1))
|
|
329
|
+
elif case == 'j':
|
|
330
|
+
# Quantification case
|
|
331
|
+
fj = gc._fj()
|
|
332
|
+
scale = ScaleParam()
|
|
333
|
+
s = scale._gscale_loc(np.mean(fj))
|
|
334
|
+
s = np.where(s > self.tol, s, 1)
|
|
335
|
+
q, q1 = gc._get_q_q1(S=s)
|
|
336
|
+
H = np.mean(gc._hj(q, q1))
|
|
337
|
+
else:
|
|
338
|
+
ValueError("case must be either 'i' or 'j'")
|
|
339
|
+
|
|
340
|
+
return H
|
|
341
|
+
|
|
342
|
+
def _gnostic_autocovariance(self, K: int, case: str = 'i') -> float:
|
|
343
|
+
"""
|
|
344
|
+
Calculate the gnostic autocovariance according to equation 14.19.
|
|
345
|
+
|
|
346
|
+
Autocovariance measures the correlation between data points separated by K positions
|
|
347
|
+
within the same data sample.
|
|
348
|
+
|
|
349
|
+
Parameters
|
|
350
|
+
----------
|
|
351
|
+
K : int
|
|
352
|
+
Lag parameter, must be between 1 and N-1
|
|
353
|
+
case : str, default='i'
|
|
354
|
+
The type of covariance to calculate:
|
|
355
|
+
- 'i': Estimation case using Hi irrelevance
|
|
356
|
+
- 'j': Quantification case using Hj irrelevance
|
|
357
|
+
|
|
358
|
+
Returns
|
|
359
|
+
-------
|
|
360
|
+
float
|
|
361
|
+
The calculated autocovariance value
|
|
362
|
+
|
|
363
|
+
Notes
|
|
364
|
+
-----
|
|
365
|
+
Implementation of equation 14.19:
|
|
366
|
+
acov_c := 1/(N-K) * sum(h_c(2*Omega_i) * h_c(2*Omega_(i+k)))
|
|
367
|
+
where:
|
|
368
|
+
- N is the sample size
|
|
369
|
+
- K is the lag parameter
|
|
370
|
+
- h_c is the irrelevance function (Hi or Hj)
|
|
371
|
+
- Omega_i are the data angles
|
|
372
|
+
|
|
373
|
+
References
|
|
374
|
+
----------
|
|
375
|
+
Equation 14.19 in Mathematical Gnostics
|
|
376
|
+
"""
|
|
377
|
+
# Validate inputs
|
|
378
|
+
N = len(self.data)
|
|
379
|
+
if not 1 <= K <= N-1:
|
|
380
|
+
raise ValueError(f"K must be between 1 and {N-1}")
|
|
381
|
+
|
|
382
|
+
# Get G-median for angle calculations
|
|
383
|
+
z0_result = self._gnostic_median(case=case)
|
|
384
|
+
z0 = z0_result.root
|
|
385
|
+
|
|
386
|
+
# Calculate characteristics
|
|
387
|
+
gc = GnosticsCharacteristics(self.data/z0)
|
|
388
|
+
q, q1 = gc._get_q_q1()
|
|
389
|
+
|
|
390
|
+
# Get irrelevance values based on case
|
|
391
|
+
if case == 'i':
|
|
392
|
+
h_values = gc._hi(q, q1)
|
|
393
|
+
elif case == 'j':
|
|
394
|
+
h_values = gc._hj(q, q1)
|
|
395
|
+
else:
|
|
396
|
+
raise ValueError("case must be either 'i' or 'j'")
|
|
397
|
+
|
|
398
|
+
# Calculate autocovariance using equation 14.19
|
|
399
|
+
acov = 0.0
|
|
400
|
+
for i in range(N-K):
|
|
401
|
+
acov += h_values[i] * h_values[i+K]
|
|
402
|
+
|
|
403
|
+
return acov / (N-K)
|
|
404
|
+
|
|
405
|
+
def _gnostic_crosscovariance(self, other_data: np.ndarray, case: str = 'i') -> float:
|
|
406
|
+
"""
|
|
407
|
+
Calculate the gnostic crosscovariance according to equation 14.20.
|
|
408
|
+
|
|
409
|
+
Crosscovariance measures the correlation between two different data samples
|
|
410
|
+
of the same size.
|
|
411
|
+
|
|
412
|
+
Parameters
|
|
413
|
+
----------
|
|
414
|
+
other_data : np.ndarray
|
|
415
|
+
Second data sample to compare with self.data
|
|
416
|
+
case : str, default='i'
|
|
417
|
+
The type of covariance to calculate:
|
|
418
|
+
- 'i': Estimation case using Hi irrelevance
|
|
419
|
+
- 'j': Quantification case using Hj irrelevance
|
|
420
|
+
|
|
421
|
+
Returns
|
|
422
|
+
-------
|
|
423
|
+
float
|
|
424
|
+
The calculated crosscovariance value
|
|
425
|
+
|
|
426
|
+
Notes
|
|
427
|
+
-----
|
|
428
|
+
Implementation of equation 14.20:
|
|
429
|
+
ccov_c := 1/N * sum(h_c(2*Omega_n,A) * h_c(2*Omega_n,B))
|
|
430
|
+
where:
|
|
431
|
+
- N is the sample size
|
|
432
|
+
- h_c is the irrelevance function (Hi or Hj)
|
|
433
|
+
- Omega_n,A and Omega_n,B are angles from samples A and B
|
|
434
|
+
|
|
435
|
+
References
|
|
436
|
+
----------
|
|
437
|
+
Equation 14.20 in Mathematical Gnostics
|
|
438
|
+
"""
|
|
439
|
+
other_data = np.asarray(other_data)
|
|
440
|
+
if len(self.data) != len(other_data):
|
|
441
|
+
raise ValueError("Both data samples must have the same length")
|
|
442
|
+
|
|
443
|
+
N = len(self.data)
|
|
444
|
+
|
|
445
|
+
# Calculate G-medians for both samples
|
|
446
|
+
z0_A = self._gnostic_median(case=case).root
|
|
447
|
+
gcs_B = GnosticCharacteristicsSample(other_data)
|
|
448
|
+
z0_B = gcs_B._gnostic_median(case=case).root
|
|
449
|
+
|
|
450
|
+
# Calculate characteristics for both samples
|
|
451
|
+
z_A = self.data / z0_A
|
|
452
|
+
z_B = other_data / z0_B
|
|
453
|
+
gc_A = GnosticsCharacteristics(z_A)
|
|
454
|
+
gc_B = GnosticsCharacteristics(z_B)
|
|
455
|
+
q_A, q1_A = gc_A._get_q_q1()
|
|
456
|
+
q_B, q1_B = gc_B._get_q_q1()
|
|
457
|
+
|
|
458
|
+
# Get irrelevance values based on case (return full arrays, not means)
|
|
459
|
+
if case == 'i':
|
|
460
|
+
fi_A = gc_A._fi(q_A, q1_A)
|
|
461
|
+
scale_A = ScaleParam()
|
|
462
|
+
s_A = scale_A._gscale_loc(np.mean(fi_A))
|
|
463
|
+
s_A = np.where(s_A > self.tol, s_A, 1)
|
|
464
|
+
q_A, q1_A = gc_A._get_q_q1(S=s_A)
|
|
465
|
+
h_values_A = gc_A._hi(q_A, q1_A)
|
|
466
|
+
|
|
467
|
+
fi_B = gc_B._fi(q_B, q1_B)
|
|
468
|
+
scale_B = ScaleParam()
|
|
469
|
+
s_B = scale_B._gscale_loc(np.mean(fi_B))
|
|
470
|
+
s_B = np.where(s_B > self.tol, s_B, 1)
|
|
471
|
+
q_B, q1_B = gc_B._get_q_q1(S=s_B)
|
|
472
|
+
h_values_B = gc_B._hi(q_B, q1_B)
|
|
473
|
+
elif case == 'j':
|
|
474
|
+
fj_A = gc_A._fj(q_A, q1_A)
|
|
475
|
+
scale_A = ScaleParam()
|
|
476
|
+
s_A = scale_A._gscale_loc(np.mean(fj_A))
|
|
477
|
+
s_A = np.where(s_A > self.tol, s_A, 1)
|
|
478
|
+
q_A, q1_A = gc_A._get_q_q1(S=s_A)
|
|
479
|
+
h_values_A = gc_A._hj(q_A, q1_A)
|
|
480
|
+
|
|
481
|
+
fj_B = gc_B._fj(q_B, q1_B)
|
|
482
|
+
scale_B = ScaleParam()
|
|
483
|
+
s_B = scale_B._gscale_loc(np.mean(fj_B))
|
|
484
|
+
s_B = np.where(s_B > self.tol, s_B, 1)
|
|
485
|
+
q_B, q1_B = gc_B._get_q_q1(S=s_B)
|
|
486
|
+
h_values_B = gc_B._hj(q_B, q1_B)
|
|
487
|
+
else:
|
|
488
|
+
raise ValueError("case must be either 'i' or 'j'")
|
|
489
|
+
|
|
490
|
+
# Calculate crosscovariance using equation 14.20
|
|
491
|
+
ccov = np.sum(h_values_A * h_values_B) / N
|
|
492
|
+
|
|
493
|
+
return ccov
|
|
494
|
+
|
|
495
|
+
def _gnostic_correlation(self, other_data:np.ndarray, case:str = 'i') -> float:
|
|
496
|
+
'''
|
|
497
|
+
Calculated gnostic correlation from gnostic variance and cross-covariance
|
|
498
|
+
'''
|
|
499
|
+
data = np.asarray(self.data)
|
|
500
|
+
other_data = np.asarray(other_data)
|
|
501
|
+
|
|
502
|
+
# If other_data is 2D with one column, reduce to 1D
|
|
503
|
+
if other_data.ndim == 2 and other_data.shape[1] == 1:
|
|
504
|
+
other_data = other_data.ravel()
|
|
505
|
+
|
|
506
|
+
# If data is a pandas DataFrame, convert to numpy array
|
|
507
|
+
if hasattr(data, "values"):
|
|
508
|
+
data = data.values
|
|
509
|
+
|
|
510
|
+
# If other_data is a pandas DataFrame/Series, convert to numpy array
|
|
511
|
+
if hasattr(other_data, "values"):
|
|
512
|
+
other_data = other_data.values
|
|
513
|
+
|
|
514
|
+
# If data is 1D, just compute as before
|
|
515
|
+
if data.ndim == 1:
|
|
516
|
+
d_vars_1 = self._gnostic_variance(case=case, data=data)
|
|
517
|
+
d_vars_2 = self._gnostic_variance(case=case, data=other_data)
|
|
518
|
+
n_ccov_12 = self._gnostic_crosscovariance(other_data=other_data, case=case)
|
|
519
|
+
cor = n_ccov_12 / np.sqrt(d_vars_1 * d_vars_2)
|
|
520
|
+
return cor
|
|
521
|
+
|
|
522
|
+
# If data is 2D, compute for each column
|
|
523
|
+
corrs = []
|
|
524
|
+
for i in range(data.shape[1]):
|
|
525
|
+
xi = data[:, i]
|
|
526
|
+
gcs_xi = self.__class__(xi, tol=self.tol)
|
|
527
|
+
d_vars_1 = gcs_xi._gnostic_variance(case=case, data=xi)
|
|
528
|
+
d_vars_2 = gcs_xi._gnostic_variance(case=case, data=other_data)
|
|
529
|
+
n_ccov_12 = gcs_xi._gnostic_crosscovariance(other_data=other_data, case=case)
|
|
530
|
+
cor = n_ccov_12 / np.sqrt(d_vars_1 * d_vars_2)
|
|
531
|
+
corrs.append(cor)
|
|
532
|
+
return np.array(corrs)
|